[Xen-devel] [PATCH 0/6] libxl: add HVM USB passthrough capability

2016-09-08 Thread Juergen Gross
Add the capability to pass USB devices to HVM domains by using the
emulation of USB controllers of qemu.

The user interface via xl is the same as for pvusb passthrough, only
the type of the usbctrl is different: instead of "qusb" (qemu-based
pvusb backend) or "vusb" (kernel-based pvusb backend) the type
"devicemodel" is used.

Especially the communication with qemu via qmp commands is based on
the patches of George Dunlap sent in 2014:

https://lists.xen.org/archives/html/xen-devel/2014-06/msg00085.html

Juergen Gross (6):
  libxl: rename libcl_pvusb.c to libxl_usb.c
  libxl: add libxl__qmp_run_command_flexarray() function
  libxl: dont pass array size to libxl__xs_kvs_of_flexarray()
  libxl: add basic support for devices without backend
  libxl: add HVM usb passthrough support
  docs: add HVM USB passthrough documentation

 docs/man/xl.cfg.pod.5.in   |  12 +-
 tools/libxl/Makefile   |   2 +-
 tools/libxl/libxl.c|  22 +-
 tools/libxl/libxl_device.c |  62 +++--
 tools/libxl/libxl_internal.h   |   5 +-
 tools/libxl/libxl_nic.c|   6 +-
 tools/libxl/libxl_pci.c|   7 +-
 tools/libxl/libxl_qmp.c|  16 ++
 tools/libxl/libxl_types_internal.idl   |   1 +
 tools/libxl/{libxl_pvusb.c => libxl_usb.c} | 423 ++---
 tools/libxl/libxl_vtpm.c   |   6 +-
 tools/libxl/libxl_xshelp.c |   8 +-
 tools/libxl/xl_cmdimpl.c   |   4 +-
 13 files changed, 429 insertions(+), 145 deletions(-)
 rename tools/libxl/{libxl_pvusb.c => libxl_usb.c} (80%)

-- 
2.6.6


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 5/6] libxl: add HVM usb passthrough support

2016-09-08 Thread Juergen Gross
Add HVM usb passthrough support to libxl by using qemu's capability
to emulate standard USB controllers.

A USB controller is added via qmp command to the emulated hardware
when a usbctrl device of type DEVICEMODEL is requested. Depending on
the requested speed the appropriate hardware type is selected. A host
USB device can then be added to the emulated USB controller via qmp
command.

Removing of the devices is done via qmp commands, too.

Signed-off-by: Juergen Gross 
---
 tools/libxl/libxl_device.c |   3 +-
 tools/libxl/libxl_usb.c| 417 +++--
 tools/libxl/xl_cmdimpl.c   |   4 +-
 3 files changed, 331 insertions(+), 93 deletions(-)

diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index 5211f20..c6f15db 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -782,8 +782,7 @@ void libxl__devices_destroy(libxl__egc *egc, 
libxl__devices_remove_state *drs)
 aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
 aodev->dev = dev;
 aodev->force = drs->force;
-if (dev->backend_kind == LIBXL__DEVICE_KIND_VUSB ||
-dev->backend_kind == LIBXL__DEVICE_KIND_QUSB)
+if (dev->kind == LIBXL__DEVICE_KIND_VUSB)
 libxl__initiate_device_usbctrl_remove(egc, aodev);
 else
 libxl__initiate_device_generic_remove(egc, aodev);
diff --git a/tools/libxl/libxl_usb.c b/tools/libxl/libxl_usb.c
index 6b30e0f..40c5a84 100644
--- a/tools/libxl/libxl_usb.c
+++ b/tools/libxl/libxl_usb.c
@@ -17,6 +17,7 @@
 
 #include "libxl_internal.h"
 #include 
+#include 
 
 #define USBBACK_INFO_PATH "/libxl/usbback"
 
@@ -43,12 +44,6 @@ static int libxl__device_usbctrl_setdefault(libxl__gc *gc, 
uint32_t domid,
 int rc;
 libxl_domain_type domtype = libxl__domain_type(gc, domid);
 
-if (!usbctrl->version)
-usbctrl->version = 2;
-
-if (!usbctrl->ports)
-usbctrl->ports = 8;
-
 if (usbctrl->type == LIBXL_USBCTRL_TYPE_AUTO) {
 if (domtype == LIBXL_DOMAIN_TYPE_PV) {
 rc = usbback_is_loaded(gc);
@@ -62,6 +57,67 @@ static int libxl__device_usbctrl_setdefault(libxl__gc *gc, 
uint32_t domid,
 }
 }
 
+switch (usbctrl->type) {
+case LIBXL_USBCTRL_TYPE_PV:
+case LIBXL_USBCTRL_TYPE_QUSB:
+if (!usbctrl->version)
+usbctrl->version = 2;
+if (usbctrl->version < 1 || usbctrl->version > 2) {
+LOG(ERROR, "USB version for paravirtualized devices must be 1 or 
2");
+rc = ERROR_INVAL;
+goto out;
+}
+if (!usbctrl->ports)
+usbctrl->ports = 8;
+if (usbctrl->ports < 1 || usbctrl->ports > USBIF_MAX_PORTNR) {
+LOG(ERROR, "Number of ports for USB controller is limited to %u",
+USBIF_MAX_PORTNR);
+rc = ERROR_INVAL;
+goto out;
+}
+break;
+case LIBXL_USBCTRL_TYPE_DEVICEMODEL:
+if (!usbctrl->version)
+usbctrl->version = 2;
+switch (usbctrl->version) {
+case 1:
+/* uhci controller in qemu has fixed number of ports. */
+if (usbctrl->ports && usbctrl->ports != 2) {
+LOG(ERROR, "Number of ports for USB controller of version 1 is 
always 2");
+rc = ERROR_INVAL;
+goto out;
+}
+usbctrl->ports = 2;
+break;
+case 2:
+/* ehci controller in qemu has fixed number of ports. */
+if (usbctrl->ports && usbctrl->ports != 6) {
+LOG(ERROR, "Number of ports for USB controller of version 2 is 
always 6");
+rc = ERROR_INVAL;
+goto out;
+}
+usbctrl->ports = 6;
+break;
+case 3:
+if (!usbctrl->ports)
+usbctrl->ports = 8;
+/* xhci controller in qemu supports up to 15 ports. */
+if (usbctrl->ports > 15) {
+LOG(ERROR, "Number of ports for USB controller of version 3 is 
limited to 15");
+rc = ERROR_INVAL;
+goto out;
+}
+break;
+default:
+LOG(ERROR, "Illegal USB version");
+rc = ERROR_INVAL;
+goto out;
+}
+break;
+default:
+break;
+}
+
 rc = libxl__resolve_domid(gc, usbctrl->backend_domname,
   &usbctrl->backend_domid);
 
@@ -75,9 +131,19 @@ static int libxl__device_from_usbctrl(libxl__gc *gc, 
uint32_t domid,
 {
 device->backend_devid   = usbctrl->devid;
 device->backend_domid   = usbctrl->backend_domid;
-device->backend_kind= (usbctrl->type == LIBXL_USBCTRL_TYPE_PV)
-  ? LIBXL__DEVICE_KIND_VUSB
-  : LIBXL__DEVICE_KIND_QUSB;
+switch (usbctrl->type) {
+case LIBXL_USBCTRL_TY

[Xen-devel] [PATCH 1/6] libxl: rename libcl_pvusb.c to libxl_usb.c

2016-09-08 Thread Juergen Gross
Rename libcl_pvusb.c to libxl_usb.c in order to reflect future support
of USB passthrough via qemu emulated USB controllers.

Signed-off-by: Juergen Gross 
---
 tools/libxl/Makefile   | 2 +-
 tools/libxl/{libxl_pvusb.c => libxl_usb.c} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename tools/libxl/{libxl_pvusb.c => libxl_usb.c} (100%)

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 6994c58..f32169a 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -108,7 +108,7 @@ LIBXL_OBJS = flexarray.o libxl.o libxl_create.o libxl_dm.o 
libxl_pci.o \
libxl_stream_read.o libxl_stream_write.o \
libxl_save_callout.o _libxl_save_msgs_callout.o \
libxl_qmp.o libxl_event.o libxl_fork.o \
-   libxl_dom_suspend.o libxl_dom_save.o libxl_pvusb.o \
+   libxl_dom_suspend.o libxl_dom_save.o libxl_usb.o \
libxl_vtpm.o libxl_nic.o \
 $(LIBXL_OBJS-y)
 LIBXL_OBJS += libxl_genid.o
diff --git a/tools/libxl/libxl_pvusb.c b/tools/libxl/libxl_usb.c
similarity index 100%
rename from tools/libxl/libxl_pvusb.c
rename to tools/libxl/libxl_usb.c
-- 
2.6.6


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 4/6] libxl: add basic support for devices without backend

2016-09-08 Thread Juergen Gross
With the planned support of HVM USB passthrough via the USB emulation
capabilities of qemu libxl has to support guest devices which have no
back- and frontend. Information about those devices will live in the
libxl part of Xenstore only.

Add some basic support to libxl to be able to cope with this scenario.

Signed-off-by: Juergen Gross 
---
 tools/libxl/libxl_device.c   | 59 
 tools/libxl/libxl_types_internal.idl |  1 +
 tools/libxl/libxl_xshelp.c   |  6 +++-
 3 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index 9c77b62..5211f20 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -114,15 +114,21 @@ int libxl__device_generic_add(libxl__gc *gc, 
xs_transaction_t t,
 libxl__device *device, char **bents, char **fents, char **ro_fents)
 {
 libxl_ctx *ctx = libxl__gc_owner(gc);
-char *frontend_path, *backend_path, *libxl_path;
+char *frontend_path = NULL, *backend_path = NULL, *libxl_path;
 struct xs_permissions frontend_perms[2];
 struct xs_permissions ro_frontend_perms[2];
 struct xs_permissions backend_perms[2];
 int create_transaction = t == XBT_NULL;
+int libxl_only = device->backend_kind == LIBXL__DEVICE_KIND_NONE;
 int rc;
 
-frontend_path = libxl__device_frontend_path(gc, device);
-backend_path = libxl__device_backend_path(gc, device);
+if (libxl_only) {
+/* bents should be set as this is used to setup libxl_path content. */
+assert(!fents && !ro_fents);
+} else {
+frontend_path = libxl__device_frontend_path(gc, device);
+backend_path = libxl__device_backend_path(gc, device);
+}
 libxl_path = libxl__device_libxl_path(gc, device);
 
 frontend_perms[0].id = device->domid;
@@ -144,13 +150,15 @@ retry_transaction:
 rc = libxl__xs_rm_checked(gc, t, libxl_path);
 if (rc) goto out;
 
-rc = libxl__xs_write_checked(gc, t, GCSPRINTF("%s/frontend",libxl_path),
- frontend_path);
-if (rc) goto out;
+if (!libxl_only) {
+rc = libxl__xs_write_checked(gc, t, 
GCSPRINTF("%s/frontend",libxl_path),
+ frontend_path);
+if (rc) goto out;
 
-rc = libxl__xs_write_checked(gc, t, GCSPRINTF("%s/backend",libxl_path),
- backend_path);
-if (rc) goto out;
+rc = libxl__xs_write_checked(gc, t, GCSPRINTF("%s/backend",libxl_path),
+ backend_path);
+if (rc) goto out;
+}
 
 /* xxx much of this function lacks error checks! */
 
@@ -179,12 +187,15 @@ retry_transaction:
 }
 
 if (bents) {
-xs_rm(ctx->xsh, t, backend_path);
-xs_mkdir(ctx->xsh, t, backend_path);
-xs_set_permissions(ctx->xsh, t, backend_path, backend_perms, 
ARRAY_SIZE(backend_perms));
-xs_write(ctx->xsh, t, GCSPRINTF("%s/frontend", backend_path),
- frontend_path, strlen(frontend_path));
-libxl__xs_writev(gc, t, backend_path, bents);
+if (!libxl_only) {
+xs_rm(ctx->xsh, t, backend_path);
+xs_mkdir(ctx->xsh, t, backend_path);
+xs_set_permissions(ctx->xsh, t, backend_path, backend_perms,
+   ARRAY_SIZE(backend_perms));
+xs_write(ctx->xsh, t, GCSPRINTF("%s/frontend", backend_path),
+ frontend_path, strlen(frontend_path));
+libxl__xs_writev(gc, t, backend_path, bents);
+}
 
 /*
  * We make a copy of everything for the backend in the libxl
@@ -194,6 +205,9 @@ retry_transaction:
  * instead.  But there are still places in libxl that try to
  * reconstruct a config from xenstore.
  *
+ * For devices without backend (e.g. USB devices emulated via qemu)
+ * only the libxl path is written.
+ *
  * This duplication will typically produces duplicate keys
  * which will go out of date, but that's OK because nothing
  * reads those.  For example, there is usually
@@ -662,12 +676,18 @@ void libxl__multidev_prepared(libxl__egc *egc,
 
 int libxl__device_destroy(libxl__gc *gc, libxl__device *dev)
 {
-const char *be_path = libxl__device_backend_path(gc, dev);
-const char *fe_path = libxl__device_frontend_path(gc, dev);
+const char *be_path = NULL;
+const char *fe_path = NULL;
 const char *libxl_path = libxl__device_libxl_path(gc, dev);
 xs_transaction_t t = 0;
 int rc;
 uint32_t domid;
+int libxl_only = dev->backend_kind == LIBXL__DEVICE_KIND_NONE;
+
+if (!libxl_only) {
+be_path = libxl__device_backend_path(gc, dev);
+fe_path = libxl__device_frontend_path(gc, dev);
+}
 
 rc = libxl__get_domid(gc, &domid);
 if (rc) goto out;
@@ -681,10 +701,11 @@ int libxl__device_destroy(libxl__gc *gc, libxl__device 
*dev)
   

[Xen-devel] [PATCH 2/6] libxl: add libxl__qmp_run_command_flexarray() function

2016-09-08 Thread Juergen Gross
Add a function libxl__qmp_run_command_flexarray() to run a qmp command
with an array of arguments. The arguments are name-value pairs stored
in a flexarray.

Signed-off-by: Juergen Gross 
---
 tools/libxl/libxl_internal.h |  3 +++
 tools/libxl/libxl_qmp.c  | 16 
 2 files changed, 19 insertions(+)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 3f29aa6..ecbfdad 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1753,6 +1753,9 @@ typedef struct libxl__qmp_handler libxl__qmp_handler;
  */
 _hidden libxl__qmp_handler *libxl__qmp_initialize(libxl__gc *gc,
   uint32_t domid);
+_hidden int libxl__qmp_run_command_flexarray(libxl__gc *gc, int domid,
+ const char *cmd,
+ flexarray_t *array);
 /* ask to QEMU the serial port information and store it in xenstore. */
 _hidden int libxl__qmp_query_serial(libxl__qmp_handler *qmp);
 _hidden int libxl__qmp_pci_add(libxl__gc *gc, int d, libxl_device_pci *pcidev);
diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c
index 0d8d5f4..f8addf9 100644
--- a/tools/libxl/libxl_qmp.c
+++ b/tools/libxl/libxl_qmp.c
@@ -827,6 +827,22 @@ static int qmp_run_command(libxl__gc *gc, int domid,
 return rc;
 }
 
+int libxl__qmp_run_command_flexarray(libxl__gc *gc, int domid,
+ const char *cmd, flexarray_t *array)
+{
+libxl__json_object *args = NULL;
+int i;
+void *name, *value;
+
+for (i = 0; i < array->count; i += 2) {
+flexarray_get(array, i, &name);
+flexarray_get(array, i + 1, &value);
+qmp_parameters_add_string(gc, &args, (char *)name, (char *)value);
+}
+
+return qmp_run_command(gc, domid, cmd, args, NULL, NULL);
+}
+
 int libxl__qmp_pci_add(libxl__gc *gc, int domid, libxl_device_pci *pcidev)
 {
 libxl__qmp_handler *qmp = NULL;
-- 
2.6.6


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 6/6] docs: add HVM USB passthrough documentation

2016-09-08 Thread Juergen Gross
Update the man page regarding passthrough of USB devices to HVM
domains via qemu USB emulation.

Signed-off-by: Juergen Gross 
---
 docs/man/xl.cfg.pod.5.in | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/docs/man/xl.cfg.pod.5.in b/docs/man/xl.cfg.pod.5.in
index 77a1be3..076b2a6 100644
--- a/docs/man/xl.cfg.pod.5.in
+++ b/docs/man/xl.cfg.pod.5.in
@@ -745,19 +745,25 @@ Specifies the usb controller type.
 
 "qusb" specifies a qemu base backend for pvusb.
 
+"devicemodel" specifies a USB controller emulated by qemu. It will show up as
+a PCI-device in the guest.
+
 "auto" (the default) determines whether a kernel based backend is installed.
 If this is the case, "pv" is selected, "qusb" will be selected if no kernel
-backend is currently available.
+backend is currently available. For HVM domains "devicemodel" is being 
selected.
 
 =item B
 
 Specifies the usb controller version.  Possible values include
-1 (USB1.1) and 2 (USB2.0). Default is 2 (USB2.0).
+1 (USB1.1), 2 (USB2.0) and 3 (USB3.0). Default is 2 (USB2.0). 3 (USB3.0) is
+available for the type "devicemodel" only.
 
 =item B
 
 Specifies the total ports of the usb controller. The maximum
-number is 31. Default is 8.
+number is 31. Default is 8. With the type "devicemodel" the number of ports
+is more limited: a USB1.1 controller always has 2 ports, a USB2.0 controller
+always has 6 ports and a USB3.0 controller can have up to 15 ports.
 
 USB controller ids start from 0.  In line with the USB spec, however,
 ports on a controller start from 1.
-- 
2.6.6


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 3/6] libxl: dont pass array size to libxl__xs_kvs_of_flexarray()

2016-09-08 Thread Juergen Gross
Instead of passing the array size as an argument when calling
libxl__xs_kvs_of_flexarray() let the function get the size from the
array instead.

Signed-off-by: Juergen Gross 
---
 tools/libxl/libxl.c  | 22 +++---
 tools/libxl/libxl_internal.h |  2 +-
 tools/libxl/libxl_nic.c  |  6 ++
 tools/libxl/libxl_pci.c  |  7 +++
 tools/libxl/libxl_usb.c  |  6 +++---
 tools/libxl/libxl_vtpm.c |  6 ++
 tools/libxl/libxl_xshelp.c   |  4 ++--
 7 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 577ed35..b7b8b08 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -2315,8 +2315,8 @@ static void device_disk_add(libxl__egc *egc, uint32_t 
domid,
 }
 
 libxl__device_generic_add(gc, t, device,
-  libxl__xs_kvs_of_flexarray(gc, back, 
back->count),
-  libxl__xs_kvs_of_flexarray(gc, front, 
front->count),
+  libxl__xs_kvs_of_flexarray(gc, back),
+  libxl__xs_kvs_of_flexarray(gc, front),
   NULL);
 
 rc = libxl__xs_transaction_commit(gc, &t);
@@ -2735,7 +2735,7 @@ int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, 
libxl_device_disk *disk,
 goto out;
 }
 
-char **kvs = libxl__xs_kvs_of_flexarray(gc, empty, empty->count);
+char **kvs = libxl__xs_kvs_of_flexarray(gc, empty);
 
 rc = libxl__xs_writev(gc, t, be_path, kvs);
 if (rc) goto out;
@@ -2777,7 +2777,7 @@ int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, 
libxl_device_disk *disk,
 rc = libxl__set_domain_configuration(gc, domid, &d_config);
 if (rc) goto out;
 
-char **kvs = libxl__xs_kvs_of_flexarray(gc, insert, insert->count);
+char **kvs = libxl__xs_kvs_of_flexarray(gc, insert);
 
 rc = libxl__xs_writev(gc, t, be_path, kvs);
 if (rc) goto out;
@@ -3147,9 +3147,9 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t 
domid,
 flexarray_append(front, LIBXL_XENCONSOLE_PROTOCOL);
 }
 libxl__device_generic_add(gc, XBT_NULL, device,
-  libxl__xs_kvs_of_flexarray(gc, back, 
back->count),
-  libxl__xs_kvs_of_flexarray(gc, front, 
front->count),
-  libxl__xs_kvs_of_flexarray(gc, ro_front, 
ro_front->count));
+  libxl__xs_kvs_of_flexarray(gc, back),
+  libxl__xs_kvs_of_flexarray(gc, front),
+  libxl__xs_kvs_of_flexarray(gc, ro_front));
 rc = 0;
 out:
 return rc;
@@ -3476,8 +3476,8 @@ int libxl__device_vkb_add(libxl__gc *gc, uint32_t domid,
 flexarray_append(front, GCSPRINTF("%d", XenbusStateInitialising));
 
 libxl__device_generic_add(gc, XBT_NULL, &device,
-  libxl__xs_kvs_of_flexarray(gc, back, 
back->count),
-  libxl__xs_kvs_of_flexarray(gc, front, 
front->count),
+  libxl__xs_kvs_of_flexarray(gc, back),
+  libxl__xs_kvs_of_flexarray(gc, front),
   NULL);
 rc = 0;
 out:
@@ -3589,8 +3589,8 @@ int libxl__device_vfb_add(libxl__gc *gc, uint32_t domid, 
libxl_device_vfb *vfb)
 flexarray_append_pair(front, "state", GCSPRINTF("%d", 
XenbusStateInitialising));
 
 libxl__device_generic_add(gc, XBT_NULL, &device,
-  libxl__xs_kvs_of_flexarray(gc, back, 
back->count),
-  libxl__xs_kvs_of_flexarray(gc, front, 
front->count),
+  libxl__xs_kvs_of_flexarray(gc, back),
+  libxl__xs_kvs_of_flexarray(gc, front),
   NULL);
 rc = 0;
 out:
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index ecbfdad..ec4fc23 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -676,7 +676,7 @@ _hidden int libxl__remove_directory(libxl__gc *gc, const 
char *path);
 _hidden int libxl__remove_file_or_directory(libxl__gc *gc, const char *path);
 
 
-_hidden char **libxl__xs_kvs_of_flexarray(libxl__gc *gc, flexarray_t *array, 
int length);
+_hidden char **libxl__xs_kvs_of_flexarray(libxl__gc *gc, flexarray_t *array);
 
 /* treats kvs as pairs of keys and values and writes each to dir. */
 _hidden int libxl__xs_writev(libxl__gc *gc, xs_transaction_t t,
diff --git a/tools/libxl/libxl_nic.c b/tools/libxl/libxl_nic.c
index d1caa90..220a028 100644
--- a/tools/libxl/libxl_nic.c
+++ b/tools/libxl/libxl_nic.c
@@ -266,10 +266,8 @@ static void libxl__device_nic_add(libxl__egc *egc, 
uint32_t domid,
 }
 
 libxl__device_generic_add(gc, t, device,
-  libxl__xs_kvs_of_flexarray(gc, back,
-

Re: [Xen-devel] [PATCH v3 1/1] xen: move TLB-flush filtering out into populate_physmap during vm creation

2016-09-08 Thread Dario Faggioli
On Thu, 2016-09-08 at 13:30 +0800, Dongli Zhang wrote:
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index f34dd56..3641469 100644
> @@ -150,6 +152,12 @@ static void populate_physmap(struct memop_args
> *a)
>  max_order(curr_d)) )
>  return;
>  
> +/* MEMF_no_tlbflush can be set only during vm creation phase
> when
> + * already_scheduled is still 0 before this domain gets
> scheduled for
> + * the first time. */
>
/*
 * Comment style for multi line comments in Xen
 * includes the 'wings'. :-)
 */

Yes, I know there's some inconsistency in this file (and in many others
:-/), but still.

> +if ( d->already_scheduled == 0 )
>
unlikely() maybe?

> +a->memflags |= MEMF_no_tlbflush;
> +
>  for ( i = a->nr_done; i < a->nr_extents; i++ )
>  {
>  if ( i != a->nr_done && hypercall_preempt_check() )
> @@ -214,6 +222,21 @@ static void populate_physmap(struct memop_args
> *a)
>  goto out;
>  }
>  
> +if ( d->already_scheduled == 0 )
> +{
> +for ( j = 0; j < (1U << a->extent_order); j++ )
> +{
> +if ( page[j].u.free.need_tlbflush &&
> + (page[j].tlbflush_timestamp <=
> tlbflush_current_time()) &&
> + (!need_tlbflush ||
> + (page[j].tlbflush_timestamp >
> tlbflush_timestamp)) )
>
This check is long, complicated to read (at least to a non TLBflush
guru), and also appear twice.. can it be put in an inline function with
a talking name?

Oh, and I think you don't need the parenthesis around these twos:

 (page[j].tlbflush_timestamp <= tlbflush_current_time())
 (page[j].tlbflush_timestamp > tlbflush_timestamp)

> +{
> +need_tlbflush = 1;
> +tlbflush_timestamp =
> page[j].tlbflush_timestamp;
> +}
> +}
> +}
> +
>  mfn = page_to_mfn(page);
>  }

> diff --git a/xen/common/schedule.c b/xen/common/schedule.c
> index 32a300f..593541a 100644
> @@ -1376,6 +1376,11 @@ static void schedule(void)
>  
>  next = next_slice.task;
>  
> +/* Set already_scheduled to 1 when this domain gets scheduled
> for the
> + * first time */
>
Wings again.

And, about the content, it's already clear from the code that this gets
set when a vcpu of a domain is scheduled. What we want here is a
_quick_ explanation of why we need the scheduler to record this
information.

> +if ( next->domain->already_scheduled == 0 )
>
unlikely() (and here I'm sure :-)).

> +next->domain->already_scheduled = 1;
> +
>
And, finally, I'd move this toward the bottom of the function, outside
of the pcpu_schedule_lock() critical section, e.g., around the call to
vcpu_periodic_timer_work(next);

>  sd->curr = next;
>  
>  if ( next_slice.time >= 0 ) /* -ve means no limit */

Regards,
Dario
-- 
<> (Raistlin Majere)
-
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)



signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Xen 4.8 Development Update

2016-09-08 Thread Juergen Gross
On 31/08/16 10:30, Wei Liu wrote:
> This email only tracks big items for xen.git tree. Please reply for items you
> woulk like to see in 4.8 so that people have an idea what is going on and
> prioritise accordingly.
> 
> You're welcome to provide description and use cases of the feature you're
> working on.
> 
> = Timeline =
> 
> We now adopt a fixed cut-off date scheme. We will release twice a
> year. The upcoming 4.8 timeline are as followed:
> 
> * Last posting date: September 16, 2016
> * Hard code freeze: September 30, 2016
> * RC1: TBD
> * Release: December 2, 2016
> 
> Note that we don't have freeze exception scheme anymore. All patches
> that wish to go into 4.8 must be posted no later than the last posting
> date. All patches posted after that date will be automatically queued
> into next release.
> 
> RCs will be arranged immediately after freeze.
> 
> = Projects =

> == Toolstack == 

*  Add HVM USB passthrough via qemu
  -  Juergen Gross


Juergen


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 1/2] xen/x86: Convert to hotplug state machine

2016-09-08 Thread Sebastian Andrzej Siewior
On 2016-09-07 13:19:00 [-0400], Boris Ostrovsky wrote:
> * Be more careful with return value of cpuhp_setup_state_nocalls()
>   as it may return a positive (non-error) number. (Which suggests
>   that comment on top of __cpuhp_setup_state() is probably incorrect)

Yes, we need to update that one.
The two patches look good.

Sebastian

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 1/3] x86: refactor psr implementation in hypervisor.

2016-09-08 Thread Yi Sun
On 16-09-07 03:01:34, Jan Beulich wrote:
> >> >>> On 25.08.16 at 07:22,  wrote:
> >> > + struct psr_socket_alloc_info *info);
> >> > +/*
> >> > + * get_old_set_new is used in set value process to get all features'
> >> > + * COS registers values according to original cos id of the domain.
> >> > + * Then, assemble them into an mask array as feature list order.
> >> 
> >> This sentence in particular doesn't make any sense to me. What
> >> follows below also looks like it is in need of improvement.
> >> 
> > Do you mean the comments are not accurate?
> 
> I simply wasn't able to tell, because of not being able to interpret
> the sentence.
> 
> > How about below description?
> >  
> > get_old_set_new will traverse all features in list. It is used to do below 
> > two
> > things:
> > 1. get old_cos register value of all supported features and
> > 2. set the new value for appointed feature.
> > 
> > All the values are set into mask array according the traversal order, 
> > meaning
> > the same order of feature list members.
> 
> Sounds reasonable. I suppose the example that you gave right
> next wasn't meant to go into the comment.
> 
Great, will replace the comments with the new sentences. The example will not
go in.

> >> > +/*
> >> > + * exceed_range is used to check if the input cos id exceeds the
> >> > + * feature's cos_max and if the input mask value is not the default 
> >> > one.
> >> > + * Even if the associated cos exceeds the cos_max, HW can work as 
> >> > default
> >> > + * value. That is the reason we need check is mask value is default 
> >> > one.
> >> > + * If both criteria are fulfilled, that means the input exceeds the
> >> > + * range.
> >> > + */
> >> > +unsigned int (*exceed_range)(uint64_t *mask, struct feat_list 
> >> > *pFeat,
> >> > + unsigned int cos);
> >> 
> >> According to the comment this is kind of a predicate, which seems
> >> unlikely to return an unsigned value. In fact without a word on the
> >> return value I'd expect such to return bool. And I'd also expect the
> >> name to reflect the purpose, i.e. "exceeds_name()". Plus just like
> >> for compare above I wonder whether come or all of the parameters
> >> should be pointers to const (please go over the entire patch and do
> >> constification wherever possible/sensible).
> >> 
> > Yes, you are right. I will change the function type to bool and add const
> > for not changed input pointers.
> > 
> > This function is used to check if the input cos id exceeds the cos_max. If 
> > yes
> > and the set value is not default value, we should return error. So, I think
> > to change the function name to exceed_cos_max(). How do you think?
> 
> Okay, except that I continue to think you mean "exceeds".
> "exceed_cos_max" to me is kind of a directive, not a predicate.
> 
How about "beyond"?

> >> > +#define MAX_FEAT_INFO_SIZE 8
> >> > +#define MAX_COS_REG_NUM  128
> >> 
> >> Are these numbers arbitrary, or based on something?
> >> 
> > MAX_FEAT_INFO_SIZE is got from the sizeof(struct psr_cat_lvl_info) and
> > consider the extension for future feature.
> 
> In that case please use that sizeof() in the expression here.
> 
Sure. Thanks!

> > MAX_COS_REG_NUM is got from spec that the max COS registers number is 128
> > for all PSR features so far.
> 
> "So far" makes me still wonder: Is this an architectural limit or one
> resulting from current (hardware) implementations. In the former
> case I don't think a comment is strictly needed, but in the latter
> case the choice should be explained.
> 
It is the latter case. I will add comment to explain it. Thanks!

> >> > +struct psr_socket_alloc_info {
> >> 
> >> I've yet to see whether the "alloc" in the name really makes sense.
> 
> And btw., having seen the entire patch I don't think this alloc_ infix
> is warranted both here and in the variable name.
> 
Ok, will consider to remove it in codes. Thanks!

> >> > +/* Common functions for supporting feature callback functions. */
> >> > +static void add_feature(struct feat_list *pHead, struct feat_list *pTmp)
> >> > +{
> >> > +if ( NULL == pHead || NULL == pTmp )
> >> > +return;
> >> > +
> >> > +while ( pHead->pNext )
> >> > +pHead = pHead->pNext;
> >> > +
> >> > +pHead->pNext = pTmp;
> >> > +}
> >> 
> >> Do you really need custom list management here?
> >> 
> > It seems xen list interfaces require the input list be a double linked list 
> > but
> > my list is a single linked list. Furthermore, I only need simple add to tail
> > function and free function. So I create custom list management functions.
> 
> Unless there's a strong need, I'd like you to go with what is there,
> or introduce _generic_ singly linked list management.
> 
I will check below list management interfaces to see if I can reuse them.
Thanks!
xen/include/xen/list.h

> >> > +static void free_feature(struct psr_socket_alloc_info *info)
> >> > +{

Re: [Xen-devel] [PATCH 2/3] x86: add support for L2 CAT in hypervisor.

2016-09-08 Thread Yi Sun
On 16-09-07 03:03:12, Jan Beulich wrote:
> >>> On 07.09.16 at 09:13,  wrote:
> > On 16-09-06 01:43:22, Jan Beulich wrote:
> >> >>> On 25.08.16 at 07:22,  wrote:
> >> 
> >> Please extend the comments given for patch 1 to this one. Just one
> >> extra thing:
> >> 
> >> > @@ -743,7 +744,7 @@ struct xen_sysctl_psr_cat_op {
> >> >  uint32_t cos_max;   /* OUT: Maximum COS */
> >> >  #define XEN_SYSCTL_PSR_CAT_L3_CDP   (1u << 0)
> >> >  uint32_t flags; /* OUT: CAT flags */
> >> > -} l3_info;
> >> > +} info;
> >> 
> >> Such an adjustment breaks the tools build, i.e. can't come without
> >> also minimally adjusting libxc.
> >> 
> >> Jan
> > I thought 4.8 will also make tools version upgrade but not considered
> > to be compatible with old tools. Sorry for that.
> > 
> > Considering the compatibility and to support future feature, I want to
> > add a general structure in union, like below. How do you think? Thanks!
> 
> No, you don't need to be compatible with old tools. But you need to
> avoid build breakage between patches 2 and 3. Please always
> remember that (a) patch series may not get applied in one go and
> (b) even if they do any intermediate build breakage will hinder
> bisection attempts.
> 
> Jan

Got it. Thanks a lot for your explanation!


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [libvirt test] 100802: tolerable FAIL - PUSHED

2016-09-08 Thread osstest service owner
flight 100802 libvirt real [real]
http://logs.test-lab.xenproject.org/osstest/logs/100802/

Failures :-/ but no regressions.

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-amd64-libvirt-vhd 11 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-xsm 14 guest-saverestorefail   never pass
 test-armhf-armhf-libvirt 14 guest-saverestorefail   never pass
 test-armhf-armhf-libvirt-qcow2 11 migrate-support-checkfail never pass
 test-armhf-armhf-libvirt-qcow2 13 guest-saverestorefail never pass
 test-armhf-armhf-libvirt-raw 11 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 13 guest-saverestorefail   never pass

version targeted for testing:
 libvirt  fe94ee5db5461970743f52a85fc295af023f50bf
baseline version:
 libvirt  cbbaa17faf2e8dd92455d69daaa05178be4dce05

Last test of basis   100782  2016-09-07 04:20:15 Z1 days
Testing same since   100802  2016-09-08 04:20:09 Z0 days1 attempts


People who touched revisions under test:
  Erik Skultety 
  Julio Faracco 
  Maxim Nestratov 
  Michal Privoznik 
  Peter Krempa 
  Rufo Dogav 
  Yanqiu Zhang 
  Yuri Pudgorodskiy 

jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm   pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsmpass
 test-amd64-amd64-libvirt-xsm pass
 test-armhf-armhf-libvirt-xsm fail
 test-amd64-i386-libvirt-xsm  pass
 test-amd64-amd64-libvirt pass
 test-armhf-armhf-libvirt fail
 test-amd64-i386-libvirt  pass
 test-amd64-amd64-libvirt-pairpass
 test-amd64-i386-libvirt-pair pass
 test-armhf-armhf-libvirt-qcow2   fail
 test-armhf-armhf-libvirt-raw fail
 test-amd64-amd64-libvirt-vhd pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

+ branch=libvirt
+ revision=fe94ee5db5461970743f52a85fc295af023f50bf
+ . ./cri-lock-repos
++ . ./cri-common
+++ . ./cri-getconfig
+++ umask 002
+++ getrepos
 getconfig Repos
 perl -e '
use Osstest;
readglobalconfig();
print $c{"Repos"} or die $!;
'
+++ local repos=/home/osstest/repos
+++ '[' -z /home/osstest/repos ']'
+++ '[' '!' -d /home/osstest/repos ']'
+++ echo /home/osstest/repos
++ repos=/home/osstest/repos
++ repos_lock=/home/osstest/repos/lock
++ '[' x '!=' x/home/osstest/repos/lock ']'
++ OSSTEST_REPOS_LOCK_LOCKED=/home/osstest/repos/lock
++ exec with-lock-ex -w /home/osstest/repos/lo

Re: [Xen-devel] [PATCH v5 12/16] x86/efi: create new early memory allocator

2016-09-08 Thread Daniel Kiper
On Wed, Sep 07, 2016 at 08:01:31AM -0600, Jan Beulich wrote:
> >>> On 07.09.16 at 14:05,  wrote:
> > On Mon, Sep 05, 2016 at 06:33:57AM -0600, Jan Beulich wrote:
> >> >>> On 20.08.16 at 00:43,  wrote:
> >> > +static char __initdata *ebmalloc_free = NULL;
> >> > +
> >> > +/* EFI boot allocator. */
> >> > +static void __init *ebmalloc(size_t size)
> >> > +{
> >> > +void *ptr;
> >> > +
> >> > +/*
> >> > + * Init ebmalloc_free on runtime. Static initialization
> >> > + * will not work because it puts virtual address there.
> >> > + */
> >>
> >> I don't understand this static allocation comment: We have this issue
> >> elsewhere (and use bootsym() as needed), and we do not have this
> >> issue at all in xen.efi (which this code also gets built for). So I think 
> >> at
> >> the very least the comment needs improvement. And then, if static
> >> initialization indeed can't be used, then a static symbol's initializer of
> >> NULL is pointless and hence should be omitted.
> >
> > You have to remember that xen/arch/x86/efi/efi-boot.h stuff is build
> > into xen.efi and xen.gz. Of course xen.efi with
> >
> > static char __initdata *ebmalloc_free = ebmalloc_mem;
> >
> > works, however, xen.gz does not. Sadly, I have not found simpler
> > solution for that issue, so, I do initialization during runtime.
>
> Which all is in line with my request of improving the comment.

OK.

> >> > +if ( ebmalloc_free == NULL )
> >> > +ebmalloc_free = ebmalloc_mem;
> >> > +
> >> > +ptr = ebmalloc_free;
> >> > +
> >> > +ebmalloc_free += size;
> >>
> >> No minimal (at least pointer size) alignment getting enforced
> >> somewhere here?
> >
> > For what?
>
> To avoid the penalty unaligned accesses incur? And that's alongside
> the fact that it's simply bad practice to knowingly but without actual
> need cause unaligned accesses even if they work fine.

I expected that but I do not think it is very important here. Anyway,
I am still not sure why you say "at least pointer size". Because
sizeof(void *) assures proper alignment on any architecture?
Additionally, will this alignment sufficiently replace alignment
provided by current efi_arch_allocate_mmap_buffer() implementation?

> >> And then - wouldn't this better go into xen/common/efi/boot.c,
> >> even if ARM64 does not have a use for it right away? The code
> >> certainly isn't really x86-specific.
> >
> > Sure thing. However, if it is not used by ARM64 then I think ebmalloc
> > stuff should not be moved to xen/common/efi/boot.c.
>
> Being architecture independent it has all reasons to be moved
> there. Agreed there may be compiler warnings for these then
> being unused static functions, but I'd rather see this code get
> #ifdef-ed out for ARM for the time being than it needing to be

OK.

> moved over later on. And of course a question to be asked first
> is whether in fact there is something in common or ARM specific
> code that could benefit from using this new allocator, if you
> already introduce it.

I think that it is x86 specific stuff and should stay here as is.
However, potentially it can be common allocator for both architectures.
Though I do not see gains on ARM itself.

Daniel

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4 6/9] livepatch: Add parsing for the symbol+0x

2016-09-08 Thread Konrad Rzeszutek Wilk
On Wed, Sep 07, 2016 at 02:10:43AM -0600, Jan Beulich wrote:
> >>> On 06.09.16 at 21:56,  wrote:
> > On Wed, Aug 24, 2016 at 03:08:01AM -0600, Jan Beulich wrote:
> >> >>> On 24.08.16 at 04:22,  wrote:
> >> > --- a/xen/common/livepatch.c
> >> > +++ b/xen/common/livepatch.c
> >> > @@ -237,13 +237,34 @@ static const char 
> >> > *livepatch_symbols_lookup(unsigned long addr,
> >> >  static int resolve_old_address(struct livepatch_func *f,
> >> > const struct livepatch_elf *elf)
> >> >  {
> >> > +const char *s;
> >> > +char *plus = NULL;
> >> 
> >> Pointless initializer.
> > 
> > We need that otherwise this part (which is at the bottom of this function):
> > 
> > if ( plus )
> > {
> > *plus = '+';
> > f->old_addr += offset;
> > }
> > 
> > 
> > May be invoked for symbols that that don't have the '+' in them.
> 
> I don't see how it would. This
> 
> plus = strchr(f->name, '+');
> 
> comes ahead of any paths leading to the code you quote.

Ah. Stale information - the earlier patch had 'slash' and 'plus' variables
to look for - and that was why I needed it.

But with the code you are quoting - it is not needed.
> 
> >> > +/* + */
> >> > +plus = strchr(f->name, '+');
> >> 
> >> And I think you should prefer using the local variable here.
> > 
> > 
> > 
> >> 
> >> Furthermore you're losing const here - does f->name really point
> >> to memory that doesn't get mapped r/o?
> > 
> > Yes.
> > 
> > The 'struct livepatch_func' contains the the ->opaque array of 31 bytes
> > (from which we use 5 bytes) which the hypervisor uses to stash the original
> > instructions.
> 
> How does the patch name end up in (5 bytes of) the opaque field?

I was (ineptly) saying that the struct livepatch_func has fields that are
modified, hence it ends up in .data section.

Wait a minute. The f->name should have a relocation to point to .rodata
instead of .data! And that should have crashed when I modified it.

Ah, they are all 'static char name[] = "blah"' instead of
'static const char name[] = "blah"'.

Patch queued up.

> In any event the correctness of deliberately stripping const should
> be explained in a comment (if, of course, it can't be avoided in the
> first place).
> 
> >> Overall - are you sure you want to disallow symbol names containing
> >> + characters? I.e. you don't want to add support for some form of
> >> quoting?
> > 
> > Can you actually have + in a function or object?
> 
> Why not? The ELF spec, iirc, doesn't put any restrictions on what
> characters (other than nul of course) can be used in symbol names.
> gas actually has received full quoting support a year or two ago,
> to no longer needlessly restrict the character set available here.

I was thinking of + in the C land. But that is irrelevant to this
discussion.

Let me dig in the gas code to find examples of this - but in the
meantime (and if you recall), you meant something like this:

"do_domain_pause+something"

?
Which would mean for offset purposes I would need to deal with:

"do_domain_pause+something"+0x10

or
'do_domain_pause+something'+0x10



> 
> Jan
> 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4 1/9] livepatch: Clear .bss when payload is reverted

2016-09-08 Thread Konrad Rzeszutek Wilk
On Wed, Sep 07, 2016 at 02:02:44AM -0600, Jan Beulich wrote:
> >>> On 06.09.16 at 18:47,  wrote:
> > On Wed, Aug 24, 2016 at 02:55:21AM -0600, Jan Beulich wrote:
> >> >>> On 24.08.16 at 04:22,  wrote:
> >> > --- a/xen/common/livepatch.c
> >> > +++ b/xen/common/livepatch.c
> >> > @@ -70,6 +70,9 @@ struct payload {
> >> >  unsigned int nsyms;  /* Nr of entries in .strtab 
> >> > and symbols. */
> >> >  struct livepatch_build_id id;/* 
> >> > ELFNOTE_DESC(.note.gnu.build-id) of the payload. */
> >> >  struct livepatch_build_id dep;   /* 
> >> > ELFNOTE_DESC(.livepatch.depends). */
> >> > +void **bss;  /* .bss's of the payload. */
> >> > +size_t *bss_size;/* and their sizes. */
> >> 
> >> Is size_t wide enough in the extreme case? Perhaps yes, because I
> >> don't think we'll ever load 64-bit ELF on a 32-bit platform.
> > 
> > Nonethless having a huge .bss is a kind of extreme? Perhaps we should
> > have an seperate patch that checks the SHT_NOBITS and disallows .bss's
> > bigger than say 2MB?
> 
> Well, the extra check certainly wouldn't hurt, but I think before
> hitting the size_t limit you'd run out of address space to place
> the payload in (as that's iirc a less than 1Gb area).

True. And on ARM 32|64 even smaller (2MB). Let me force an
even smaller width type - say 'unsigned int'.
> 
> Jan
> 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Impact of HW vulnerabilities & Implications on Security Vulnerability Process

2016-09-08 Thread George Dunlap
On 07/09/16 22:02, Stefano Stabellini wrote:
> On Wed, 7 Sep 2016, Meng Xu wrote:
>> On Wed, Sep 7, 2016 at 3:08 PM, Stefano Stabellini
>>  wrote:
>>>
>>> On Wed, 7 Sep 2016, Ian Jackson wrote:
> Technical
> =
> On the technical front, it would be good to understand whether
> a) This is a real threat and whether thus, we as a community need to
>take action

 It is unclear what action the Xen upstream community can usefully
 take, other than providing users with information.

 But, users with deployments on actual hardware ought to try to find
 out whether they are vulnerable.  If they are then they could seek
 replacement non-faulty hardware from their vendor, or take unpleasant
 migitation measures (like switching to HVM, perhaps).
>>>
>>> How difficult is to check for it?
>>>
>>> Is there a simple test, maybe a little executable, that users could use
>>> to find out whether their ram is vulnerable? That would be extremely
>>> valuable.
>>
>> Google does have a github repo to do the rowhammer test:
>> https://github.com/google/rowhammer-test
> 
> Nice! It would be good to document this in a Xen Project document
> somewhere.
> 
> The code is small enough that we could even consider pulling it in Xen
> and running it at boot time (obviously it would be a kconfig option to
> compile and a xen command line option to run the test). In case of
> failure we could WARN the sysadmin and refuse to continue.

The rowhammer test takes a long time; on the order of an hour or two.  I
don't think people would appreciate those kinds of boot times. ;-)

Additionally, the default version in the Google repo randomly corrupts
memory -- potentially including Xen memory.  And if you have ECC memory,
the result of an uncorrestable error is often a machine reboot.  So
there would be a risk that adding such a test on a vulnerable system
would cause Xen to always reboot; or worse, to boot but after having
corrupted its own data or text segments.

I've been playing around with it, but "unfortunately" both my test
machine and the machine under my desk have ECC RAM.  I ran the
double-sided rowhammer test for 3 hours yesterday on the machine under
my desk, and the Linux EDAC driver didn't report any errors corrected.
This could either be because no errors happened, or because the errors
weren't being reported to Linux.  If no errors happened, it could be
because I'm not vulnerable, or because the test doesn't work on my hardware.

So unfortunately, there are just too many unknowns at this point to give
useful advice, other than "ECC RAM is probably better than non-ECC RAM".

 -George

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] OVMF for Xen PVH

2016-09-08 Thread Anthony PERARD
Hello,

We are introducing a new virtualisation mode in Xen called PVHv2 (also
called hvmlite in the past). We would like to have a UEFI firmware
running on it to make it easier to start a guest. (Right now, I think it
involves supplying the guest kernel to the guest config, like a PV
guest.)

I'm exploring different possibility of what could be done, and what
should be avoided. It would be nice to have only one binary for both
PVHv2 guest and HVM guest.

Would it be possible to introduce a different entry point in OVMF? The
current one cannot be used at the start of the day of a PVHv2 guest.

If not, we'll try to use the current entry point or create a new package
like it has been done for Xen on ARM.

Thanks for any feedback,

-- 
Anthony PERARD

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [xen-unstable test] 100789: regressions - FAIL

2016-09-08 Thread Wei Liu
On Thu, Sep 08, 2016 at 05:32:00AM +, osstest service owner wrote:
> flight 100789 xen-unstable real [real]
> http://logs.test-lab.xenproject.org/osstest/logs/100789/
> 
> Regressions :-(
> 
> Tests which did not succeed and are blocking,
> including tests which could not be run:
>  test-amd64-amd64-libvirt-vhd  9 debian-di-installfail REGR. vs. 
> 100773
[...]
>  test-armhf-armhf-xl-vhd   9 debian-di-installfail REGR. vs. 
> 100773
> 

Andrew pointed out IRL that these two regressions are unfortunate side
effect of deleting blktap2. In short, the vhd-util used in these tests
comes from Xen's blktap2. :-/

I see three ways to move this forward.

1. Resurrect vhd-util from blktap2.
2. Install blktap-utils shipped in Debian (available from Wheezy
   onwards), the main difficulty would be the package depends on a dkms
   package that seems to require building with kernel header when
   installing.
3. Retire these two tests.

In the meantime, if we want to avoid blocking xen-unstable for too long,
we might want to force push.

Wei.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [xen-unstable test] 100789: regressions - FAIL

2016-09-08 Thread Andrew Cooper
On 08/09/16 10:43, Wei Liu wrote:
> On Thu, Sep 08, 2016 at 05:32:00AM +, osstest service owner wrote:
>> flight 100789 xen-unstable real [real]
>> http://logs.test-lab.xenproject.org/osstest/logs/100789/
>>
>> Regressions :-(
>>
>> Tests which did not succeed and are blocking,
>> including tests which could not be run:
>>  test-amd64-amd64-libvirt-vhd  9 debian-di-installfail REGR. vs. 
>> 100773
> [...]
>>  test-armhf-armhf-xl-vhd   9 debian-di-installfail REGR. vs. 
>> 100773
>>
> Andrew pointed out IRL that these two regressions are unfortunate side
> effect of deleting blktap2. In short, the vhd-util used in these tests
> comes from Xen's blktap2. :-/
>
> I see three ways to move this forward.
>
> 1. Resurrect vhd-util from blktap2.
> 2. Install blktap-utils shipped in Debian (available from Wheezy
>onwards), the main difficulty would be the package depends on a dkms
>package that seems to require building with kernel header when
>installing.
> 3. Retire these two tests.
>
> In the meantime, if we want to avoid blocking xen-unstable for too long,
> we might want to force push.

+1 to a force push for now.  There are quite a few changes currently
blocked.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH] arm/xen: fix SMP guests boot

2016-09-08 Thread Vitaly Kuznetsov
Commit 88e957d6e47f ("xen: introduce xen_vcpu_id mapping") broke SMP ARM
guests on Xen. When FIFO-based event channels are in use (this is the
default), evtchn_fifo_alloc_control_block() is called on CPU_UP_PREPARE
event and this happens before we set up xen_vcpu_id mapping in
xen_starting_cpu. Temporary fix the issue by setting direct Linux CPU id
<-> Xen vCPU id mapping for all possible CPUs at boot. We don't currently
support kexec/kdump on Xen/ARM so these ids always match.

In future, we have several ways to solve the issue, e.g.:
- Eliminate all hypercalls from CPU_UP_PREPARE, do them from the starting
CPU. This can probably be done for both x86 and ARM and, if done, will
allow us to get Xen's idea of vCPU id from CPUID/MPIDR on the starting CPU
directly, no messing with ACPI/device tree required.
- Save vCPU id information from ACPI/device tree on ARM and use it to
initialize xen_vcpu_id mapping. This is the same trick we currently do on
x86.

Reported-by: Julien Grall 
Tested-by: Wei Chen 
Signed-off-by: Vitaly Kuznetsov 
---
It would be nice if this patch could still make it to 4.8 as all SMP
ARM/Xen guests are currently broken.
---
 arch/arm/xen/enlighten.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 3d2cef6..f193414 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -170,9 +170,6 @@ static int xen_starting_cpu(unsigned int cpu)
pr_info("Xen: initializing cpu%d\n", cpu);
vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
 
-   /* Direct vCPU id mapping for ARM guests. */
-   per_cpu(xen_vcpu_id, cpu) = cpu;
-
info.mfn = virt_to_gfn(vcpup);
info.offset = xen_offset_in_page(vcpup);
 
@@ -330,6 +327,7 @@ static int __init xen_guest_init(void)
 {
struct xen_add_to_physmap xatp;
struct shared_info *shared_info_page = NULL;
+   int cpu;
 
if (!xen_domain())
return 0;
@@ -380,7 +378,8 @@ static int __init xen_guest_init(void)
return -ENOMEM;
 
/* Direct vCPU id mapping for ARM guests. */
-   per_cpu(xen_vcpu_id, 0) = 0;
+   for_each_possible_cpu(cpu)
+   per_cpu(xen_vcpu_id, cpu) = cpu;
 
xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames();
if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn,
-- 
2.7.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 1/3] x86: refactor psr implementation in hypervisor.

2016-09-08 Thread Jan Beulich
>>> On 08.09.16 at 09:28,  wrote:
> On 16-09-07 03:01:34, Jan Beulich wrote:
>> >> >>> On 25.08.16 at 07:22,  wrote:
>> >> > +unsigned int (*exceed_range)(uint64_t *mask, struct feat_list 
>> >> > *pFeat,
>> >> > + unsigned int cos);
>> >> 
>> >> According to the comment this is kind of a predicate, which seems
>> >> unlikely to return an unsigned value. In fact without a word on the
>> >> return value I'd expect such to return bool. And I'd also expect the
>> >> name to reflect the purpose, i.e. "exceeds_name()". Plus just like
>> >> for compare above I wonder whether come or all of the parameters
>> >> should be pointers to const (please go over the entire patch and do
>> >> constification wherever possible/sensible).
>> >> 
>> > Yes, you are right. I will change the function type to bool and add const
>> > for not changed input pointers.
>> > 
>> > This function is used to check if the input cos id exceeds the cos_max. If 
>> > yes
>> > and the set value is not default value, we should return error. So, I think
>> > to change the function name to exceed_cos_max(). How do you think?
>> 
>> Okay, except that I continue to think you mean "exceeds".
>> "exceed_cos_max" to me is kind of a directive, not a predicate.
>> 
> How about "beyond"?

What's wrong with "exceeds"?

>> >> > +static int l3_cat_compare_mask(uint64_t *mask, struct feat_list *pFeat,
>> >> > +   unsigned int cos, bool_t *found)
>> >> > +{
>> >> > +struct psr_cat_lvl_info cat_info;
>> >> > +uint64_t l3_def_cbm;
>> >> > +
>> >> > +memcpy(&cat_info, pFeat->feat_info, sizeof(struct 
>> >> > psr_cat_lvl_info));
>> >> 
>> >> Already here I think this memcpy()ing gets unwieldy. Can't you
>> >> simply make the structure field a union of all types of interest?
>> >> 
>> > Sorry that I am not very clear about your meaning to make a union. Do you 
>> > mean
>> > make feat_info a union? If so, it will lose the universality to cover all
>> > features. Future feature may have different info.
>> 
>> Which is the purpose of a union - you'd simply add a new member
>> then.
>>
> I guess your idea likes below. Right?
> union {
> struct l3_info {
> union {
> uint64_t cbm;
> struct {
> uint64_t code;
> uint64_t data;
> };
> };
> 
> };
> 
> struct l2_info {
> uint64_t cbm;
> };
> };
>  
> My original design is to use this feat_info cover all features and eliminate
> the feature's specific properties. If using above union, we still need to
> know the current feature is which when handles feat_info. That loses the
> abstraction.
> 
> If my thought is not right, please correct me. Thanks!

I don't understand what abstraction you would lose with the above
layout. The memcpy()int you currently do is, I'm sorry to say that,
horrible.

>> > I think I can replace the memcpy() to directly assign value to cat_info.
>> 
>> No, this copying (done in _many_ places) really should go away.
>> 
> I want to replace memcpy() to below codes.
> cat_info.cbm_len = feat_info[0];
> cat_info.cos_max = feat_info[1];

And again do that in a dozen places? No, please don't.

>> >> > +if ( type == PSR_MASK_TYPE_L3_CBM )
>> >> > +mask[0] = m;
>> >> 
>> >> This overwriting behavior also looks quite strange to me. What's
>> >> the purpose? And if this really is meant to be that way, why is
>> >> this not (leaving aside the other suggested adjustment)
>> >> 
>> >> if ( type == PSR_MASK_TYPE_L3_CBM )
>> >> mask[0] = m;
>> >> else if ( old_cos > cat_info.cos_max )
>> >> mask[0] = pFeat->cos_reg_val[0];
>> >> else
>> >> mask[0] = pFeat->cos_reg_val[old_cos];
>> >> 
>> >> ?
>> >> 
>> > get_old_set_new() is used to do below two things:
>> > 1. get old_cos register value of all supported features and
>> > 2. set the new value for appointed feature.
>> > 
>> > So, if the appointed feature is L3 CAT, we should set input vallue for it 
>> > here.
>> 
>> Okay, that answers the "what" aspect, but leaves open _why_ it
>> needs to be that way.
>> 
> A scenario here to help to understand _why_. 
> 
> Like the example for explaining get_old_set_new(), old_cos of the domain is 
> 1.
> Then, User wants to set L3 CAT CBM to 0x1ff and L2 CAT 0x3f. The original
> COS registers like below.
> 
> ---
> | COS 0 | COS 1 | COS 2 | ... |
> ---
> L3 CAT  | 0x7ff | 0x3ff | 0x1ff | ... |
> ---
> L2 CAT  | 0xff  | 0x3f  | 0x3f  | ... |
> ---
> 
> Then, mask array should be assembled in get_old_set_new() like below:
> mask[0]: 0x1ff
> mask[1]: 0x3f
> 
> Then, we can use this mask array to find if there is matching COS through
> compare_mask(). We can find COS 2 is the matching one. 
> 
> If there is already a COS registers combination (e.g. L3 COS 2

[Xen-devel] [PATCH] x86/paging: Make paging_mode_*() predecates behave like predicates

2016-09-08 Thread Andrew Cooper
Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Tim Deegan 
CC: George Dunlap 
---
 xen/include/asm-x86/paging.h | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index a1401ab..56eef6b 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -57,14 +57,14 @@
  * requires VT or similar mechanisms */
 #define PG_external(XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift)
 
-#define paging_mode_enabled(_d)   ((_d)->arch.paging.mode)
-#define paging_mode_shadow(_d)((_d)->arch.paging.mode & PG_SH_enable)
-#define paging_mode_hap(_d)   ((_d)->arch.paging.mode & PG_HAP_enable)
-
-#define paging_mode_refcounts(_d) ((_d)->arch.paging.mode & PG_refcounts)
-#define paging_mode_log_dirty(_d) ((_d)->arch.paging.mode & PG_log_dirty)
-#define paging_mode_translate(_d) ((_d)->arch.paging.mode & PG_translate)
-#define paging_mode_external(_d)  ((_d)->arch.paging.mode & PG_external)
+#define paging_mode_enabled(_d)   (!!(_d)->arch.paging.mode)
+#define paging_mode_shadow(_d)(!!((_d)->arch.paging.mode & PG_SH_enable))
+#define paging_mode_hap(_d)   (!!((_d)->arch.paging.mode & PG_HAP_enable))
+
+#define paging_mode_refcounts(_d) (!!((_d)->arch.paging.mode & PG_refcounts))
+#define paging_mode_log_dirty(_d) (!!((_d)->arch.paging.mode & PG_log_dirty))
+#define paging_mode_translate(_d) (!!((_d)->arch.paging.mode & PG_translate))
+#define paging_mode_external(_d)  (!!((_d)->arch.paging.mode & PG_external))
 
 /* flags used for paging debug */
 #define PAGING_DEBUG_LOGDIRTY 0
-- 
2.1.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v5 12/16] x86/efi: create new early memory allocator

2016-09-08 Thread Jan Beulich
>>> On 08.09.16 at 10:29,  wrote:
> On Wed, Sep 07, 2016 at 08:01:31AM -0600, Jan Beulich wrote:
>> >>> On 07.09.16 at 14:05,  wrote:
>> > On Mon, Sep 05, 2016 at 06:33:57AM -0600, Jan Beulich wrote:
>> >> >>> On 20.08.16 at 00:43,  wrote:
>> >> > +if ( ebmalloc_free == NULL )
>> >> > +ebmalloc_free = ebmalloc_mem;
>> >> > +
>> >> > +ptr = ebmalloc_free;
>> >> > +
>> >> > +ebmalloc_free += size;
>> >>
>> >> No minimal (at least pointer size) alignment getting enforced
>> >> somewhere here?
>> >
>> > For what?
>>
>> To avoid the penalty unaligned accesses incur? And that's alongside
>> the fact that it's simply bad practice to knowingly but without actual
>> need cause unaligned accesses even if they work fine.
> 
> I expected that but I do not think it is very important here. Anyway,
> I am still not sure why you say "at least pointer size". Because
> sizeof(void *) assures proper alignment on any architecture?

Yes, this gives (on "normal" architectures at least) machine word
size alignment, which commonly is good enough for everything
except SIMD data (or things similar to it).

> Additionally, will this alignment sufficiently replace alignment
> provided by current efi_arch_allocate_mmap_buffer() implementation?

Just compare __alignof__(EFI_MEMORY_DESCRIPTOR) and
__alignof__(void *).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [ovmf test] 100805: all pass - PUSHED

2016-09-08 Thread osstest service owner
flight 100805 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/100805/

Perfect :-)
All tests in this flight passed as required
version targeted for testing:
 ovmf 4ac14ceae076439dcea926bc47cda4e1d2779cae
baseline version:
 ovmf ad8a2f5e68fd9850c10740a6ace2ab785cb99818

Last test of basis   100801  2016-09-08 03:01:09 Z0 days
Testing same since   100805  2016-09-08 05:50:03 Z0 days1 attempts


People who touched revisions under test:
  Liming Gao 

jobs:
 build-amd64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

+ branch=ovmf
+ revision=4ac14ceae076439dcea926bc47cda4e1d2779cae
+ . ./cri-lock-repos
++ . ./cri-common
+++ . ./cri-getconfig
+++ umask 002
+++ getrepos
 getconfig Repos
 perl -e '
use Osstest;
readglobalconfig();
print $c{"Repos"} or die $!;
'
+++ local repos=/home/osstest/repos
+++ '[' -z /home/osstest/repos ']'
+++ '[' '!' -d /home/osstest/repos ']'
+++ echo /home/osstest/repos
++ repos=/home/osstest/repos
++ repos_lock=/home/osstest/repos/lock
++ '[' x '!=' x/home/osstest/repos/lock ']'
++ OSSTEST_REPOS_LOCK_LOCKED=/home/osstest/repos/lock
++ exec with-lock-ex -w /home/osstest/repos/lock ./ap-push ovmf 
4ac14ceae076439dcea926bc47cda4e1d2779cae
+ branch=ovmf
+ revision=4ac14ceae076439dcea926bc47cda4e1d2779cae
+ . ./cri-lock-repos
++ . ./cri-common
+++ . ./cri-getconfig
+++ umask 002
+++ getrepos
 getconfig Repos
 perl -e '
use Osstest;
readglobalconfig();
print $c{"Repos"} or die $!;
'
+++ local repos=/home/osstest/repos
+++ '[' -z /home/osstest/repos ']'
+++ '[' '!' -d /home/osstest/repos ']'
+++ echo /home/osstest/repos
++ repos=/home/osstest/repos
++ repos_lock=/home/osstest/repos/lock
++ '[' x/home/osstest/repos/lock '!=' x/home/osstest/repos/lock ']'
+ . ./cri-common
++ . ./cri-getconfig
++ umask 002
+ select_xenbranch
+ case "$branch" in
+ tree=ovmf
+ xenbranch=xen-unstable
+ '[' xovmf = xlinux ']'
+ linuxbranch=
+ '[' x = x ']'
+ qemuubranch=qemu-upstream-unstable
+ select_prevxenbranch
++ ./cri-getprevxenbranch xen-unstable
+ prevxenbranch=xen-4.7-testing
+ '[' x4ac14ceae076439dcea926bc47cda4e1d2779cae = x ']'
+ : tested/2.6.39.x
+ . ./ap-common
++ : osst...@xenbits.xen.org
+++ getconfig OsstestUpstream
+++ perl -e '
use Osstest;
readglobalconfig();
print $c{"OsstestUpstream"} or die $!;
'
++ :
++ : git://xenbits.xen.org/xen.git
++ : osst...@xenbits.xen.org:/home/xen/git/xen.git
++ : git://xenbits.xen.org/qemu-xen-traditional.git
++ : git://git.kernel.org
++ : git://git.kernel.org/pub/scm/linux/kernel/git
++ : git
++ : git://xenbits.xen.org/libvirt.git
++ : osst...@xenbits.xen.org:/home/xen/git/libvirt.git
++ : git://xenbits.xen.org/libvirt.git
++ : git://xenbits.xen.org/osstest/rumprun.git
++ : git
++ : git://xenbits.xen.org/osstest/rumprun.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/rumprun.git
++ : git://git.seabios.org/seabios.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/seabios.git
++ : git://xenbits.xen.org/osstest/seabios.git
++ : https://github.com/tianocore/edk2.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/ovmf.git
++ : git://xenbits.xen.org/osstest/ovmf.git
++ : git://xenbits.xen.org/osstest/linux-firmware.git
++ : osst...@xenbits.xen.org:/home/osstest/ext/linux-firmware.git
++ : git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
++ : osst...@xenbits.xen.org:/home/xen/git/linux-pvops.git
++ : git://xenbits.xen.org/linux-pvops.git
++ : tested/linux-3.14
++ : tested/linux-arm-xen
++ '[' xgit://xenbits.xen.

Re: [Xen-devel] [PATCH] x86/paging: Make paging_mode_*() predecates behave like predicates

2016-09-08 Thread Tim Deegan
At 10:55 +0100 on 08 Sep (1473332146), Andrew Cooper wrote:
> Signed-off-by: Andrew Cooper 

s/predecates/predicates/, and Acked-by: Tim Deegan 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] x86/paging: Make paging_mode_*() predecates behave like predicates

2016-09-08 Thread Andrew Cooper
On 08/09/16 11:00, Tim Deegan wrote:
> At 10:55 +0100 on 08 Sep (1473332146), Andrew Cooper wrote:
>> Signed-off-by: Andrew Cooper 
> s/predecates/predicates/, and Acked-by: Tim Deegan 

Ah - so it is.  Will fix.

Thanks.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v4 6/9] livepatch: Add parsing for the symbol+0x

2016-09-08 Thread Jan Beulich
>>> On 08.09.16 at 11:22,  wrote:
> On Wed, Sep 07, 2016 at 02:10:43AM -0600, Jan Beulich wrote:
>> >>> On 06.09.16 at 21:56,  wrote:
>> > On Wed, Aug 24, 2016 at 03:08:01AM -0600, Jan Beulich wrote:
>> >> Overall - are you sure you want to disallow symbol names containing
>> >> + characters? I.e. you don't want to add support for some form of
>> >> quoting?
>> > 
>> > Can you actually have + in a function or object?
>> 
>> Why not? The ELF spec, iirc, doesn't put any restrictions on what
>> characters (other than nul of course) can be used in symbol names.
>> gas actually has received full quoting support a year or two ago,
>> to no longer needlessly restrict the character set available here.
> 
> I was thinking of + in the C land. But that is irrelevant to this
> discussion.
> 
> Let me dig in the gas code to find examples of this - but in the
> meantime (and if you recall), you meant something like this:
> 
> "do_domain_pause+something"
> 
> ?

Yes.

> Which would mean for offset purposes I would need to deal with:
> 
> "do_domain_pause+something"+0x10
> 
> or
> 'do_domain_pause+something'+0x10

Yes. Or, as said - at least queue it up as a work item, as I certainly
agree it's not the highest priority thing to deal with right away.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] x86/paging: Make paging_mode_*() predecates behave like predicates

2016-09-08 Thread George Dunlap
On 08/09/16 10:55, Andrew Cooper wrote:
> Signed-off-by: Andrew Cooper 

In case it needs it:

Acked-by: George Dunlap 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [qemu-upstream-unstable baseline-only test] 67670: regressions - FAIL

2016-09-08 Thread Platform Team regression test user
This run is configured for baseline tests only.

flight 67670 qemu-upstream-unstable real [real]
http://osstest.xs.citrite.net/~osstest/testlogs/logs/67670/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-armhf-armhf-xl-vhd  14 guest-start/debian.repeat fail REGR. vs. 66870
 test-armhf-armhf-libvirt-raw  9 debian-di-install fail REGR. vs. 66870
 test-amd64-amd64-qemuu-nested-intel 16 debian-hvm-install/l1/l2 fail REGR. vs. 
66870

Regressions which are regarded as allowable (not blocking):
 test-amd64-amd64-xl-qemuu-win7-amd64 16 guest-stop   fail blocked in 66870
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stopfail blocked in 66870

Tests which did not succeed, but are not blocking:
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-intel 11 guest-start  fail  never pass
 test-armhf-armhf-xl-midway   12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-midway   13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-credit2  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-libvirt-xsm 14 guest-saverestorefail   never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 13 saverestore-support-checkfail  never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt 14 guest-saverestorefail   never pass
 test-armhf-armhf-xl-rtds 12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-rtds 13 saverestore-support-checkfail   never pass
 test-amd64-amd64-xl-pvh-amd  11 guest-start  fail   never pass
 test-armhf-armhf-libvirt-qcow2 11 migrate-support-checkfail never pass
 test-armhf-armhf-libvirt-qcow2 13 guest-saverestorefail never pass
 test-armhf-armhf-xl-vhd  11 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-vhd  12 saverestore-support-checkfail   never pass
 test-amd64-amd64-qemuu-nested-amd 16 debian-hvm-install/l1/l2  fail never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-amd64-libvirt-vhd 11 migrate-support-checkfail   never pass

version targeted for testing:
 qemuu570117996772b762e9654e58e708943a4db68b4f
baseline version:
 qemuud145386f52950c0c5d4587dbb6c3b9cdf3a58309

Last test of basis66870  2016-07-30 22:17:31 Z   39 days
Testing same since67670  2016-09-08 00:15:01 Z0 days1 attempts


421 people touched revisions under test,
not listing them all

jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl  pass
 test-armhf-armhf-xl  pass
 test-amd64-i386-xl   pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm   pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsmpass
 test-amd64-amd64-xl-qemuu-debianhvm-amd64-xsmpass
 test-amd64-i386-xl-qemuu-debianhvm-amd64-xsm pass

Re: [Xen-devel] OVMF for Xen PVH

2016-09-08 Thread Laszlo Ersek
On 09/08/16 11:38, Anthony PERARD wrote:
> Hello,
> 
> We are introducing a new virtualisation mode in Xen called PVHv2 (also
> called hvmlite in the past). We would like to have a UEFI firmware
> running on it to make it easier to start a guest. (Right now, I think it
> involves supplying the guest kernel to the guest config, like a PV
> guest.)
> 
> I'm exploring different possibility of what could be done, and what
> should be avoided. It would be nice to have only one binary for both
> PVHv2 guest and HVM guest.
> 
> Would it be possible to introduce a different entry point in OVMF? The
> current one cannot be used at the start of the day of a PVHv2 guest.
> 
> If not, we'll try to use the current entry point or create a new package
> like it has been done for Xen on ARM.
> 
> Thanks for any feedback,
> 

I've been thinking about having a shared OVMF binary for Xen and
QEMU/KVM (from a different perspective), and I did recall that ArmVirt
has separate platform DSCs / FDFs for Xen and QEMU.

The question that made me think about this is the number and size of
modules that we now build into the OVMF binary. The binary has been
continuously growing (internally), and while Ard did some fantastic work
on enabling -Os for a bunch of edk2 compilers and platforms, the
compressed size (= the ultimate utilization of the flash chip) has not
gone down significantly, if I recall correctly.

Growing the non-compressed DXEFV (which -Os mitigates significantly) is
not terribly hard, as long as we don't outgrow OVMF_CODE.fd (1920 KB),
i.e., the external thingy after compression. Outgrowing OVMF_CODE.fd
might be major pain for distros however, so I've been thinking about
trimming the builds statically.

There's some low hanging fruit for that; for example the virtio drivers
should only go into the qemu/KVM build, same for the SMM driver stack,
same for the pflash driver. Whereas the XenPV drivers, the FS-based
varstore emulation, etc should go only into the Xen build.

So, from this (independent) POV, I'd prefer separate builds for Xen and
qemu/KVM.

Regarding the entry point itself, the SMM work has complicated those
early (= SEC / PEI) modules quite a bit (for example, grep OvmfPkg for
"PcdSmmSmramRequire"). I think if you start with a separate platform,
that will make your work easier (giving you more freedom in
accommodating both PVHv2 and HVM, without regard to qemu/KVM), and allow
me to keep my sanity -- think regressions, reviews, etc :)

Here's another point I've been thinking about, on-and-off: I find it
regrettable that we don't have any official co-maintainer in
Maintainers.txt for OvmfPkg's Xen parts. We've regressed Xen a few times
in the past because none of the OvmfPkg co-maintainers run Xen. This
should certainly be fixed.

Now, if you create a new platform (DSC + FDF) for Xen, that sort of
forces someone from the Xen community to assume co-maintainership for
the Xen bits. (Hopefully those bits would be easily identifiable by
pathname.) I'd welcome that *very much*.

So, I prefer a separate platform. I'd suggest to extract the Xen
platform with the current functionality first (with all those additional
benefits), then rework the new Xen platform to accommodate PVHv2 as well
(possibly with different Sec / PlatformPei modules etc).

Do wait for feedback from Jordan please.

Thanks
Laszlo

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH Altp2m cleanup v4 1/4] x86/HVM: adjust feature checking in MSR intercept handling

2016-09-08 Thread Jan Beulich
>>> On 08.09.16 at 00:04,  wrote:
> From: Jan Beulich 
> 
> Consistently consult hvm_cpuid(). With that, BNDCFGS gets better
> handled outside of VMX specific code, just like XSS. Don't needlessly
> check for MTRR support when the MSR being accessed clearly is not an
> MTRR one.
> 
> Signed-off-by: Jan Beulich 
> Reviewed-by: Andrew Cooper 

Why did you (re)send this? It went in yesterday together with its
VMX prereq. Without that prereq it's useless (as it won't apply),
and if you worked on a tree where the prereq was already present,
then this one would have been present too (as they got pushed at
the same time).

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2 20/20] livepatch: ARM32 support.

2016-09-08 Thread Konrad Rzeszutek Wilk
On Thu, Aug 25, 2016 at 09:37:35AM -0400, Konrad Rzeszutek Wilk wrote:
> The patch piggybacks on: livepatch: Initial ARM64 support, which
> brings up all of the neccessary livepatch infrastructure pieces in.
> 
> This patch adds three major pieces:
> 
>  1) ELF relocations. ARM32 uses SHT_REL instead of SHT_RELA which
> means the adddendum had to be extracted from within the
> instruction. Which required parsing BL/BLX, B/BL,
> MOVT, and MOVW instructions.
> 
> The code was written from scratch using the ARM ELF manual
> (and the ARM Architecture Reference Manual)
> 
>  2) Inserting an trampoline. We use the B (branch to address)
> which uses an offset that is based on the PC value: PC + imm32.
> Because we insert the branch at the start of the old function
> we have to account for the instruction already being fetched
> and subtract -4 from the delta (new_addr - old_addr).
> 
>  3) Allows the test-cases to be built under ARM 32.
> The "livepatch: tests: Make them compile under ARM64"
> put in the right infrastructure for it and we piggyback on it.
> 
> Signed-off-by: Konrad Rzeszutek Wilk 
> ---
> Cc: Julien Grall 
> Cc: Stefano Stabellini 
> 
> v2: First submission.
> ---
>  xen/arch/arm/arm32/livepatch.c | 252 
> -
>  xen/arch/arm/arm64/livepatch.c |   7 ++
>  xen/arch/arm/livepatch.c   |   7 --
>  xen/common/Kconfig |   2 +-
>  xen/include/xen/elfstructs.h   |  24 +++-
>  xen/test/Makefile  |   2 -
>  xen/test/livepatch/Makefile|   3 +
>  7 files changed, 284 insertions(+), 13 deletions(-)
> 
> diff --git a/xen/arch/arm/arm32/livepatch.c b/xen/arch/arm/arm32/livepatch.c
> index c33b68d..63e450b 100644
> --- a/xen/arch/arm/arm32/livepatch.c
> +++ b/xen/arch/arm/arm32/livepatch.c
> @@ -3,28 +3,276 @@
>   */
>  
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
>  
> +#include 
> +#include 
> +
>  void arch_livepatch_apply_jmp(struct livepatch_func *func)
>  {
> +uint32_t insn;
> +uint32_t *old_ptr;

This is now removed.
> +uint32_t *new_ptr;
> +
> +BUILD_BUG_ON(PATCH_INSN_SIZE > sizeof(func->opaque));
> +BUILD_BUG_ON(PATCH_INSN_SIZE != sizeof(insn));
> +
> +ASSERT(vmap_of_xen_text);
> +
> +/* Save old one. */
> +old_ptr = func->old_addr;
> +memcpy(func->opaque, old_ptr, PATCH_INSN_SIZE);

Which makes this smaller.
> +
> +if ( func->new_addr )
> +{
> +s32 delta;
> +
> +/*
> + * The -4 is to account for the b  instruction placed at
> + * the start of the func->old_addr.
> + */
> +delta = (s32)(func->new_addr - func->old_addr - 4);

And I made this a bit simpler:

delta = (s32)func->new_addr - (s32)func->old_addr - PATCH_INSN_SIZE;

Along with a comment refering to the ARM DDI 0406C.c  A8.8.18

Anyhow, when I posted this patch I was excited that everything "worked". 

But a more dilligient test showed that in fact the SP is being corrupted.

That is if I call 'xl info' before patching (with this inline patch):

diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index d0edb13..793e219 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -240,6 +240,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
 xen_extraversion_t extraversion;
 
 memset(extraversion, 0, sizeof(extraversion));
+printk("%s: %p %p\n", &extraversion, xen_extra_version());
 safe_strcpy(extraversion, deny ? xen_deny() : xen_extra_version());
 if ( copy_to_guest(arg, extraversion, ARRAY_SIZE(extraversion)) )
 return -EFAULT;

I get:
(XEN) do_xen_version: dst=43fd7ad8 src=0028b020

OK, good.

With the hypervisor being patched I get:

(XEN) do_xen_version: dst=ffe8 src=00805038

The src is corrected - it points to the payload .rodata section.

But the SP is all messed up! And that ends in tears with the hypervisor:
 Assertion 'diff < STACK_SIZE' failed at traps.c:864


Decoding the instructions that are being called (the new xen_extra_version())
yields:

   0:   e52db004    push{fp}
   4:   e28db000add fp, sp, #0
   8:   e3050038movwr0, #20536  ; 0x5038
   c:   e3400080movtr0, #128; 0x80
  10:   e24bd000sub sp, fp, #0
  14:   e12fff1ebx  lr

(This is after the relocation has been done).
And the unconditional branch that is put in the old xen_extra_version
is: 

 0:   ea1710d0b   0x5c4348

Which is correct too - to check for correctness I added two brkp.
One (71 00 20 e1) at the start of the new 'xen_extra_version'.
And then another four bytes in front of it (72 02 20 e1). The signature
of them is different and the exception we hit was the first Prefetch 
Abort
(71 00 20 e1).

Anyhow something is amiss here. I am not sure if there are some hidden
semantics in regard to a B condition.

I am going to try to patc

Re: [Xen-devel] [PATCH v3 1/1] xen: move TLB-flush filtering out into populate_physmap during vm creation

2016-09-08 Thread Wei Liu
On Thu, Sep 08, 2016 at 01:30:03PM +0800, Dongli Zhang wrote:
> This patch implemented parts of TODO left in commit id
> a902c12ee45fc9389eb8fe54eeddaf267a555c58. It moved TLB-flush filtering out
> into populate_physmap. Because of TLB-flush in alloc_heap_pages, it's very
> slow to create a guest with memory size of more than 100GB on host with
> 100+ cpus.
> 
> This patch introduced a "MEMF_no_tlbflush" bit to memflags to indicate
> whether TLB-flush should be done in alloc_heap_pages or its caller
> populate_physmap. Once this bit is set in memflags, alloc_heap_pages will
> ignore TLB-flush. To use this bit after vm is created might lead to
> security issue, that is, this would make pages accessible to the guest B,
> when guest A may still have a cached mapping to them.
> 
> Therefore, this patch also introduced a "already_scheduled" field to struct
> domain to indicate whether this domain has ever got scheduled by
> hypervisor.  MEMF_no_tlbflush can be set only during vm creation phase when
> already_scheduled is still 0 before this domain gets scheduled for the
> first time.
> 
> TODO: ballooning very huge amount of memory cannot benefit from this patch
> and might still be slow.
> 
> Signed-off-by: Dongli Zhang 
> 
> ---
> Changed since v2:
>   * Limit this optimization to domain creation time.
> 
> ---
>  xen/common/domain.c |  2 ++
>  xen/common/memory.c | 33 +
>  xen/common/page_alloc.c |  3 ++-
>  xen/common/schedule.c   |  5 +
>  xen/include/xen/mm.h|  2 ++
>  xen/include/xen/sched.h |  3 +++
>  6 files changed, 47 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index a8804e4..611a471 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -303,6 +303,8 @@ struct domain *domain_create(domid_t domid, unsigned int 
> domcr_flags,
>  if ( !zalloc_cpumask_var(&d->domain_dirty_cpumask) )
>  goto fail;
>  
> +d->already_scheduled = 0;
> +

Use false please -- this is a bool_t.

[...]
> diff --git a/xen/common/schedule.c b/xen/common/schedule.c
> index 32a300f..593541a 100644
> --- a/xen/common/schedule.c
> +++ b/xen/common/schedule.c
> @@ -1376,6 +1376,11 @@ static void schedule(void)
>  
>  next = next_slice.task;
>  
> +/* Set already_scheduled to 1 when this domain gets scheduled for the
> + * first time */
> +if ( next->domain->already_scheduled == 0 )
> +next->domain->already_scheduled = 1;
> +

Can be simplified by omitting the "if" altogether.  And use "true" here.

Wei.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 1/1] xen: move TLB-flush filtering out into populate_physmap during vm creation

2016-09-08 Thread Dario Faggioli
On Thu, 2016-09-08 at 11:50 +0100, Wei Liu wrote:
> On Thu, Sep 08, 2016 at 01:30:03PM +0800, Dongli Zhang wrote:
> > 
> > diff --git a/xen/common/schedule.c b/xen/common/schedule.c
> > index 32a300f..593541a 100644
> > --- a/xen/common/schedule.c
> > +++ b/xen/common/schedule.c
> > @@ -1376,6 +1376,11 @@ static void schedule(void)
> >  
> >  next = next_slice.task;
> >  
> > +/* Set already_scheduled to 1 when this domain gets scheduled
> > for the
> > + * first time */
> > +if ( next->domain->already_scheduled == 0 )
> > +next->domain->already_scheduled = 1;
> > +
> Can be simplified by omitting the "if" altogether.  
>
Are you sure? I mean looking at the cases when the flag is already true
(which means, during the life of a domain, basically **always** except
a handful of instances after creation), what costs less, a check that
is always false, or a write that is always updating a value with its
current value?

And I'm not being ironic or anything, I honestly am not sure and this
is a genuine question.

> And use "true" here.
> 
Yeah, or just:

 if ( unlikely(!next->domain->already_scheduled) )
     ...

> Wei.
-- 
<> (Raistlin Majere)
-
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)



signature.asc
Description: This is a digitally signed message part
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 1/1] xen: move TLB-flush filtering out into populate_physmap during vm creation

2016-09-08 Thread Wei Liu
On Thu, Sep 08, 2016 at 01:01:40PM +0200, Dario Faggioli wrote:
> On Thu, 2016-09-08 at 11:50 +0100, Wei Liu wrote:
> > On Thu, Sep 08, 2016 at 01:30:03PM +0800, Dongli Zhang wrote:
> > > 
> > > diff --git a/xen/common/schedule.c b/xen/common/schedule.c
> > > index 32a300f..593541a 100644
> > > --- a/xen/common/schedule.c
> > > +++ b/xen/common/schedule.c
> > > @@ -1376,6 +1376,11 @@ static void schedule(void)
> > >  
> > >  next = next_slice.task;
> > >  
> > > +/* Set already_scheduled to 1 when this domain gets scheduled
> > > for the
> > > + * first time */
> > > +if ( next->domain->already_scheduled == 0 )
> > > +next->domain->already_scheduled = 1;
> > > +
> > Can be simplified by omitting the "if" altogether.  
> >
> Are you sure? I mean looking at the cases when the flag is already true
> (which means, during the life of a domain, basically **always** except
> a handful of instances after creation), what costs less, a check that
> is always false, or a write that is always updating a value with its
> current value?

Omitting the check certain results in less instructions. And it would
probably eliminate misses in instruction cache and branch prediction
logic in the processor.

In the grand scheme of things, this is a rather minor optimisation, so I
wouldn't argue strongly for this.

Wei.

> 
> And I'm not being ironic or anything, I honestly am not sure and this
> is a genuine question.
> 
> > And use "true" here.
> > 
> Yeah, or just:
> 
>  if ( unlikely(!next->domain->already_scheduled) )
>      ...
> 
> > Wei.
> -- 
> <> (Raistlin Majere)
> -
> Dario Faggioli, Ph.D, http://about.me/dario.faggioli
> Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
> 



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Impact of HW vulnerabilities & Implications on Security Vulnerability Process

2016-09-08 Thread Ian Jackson
George Dunlap writes ("Re: Impact of HW vulnerabilities & Implications on 
Security Vulnerability Process"):
> What's the conclusion here -- are you inclined to say that we shouldn't
> issue an XSA, but perhaps do some other sort of announcement?

I would like us to _either_ issue an XSA or some other sort of
announcement.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 1/1] xen: move TLB-flush filtering out into populate_physmap during vm creation

2016-09-08 Thread Juergen Gross
On 08/09/16 13:11, Wei Liu wrote:
> On Thu, Sep 08, 2016 at 01:01:40PM +0200, Dario Faggioli wrote:
>> On Thu, 2016-09-08 at 11:50 +0100, Wei Liu wrote:
>>> On Thu, Sep 08, 2016 at 01:30:03PM +0800, Dongli Zhang wrote:
  
 diff --git a/xen/common/schedule.c b/xen/common/schedule.c
 index 32a300f..593541a 100644
 --- a/xen/common/schedule.c
 +++ b/xen/common/schedule.c
 @@ -1376,6 +1376,11 @@ static void schedule(void)
  
  next = next_slice.task;
  
 +/* Set already_scheduled to 1 when this domain gets scheduled
 for the
 + * first time */
 +if ( next->domain->already_scheduled == 0 )
 +next->domain->already_scheduled = 1;
 +
>>> Can be simplified by omitting the "if" altogether.  
>>>
>> Are you sure? I mean looking at the cases when the flag is already true
>> (which means, during the life of a domain, basically **always** except
>> a handful of instances after creation), what costs less, a check that
>> is always false, or a write that is always updating a value with its
>> current value?
> 
> Omitting the check certain results in less instructions. And it would
> probably eliminate misses in instruction cache and branch prediction
> logic in the processor.

The first scheduling is done via unpausing the domain. Why not setting
the flag to true in that path?

Juergen

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Impact of HW vulnerabilities & Implications on Security Vulnerability Process

2016-09-08 Thread Wei Liu
On Thu, Sep 08, 2016 at 12:12:22PM +0100, Ian Jackson wrote:
> George Dunlap writes ("Re: Impact of HW vulnerabilities & Implications on 
> Security Vulnerability Process"):
> > What's the conclusion here -- are you inclined to say that we shouldn't
> > issue an XSA, but perhaps do some other sort of announcement?
> 
> I would like us to _either_ issue an XSA or some other sort of
> announcement.
> 

+1 for some other sort of announcement.

Wei.

> Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [distros-debian-wheezy test] 67673: all pass

2016-09-08 Thread Platform Team regression test user
flight 67673 distros-debian-wheezy real [real]
http://osstest.xs.citrite.net/~osstest/testlogs/logs/67673/

Perfect :-)
All tests in this flight passed as required
baseline version:
 flight   67618

jobs:
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-pvopspass
 build-armhf-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-amd64-wheezy-netboot-pvgrub pass
 test-amd64-i386-i386-wheezy-netboot-pvgrub   pass
 test-amd64-i386-amd64-wheezy-netboot-pygrub  pass
 test-amd64-amd64-i386-wheezy-netboot-pygrub  pass



sg-report-flight on osstest.xs.citrite.net
logs: /home/osstest/logs
images: /home/osstest/images

Logs, config files, etc. are available at
http://osstest.xs.citrite.net/~osstest/testlogs/logs

Test harness code can be found at
http://xenbits.xensource.com/gitweb?p=osstest.git;a=summary


Push not applicable.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] Impact of HW vulnerabilities & Implications on Security Vulnerability Process

2016-09-08 Thread Lars Kurth

> On 8 Sep 2016, at 12:12, Ian Jackson  wrote:
> 
> George Dunlap writes ("Re: Impact of HW vulnerabilities & Implications on 
> Security Vulnerability Process"):
>> What's the conclusion here -- are you inclined to say that we shouldn't
>> issue an XSA, but perhaps do some other sort of announcement?
> 
> I would like us to _either_ issue an XSA or some other sort of
> announcement.

xen-announce@ and XSA's go to the same group of people: with the exception that 
xen-announce@  may not
cover all people on the pre-disclosure list and we may not hit the people who 
poll http://xenbits.xen.org/xsa/

I would prefer not to use an XSA, as I laid out before. 
It seems that Ian has a slight preference not to be constrained by the XSA 
format. 

Using xen-announce@ allows us to set up more context (e.g. including to some of 
the 
related studies covering other hypervisors, ...). Secondly xen-announce@ is 
less formal 
and thus the risk that the media will pick it up is significantly lower. 

But I also think that this should contain some practical and useful advice.

Regards
Lars


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [ovmf baseline-only test] 67671: regressions - FAIL

2016-09-08 Thread Platform Team regression test user
This run is configured for baseline tests only.

flight 67671 ovmf real [real]
http://osstest.xs.citrite.net/~osstest/testlogs/logs/67671/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-xl-qemuu-ovmf-amd64 14 guest-saverestore.2 fail REGR. vs. 
67668

version targeted for testing:
 ovmf 960d0de80b288c7cd9cbccfde7a12a48935055b4
baseline version:
 ovmf ec68dc28557925e0708d5676288ad140651a3851

Last test of basis67668  2016-09-07 18:16:43 Z0 days
Testing same since67671  2016-09-08 03:19:26 Z0 days1 attempts


People who touched revisions under test:
  Ard Biesheuvel 
  Hegde Nagaraj P 
  Jiaxin Wu 
  Jiewen Yao 
  Leif Lindholm 
  Michael Zimmermann 
  Ryan Harkin 

jobs:
 build-amd64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 fail
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass



sg-report-flight on osstest.xs.citrite.net
logs: /home/osstest/logs
images: /home/osstest/images

Logs, config files, etc. are available at
http://osstest.xs.citrite.net/~osstest/testlogs/logs

Test harness code can be found at
http://xenbits.xensource.com/gitweb?p=osstest.git;a=summary


Push not applicable.


commit 960d0de80b288c7cd9cbccfde7a12a48935055b4
Author: Ard Biesheuvel 
Date:   Wed Sep 7 09:12:29 2016 +0100

ArmPkg/DefaultExceptionHandlerLib AARCH64: add minimal backtrace to crash 
dump

When dumping the CPU state after an unhandled fault, walk the stack
frames and decode the return addresses so we can show a minimal
backtrace. Unfortunately, we do not have sufficient information to
show the function names, but at least we can see the modules and the
return addresses inside the modules.

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Ard Biesheuvel 
Reviewed-by: Leif Lindholm 
Tested-by: Leif Lindholm 

commit 8f0b62a5dac0830698d6cf4b1c25ce2612f93dd8
Author: Ard Biesheuvel 
Date:   Wed Sep 7 09:19:37 2016 +0100

BaseTools/tools_def AARCH64: enable frame pointers for DEBUG builds

Enable frame pointers on DEBUG builds so we can support backtraces in
crash dumps.

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Ard Biesheuvel 
Reviewed-by: Leif Lindholm 
Tested-by: Leif Lindholm 

commit 570e7cd4a42118ba9f20a616c0869503337fbc1c
Author: Michael Zimmermann 
Date:   Wed Sep 7 13:16:18 2016 +0100

ArmPlatformPkg/PrePi: fix secondary stack base

this bug was introduced by:
d2fa09a ArmPlatformPkg/PrePi: switch to ASM_FUNC() asm macro

Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Michael Zimmermann 
Tested-by: Ryan Harkin 

commit 2f4f6489d8f70f1b83851234637e4534e95bf663
Author: Jiewen Yao 
Date:   Tue Sep 6 16:21:15 2016 +0800

Vlv2TbltDevicePkg/dec: Correct wrong default value.

The default PcdPeiIchEhciControllerMemoryBaseAddress value 0xFD00
conflict with the default TXE SECUMA MMIO address.
So we update to 0xFC00 to avoid conflict.

Cc: David Wei 
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Jiewen Yao 
Reviewed-by: David Wei 

commit a27bdc0556d5e87df0e81287f541739a4b77c1f6
Author: Jiewen Yao 
Date:   Tue Sep 6 16:24:31 2016 +0800

Vlv2TbltDevicePkg/PlatformInitPei: Support USB init

In order to support recovery in PEI phase, a platform need initialize
USB controller. This logic is missing in current PchInitPeim.

We removed MultiPlatformInfoInit() because it is already done in
PlatformEarlyInitEntry().

We also initialize XhciMemBaseAddr to 0, or it is garbage value.

Cc: David Wei 
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Jiewen Yao 
Reviewed-by: David Wei 

commit 3093f45c44eaea11e2f2552af7ff5c5b370a93d7
Author: Jiaxin Wu 
Date:   Tue Sep 6 11:23:38 2016 +0800

NetworkPkg/DnsDxe: Handle CNAME type responded from the name server

v2:
* Code refine.
* For DnsCache, the minimum value of TTL is selected between CNAME and 
A/ record.

According RFC 103

[Xen-devel] Xen Security Advisory 186 (CVE-2016-7093) - x86: Mishandling of instruction pointer truncation during emulation

2016-09-08 Thread Xen . org security team
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Xen Security Advisory CVE-2016-7093 / XSA-186
  version 4

  x86: Mishandling of instruction pointer truncation during emulation

UPDATES IN VERSION 4


Public release.

ISSUE DESCRIPTION
=

When emulating HVM instructions, Xen uses a small i-cache for fetches
from guest memory.  The code that handles cache misses does not check
if the address from which it fetched lies within the cache before
blindly writing to it.  As such it is possible for the guest to
overwrite hypervisor memory.

It is currently believed that the only way to trigger this bug is to
use the way that Xen currently incorrectly wraps CS:IP in 16 bit
modes.  The included patch prevents such wrapping.

IMPACT
==

A malicious HVM guest administrator can escalate their privilege to that
of the host.

VULNERABLE SYSTEMS
==

Xen versions 4.7.0 and later are vulnerable.
Xen releases 4.6.3 and 4.5.3 are vulnerable.

Xen releases 4.6.0 to 4.6.2 inclusive are NOT vulnerable.
Xen releases 4.5.2 and earlier are NOT vulnerable.

The vulnerability is only exposed to HVM guests on x86 hardware.

The vulnerability is not exposed to x86 PV guests, or ARM guests.

MITIGATION
==

Running only PV guests will avoid this vulnerability.

CREDITS
===

This issue was discovered by Brian Marcotte.

RESOLUTION
==

Applying the first patch will resolve the issue.

Users wishing to independently verify the correctness of the fix may
find the second patch helpful.  The second patch makes it easier to
use the "fep" (Force Emulation Prefix) feature to reproduce the
erroneous condition in a test environment.  The "fep" feature requires
explicit enablement on the hypervisor command line, and is unsuitable
for production systems.  Accordingly, applying the second patch does
not affect production systems and does not improve security.

Xen version First patch   Second patch
 xen-unstable:   xsa186-0001-*.patch   xsa186-0002-*.patch
 Xen 4.7.x:  xsa186-0001-*.patch   xsa186-4.7-0002-*.patch
 Xen 4.6.3:  xsa186-0001-*.patch   xsa186-4.6-0002-*.patch
 Xen 4.5.3:  xsa186-0001-*.patch   xsa186-4.6-0002-*.patch

$ sha256sum xsa186*
f2082a36d968a47e477bb5082d0e0aaa58e6cb3dc20b26389f043a9b7b595fa6  
xsa186-0001-x86-emulate-Correct-boundary-interactions-of-emulate.patch
412fa58edcbd1c7fdbfec7e28898cf98585593e6a24ccfb088dc0b84715286a5  
xsa186-0002-hvm-fep-Allow-testing-of-instructions-crossing-the-1.patch
7482a823c3443e26deec4904162845eaa9f826aa7bf8348007406d91bddd  
xsa186-4.6-0002-hvm-fep-Allow-testing-of-instructions-crossing-the.patch
5a826a32763d82ac83c924f8c89d12aae5f069a4cbc7d5193aa8413a02b6dc05  
xsa186-4.7-0002-hvm-fep-Allow-testing-of-instructions-crossing-the.patch
$

DEPLOYMENT DURING EMBARGO
=

Deployment of the patches and/or mitigations described above (or
others which are substantially similar) is permitted during the
embargo, even on public-facing systems with untrusted guest users and
administrators.

But: Distribution of updated software is prohibited (except to other
members of the predisclosure list).

Predisclosure list members who wish to deploy significantly different
patches and/or mitigations, please contact the Xen Project Security
Team.

(Note: this during-embargo deployment notice is retained in
post-embargo publicly released Xen Project advisories, even though it
is then no longer applicable.  This is to enable the community to have
oversight of the Xen Project Security Team's decisionmaking.)

For more information about permissible uses of embargoed information,
consult the Xen Project community's agreed Security Policy:
  http://www.xenproject.org/security-policy.html
-BEGIN PGP SIGNATURE-
Version: GnuPG v1

iQEcBAEBAgAGBQJX0VLsAAoJEIP+FMlX6CvZoUoIAMvgdMZRYdK5MaaRUAA1hDG3
UFSxZCH8zja6wZG6WPNj7VqvEkQ2350oqb05BGB8jTFCmqtNDDIyHK68WaMpwDMv
EEeetosujnlHTtVV7N8e0HO7F497PzZtzfniTyZc/h2Lna552ohMy/UcADtA7xxP
IK6qwvxpkx1aLzsDFpHIdrVcttDD/oZcVbBFwcCAqK33eGNC3S6BJvIibCAKfO8h
YKiAtvWUNsX/o4L9Zs4M50/pK3TzWsaDjfK3IX5LJPtsrcrKklrALVnDUOpTz1WA
07UIk0BcrzicEuTvuATWSQ3nVxUXAH95io23PCniHHntBtYJHjGA5rIqX+tiN6w=
=HT+K
-END PGP SIGNATURE-


xsa186-0001-x86-emulate-Correct-boundary-interactions-of-emulate.patch
Description: Binary data


xsa186-0002-hvm-fep-Allow-testing-of-instructions-crossing-the-1.patch
Description: Binary data


xsa186-4.6-0002-hvm-fep-Allow-testing-of-instructions-crossing-the.patch
Description: Binary data


xsa186-4.7-0002-hvm-fep-Allow-testing-of-instructions-crossing-the.patch
Description: Binary data
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] Xen Security Advisory 185 (CVE-2016-7092) - x86: Disallow L3 recursive pagetable for 32-bit PV guests

2016-09-08 Thread Xen . org security team
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Xen Security Advisory CVE-2016-7092 / XSA-185
  version 3

x86: Disallow L3 recursive pagetable for 32-bit PV guests

UPDATES IN VERSION 3


Public release.

ISSUE DESCRIPTION
=

On real hardware, a 32-bit PAE guest must leave the USER and RW bit
clear in L3 pagetable entries, but the pagetable walk behaves as if
they were set.  (The L3 entries are cached in processor registers, and
don't actually form part of the pagewalk.)

When running a 32-bit PV guest on a 64-bit Xen, Xen must always OR in
the USER and RW bits for L3 updates for the guest to observe
architectural behaviour.  This is unsafe in combination with recursive
pagetables.

As there is no way to construct an L3 recursive pagetable in native
32-bit PAE mode, disallow this option in 32-bit PV guests.

IMPACT
==

A malicious 32-bit PV guest administrator can escalate their privilege
to that of the host.

VULNERABLE SYSTEMS
==

All versions of Xen are vulnerable.

Only 64-bit builds of the hypervisor are vulnerable.  For Xen 4.3 and
earlier, 32-bit builds of the hypervisor are not vulnerable.

The vulnerability is only exposed to 32-bit PV guests on x86 hardware.

The vulnerability is not exposed to 64-bit PV guests, x86 HVM guests,
or ARM guests.

MITIGATION
==

Running only 64-bit PV or HVM guests will avoid this vulnerability.

CREDITS
===

This issue was found in parallel by multiple discoverers, who each
disclosed it to the Xen Project Security Team.

The first report to us was made by Jérémie Boutoille of Quarkslab.
The second report, one working day later, by Shangcong Luan of Alibaba
Cloud.

RESOLUTION
==

Applying the attached patch resolves this issue.

xsa185.patch   xen-unstable - Xen 4.4

$ sha256sum xsa185*
3328a1953ecdf4de35462ea8396b0927171d718e95f73a87a7f651427bd8f8b4  xsa185.patch
$

DEPLOYMENT DURING EMBARGO
=

Deployment of the patches and/or mitigations described above (or
others which are substantially similar) is permitted during the
embargo, even on public-facing systems with untrusted guest users and
administrators.

But: Distribution of updated software is prohibited (except to other
members of the predisclosure list).

Predisclosure list members who wish to deploy significantly different
patches and/or mitigations, please contact the Xen Project Security
Team.

(Note: this during-embargo deployment notice is retained in
post-embargo publicly released Xen Project advisories, even though it
is then no longer applicable.  This is to enable the community to have
oversight of the Xen Project Security Team's decisionmaking.)

For more information about permissible uses of embargoed information,
consult the Xen Project community's agreed Security Policy:
  http://www.xenproject.org/security-policy.html
-BEGIN PGP SIGNATURE-
Version: GnuPG v1

iQEcBAEBAgAGBQJX0VLpAAoJEIP+FMlX6CvZ/koH/0hN8oXOpBPVgsr5d+ylYFBU
We948VVN/0uthy9IgI1DBnjM2tjoGgy0w7c7dKWUD3ACTvdIq4hWZywA+6uMIwb5
aneB7hgZZ1i/ie1kAwMl96hdWgPGaXjL1r19WxslgOnr2TkH/9zlAaBvhFkbL+/c
cw2lI+AOmhB/VOtNfXYd81qxdSUBUPz2DfiOEjgVx8e8E+q/S5dJO1L41kqRt1bM
ENG8NtaxBrXAtZzilxOPVPmQmvSSegTjZMshGhx29wIgUy4R/HnsoYW7OklZQDhU
6DV7WUSlrUU5vlIhwQVIZidXpyhzLBLnR5GS0R4CKcYSb6pRQ8FO3TG81TmO/6Q=
=NDX0
-END PGP SIGNATURE-


xsa185.patch
Description: Binary data
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] Xen Security Advisory 188 (CVE-2016-7154) - use after free in FIFO event channel code

2016-09-08 Thread Xen . org security team
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Xen Security Advisory CVE-2016-7154 / XSA-188
  version 3

   use after free in FIFO event channel code

UPDATES IN VERSION 3


Public release.

ISSUE DESCRIPTION
=

When the EVTCHNOP_init_control operation is called with a bad guest
frame number, it takes an error path which frees a control structure
without also clearing the corresponding pointer.  Certain subsequent
operations (EVTCHNOP_expand_array or another EVTCHNOP_init_control),
upon finding the non-NULL pointer, continue operation assuming it
points to allocated memory.

IMPACT
==

A malicious guest administrator can crash the host, leading to a DoS.
Arbitrary code execution (and therefore privilege escalation), and
information leaks, cannot be excluded.

VULNERABLE SYSTEMS
==

Only Xen 4.4 is vulnerable.  Xen versions 4.5 and later as well as Xen
versions 4.3 and earlier are not vulnerable.

MITIGATION
==

There is no mitigation available.

CREDITS
===

This issue was discovered by Mikhail Gorobets of Advanced Threat
Research, Intel Security.

RESOLUTION
==

Applying the attached patch resolves this issue.

xsa188.patch   Xen 4.4.x

$ sha256sum xsa188*
9f374c2e1437ad71369f41275e7b333e7b7691a783ba693ee567c899bd78c722  xsa188.patch
$

DEPLOYMENT DURING EMBARGO
=

Deployment of the patches and/or mitigations described above (or
others which are substantially similar) is permitted during the
embargo, even on public-facing systems with untrusted guest users and
administrators.

But: Distribution of updated software is prohibited (except to other
members of the predisclosure list).

Predisclosure list members who wish to deploy significantly different
patches and/or mitigations, please contact the Xen Project Security
Team.
-BEGIN PGP SIGNATURE-
Version: GnuPG v1

iQEcBAEBAgAGBQJX0VLuAAoJEIP+FMlX6CvZNjYH/RVxqYegZpfj0aiT5pai/a0i
PgPSoMccGoSSVTXzivXUTZS3fTIqfTpd4SQHu2Q2dUqbb6zcPqd3NzF7Jl9IMwLk
JHZwPYXOsZ0D6thFAMYFpjHOWXv7+1Mw7Np82PaA2yAUad+kxUORiJeL1RAE6zG/
xsAR7PTl2mK1Ae9lqDtKLijn0cnicAYoKiSlta8M0T5Sp79CT3xsfHiBbaWUBCcI
gmOW76RUbfOwn2kmhFJ4X5bwSzEhM93pQu7hJCmuwAADc8ezEEFv2lsUm5W8hkmW
a8V2nuqM+prbxY8JI3XbKJm5YrmHQpnX4FiBn13DZeUsaukT4Q1EltP1z/XvJto=
=jzF5
-END PGP SIGNATURE-


xsa188.patch
Description: Binary data
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] Xen Security Advisory 187 (CVE-2016-7094) - x86 HVM: Overflow of sh_ctxt->seg_reg[]

2016-09-08 Thread Xen . org security team
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Xen Security Advisory CVE-2016-7094 / XSA-187
  version 3

x86 HVM: Overflow of sh_ctxt->seg_reg[]

UPDATES IN VERSION 3


Fix the backports xsa187-4.6-0002-*.patch and xsa187-4.4-0002-*.patch.
In v1 and v2 these did not compile in debug builds.  (Debug builds
should not be used in production.)

Public release.

ISSUE DESCRIPTION
=

x86 HVM guests running with shadow paging use a subset of the x86 emulator to
handle the guest writing to its own pagetables.  There are situations a guest
can provoke which result in exceeding the space allocated for internal state.


IMPACT
==

A malicious HVM guest administrator can cause Xen to fail a bug check,
causing a denial of service to the host.


VULNERABLE SYSTEMS
==

All versions of Xen are vulnerable.

The vulnerability is only exposed to HVM guests on x86 hardware, which are
configured to run with shadow paging.

The vulnerability is not exposed to x86 PV guests, x86 HVM guests running with
hardware assisted paging, or ARM guests.


x86 HVM guests run in HAP mode by default on modern CPUs.

To discover whether your HVM guests are using HAP, or shadow page
tables: request debug key `q' (from the Xen console, or with
`xl debug-keys q').  This will print (to the console, and visible in
`xl dmesg'), debug information for every domain, containing something
like this:

  (XEN) General information for domain 2:
  (XEN) refcnt=1 dying=2 pause_count=2
  (XEN) nr_pages=2 xenheap_pages=0 shared_pages=0 paged_pages=0 
dirty_cpus={} max_pages=262400
  (XEN) handle=ef58ef1a-784d-4e59-8079-42bdee87f219 vm_assist=
  (XEN) paging assistance: hap refcounts translate external
   ^^^
The presence of `hap' here indicates that the host is not
vulnerable to this domain.  For an HVM domain the presence of `shadow'
indicates that the domain can exploit the vulnerability.


MITIGATION
==

Running only PV guests will avoid this vulnerability.

On hardware which supports Hardware Assisted Paging, configuring the
guests to not run with shadow paging will avoid this vulnerability.


CREDITS
===

This issue was discovered by Andrew Cooper of Citrix.

RESOLUTION
==

Applying the first patch will resolve this issue.

The second patch provides additional assurance that the vulnerability
is truly eliminated and that there are no related problems.

If hotpatching, applying only the first patch is recommended since the
second patch is awkward for hotpatching.  If deploying new builds,
applying both patches is recommended.

Xen version First patch   Second patch
 xen-unstable:   xsa187-0001-*.patch   xsa187-0002-*.patch
 Xen 4.7.x:  xsa187-4.7-0001-*.patch   xsa187-4.7-0002-*.patch
 Xen 4.6.x:  xsa187-4.7-0001-*.patch   xsa187-4.6-0002-*.patch
 Xen 4.5.x:  xsa187-4.7-0001-*.patch   xsa187-4.6-0002-*.patch
 Xen 4.4.x:  xsa187-4.7-0001-*.patch   xsa187-4.4-0002-*.patch

$ sha256sum xsa187*
65205ee195699d65884af04083ffb86c6ddbc96cbca3141c87f6b2d671de45a3  
xsa187-0001-x86-shadow-Avoid-overflowing-sh_ctxt-seg_reg.patch
f90e6d13385fb9219e1e26e3a148d1670aefc7130e0639415d08bbb6a1d9efee  
xsa187-0002-x86-segment-Bounds-check-accesses-to-emulation-ctxt-.patch
727b18ae83001f7ea04613aa7199ada3e6a84939aa44516f7c426e609d383b2a  
xsa187-4.4-0002-x86-segment-Bounds-check-accesses-to-emulation-ctx.patch
b96731379ea77d4931d969f4742dde985ef7a86af9422dcac8327c2a1916  
xsa187-4.6-0002-x86-segment-Bounds-check-accesses-to-emulation-ctx.patch
be9fe85d36c2c1fbca246c1f4d834c3ef11b6ab3d5467da0ac8c079aa5a68de9  
xsa187-4.7-0001-x86-shadow-Avoid-overflowing-sh_ctxt-seg.patch
36b22d6a168be39f31a1c1304f708269a2a10fe5105f7da4a06877d6059f1cd6  
xsa187-4.7-0002-x86-segment-Bounds-check-accesses-to-emulation-ctx.patch
$


DEPLOYMENT DURING EMBARGO
=

Deployment of the "reconfigure to use HAP" MITIGATION is NOT permitted
(except where all the affected systems and VMs are administered and
used only by organisations which are members of the Xen Project
Security Issues Predisclosure List).  Specifically, deployment on
public cloud systems is NOT permitted.

This is because the mitigation result in guest-visible changes.

Deployment of this mitigation is permitted only AFTER the embargo
ends.


Deployment of the PATCHES described above (or others which are
substantially similar) is permitted during the embargo, even on
public-facing systems with untrusted guest users and administrators.

But: Distribution of updated software is prohibited (except to other
members of the predisclosure list).


Predisclosure list members who wish to deploy significantly different
patches and/or mitigations, please contact the Xen Project Security
Team.


(Note: this during-embargo deployment notice is retained in
post-embargo publicly released Xen Project advisories

[Xen-devel] [PATCH] fix EFI part of "symbols: Generate an xen-sym.map"

2016-09-08 Thread Jan Beulich
Commit 6ea24e53f1 introduced two problems: It left out a semicolon and
typo-ed the source file name of the EFI map file install command.

Signed-off-by: Jan Beulich 

--- a/xen/Makefile
+++ b/xen/Makefile
@@ -67,7 +67,7 @@ _install: $(TARGET)$(CONFIG_XEN_INSTALL_
if [ -r $(TARGET).efi -a -n '$(EFI_DIR)' ]; then \
[ -d $(D)$(EFI_DIR) ] || $(INSTALL_DIR) $(D)$(EFI_DIR); \
$(INSTALL_DATA) $(TARGET).efi 
$(D)$(EFI_DIR)/$(T)-$(XEN_FULLVERSION).efi; \
-   $(INSTALL_DATA) $(TARGET)-efi.map 
$(D)$(DEBUG_DIR)/$(T)-$(XEN_FULLVERSION).efi.map \
+   $(INSTALL_DATA) $(TARGET).efi.map 
$(D)$(DEBUG_DIR)/$(T)-$(XEN_FULLVERSION).efi.map; \
ln -sf $(T)-$(XEN_FULLVERSION).efi 
$(D)$(EFI_DIR)/$(T)-$(XEN_VERSION).$(XEN_SUBVERSION).efi; \
ln -sf $(T)-$(XEN_FULLVERSION).efi 
$(D)$(EFI_DIR)/$(T)-$(XEN_VERSION).efi; \
ln -sf $(T)-$(XEN_FULLVERSION).efi $(D)$(EFI_DIR)/$(T).efi; \



fix EFI part of "symbols: Generate an xen-sym.map"

Commit 6ea24e53f1 introduced two problems: It left out a semicolon and
typo-ed the source file name of the EFI map file install command.

Signed-off-by: Jan Beulich 

--- a/xen/Makefile
+++ b/xen/Makefile
@@ -67,7 +67,7 @@ _install: $(TARGET)$(CONFIG_XEN_INSTALL_
if [ -r $(TARGET).efi -a -n '$(EFI_DIR)' ]; then \
[ -d $(D)$(EFI_DIR) ] || $(INSTALL_DIR) $(D)$(EFI_DIR); \
$(INSTALL_DATA) $(TARGET).efi 
$(D)$(EFI_DIR)/$(T)-$(XEN_FULLVERSION).efi; \
-   $(INSTALL_DATA) $(TARGET)-efi.map 
$(D)$(DEBUG_DIR)/$(T)-$(XEN_FULLVERSION).efi.map \
+   $(INSTALL_DATA) $(TARGET).efi.map 
$(D)$(DEBUG_DIR)/$(T)-$(XEN_FULLVERSION).efi.map; \
ln -sf $(T)-$(XEN_FULLVERSION).efi 
$(D)$(EFI_DIR)/$(T)-$(XEN_VERSION).$(XEN_SUBVERSION).efi; \
ln -sf $(T)-$(XEN_FULLVERSION).efi 
$(D)$(EFI_DIR)/$(T)-$(XEN_VERSION).efi; \
ln -sf $(T)-$(XEN_FULLVERSION).efi $(D)$(EFI_DIR)/$(T).efi; \
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [xen-unstable-smoke test] 100812: regressions - trouble: blocked/broken/pass

2016-09-08 Thread osstest service owner
flight 100812 xen-unstable-smoke real [real]
http://logs.test-lab.xenproject.org/osstest/logs/100812/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 build-armhf   4 host-build-prep  fail REGR. vs. 100800

Tests which did not succeed, but are not blocking:
 test-armhf-armhf-xl   1 build-check(1)   blocked  n/a
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass

version targeted for testing:
 xen  f8b4d961d5661d2edfaccadf66988596bfbc44c6
baseline version:
 xen  3d20a6f4faf1c6a18b51b80d99d23daa7762dda2

Last test of basis   100800  2016-09-08 02:02:05 Z0 days
Testing same since   100812  2016-09-08 11:01:37 Z0 days1 attempts


People who touched revisions under test:
  Andrew Cooper 
  George Dunlap 
  Tim Deegan 

jobs:
 build-amd64  pass
 build-armhf  broken  
 build-amd64-libvirt  pass
 test-armhf-armhf-xl  blocked 
 test-amd64-amd64-xl-qemuu-debianhvm-i386 pass
 test-amd64-amd64-libvirt pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Not pushing.


commit f8b4d961d5661d2edfaccadf66988596bfbc44c6
Author: Andrew Cooper 
Date:   Tue Jun 14 12:45:56 2016 +0100

x86/paging: Make paging_mode_*() predicates behave like predicates

Signed-off-by: Andrew Cooper 
Acked-by: Tim Deegan 
Acked-by: George Dunlap 
(qemu changes not included)

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH] fix EFI part of "symbols: Generate an xen-sym.map"

2016-09-08 Thread Wei Liu
On Thu, Sep 08, 2016 at 06:45:36AM -0600, Jan Beulich wrote:
> Commit 6ea24e53f1 introduced two problems: It left out a semicolon and
> typo-ed the source file name of the EFI map file install command.
> 
> Signed-off-by: Jan Beulich 
> 

Acked-by: Wei Liu 

> --- a/xen/Makefile
> +++ b/xen/Makefile
> @@ -67,7 +67,7 @@ _install: $(TARGET)$(CONFIG_XEN_INSTALL_
>   if [ -r $(TARGET).efi -a -n '$(EFI_DIR)' ]; then \
>   [ -d $(D)$(EFI_DIR) ] || $(INSTALL_DIR) $(D)$(EFI_DIR); \
>   $(INSTALL_DATA) $(TARGET).efi 
> $(D)$(EFI_DIR)/$(T)-$(XEN_FULLVERSION).efi; \
> - $(INSTALL_DATA) $(TARGET)-efi.map 
> $(D)$(DEBUG_DIR)/$(T)-$(XEN_FULLVERSION).efi.map \
> + $(INSTALL_DATA) $(TARGET).efi.map 
> $(D)$(DEBUG_DIR)/$(T)-$(XEN_FULLVERSION).efi.map; \
>   ln -sf $(T)-$(XEN_FULLVERSION).efi 
> $(D)$(EFI_DIR)/$(T)-$(XEN_VERSION).$(XEN_SUBVERSION).efi; \
>   ln -sf $(T)-$(XEN_FULLVERSION).efi 
> $(D)$(EFI_DIR)/$(T)-$(XEN_VERSION).efi; \
>   ln -sf $(T)-$(XEN_FULLVERSION).efi $(D)$(EFI_DIR)/$(T).efi; \
> 
> 
> 

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 00/17] x86: split insn emulator decode and execution

2016-09-08 Thread Jan Beulich
..., complete the decoder, leverage decoding for SVM instruction
sizing and PV 32-bit call gate emulation, and use the emulator for
PV priv-op handling.

01: x86emul: split instruction decoding from execution
02: x86emul: fetch all insn bytes during the decode phase
03: x86emul: track only rIP in emulator state
04: x86emul: complete decoding of two-byte instructions
05: x86emul: add XOP decoding
06: x86emul: add EVEX decoding
07: x86emul: move x86_execute() common epilogue code
08: x86emul: generate and make use of canonical opcode representation
09: SVM: use generic instruction decoding
10: x86/32on64: use generic instruction decoding
11: x86/PV: split out dealing with CRn from privileged instruction handling
12: x86/PV: split out dealing with DRn from privileged instruction handling
13: x86/PV: split out dealing with MSRs from privileged instruction handling
14: x86emul: support XSETBV
15: x86emul: sort opcode 0f01 special case switch() statement
16: x86/PV: use generic emulator for privileged instruction handling
17: x86emul: don't assume a memory operand

Signed-off-by: Jan Beulich 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [xen-unstable test] 100789: regressions - FAIL

2016-09-08 Thread Wei Liu
On Thu, Sep 08, 2016 at 10:43:59AM +0100, Wei Liu wrote:
> On Thu, Sep 08, 2016 at 05:32:00AM +, osstest service owner wrote:
> > flight 100789 xen-unstable real [real]
> > http://logs.test-lab.xenproject.org/osstest/logs/100789/
> > 
> > Regressions :-(
> > 
> > Tests which did not succeed and are blocking,
> > including tests which could not be run:
> >  test-amd64-amd64-libvirt-vhd  9 debian-di-installfail REGR. vs. 
> > 100773
> [...]
> >  test-armhf-armhf-xl-vhd   9 debian-di-installfail REGR. vs. 
> > 100773
> > 
> 
> Andrew pointed out IRL that these two regressions are unfortunate side
> effect of deleting blktap2. In short, the vhd-util used in these tests
> comes from Xen's blktap2. :-/
> 
> I see three ways to move this forward.
> 
> 1. Resurrect vhd-util from blktap2.
> 2. Install blktap-utils shipped in Debian (available from Wheezy
>onwards), the main difficulty would be the package depends on a dkms
>package that seems to require building with kernel header when
>installing.
> 3. Retire these two tests.
> 

4. Provide a pre-made vhd image.

vhd-util create disk.vhd -s 1 -> 24K in actual size.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [xen-unstable test] 100803: regressions - FAIL

2016-09-08 Thread osstest service owner
flight 100803 xen-unstable real [real]
http://logs.test-lab.xenproject.org/osstest/logs/100803/

Regressions :-(

Tests which did not succeed and are blocking,
including tests which could not be run:
 test-amd64-amd64-libvirt-vhd  9 debian-di-installfail REGR. vs. 100773
 test-armhf-armhf-xl-credit2 15 guest-start/debian.repeat fail REGR. vs. 100773
 test-armhf-armhf-xl-vhd   9 debian-di-installfail REGR. vs. 100773

Regressions which are regarded as allowable (not blocking):
 test-armhf-armhf-xl-rtds  6 xen-boot fail  like 100766
 test-amd64-i386-xl-qemuu-win7-amd64 16 guest-stop fail like 100773
 test-amd64-i386-xl-qemut-win7-amd64 16 guest-stop fail like 100773
 test-amd64-amd64-xl-qemuu-win7-amd64 16 guest-stopfail like 100773
 test-amd64-amd64-xl-rtds  9 debian-install   fail  like 100773
 test-amd64-amd64-xl-qemut-win7-amd64 16 guest-stopfail like 100773

Tests which did not succeed, but are not blocking:
 test-amd64-amd64-rumprun-amd64  1 build-check(1)   blocked  n/a
 test-amd64-i386-rumprun-i386  1 build-check(1)   blocked  n/a
 build-amd64-rumprun   5 rumprun-buildfail   never pass
 build-i386-rumprun5 rumprun-buildfail   never pass
 test-amd64-amd64-xl-pvh-amd  11 guest-start  fail   never pass
 test-amd64-amd64-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt-xsm  12 migrate-support-checkfail   never pass
 test-amd64-i386-libvirt  12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt 12 migrate-support-checkfail   never pass
 test-amd64-amd64-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-armhf-armhf-libvirt-xsm 12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-xsm 14 guest-saverestorefail   never pass
 test-armhf-armhf-xl-arndale  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-arndale  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-xsm  13 saverestore-support-checkfail   never pass
 test-amd64-amd64-qemuu-nested-amd 16 debian-hvm-install/l1/l2  fail never pass
 test-armhf-armhf-xl-cubietruck 12 migrate-support-checkfail never pass
 test-armhf-armhf-xl-credit2  12 migrate-support-checkfail   never pass
 test-armhf-armhf-xl-cubietruck 13 saverestore-support-checkfail never pass
 test-armhf-armhf-xl-credit2  13 saverestore-support-checkfail   never pass
 test-armhf-armhf-xl-multivcpu 12 migrate-support-checkfail  never pass
 test-armhf-armhf-xl-multivcpu 13 saverestore-support-checkfail  never pass
 test-amd64-i386-libvirt-qemuu-debianhvm-amd64-xsm 10 migrate-support-check 
fail never pass
 test-amd64-amd64-xl-pvh-intel 11 guest-start  fail  never pass
 test-armhf-armhf-libvirt 12 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt 14 guest-saverestorefail   never pass
 test-armhf-armhf-libvirt-qcow2 11 migrate-support-checkfail never pass
 test-armhf-armhf-libvirt-qcow2 13 guest-saverestorefail never pass
 test-armhf-armhf-libvirt-raw 11 migrate-support-checkfail   never pass
 test-armhf-armhf-libvirt-raw 13 guest-saverestorefail   never pass

version targeted for testing:
 xen  3d20a6f4faf1c6a18b51b80d99d23daa7762dda2
baseline version:
 xen  343f84be135e6f9e681960a9e235296eae159fc8

Last test of basis   100773  2016-09-06 13:13:28 Z1 days
Failing since100789  2016-09-07 09:36:36 Z1 days2 attempts
Testing same since   100803  2016-09-08 05:36:29 Z0 days1 attempts


People who touched revisions under test:
  "Rockosov, Dmitry" 
  Andrew Cooper 
  George Dunlap 
  George Dunlap 
  Ian Jackson 
  Jan Beulich 
  Juergen Gross 
  Julien Grall 
  Konrad Rzeszutek Wilk 
  Olaf Hering 
  Razvan Cojocaru 
  Stefano Stabellini 
  Tamas K Lengyel 
  Wei Liu 

jobs:
 build-amd64-xsm  pass
 build-armhf-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-armhf  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-armhf-libvirt  pass

[Xen-devel] [PATCH 01/17] x86emul: split instruction decoding from execution

2016-09-08 Thread Jan Beulich
This is only the mechanical part, a subsequent patch will make non-
mechanical adjustments to actually do all decoding in this new
function.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -48,7 +48,9 @@
 /* All operands are implicit in the opcode. */
 #define ImplicitOps (DstImplicit|SrcImplicit)
 
-static uint8_t opcode_table[256] = {
+typedef uint8_t opcode_desc_t;
+
+static const opcode_desc_t opcode_table[256] = {
 /* 0x00 - 0x07 */
 ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
 ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
@@ -178,7 +180,7 @@ static uint8_t opcode_table[256] = {
 ImplicitOps, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
 };
 
-static uint8_t twobyte_table[256] = {
+static const opcode_desc_t twobyte_table[256] = {
 /* 0x00 - 0x07 */
 SrcMem16|ModRM, ImplicitOps|ModRM, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
 /* 0x08 - 0x0F */
@@ -607,7 +609,7 @@ do{ asm volatile (
 })
 #define truncate_ea(ea) truncate_word((ea), ad_bytes)
 
-#define mode_64bit() (def_ad_bytes == 8)
+#define mode_64bit() (ctxt->addr_size == 64)
 
 #define fail_if(p)  \
 do {\
@@ -1558,32 +1560,63 @@ int x86emul_unhandleable_rw(
 return X86EMUL_UNHANDLEABLE;
 }
 
-int
-x86_emulate(
-struct x86_emulate_ctxt *ctxt,
-const struct x86_emulate_ops  *ops)
-{
-/* Shadow copy of register state. Committed on successful emulation. */
-struct cpu_user_regs _regs = *ctxt->regs;
+struct x86_emulate_state {
+unsigned int op_bytes, ad_bytes;
+
+enum { ext_none, ext_0f, ext_0f38 } ext;
+uint8_t opcode;
+uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
+uint8_t rex_prefix;
+bool lock_prefix;
+opcode_desc_t desc;
+union vex vex;
+int override_seg;
 
-uint8_t b, d, sib, sib_index, sib_base, rex_prefix = 0;
-uint8_t modrm = 0, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
-enum { ext_none, ext_0f, ext_0f38 } ext = ext_none;
-union vex vex = {};
-unsigned int op_bytes, def_op_bytes, ad_bytes, def_ad_bytes;
-bool_t lock_prefix = 0;
-int override_seg = -1, rc = X86EMUL_OKAY;
-struct operand src = { .reg = REG_POISON };
-struct operand dst = { .reg = REG_POISON };
-enum x86_swint_type swint_type;
-struct x86_emulate_stub stub = {};
-DECLARE_ALIGNED(mmval_t, mmval);
 /*
  * Data operand effective address (usually computed from ModRM).
  * Default is a memory operand relative to segment DS.
  */
-struct operand ea = { .type = OP_MEM, .reg = REG_POISON };
-ea.mem.seg = x86_seg_ds; /* gcc may reject anon union initializer */
+struct operand ea;
+
+/* Immediate operand values, if any. Use otherwise unused fields. */
+#define imm1 ea.val
+#define imm2 ea.orig_val
+
+/* Shadow copy of register state. Committed on successful emulation. */
+struct cpu_user_regs regs;
+};
+
+/* Helper definitions. */
+#define op_bytes (state->op_bytes)
+#define ad_bytes (state->ad_bytes)
+#define ext (state->ext)
+#define modrm (state->modrm)
+#define modrm_mod (state->modrm_mod)
+#define modrm_reg (state->modrm_reg)
+#define modrm_rm (state->modrm_rm)
+#define rex_prefix (state->rex_prefix)
+#define lock_prefix (state->lock_prefix)
+#define vex (state->vex)
+#define override_seg (state->override_seg)
+#define ea (state->ea)
+#define _regs (state->regs)
+
+static int
+x86_decode(
+struct x86_emulate_state *state,
+struct x86_emulate_ctxt *ctxt,
+const struct x86_emulate_ops  *ops)
+{
+uint8_t b, d, sib, sib_index, sib_base;
+unsigned int def_op_bytes, def_ad_bytes;
+int rc = X86EMUL_OKAY;
+
+memset(state, 0, sizeof(*state));
+override_seg = -1;
+ea.type = OP_MEM;
+ea.mem.seg = x86_seg_ds;
+ea.reg = REG_POISON;
+_regs = *ctxt->regs;
 
 ctxt->retire.byte = 0;
 
@@ -1800,7 +1833,7 @@ x86_emulate(
 d = (d & ~(DstMask | SrcMask)) | DstMem | SrcReg | Mov;
 break;
 default: /* Until it is worth making this table based ... */
-goto cannot_emulate;
+return X86EMUL_UNHANDLEABLE;
 }
 break;
 
@@ -1932,6 +1965,61 @@ x86_emulate(
 if ( override_seg != -1 && ea.type == OP_MEM )
 ea.mem.seg = override_seg;
 
+/* Fetch the immediate operand, if present. */
+switch ( d & SrcMask )
+{
+unsigned int bytes;
+
+case SrcImm:
+if ( !(d & ByteOp) )
+bytes = op_bytes != 8 ? op_bytes : 4;
+else
+{
+case SrcImmByte:
+bytes = 1;
+}
+/* NB. Immediates are sign-extended as necessary. */
+switch ( bytes )
+{
+case 1: imm1 = insn_fetch_type(int8_t);  break;
+case 2: imm1 = insn_fetch_type(int16_t); break;
+case 4: imm1 = insn_fetch_type(int32_t); break;
+

[Xen-devel] [PATCH 02/17] x86emul: fetch all insn bytes during the decode phase

2016-09-08 Thread Jan Beulich
This way we can offer to callers the service of just sizing
instructions, and we also can better guarantee not to raise the wrong
fault due to not having read all relevant bytes.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -129,8 +129,8 @@ static const opcode_desc_t opcode_table[
 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
 ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps, ImplicitOps,
 /* 0xA0 - 0xA7 */
-ByteOp|DstEax|SrcImplicit|Mov, DstEax|SrcImplicit|Mov,
-ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+ByteOp|DstEax|SrcMem|Mov, DstEax|SrcMem|Mov,
+ByteOp|DstMem|SrcEax|Mov, DstMem|SrcEax|Mov,
 ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
 ByteOp|ImplicitOps, ImplicitOps,
 /* 0xA8 - 0xAF */
@@ -1602,6 +1602,45 @@ struct x86_emulate_state {
 #define _regs (state->regs)
 
 static int
+x86_decode_base(
+struct x86_emulate_state *state,
+struct x86_emulate_ctxt *ctxt,
+const struct x86_emulate_ops *ops)
+{
+int rc = X86EMUL_OKAY;
+
+switch ( state->opcode )
+{
+case 0x9a: /* call (far, absolute) */
+case 0xea: /* jmp (far, absolute) */
+generate_exception_if(mode_64bit(), EXC_UD, -1);
+
+imm1 = insn_fetch_bytes(op_bytes);
+imm2 = insn_fetch_type(uint16_t);
+break;
+
+case 0xa0: case 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
+case 0xa2: case 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
+/* Source EA is not encoded via ModRM. */
+ea.mem.off = insn_fetch_bytes(ad_bytes);
+break;
+
+case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
+if ( op_bytes == 8 ) /* Fetch more bytes to obtain imm64. */
+imm1 = ((uint32_t)imm1 |
+((uint64_t)insn_fetch_type(uint32_t) << 32));
+break;
+
+case 0xc8: /* enter imm16,imm8 */
+imm2 = insn_fetch_type(uint8_t);
+break;
+}
+
+ done:
+return rc;
+}
+
+static int
 x86_decode(
 struct x86_emulate_state *state,
 struct x86_emulate_ctxt *ctxt,
@@ -1994,10 +2033,29 @@ x86_decode(
 state->opcode = b;
 state->desc = d;
 
+switch ( ext )
+{
+case ext_none:
+rc = x86_decode_base(state, ctxt, ops);
+break;
+
+case ext_0f:
+case ext_0f38:
+break;
+
+default:
+ASSERT_UNREACHABLE();
+return X86EMUL_UNHANDLEABLE;
+}
+
  done:
 return rc;
 }
 
+/* No insn fetching past this point. */
+#undef insn_fetch_bytes
+#undef insn_fetch_type
+
 int
 x86_emulate(
 struct x86_emulate_ctxt *ctxt,
@@ -2560,6 +2618,8 @@ x86_emulate(
 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
 generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
 case 0x88 ... 0x8b: /* mov */
+case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
+case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
 dst.val = src.val;
 break;
 
@@ -2644,18 +2704,13 @@ x86_emulate(
 
 case 0x9a: /* call (far, absolute) */ {
 struct segment_register reg;
-uint16_t sel;
-uint32_t eip;
 
-generate_exception_if(mode_64bit(), EXC_UD, -1);
+ASSERT(!mode_64bit());
 fail_if(ops->read_segment == NULL);
 
-eip = insn_fetch_bytes(op_bytes);
-sel = insn_fetch_type(uint16_t);
-
 if ( (rc = ops->read_segment(x86_seg_cs, ®, ctxt)) ||
- (rc = load_seg(x86_seg_cs, sel, 0, &cs, ctxt, ops)) ||
- (validate_far_branch(&cs, eip),
+ (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) ||
+ (validate_far_branch(&cs, imm1),
   rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
   ®.sel, op_bytes, ctxt)) ||
  (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
@@ -2663,7 +2718,7 @@ x86_emulate(
  (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) )
 goto done;
 
-_regs.eip = eip;
+_regs.eip = imm1;
 break;
 }
 
@@ -2706,23 +2761,6 @@ x86_emulate(
 ((uint8_t *)&_regs.eax)[1] = (_regs.eflags & 0xd7) | 0x02;
 break;
 
-case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
-/* Source EA is not encoded via ModRM. */
-dst.bytes = (d & ByteOp) ? 1 : op_bytes;
-if ( (rc = read_ulong(ea.mem.seg, insn_fetch_bytes(ad_bytes),
-  &dst.val, dst.bytes, ctxt, ops)) != 0 )
-goto done;
-break;
-
-case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
-/* Destination EA is not encoded via ModRM. */
-dst.type  = OP_MEM;
-dst.mem.seg = ea.mem.seg;
-dst.mem.off = insn_fetch_bytes(ad_bytes);
-dst.bytes = (d & ByteOp) ? 1 : op_bytes;
-dst.val   = (unsigned long)_regs.eax;
-break;
-
 case 0xa4 ... 0xa5: /* movs */ {
 unsigned long nr_reps = get_rep_prefix();
 dst.byt

[Xen-devel] [PATCH 04/17] x86emul: track only rIP in emulator state

2016-09-08 Thread Jan Beulich
Now that all decoding happens in x86_decode() there's no need to keep
the local registers copy in struct x86_emulate_state. Only rIP gets
updated in the decode phase, so only that register needs tracking
there. All other (read-only) registers can be read from the original
structure (but sadly, due to it getting passed to decode_register(),
the pointer can't be made point to "const" to make the compiler help
ensure no modification happens).

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -590,9 +590,9 @@ do{ asm volatile (
 
 /* Fetch next part of the instruction being emulated. */
 #define insn_fetch_bytes(_size) \
-({ unsigned long _x = 0, _eip = _regs.eip;  \
-   _regs.eip += (_size); /* real hardware doesn't truncate */   \
-   generate_exception_if((uint8_t)(_regs.eip -  \
+({ unsigned long _x = 0, _eip = state->eip; \
+   state->eip += (_size); /* real hardware doesn't truncate */  \
+   generate_exception_if((uint8_t)(state->eip - \
ctxt->regs->eip) > MAX_INST_LEN, \
  EXC_GP, 0);\
rc = ops->insn_fetch(x86_seg_cs, _eip, &_x, (_size), ctxt);  \
@@ -1582,8 +1582,8 @@ struct x86_emulate_state {
 #define imm1 ea.val
 #define imm2 ea.orig_val
 
-/* Shadow copy of register state. Committed on successful emulation. */
-struct cpu_user_regs regs;
+unsigned long eip;
+struct cpu_user_regs *regs;
 };
 
 /* Helper definitions. */
@@ -1599,7 +1599,6 @@ struct x86_emulate_state {
 #define vex (state->vex)
 #define override_seg (state->override_seg)
 #define ea (state->ea)
-#define _regs (state->regs)
 
 static int
 x86_decode_base(
@@ -1655,7 +1654,8 @@ x86_decode(
 ea.type = OP_MEM;
 ea.mem.seg = x86_seg_ds;
 ea.reg = REG_POISON;
-_regs = *ctxt->regs;
+state->regs = ctxt->regs;
+state->eip = ctxt->regs->eip;
 
 ctxt->retire.byte = 0;
 
@@ -1759,7 +1759,7 @@ x86_decode(
 default:
 BUG();
 case 2:
-if ( in_realmode(ctxt, ops) || (_regs.eflags & EFLG_VM) )
+if ( in_realmode(ctxt, ops) || (state->regs->eflags & EFLG_VM) 
)
 break;
 /* fall through */
 case 4:
@@ -1885,7 +1885,7 @@ x86_decode(
 modrm_rm |= (rex_prefix & 1) << 3;
 ea.type = OP_REG;
 ea.reg  = decode_register(
-modrm_rm, &_regs, (d & ByteOp) && (rex_prefix == 0));
+modrm_rm, state->regs, (d & ByteOp) && (rex_prefix == 0));
 }
 else if ( ad_bytes == 2 )
 {
@@ -1893,33 +1893,33 @@ x86_decode(
 switch ( modrm_rm )
 {
 case 0:
-ea.mem.off = _regs.ebx + _regs.esi;
+ea.mem.off = state->regs->ebx + state->regs->esi;
 break;
 case 1:
-ea.mem.off = _regs.ebx + _regs.edi;
+ea.mem.off = state->regs->ebx + state->regs->edi;
 break;
 case 2:
 ea.mem.seg = x86_seg_ss;
-ea.mem.off = _regs.ebp + _regs.esi;
+ea.mem.off = state->regs->ebp + state->regs->esi;
 break;
 case 3:
 ea.mem.seg = x86_seg_ss;
-ea.mem.off = _regs.ebp + _regs.edi;
+ea.mem.off = state->regs->ebp + state->regs->edi;
 break;
 case 4:
-ea.mem.off = _regs.esi;
+ea.mem.off = state->regs->esi;
 break;
 case 5:
-ea.mem.off = _regs.edi;
+ea.mem.off = state->regs->edi;
 break;
 case 6:
 if ( modrm_mod == 0 )
 break;
 ea.mem.seg = x86_seg_ss;
-ea.mem.off = _regs.ebp;
+ea.mem.off = state->regs->ebp;
 break;
 case 7:
-ea.mem.off = _regs.ebx;
+ea.mem.off = state->regs->ebx;
 break;
 }
 switch ( modrm_mod )
@@ -1946,14 +1946,15 @@ x86_decode(
 sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
 sib_base  = (sib & 7) | ((rex_prefix << 3) & 8);
 if ( sib_index != 4 )
-ea.mem.off = *(long*)decode_register(sib_index, &_regs, 0);
+ea.mem.off = *(long *)decode_register(sib_index,
+  state->regs, 0);
 ea.mem.off <<= (sib >> 6) & 3;
 if ( (modrm_mod == 0) && ((sib_base & 7) == 5) )
 ea.mem.off += insn_fetch_type(int32_t);
 

[Xen-devel] [PATCH 03/17] x86emul: track only rIP in emulator state

2016-09-08 Thread Jan Beulich
Now that all decoding happens in x86_decode() there's no need to keep
the local registers copy in struct x86_emulate_state. Only rIP gets
updated in the decode phase, so only that register needs tracking
there. All other (read-only) registers can be read from the original
structure (but sadly, due to it getting passed to decode_register(),
the pointer can't be made point to "const" to make the compiler help
ensure no modification happens).

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -590,9 +590,9 @@ do{ asm volatile (
 
 /* Fetch next part of the instruction being emulated. */
 #define insn_fetch_bytes(_size) \
-({ unsigned long _x = 0, _eip = _regs.eip;  \
-   _regs.eip += (_size); /* real hardware doesn't truncate */   \
-   generate_exception_if((uint8_t)(_regs.eip -  \
+({ unsigned long _x = 0, _eip = state->eip; \
+   state->eip += (_size); /* real hardware doesn't truncate */  \
+   generate_exception_if((uint8_t)(state->eip - \
ctxt->regs->eip) > MAX_INST_LEN, \
  EXC_GP, 0);\
rc = ops->insn_fetch(x86_seg_cs, _eip, &_x, (_size), ctxt);  \
@@ -1582,8 +1582,8 @@ struct x86_emulate_state {
 #define imm1 ea.val
 #define imm2 ea.orig_val
 
-/* Shadow copy of register state. Committed on successful emulation. */
-struct cpu_user_regs regs;
+unsigned long eip;
+struct cpu_user_regs *regs;
 };
 
 /* Helper definitions. */
@@ -1599,7 +1599,6 @@ struct x86_emulate_state {
 #define vex (state->vex)
 #define override_seg (state->override_seg)
 #define ea (state->ea)
-#define _regs (state->regs)
 
 static int
 x86_decode_base(
@@ -1655,7 +1654,8 @@ x86_decode(
 ea.type = OP_MEM;
 ea.mem.seg = x86_seg_ds;
 ea.reg = REG_POISON;
-_regs = *ctxt->regs;
+state->regs = ctxt->regs;
+state->eip = ctxt->regs->eip;
 
 ctxt->retire.byte = 0;
 
@@ -1759,7 +1759,7 @@ x86_decode(
 default:
 BUG();
 case 2:
-if ( in_realmode(ctxt, ops) || (_regs.eflags & EFLG_VM) )
+if ( in_realmode(ctxt, ops) || (state->regs->eflags & EFLG_VM) 
)
 break;
 /* fall through */
 case 4:
@@ -1885,7 +1885,7 @@ x86_decode(
 modrm_rm |= (rex_prefix & 1) << 3;
 ea.type = OP_REG;
 ea.reg  = decode_register(
-modrm_rm, &_regs, (d & ByteOp) && (rex_prefix == 0));
+modrm_rm, state->regs, (d & ByteOp) && (rex_prefix == 0));
 }
 else if ( ad_bytes == 2 )
 {
@@ -1893,33 +1893,33 @@ x86_decode(
 switch ( modrm_rm )
 {
 case 0:
-ea.mem.off = _regs.ebx + _regs.esi;
+ea.mem.off = state->regs->ebx + state->regs->esi;
 break;
 case 1:
-ea.mem.off = _regs.ebx + _regs.edi;
+ea.mem.off = state->regs->ebx + state->regs->edi;
 break;
 case 2:
 ea.mem.seg = x86_seg_ss;
-ea.mem.off = _regs.ebp + _regs.esi;
+ea.mem.off = state->regs->ebp + state->regs->esi;
 break;
 case 3:
 ea.mem.seg = x86_seg_ss;
-ea.mem.off = _regs.ebp + _regs.edi;
+ea.mem.off = state->regs->ebp + state->regs->edi;
 break;
 case 4:
-ea.mem.off = _regs.esi;
+ea.mem.off = state->regs->esi;
 break;
 case 5:
-ea.mem.off = _regs.edi;
+ea.mem.off = state->regs->edi;
 break;
 case 6:
 if ( modrm_mod == 0 )
 break;
 ea.mem.seg = x86_seg_ss;
-ea.mem.off = _regs.ebp;
+ea.mem.off = state->regs->ebp;
 break;
 case 7:
-ea.mem.off = _regs.ebx;
+ea.mem.off = state->regs->ebx;
 break;
 }
 switch ( modrm_mod )
@@ -1946,14 +1946,15 @@ x86_decode(
 sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
 sib_base  = (sib & 7) | ((rex_prefix << 3) & 8);
 if ( sib_index != 4 )
-ea.mem.off = *(long*)decode_register(sib_index, &_regs, 0);
+ea.mem.off = *(long *)decode_register(sib_index,
+  state->regs, 0);
 ea.mem.off <<= (sib >> 6) & 3;
 if ( (modrm_mod == 0) && ((sib_base & 7) == 5) )
 ea.mem.off += insn_fetch_type(int32_t);
 

[Xen-devel] [PATCH 05/17] x86emul: add XOP decoding

2016-09-08 Thread Jan Beulich
This way we can at least size (and e.g. skip) them if needed, and we
also won't raise the wrong fault due to not having read all relevant
bytes.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -279,6 +279,12 @@ static const opcode_desc_t twobyte_table
 ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM
 };
 
+static const opcode_desc_t xop_table[] = {
+DstReg|SrcImmByte|ModRM,
+DstReg|SrcMem|ModRM,
+DstReg|SrcImm|ModRM,
+};
+
 #define REX_PREFIX 0x40
 #define REX_B 0x01
 #define REX_X 0x02
@@ -1580,6 +1586,9 @@ struct x86_emulate_state {
 ext_0f   = vex_0f,
 ext_0f38 = vex_0f38,
 ext_0f3a = vex_0f3a,
+ext_8f08 = 8,
+ext_8f09,
+ext_8f0a,
 } ext;
 uint8_t opcode;
 uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
@@ -1802,7 +1811,7 @@ x86_decode(
 modrm = insn_fetch_type(uint8_t);
 modrm_mod = (modrm & 0xc0) >> 6;
 
-if ( !ext && ((b & ~1) == 0xc4) )
+if ( !ext && ((b & ~1) == 0xc4 || (b == 0x8f && (modrm & 0x18))) )
 switch ( def_ad_bytes )
 {
 default:
@@ -1816,11 +1825,11 @@ x86_decode(
 break;
 /* fall through */
 case 8:
-/* VEX */
+/* VEX / XOP */
 generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1);
 
 vex.raw[0] = modrm;
-if ( b & 1 )
+if ( b == 0xc5 )
 {
 vex.raw[1] = modrm;
 vex.opcx = vex_0f;
@@ -1848,18 +1857,30 @@ x86_decode(
 rex_prefix |= REX_R;
 
 b = insn_fetch_type(uint8_t);
-switch ( ext = vex.opcx )
+ext = vex.opcx;
+if ( b != 0x8f )
+{
+switch ( ext )
+{
+case vex_0f:
+d = twobyte_table[b];
+break;
+case vex_0f38:
+d = twobyte_table[0x38];
+break;
+case vex_0f3a:
+d = twobyte_table[0x3a];
+break;
+default:
+rc = X86EMUL_UNHANDLEABLE;
+goto done;
+}
+}
+else if ( ext < ext_8f08 +
+sizeof(xop_table) / sizeof(*xop_table) )
+d = xop_table[ext - ext_8f08];
+else
 {
-case vex_0f:
-d = twobyte_table[b];
-break;
-case vex_0f38:
-d = twobyte_table[0x38];
-break;
-case vex_0f3a:
-d = twobyte_table[0x3a];
-break;
-default:
 rc = X86EMUL_UNHANDLEABLE;
 goto done;
 }
@@ -1921,6 +1942,9 @@ x86_decode(
 
 case ext_0f:
 case ext_0f3a:
+case ext_8f08:
+case ext_8f09:
+case ext_8f0a:
 break;
 
 case ext_0f38:
@@ -2112,6 +2136,9 @@ x86_decode(
 
 case ext_0f38:
 case ext_0f3a:
+case ext_8f08:
+case ext_8f09:
+case ext_8f0a:
 break;
 
 default:
@@ -2332,6 +2359,9 @@ x86_emulate(
 default:
 ASSERT_UNREACHABLE();
 case ext_0f3a:
+case ext_8f08:
+case ext_8f09:
+case ext_8f0a:
 goto cannot_emulate;
 }
 



x86emul: add XOP decoding

This way we can at least size (and e.g. skip) them if needed, and we
also won't raise the wrong fault due to not having read all relevant
bytes.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -279,6 +279,12 @@ static const opcode_desc_t twobyte_table
 ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM
 };
 
+static const opcode_desc_t xop_table[] = {
+DstReg|SrcImmByte|ModRM,
+DstReg|SrcMem|ModRM,
+DstReg|SrcImm|ModRM,
+};
+
 #define REX_PREFIX 0x40
 #define REX_B 0x01
 #define REX_X 0x02
@@ -1580,6 +1586,9 @@ struct x86_emulate_state {
 ext_0f   = vex_0f,
 ext_0f38 = vex_0f38,
 ext_0f3a = vex_0f3a,
+ext_8f08 = 8,
+ext_8f09,
+ext_8f0a,
 } ext;
 uint8_t opcode;
 uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
@@ -1802,7 +1811,7 @@ x86_decode(
 modrm = insn_fetch_type(uint8_t);
 modrm_mod = (modrm & 0xc0) >> 6;
 
-if ( !ext && ((b & ~1) == 0xc4) )
+if ( !ext && ((b & ~1) == 0xc4 || (b == 0x8f && (modrm & 0x18))) )
 switch ( def_ad_bytes )
 {
 default:
@@ -1816,11 +1825,11 @@ x86_decode(
 break;
 /* fall through

[Xen-devel] [PATCH 04/17] x86emul: complete decoding of two-byte instructions

2016-09-08 Thread Jan Beulich
This way we can at least size (and e.g. skip) them if needed, and we
also won't raise the wrong fault due to not having read all relevant
bytes.

This at once adds correct raising of #UD for the three "ud" flavors
(Intel names only "ud2", but AMD names all three of them in their
opcode maps), as that may make a difference to callers compared to
getting back X86EMUL_UNHANDLEABLE.

Note on opcodes 0FA6 and 0FA7: These are VIA's PadLock instructions,
which have a ModRM like byte where only register forms are valid. I.e.
we could also use SrcImmByte there, but ModRM is more likely to be
correct for a hypothetical extension allowing non-register operations.

Note on opcode 0FB8: I think we're safe to ignore JMPE (which doesn't
take a ModRM byte, but an immediate).

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -182,11 +182,14 @@ static const opcode_desc_t opcode_table[
 
 static const opcode_desc_t twobyte_table[256] = {
 /* 0x00 - 0x07 */
-SrcMem16|ModRM, ImplicitOps|ModRM, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
+SrcMem16|ModRM, ImplicitOps|ModRM, ModRM, ModRM,
+0, ImplicitOps, ImplicitOps, ImplicitOps,
 /* 0x08 - 0x0F */
-ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps|ModRM, 0, 0,
+ImplicitOps, ImplicitOps, 0, ImplicitOps,
+0, ImplicitOps|ModRM, ImplicitOps, ModRM|SrcImmByte,
 /* 0x10 - 0x17 */
-ImplicitOps|ModRM, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0,
+ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
+ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
 /* 0x18 - 0x1F */
 ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
 ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
@@ -194,12 +197,13 @@ static const opcode_desc_t twobyte_table
 ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
 0, 0, 0, 0,
 /* 0x28 - 0x2F */
-ImplicitOps|ModRM, ImplicitOps|ModRM, 0, ImplicitOps|ModRM, 0, 0, 0, 0,
+ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
+ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM, ImplicitOps|ModRM,
 /* 0x30 - 0x37 */
-ImplicitOps, ImplicitOps, ImplicitOps, 0,
-ImplicitOps, ImplicitOps, 0, 0,
+ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+ImplicitOps, ImplicitOps, 0, ImplicitOps,
 /* 0x38 - 0x3F */
-DstReg|SrcMem|ModRM, 0, 0, 0, 0, 0, 0, 0,
+DstReg|SrcMem|ModRM, 0, DstReg|SrcImmByte|ModRM, 0, 0, 0, 0, 0,
 /* 0x40 - 0x47 */
 DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
 DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
@@ -211,11 +215,15 @@ static const opcode_desc_t twobyte_table
 DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
 DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
 /* 0x50 - 0x5F */
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
+ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
 /* 0x60 - 0x6F */
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
+ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
+ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
 /* 0x70 - 0x7F */
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
+SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM, SrcImmByte|ModRM,
+ModRM, ModRM, ModRM, ImplicitOps,
+ModRM, ModRM, 0, 0, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
 /* 0x80 - 0x87 */
 DstImplicit|SrcImm, DstImplicit|SrcImm,
 DstImplicit|SrcImm, DstImplicit|SrcImm,
@@ -238,9 +246,9 @@ static const opcode_desc_t twobyte_table
 ByteOp|DstMem|SrcNone|ModRM|Mov, ByteOp|DstMem|SrcNone|ModRM|Mov,
 /* 0xA0 - 0xA7 */
 ImplicitOps, ImplicitOps, ImplicitOps, DstBitBase|SrcReg|ModRM,
-DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM, 0, 0,
+DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM, ModRM, ModRM,
 /* 0xA8 - 0xAF */
-ImplicitOps, ImplicitOps, 0, DstBitBase|SrcReg|ModRM,
+ImplicitOps, ImplicitOps, ImplicitOps, DstBitBase|SrcReg|ModRM,
 DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM,
 ImplicitOps|ModRM, DstReg|SrcMem|ModRM,
 /* 0xB0 - 0xB7 */
@@ -249,22 +257,26 @@ static const opcode_desc_t twobyte_table
 DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
 ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
 /* 0xB8 - 0xBF */
-0, 0, DstBitBase|SrcImmByte|ModRM, DstBitBase|SrcReg|ModRM,
+DstReg|SrcMem|ModRM, ModRM,
+DstBitBase|SrcImmByte|ModRM, DstBitBase|SrcReg|ModRM,
 DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
 ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
 /* 0xC0 - 0xC7 */
 ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
-0, DstMem|SrcReg|ModRM|Mov,
-0, 0, 0, ImplicitOps|ModRM,
+SrcImmByte|ModRM, DstMem|SrcReg|ModRM|Mov

[Xen-devel] [PATCH 06/17] x86emul: add EVEX decoding

2016-09-08 Thread Jan Beulich
This way we can at least size (and e.g. skip) them if needed, and we
also won't raise the wrong fault due to not having read all relevant
bytes.

Signed-off-by: Jan Beulich 
---
TBD: I'm kind of undecided whether to right away propagate evex.R into
 modrm_reg (and then also deal with the new meaning of evex.x for
 modrm_rm). Since that doesn't affect GPRs (and the extra bits
 would need masking off when accessing GPRs) I've left this out for
 now.

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -336,6 +336,27 @@ union vex {
 ptr[1] = rex | REX_PREFIX; \
 } while (0)
 
+union evex {
+uint8_t raw[3];
+struct {
+uint8_t opcx:2;
+uint8_t :2;
+uint8_t R:1;
+uint8_t b:1;
+uint8_t x:1;
+uint8_t r:1;
+uint8_t pfx:2;
+uint8_t evex:1;
+uint8_t reg:4;
+uint8_t w:1;
+uint8_t opmsk:3;
+uint8_t RX:1;
+uint8_t bcst:1;
+uint8_t lr:2;
+uint8_t z:1;
+};
+};
+
 #define rep_prefix()   (vex.pfx >= vex_f3)
 #define repe_prefix()  (vex.pfx == vex_f3)
 #define repne_prefix() (vex.pfx == vex_f2)
@@ -1596,6 +1617,7 @@ struct x86_emulate_state {
 bool lock_prefix;
 opcode_desc_t desc;
 union vex vex;
+union evex evex;
 int override_seg;
 
 /*
@@ -1623,6 +1645,7 @@ struct x86_emulate_state {
 #define rex_prefix (state->rex_prefix)
 #define lock_prefix (state->lock_prefix)
 #define vex (state->vex)
+#define evex (state->evex)
 #define override_seg (state->override_seg)
 #define ea (state->ea)
 
@@ -1811,7 +1834,8 @@ x86_decode(
 modrm = insn_fetch_type(uint8_t);
 modrm_mod = (modrm & 0xc0) >> 6;
 
-if ( !ext && ((b & ~1) == 0xc4 || (b == 0x8f && (modrm & 0x18))) )
+if ( !ext && ((b & ~1) == 0xc4 || (b == 0x8f && (modrm & 0x18)) ||
+  b == 0x62) )
 switch ( def_ad_bytes )
 {
 default:
@@ -1825,7 +1849,7 @@ x86_decode(
 break;
 /* fall through */
 case 8:
-/* VEX / XOP */
+/* VEX / XOP / EVEX */
 generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1);
 
 vex.raw[0] = modrm;
@@ -1852,6 +1876,14 @@ x86_decode(
 op_bytes = 8;
 }
 }
+if ( b == 0x62 )
+{
+evex.raw[0] = vex.raw[0];
+evex.raw[1] = vex.raw[1];
+evex.raw[2] = insn_fetch_type(uint8_t);
+
+vex.opcx = evex.opcx;
+}
 }
 if ( mode_64bit() && !vex.r )
 rex_prefix |= REX_R;



x86emul: add EVEX decoding

This way we can at least size (and e.g. skip) them if needed, and we
also won't raise the wrong fault due to not having read all relevant
bytes.

Signed-off-by: Jan Beulich 
---
TBD: I'm kind of undecided whether to right away propagate evex.R into
 modrm_reg (and then also deal with the new meaning of evex.x for
 modrm_rm). Since that doesn't affect GPRs (and the extra bits
 would need masking off when accessing GPRs) I've left this out for
 now.

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -336,6 +336,27 @@ union vex {
 ptr[1] = rex | REX_PREFIX; \
 } while (0)
 
+union evex {
+uint8_t raw[3];
+struct {
+uint8_t opcx:2;
+uint8_t :2;
+uint8_t R:1;
+uint8_t b:1;
+uint8_t x:1;
+uint8_t r:1;
+uint8_t pfx:2;
+uint8_t evex:1;
+uint8_t reg:4;
+uint8_t w:1;
+uint8_t opmsk:3;
+uint8_t RX:1;
+uint8_t bcst:1;
+uint8_t lr:2;
+uint8_t z:1;
+};
+};
+
 #define rep_prefix()   (vex.pfx >= vex_f3)
 #define repe_prefix()  (vex.pfx == vex_f3)
 #define repne_prefix() (vex.pfx == vex_f2)
@@ -1596,6 +1617,7 @@ struct x86_emulate_state {
 bool lock_prefix;
 opcode_desc_t desc;
 union vex vex;
+union evex evex;
 int override_seg;
 
 /*
@@ -1623,6 +1645,7 @@ struct x86_emulate_state {
 #define rex_prefix (state->rex_prefix)
 #define lock_prefix (state->lock_prefix)
 #define vex (state->vex)
+#define evex (state->evex)
 #define override_seg (state->override_seg)
 #define ea (state->ea)
 
@@ -1811,7 +1834,8 @@ x86_decode(
 modrm = insn_fetch_type(uint8_t);
 modrm_mod = (modrm & 0xc0) >> 6;
 
-if ( !ext && ((b & ~1) == 0xc4 || (b == 0x8f && (modrm & 0x18))) )
+if ( !ext && ((b & ~1) == 0xc4 || (b == 0x8f && (modrm & 0x18)) ||
+  b == 0x62) )
 switch ( def_ad_bytes )
 {
 default:
@@ -1825,7 +1849,7 @@ x86_decode(
 break;
 /* fall through */
 case 8:
-  

[Xen-devel] [PATCH 07/17] x86emul: move x86_execute() common epilogue code

2016-09-08 Thread Jan Beulich
Only code movement, no functional change.

Signed-off-by: Jan Beulich 
---
This is just to ease review of a later patch.

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4111,56 +4111,7 @@ x86_emulate(
 default:
 goto cannot_emulate;
 }
-
- writeback:
-switch ( dst.type )
-{
-case OP_REG:
-/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
-switch ( dst.bytes )
-{
-case 1: *(uint8_t  *)dst.reg = (uint8_t)dst.val; break;
-case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
-case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
-case 8: *dst.reg = dst.val; break;
-}
-break;
-case OP_MEM:
-if ( !(d & Mov) && (dst.orig_val == dst.val) &&
- !ctxt->force_writeback )
-/* nothing to do */;
-else if ( lock_prefix )
-rc = ops->cmpxchg(
-dst.mem.seg, dst.mem.off, &dst.orig_val,
-&dst.val, dst.bytes, ctxt);
-else
-rc = ops->write(
-dst.mem.seg, dst.mem.off, &dst.val, dst.bytes, ctxt);
-if ( rc != 0 )
-goto done;
-default:
-break;
-}
-
- no_writeback:
-/* Inject #DB if single-step tracing was enabled at instruction start. */
-if ( (ctxt->regs->eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
- (ops->inject_hw_exception != NULL) )
-rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
-
-/* Commit shadow register state. */
-_regs.eflags &= ~EFLG_RF;
-
-/* Zero the upper 32 bits of %rip if not in 64-bit mode. */
-if ( !mode_64bit() )
-_regs.eip = (uint32_t)_regs.eip;
-
-*ctxt->regs = _regs;
-
- done:
-_put_fpu();
-put_stub(stub);
-return rc;
+goto writeback;
 
  ext_0f_insn:
 switch ( b )
@@ -5134,7 +5085,56 @@ x86_emulate(
 default:
 goto cannot_emulate;
 }
-goto writeback;
+
+ writeback:
+switch ( dst.type )
+{
+case OP_REG:
+/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+switch ( dst.bytes )
+{
+case 1: *(uint8_t  *)dst.reg = (uint8_t)dst.val; break;
+case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
+case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
+case 8: *dst.reg = dst.val; break;
+}
+break;
+case OP_MEM:
+if ( !(d & Mov) && (dst.orig_val == dst.val) &&
+ !ctxt->force_writeback )
+/* nothing to do */;
+else if ( lock_prefix )
+rc = ops->cmpxchg(
+dst.mem.seg, dst.mem.off, &dst.orig_val,
+&dst.val, dst.bytes, ctxt);
+else
+rc = ops->write(
+dst.mem.seg, dst.mem.off, &dst.val, dst.bytes, ctxt);
+if ( rc != 0 )
+goto done;
+default:
+break;
+}
+
+ no_writeback:
+/* Inject #DB if single-step tracing was enabled at instruction start. */
+if ( (ctxt->regs->eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
+ (ops->inject_hw_exception != NULL) )
+rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
+
+/* Commit shadow register state. */
+_regs.eflags &= ~EFLG_RF;
+
+/* Zero the upper 32 bits of %rip if not in 64-bit mode. */
+if ( !mode_64bit() )
+_regs.eip = (uint32_t)_regs.eip;
+
+*ctxt->regs = _regs;
+
+ done:
+_put_fpu();
+put_stub(stub);
+return rc;
 
  cannot_emulate:
 _put_fpu();



x86emul: move x86_execute() common epilogue code

Only code movement, no functional change.

Signed-off-by: Jan Beulich 
---
This is just to ease review of a later patch.

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4111,56 +4111,7 @@ x86_emulate(
 default:
 goto cannot_emulate;
 }
-
- writeback:
-switch ( dst.type )
-{
-case OP_REG:
-/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
-switch ( dst.bytes )
-{
-case 1: *(uint8_t  *)dst.reg = (uint8_t)dst.val; break;
-case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
-case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
-case 8: *dst.reg = dst.val; break;
-}
-break;
-case OP_MEM:
-if ( !(d & Mov) && (dst.orig_val == dst.val) &&
- !ctxt->force_writeback )
-/* nothing to do */;
-else if ( lock_prefix )
-rc = ops->cmpxchg(
-dst.mem.seg, dst.mem.off, &dst.orig_val,
-&dst.val, dst.bytes, ctxt);
-else
-rc = ops->write(
-dst.mem.seg, dst.mem.off, &dst.val, dst.bytes, ctxt);
-if ( rc != 0 )
-goto done;
-default:
-break;
-}
-
- no_writeback:
-/* Inject #DB if

[Xen-devel] [PATCH 08/17] x86emul: generate and make use of canonical opcode representation

2016-09-08 Thread Jan Beulich
This representation is then being made available to interested callers,
to facilitate replacing their custom decoding.

This entails combining the three main switch statements into one.

Signed-off-by: Jan Beulich 

--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -14,6 +14,9 @@ typedef bool bool_t;
 #define ASSERT assert
 #define ASSERT_UNREACHABLE() assert(!__LINE__)
 
+#define MASK_EXTR(v, m) (((v) & (m)) / ((m) & -(m)))
+#define MASK_INSR(v, m) (((v) * ((m) & -(m))) & (m))
+
 #define cpu_has_amd_erratum(nr) 0
 #define mark_regs_dirty(r) ((void)(r))
 
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1611,7 +1611,6 @@ struct x86_emulate_state {
 ext_8f09,
 ext_8f0a,
 } ext;
-uint8_t opcode;
 uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
 uint8_t rex_prefix;
 bool lock_prefix;
@@ -1657,7 +1656,7 @@ x86_decode_base(
 {
 int rc = X86EMUL_OKAY;
 
-switch ( state->opcode )
+switch ( ctxt->opcode )
 {
 case 0x9a: /* call (far, absolute) */
 case 0xea: /* jmp (far, absolute) */
@@ -1696,11 +1695,9 @@ x86_decode_twobyte(
 {
 int rc = X86EMUL_OKAY;
 
-switch ( state->opcode )
+switch ( ctxt->opcode & X86EMUL_OPC_MASK )
 {
 case 0x78:
-if ( vex.opcx )
-break;
 switch ( vex.pfx )
 {
 case vex_66: /* extrq $imm8, $imm8, xmm */
@@ -1709,7 +1706,23 @@ x86_decode_twobyte(
 imm2 = insn_fetch_type(uint8_t);
 break;
 }
-break;
+/* fall through */
+case 0x10 ... 0x18:
+case 0x28 ... 0x2f:
+case 0x50 ... 0x77:
+case 0x79 ... 0x7f:
+case 0xae:
+case 0xc2:
+case 0xc4 ... 0xc7:
+case 0xd0 ... 0xfe:
+ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
+break;
+/* Intentionally not handling here despite being modified by F3:
+case 0xb8: jmpe / popcnt
+case 0xbc: bsf / tzcnt
+case 0xbd: bsr / lzcnt
+ * They're being dealt with in the execution phase (if at all).
+ */
 }
 
  done:
@@ -1717,13 +1730,35 @@ x86_decode_twobyte(
 }
 
 static int
+x86_decode_0f38(
+struct x86_emulate_state *state,
+struct x86_emulate_ctxt *ctxt,
+const struct x86_emulate_ops *ops)
+{
+switch ( ctxt->opcode & X86EMUL_OPC_MASK )
+{
+case 0x00 ... 0xef:
+case 0xf2 ... 0xff:
+ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
+break;
+
+case 0xf0: case 0xf1: /* movbe / crc32 */
+if ( rep_prefix() )
+ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
+break;
+}
+
+return X86EMUL_OKAY;
+}
+
+static int
 x86_decode(
 struct x86_emulate_state *state,
 struct x86_emulate_ctxt *ctxt,
 const struct x86_emulate_ops  *ops)
 {
 uint8_t b, d, sib, sib_index, sib_base;
-unsigned int def_op_bytes, def_ad_bytes;
+unsigned int def_op_bytes, def_ad_bytes, opcode;
 int rc = X86EMUL_OKAY;
 
 memset(state, 0, sizeof(*state));
@@ -1804,29 +1839,31 @@ x86_decode(
 
 /* Opcode byte(s). */
 d = opcode_table[b];
-if ( d == 0 )
+if ( d == 0 && b == 0x0f)
 {
-/* Two-byte opcode? */
-if ( b == 0x0f )
+/* Two-byte opcode. */
+b = insn_fetch_type(uint8_t);
+d = twobyte_table[b];
+switch ( b )
 {
+default:
+opcode = b | MASK_INSR(0x0f, X86EMUL_OPC_EXT_MASK);
+ext = ext_0f;
+break;
+case 0x38:
 b = insn_fetch_type(uint8_t);
-d = twobyte_table[b];
-switch ( b )
-{
-default:
-ext = ext_0f;
-break;
-case 0x38:
-b = insn_fetch_type(uint8_t);
-ext = ext_0f38;
-break;
-case 0x3a:
-b = insn_fetch_type(uint8_t);
-ext = ext_0f3a;
-break;
-}
+opcode = b | MASK_INSR(0x0f38, X86EMUL_OPC_EXT_MASK);
+ext = ext_0f38;
+break;
+case 0x3a:
+b = insn_fetch_type(uint8_t);
+opcode = b | MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK);
+ext = ext_0f3a;
+break;
 }
 }
+else
+opcode = b;
 
 /* ModRM and SIB bytes. */
 if ( d & ModRM )
@@ -1855,6 +1892,7 @@ x86_decode(
 vex.raw[0] = modrm;
 if ( b == 0xc5 )
 {
+opcode = X86EMUL_OPC_VEX_;
 vex.raw[1] = modrm;
 vex.opcx = vex_0f;
 vex.x = 1;
@@ -1876,31 +1914,44 @@ x86_decode(
 op_bytes = 8;
 }
 }
-if ( b == 0x62 )
+switch ( b )
 {
+case 0x62:
+  

[Xen-devel] [PATCH 09/17] SVM: use generic instruction decoding

2016-09-08 Thread Jan Beulich
... instead of custom handling. To facilitate this break out init code
from _hvm_emulate_one() into the new hvm_emulate_init(), and make
hvmemul_insn_fetch( globally available.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -835,7 +835,7 @@ static int hvmemul_read(
 container_of(ctxt, struct hvm_emulate_ctxt, ctxt));
 }
 
-static int hvmemul_insn_fetch(
+int hvmemul_insn_fetch(
 enum x86_segment seg,
 unsigned long offset,
 void *p_data,
@@ -1765,15 +1765,14 @@ static const struct x86_emulate_ops hvm_
 .vmfunc= hvmemul_vmfunc,
 };
 
-static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
-const struct x86_emulate_ops *ops)
+void hvm_emulate_init(
+struct hvm_emulate_ctxt *hvmemul_ctxt,
+const unsigned char *insn_buf,
+unsigned int insn_bytes)
 {
-struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
 struct vcpu *curr = current;
-uint32_t new_intr_shadow, pfec = PFEC_page_present;
-struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+unsigned int pfec = PFEC_page_present;
 unsigned long addr;
-int rc;
 
 if ( hvm_long_mode_enabled(curr) &&
  hvmemul_ctxt->seg_reg[x86_seg_cs].attr.fields.l )
@@ -1791,14 +1790,14 @@ static int _hvm_emulate_one(struct hvm_e
 if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.dpl == 3 )
 pfec |= PFEC_user_mode;
 
-hvmemul_ctxt->insn_buf_eip = regs->eip;
-if ( !vio->mmio_insn_bytes )
+hvmemul_ctxt->insn_buf_eip = hvmemul_ctxt->ctxt.regs->eip;
+if ( !insn_bytes )
 {
 hvmemul_ctxt->insn_buf_bytes =
 hvm_get_insn_bytes(curr, hvmemul_ctxt->insn_buf) ?:
 (hvm_virtual_to_linear_addr(x86_seg_cs,
 &hvmemul_ctxt->seg_reg[x86_seg_cs],
-regs->eip,
+hvmemul_ctxt->insn_buf_eip,
 sizeof(hvmemul_ctxt->insn_buf),
 hvm_access_insn_fetch,
 hvmemul_ctxt->ctxt.addr_size,
@@ -1810,11 +1809,24 @@ static int _hvm_emulate_one(struct hvm_e
 }
 else
 {
-hvmemul_ctxt->insn_buf_bytes = vio->mmio_insn_bytes;
-memcpy(hvmemul_ctxt->insn_buf, vio->mmio_insn, vio->mmio_insn_bytes);
+hvmemul_ctxt->insn_buf_bytes = insn_bytes;
+memcpy(hvmemul_ctxt->insn_buf, insn_buf, insn_bytes);
 }
 
 hvmemul_ctxt->exn_pending = 0;
+}
+
+static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
+const struct x86_emulate_ops *ops)
+{
+const struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
+struct vcpu *curr = current;
+uint32_t new_intr_shadow;
+struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
+int rc;
+
+hvm_emulate_init(hvmemul_ctxt, vio->mmio_insn, vio->mmio_insn_bytes);
+
 vio->mmio_retry = 0;
 
 if ( cpu_has_vmx )
--- a/xen/arch/x86/hvm/svm/emulate.c
+++ b/xen/arch/x86/hvm/svm/emulate.c
@@ -15,7 +15,7 @@
  * this program; If not, see .
  */
 
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -26,41 +26,6 @@
 #include 
 #include 
 
-static unsigned int is_prefix(u8 opc)
-{
-switch ( opc )
-{
-case 0x66:
-case 0x67:
-case 0x2E:
-case 0x3E:
-case 0x26:
-case 0x64:
-case 0x65:
-case 0x36:
-case 0xF0:
-case 0xF3:
-case 0xF2:
-case 0x40 ... 0x4f:
-return 1;
-}
-return 0;
-}
-
-static unsigned long svm_rip2pointer(struct vcpu *v, unsigned long *limit)
-{
-struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
-unsigned long p = vmcb->cs.base + vmcb->rip;
-
-if ( !(vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v)) )
-{
-*limit = vmcb->cs.limit;
-return (u32)p; /* mask to 32 bits */
-}
-*limit = ~0UL;
-return p;
-}
-
 static unsigned long svm_nextrip_insn_length(struct vcpu *v)
 {
 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -89,141 +54,96 @@ static unsigned long svm_nextrip_insn_le
 return vmcb->nextrip - vmcb->rip;
 }
 
-/* First byte: Length. Following bytes: Opcode bytes. */
-#define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ }
-MAKE_INSTR(INVD,   2, 0x0f, 0x08);
-MAKE_INSTR(WBINVD, 2, 0x0f, 0x09);
-MAKE_INSTR(CPUID,  2, 0x0f, 0xa2);
-MAKE_INSTR(RDMSR,  2, 0x0f, 0x32);
-MAKE_INSTR(WRMSR,  2, 0x0f, 0x30);
-MAKE_INSTR(VMCALL, 3, 0x0f, 0x01, 0xd9);
-MAKE_INSTR(HLT,1, 0xf4);
-MAKE_INSTR(INT3,   1, 0xcc);
-MAKE_INSTR(RDTSC,  2, 0x0f, 0x31);
-MAKE_INSTR(PAUSE,  1, 0x90);
-MAKE_INSTR(XSETBV, 3, 0x0f, 0x01, 0xd1);
-MAKE_INSTR(VMRUN,  3, 0x0f, 0x01, 0xd8);
-MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda);
-MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb);
-MAKE_INSTR(STGI,   3, 0x0f, 0x01, 0xdc);
-MAKE_INSTR(CLGI,   3, 0x0f, 0x01, 0xdd);
-MAKE_INSTR(INVLPGA,3, 0x0f, 0x01, 0xdf);
-
-static const u8 *const opc

[Xen-devel] [PATCH 10/17] x86/32on64: use generic instruction decoding for call gate emulation

2016-09-08 Thread Jan Beulich
... instead of custom handling. Note that we can't use generic
emulation, as the emulator's far branch support is rather rudimentary
at this point in time.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -3138,13 +3139,92 @@ static inline int check_stack_limit(unsi
 (!(ar & _SEGMENT_EC) ? (esp - 1) <= limit : (esp - decr) > limit));
 }
 
+struct gate_op_ctxt {
+struct x86_emulate_ctxt ctxt;
+struct {
+unsigned long base, limit;
+} cs;
+bool insn_fetch;
+};
+
+static int gate_op_read(
+enum x86_segment seg,
+unsigned long offset,
+void *p_data,
+unsigned int bytes,
+struct x86_emulate_ctxt *ctxt)
+{
+const struct gate_op_ctxt *goc =
+container_of(ctxt, struct gate_op_ctxt, ctxt);
+unsigned int rc = bytes, sel = 0;
+unsigned long addr = offset, limit = 0;
+
+switch ( seg )
+{
+case x86_seg_cs:
+addr += goc->cs.base;
+limit = goc->cs.limit;
+break;
+case x86_seg_ds:
+sel = read_sreg(ds);
+break;
+case x86_seg_es:
+sel = read_sreg(es);
+break;
+case x86_seg_fs:
+sel = read_sreg(fs);
+break;
+case x86_seg_gs:
+sel = read_sreg(gs);
+break;
+case x86_seg_ss:
+sel = ctxt->regs->ss;
+break;
+default:
+return X86EMUL_UNHANDLEABLE;
+}
+if ( sel )
+{
+unsigned int ar;
+
+ASSERT(!goc->insn_fetch);
+if ( !read_descriptor(sel, current, &addr, &limit, &ar, 0) ||
+ !(ar & _SEGMENT_S) ||
+ !(ar & _SEGMENT_P) ||
+ ((ar & _SEGMENT_CODE) && !(ar & _SEGMENT_WR)) )
+return X86EMUL_UNHANDLEABLE;
+addr += offset;
+}
+else if ( seg != x86_seg_cs )
+return X86EMUL_UNHANDLEABLE;
+
+if ( limit < bytes - 1 || offset > limit - bytes + 1 )
+return X86EMUL_UNHANDLEABLE;
+
+if ( is_pv_32bit_vcpu(current) )
+addr = (uint32_t)addr;
+
+if ( !__addr_ok(addr) ||
+ (rc = __copy_from_user(p_data, (void *)addr, bytes)) )
+{
+propagate_page_fault(addr + bytes - rc,
+ goc->insn_fetch && cpu_has_nx
+ ? PFEC_insn_fetch : 0 );
+return X86EMUL_EXCEPTION;
+}
+
+return X86EMUL_OKAY;
+}
+
 static void emulate_gate_op(struct cpu_user_regs *regs)
 {
 struct vcpu *v = current;
-unsigned int sel, ar, dpl, nparm, opnd_sel;
-unsigned int op_default, op_bytes, ad_default, ad_bytes;
-unsigned long off, eip, opnd_off, base, limit;
-int jump;
+unsigned int sel, ar, dpl, nparm, insn_len;
+struct gate_op_ctxt ctxt = { .ctxt.regs = regs, .insn_fetch = true };
+struct x86_emulate_state *state;
+unsigned long off, base, limit;
+uint16_t opnd_sel = 0;
+int jump = -1, rc = X86EMUL_OKAY;
 
 /* Check whether this fault is due to the use of a call gate. */
 if ( !read_gate_descriptor(regs->error_code, v, &sel, &off, &ar) ||
@@ -3166,7 +3246,8 @@ static void emulate_gate_op(struct cpu_u
  * Decode instruction (and perhaps operand) to determine RPL,
  * whether this is a jump or a call, and the call return offset.
  */
-if ( !read_descriptor(regs->cs, v, &base, &limit, &ar, 0) ||
+if ( !read_descriptor(regs->cs, v, &ctxt.cs.base, &ctxt.cs.limit,
+  &ar, 0) ||
  !(ar & _SEGMENT_S) ||
  !(ar & _SEGMENT_P) ||
  !(ar & _SEGMENT_CODE) )
@@ -3175,179 +3256,59 @@ static void emulate_gate_op(struct cpu_u
 return;
 }
 
-op_bytes = op_default = ar & _SEGMENT_DB ? 4 : 2;
-ad_default = ad_bytes = op_default;
-opnd_sel = opnd_off = 0;
-jump = -1;
-for ( eip = regs->eip; eip - regs->_eip < 10; )
+ctxt.ctxt.addr_size = ar & _SEGMENT_DB ? 32 : 16;
+/* Leave zero in ctxt.ctxt.sp_size, as it's not needed for decoding. */
+state = x86_decode_insn(&ctxt.ctxt, gate_op_read);
+ctxt.insn_fetch = false;
+if ( IS_ERR_OR_NULL(state) )
+{
+if ( PTR_ERR(state) != -X86EMUL_EXCEPTION )
+do_guest_trap(TRAP_gp_fault, regs);
+return;
+}
+
+switch ( ctxt.ctxt.opcode )
 {
-switch ( insn_fetch(u8, base, eip, limit) )
+unsigned int modrm_345;
+
+case 0xea:
+++jump;
+/* fall through */
+case 0x9a:
+++jump;
+opnd_sel = x86_insn_immediate(state, 1);
+break;
+case 0xff:
+if ( x86_insn_modrm(state, NULL, &modrm_345) >= 3 )
+break;
+switch ( modrm_345 & 7 )
 {
-case 0x66: /* operand-size override */
-op_bytes = op_default ^ 6; /* switch between 2/4 bytes */
-continue;
-case 0x67: /* address-size override */
-ad_bytes = ad_default != 4 ? 4 : 2; /* switch to 2/4 bytes */
-  

[Xen-devel] [PATCH 11/17] x86/PV: split out dealing with CRn from privileged instruction handling

2016-09-08 Thread Jan Beulich
This is in preparation for using the generic emulator here.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2242,6 +2242,107 @@ unsigned long guest_to_host_gpr_switch(u
 
 void (*pv_post_outb_hook)(unsigned int port, u8 value);
 
+static int priv_op_read_cr(unsigned int reg, unsigned long *val,
+   struct x86_emulate_ctxt *ctxt)
+{
+const struct vcpu *curr = current;
+
+switch ( reg )
+{
+case 0: /* Read CR0 */
+*val = (read_cr0() & ~X86_CR0_TS) | curr->arch.pv_vcpu.ctrlreg[0];
+return X86EMUL_OKAY;
+
+case 2: /* Read CR2 */
+case 4: /* Read CR4 */
+*val = curr->arch.pv_vcpu.ctrlreg[reg];
+return X86EMUL_OKAY;
+
+case 3: /* Read CR3 */
+{
+const struct domain *currd = curr->domain;
+unsigned long mfn;
+
+if ( !is_pv_32bit_domain(currd) )
+{
+mfn = pagetable_get_pfn(curr->arch.guest_table);
+*val = xen_pfn_to_cr3(mfn_to_gmfn(currd, mfn));
+}
+else
+{
+l4_pgentry_t *pl4e =
+
map_domain_page(_mfn(pagetable_get_pfn(curr->arch.guest_table)));
+
+mfn = l4e_get_pfn(*pl4e);
+unmap_domain_page(pl4e);
+*val = compat_pfn_to_cr3(mfn_to_gmfn(currd, mfn));
+}
+/* PTs should not be shared */
+BUG_ON(page_get_owner(mfn_to_page(mfn)) == dom_cow);
+return X86EMUL_OKAY;
+}
+}
+
+return X86EMUL_UNHANDLEABLE;
+}
+
+static int priv_op_write_cr(unsigned int reg, unsigned long val,
+struct x86_emulate_ctxt *ctxt)
+{
+struct vcpu *curr = current;
+
+switch ( reg )
+{
+case 0: /* Write CR0 */
+if ( (val ^ read_cr0()) & ~X86_CR0_TS )
+{
+gdprintk(XENLOG_WARNING,
+"Attempt to change unmodifiable CR0 flags\n");
+break;
+}
+do_fpu_taskswitch(!!(val & X86_CR0_TS));
+return X86EMUL_OKAY;
+
+case 2: /* Write CR2 */
+curr->arch.pv_vcpu.ctrlreg[2] = val;
+arch_set_cr2(curr, val);
+return X86EMUL_OKAY;
+
+case 3: /* Write CR3 */
+{
+struct domain *currd = curr->domain;
+unsigned long gfn;
+struct page_info *page;
+int rc;
+
+gfn = !is_pv_32bit_domain(currd)
+  ? xen_cr3_to_pfn(val) : compat_cr3_to_pfn(val);
+page = get_page_from_gfn(currd, gfn, NULL, P2M_ALLOC);
+if ( !page )
+break;
+rc = new_guest_cr3(page_to_mfn(page));
+put_page(page);
+
+switch ( rc )
+{
+case 0:
+return X86EMUL_OKAY;
+case -ERESTART: /* retry after preemption */
+return X86EMUL_RETRY;
+}
+break;
+}
+
+case 4: /* Write CR4 */
+curr->arch.pv_vcpu.ctrlreg[4] = pv_guest_cr4_fixup(curr, val);
+write_cr4(pv_guest_cr4_to_real_cr4(curr));
+ctxt_switch_levelling(curr);
+return X86EMUL_OKAY;
+}
+
+return X86EMUL_UNHANDLEABLE;
+}
+
 static inline uint64_t guest_misc_enable(uint64_t val)
 {
 val &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |
@@ -2654,48 +2755,9 @@ static int emulate_privileged_op(struct
 goto fail;
 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
 modrm_rm  |= (opcode >> 0) & 7;
-reg = decode_register(modrm_rm, regs, 0);
-switch ( modrm_reg )
-{
-case 0: /* Read CR0 */
-*reg = (read_cr0() & ~X86_CR0_TS) |
-v->arch.pv_vcpu.ctrlreg[0];
-break;
-
-case 2: /* Read CR2 */
-*reg = v->arch.pv_vcpu.ctrlreg[2];
-break;
-
-case 3: /* Read CR3 */
-{
-unsigned long mfn;
-
-if ( !is_pv_32bit_domain(currd) )
-{
-mfn = pagetable_get_pfn(v->arch.guest_table);
-*reg = xen_pfn_to_cr3(mfn_to_gmfn(currd, mfn));
-}
-else
-{
-l4_pgentry_t *pl4e =
-
map_domain_page(_mfn(pagetable_get_pfn(v->arch.guest_table)));
-
-mfn = l4e_get_pfn(*pl4e);
-unmap_domain_page(pl4e);
-*reg = compat_pfn_to_cr3(mfn_to_gmfn(currd, mfn));
-}
-/* PTs should not be shared */
-BUG_ON(page_get_owner(mfn_to_page(mfn)) == dom_cow);
-}
-break;
-
-case 4: /* Read CR4 */
-*reg = v->arch.pv_vcpu.ctrlreg[4];
-break;
-
-default:
+if ( priv_op_read_cr(modrm_reg, decode_register(modrm_rm, regs, 0),
+ NULL) != X86EMUL_OKAY )
 goto fail;
-}
 break;
 
 case 0x21: /* MOV DR?, */ {
@@ -2719,56 +2781,12 @@ static int emulate_privileged_op(struct
 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
 modrm_rm  |= (opcode >> 0) & 7

[Xen-devel] [PATCH 12/17] x86/PV: split out dealing with DRn from privileged instruction handling

2016-09-08 Thread Jan Beulich
This is in preparation for using the generic emulator here.

Some care is needed temporarily to not unduly alter guest register
state: The local variable "res" can only go away once this code got
fully switched over to using x86_emulate().

Also switch to IS_ERR_VALUE() instead of (incorrectly) open coding it.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2343,6 +2343,26 @@ static int priv_op_write_cr(unsigned int
 return X86EMUL_UNHANDLEABLE;
 }
 
+static int priv_op_read_dr(unsigned int reg, unsigned long *val,
+   struct x86_emulate_ctxt *ctxt)
+{
+unsigned long res = do_get_debugreg(reg);
+
+if ( IS_ERR_VALUE(res) )
+return X86EMUL_UNHANDLEABLE;
+
+*val = res;
+
+return X86EMUL_OKAY;
+}
+
+static int priv_op_write_dr(unsigned int reg, unsigned long val,
+struct x86_emulate_ctxt *ctxt)
+{
+return do_set_debugreg(reg, val) == 0
+   ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
+}
+
 static inline uint64_t guest_misc_enable(uint64_t val)
 {
 val &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |
@@ -2761,16 +2781,14 @@ static int emulate_privileged_op(struct
 break;
 
 case 0x21: /* MOV DR?, */ {
-unsigned long res;
 opcode = insn_fetch(u8, code_base, eip, code_limit);
 if ( opcode < 0xc0 )
 goto fail;
 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
 modrm_rm  |= (opcode >> 0) & 7;
-reg = decode_register(modrm_rm, regs, 0);
-if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 )
+if ( priv_op_read_dr(modrm_reg, decode_register(modrm_rm, regs, 0),
+ NULL) != X86EMUL_OKAY )
 goto fail;
-*reg = res;
 break;
 }
 
@@ -2799,7 +2817,7 @@ static int emulate_privileged_op(struct
 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
 modrm_rm  |= (opcode >> 0) & 7;
 reg = decode_register(modrm_rm, regs, 0);
-if ( do_set_debugreg(modrm_reg, *reg) != 0 )
+if ( priv_op_write_dr(modrm_reg, *reg, NULL) != X86EMUL_OKAY )
 goto fail;
 break;
 



x86/PV: split out dealing with DRn from privileged instruction handling

This is in preparation for using the generic emulator here.

Some care is needed temporarily to not unduly alter guest register
state: The local variable "res" can only go away once this code got
fully switched over to using x86_emulate().

Also switch to IS_ERR_VALUE() instead of (incorrectly) open coding it.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2343,6 +2343,26 @@ static int priv_op_write_cr(unsigned int
 return X86EMUL_UNHANDLEABLE;
 }
 
+static int priv_op_read_dr(unsigned int reg, unsigned long *val,
+   struct x86_emulate_ctxt *ctxt)
+{
+unsigned long res = do_get_debugreg(reg);
+
+if ( IS_ERR_VALUE(res) )
+return X86EMUL_UNHANDLEABLE;
+
+*val = res;
+
+return X86EMUL_OKAY;
+}
+
+static int priv_op_write_dr(unsigned int reg, unsigned long val,
+struct x86_emulate_ctxt *ctxt)
+{
+return do_set_debugreg(reg, val) == 0
+   ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
+}
+
 static inline uint64_t guest_misc_enable(uint64_t val)
 {
 val &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |
@@ -2761,16 +2781,14 @@ static int emulate_privileged_op(struct
 break;
 
 case 0x21: /* MOV DR?, */ {
-unsigned long res;
 opcode = insn_fetch(u8, code_base, eip, code_limit);
 if ( opcode < 0xc0 )
 goto fail;
 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
 modrm_rm  |= (opcode >> 0) & 7;
-reg = decode_register(modrm_rm, regs, 0);
-if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 )
+if ( priv_op_read_dr(modrm_reg, decode_register(modrm_rm, regs, 0),
+ NULL) != X86EMUL_OKAY )
 goto fail;
-*reg = res;
 break;
 }
 
@@ -2799,7 +2817,7 @@ static int emulate_privileged_op(struct
 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
 modrm_rm  |= (opcode >> 0) & 7;
 reg = decode_register(modrm_rm, regs, 0);
-if ( do_set_debugreg(modrm_reg, *reg) != 0 )
+if ( priv_op_write_dr(modrm_reg, *reg, NULL) != X86EMUL_OKAY )
 goto fail;
 break;
 
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 13/17] x86/PV: split out dealing with MSRs from privileged instruction handling

2016-09-08 Thread Jan Beulich
This is in preparation for using the generic emulator here.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2373,6 +2373,332 @@ static inline uint64_t guest_misc_enable
 return val;
 }
 
+static inline bool is_cpufreq_controller(const struct domain *d)
+{
+return ((cpufreq_controller == FREQCTL_dom0_kernel) &&
+is_hardware_domain(d));
+}
+
+static int priv_op_read_msr(unsigned int reg, uint64_t *val,
+struct x86_emulate_ctxt *ctxt)
+{
+const struct vcpu *curr = current;
+const struct domain *currd = curr->domain;
+bool vpmu_msr = false;
+
+switch ( reg )
+{
+int rc;
+
+case MSR_FS_BASE:
+if ( is_pv_32bit_domain(currd) )
+break;
+*val = cpu_has_fsgsbase ? __rdfsbase() : curr->arch.pv_vcpu.fs_base;
+return X86EMUL_OKAY;
+
+case MSR_GS_BASE:
+if ( is_pv_32bit_domain(currd) )
+break;
+*val = cpu_has_fsgsbase ? __rdgsbase()
+: curr->arch.pv_vcpu.gs_base_kernel;
+return X86EMUL_OKAY;
+
+case MSR_SHADOW_GS_BASE:
+if ( is_pv_32bit_domain(currd) )
+break;
+*val = curr->arch.pv_vcpu.gs_base_user;
+return X86EMUL_OKAY;
+
+case MSR_K7_FID_VID_CTL:
+case MSR_K7_FID_VID_STATUS:
+case MSR_K8_PSTATE_LIMIT:
+case MSR_K8_PSTATE_CTRL:
+case MSR_K8_PSTATE_STATUS:
+case MSR_K8_PSTATE0:
+case MSR_K8_PSTATE1:
+case MSR_K8_PSTATE2:
+case MSR_K8_PSTATE3:
+case MSR_K8_PSTATE4:
+case MSR_K8_PSTATE5:
+case MSR_K8_PSTATE6:
+case MSR_K8_PSTATE7:
+if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
+break;
+if ( unlikely(is_cpufreq_controller(currd)) )
+goto normal;
+*val = 0;
+return X86EMUL_OKAY;
+
+case MSR_IA32_UCODE_REV:
+BUILD_BUG_ON(MSR_IA32_UCODE_REV != MSR_AMD_PATCHLEVEL);
+if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+{
+if ( wrmsr_safe(MSR_IA32_UCODE_REV, 0) )
+break;
+sync_core();
+}
+goto normal;
+
+case MSR_IA32_MISC_ENABLE:
+if ( rdmsr_safe(reg, *val) )
+break;
+*val = guest_misc_enable(*val);
+return X86EMUL_OKAY;
+
+case MSR_AMD64_DR0_ADDRESS_MASK:
+if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+break;
+*val = curr->arch.pv_vcpu.dr_mask[0];
+return X86EMUL_OKAY;
+
+case MSR_AMD64_DR1_ADDRESS_MASK ... MSR_AMD64_DR3_ADDRESS_MASK:
+if ( !boot_cpu_has(X86_FEATURE_DBEXT) )
+break;
+*val = curr->arch.pv_vcpu.dr_mask[reg - MSR_AMD64_DR1_ADDRESS_MASK + 
1];
+return X86EMUL_OKAY;
+
+case MSR_IA32_PERF_CAPABILITIES:
+/* No extra capabilities are supported. */
+*val = 0;
+return X86EMUL_OKAY;
+
+case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
+case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3):
+case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
+case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+{
+vpmu_msr = true;
+/* fall through */
+case MSR_AMD_FAM15H_EVNTSEL0...MSR_AMD_FAM15H_PERFCTR5:
+case MSR_K7_EVNTSEL0...MSR_K7_PERFCTR3:
+if ( vpmu_msr || (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
+{
+/* Don't leak PMU MSRs to unprivileged domains. */
+if ( (vpmu_mode & XENPMU_MODE_ALL) &&
+ !is_hardware_domain(currd) )
+*val = 0;
+else if ( vpmu_do_rdmsr(reg, val) )
+break;
+return X86EMUL_OKAY;
+}
+}
+/* fall through */
+default:
+if ( rdmsr_hypervisor_regs(reg, val) )
+return X86EMUL_OKAY;
+
+rc = vmce_rdmsr(reg, val);
+if ( rc < 0 )
+break;
+if ( rc )
+return X86EMUL_OKAY;
+/* fall through */
+case MSR_EFER:
+normal:
+/* Everyone can read the MSR space. */
+/* gdprintk(XENLOG_WARNING, "Domain attempted RDMSR %08x\n", reg); */
+if ( rdmsr_safe(reg, *val) )
+break;
+return X86EMUL_OKAY;
+}
+
+return X86EMUL_UNHANDLEABLE;
+}
+
+#include "x86_64/mmconfig.h"
+
+static int priv_op_write_msr(unsigned int reg, uint64_t val,
+ struct x86_emulate_ctxt *ctxt)
+{
+struct vcpu *curr = current;
+const struct domain *currd = curr->domain;
+bool vpmu_msr = false;
+
+switch ( reg )
+{
+uint64_t temp;
+int rc;
+
+case MSR_FS_BASE:
+if ( is_pv_32bit_domain(currd) )
+break;
+wrfsbase(val);
+curr->arch.pv_vcpu.fs_base = val;
+return X86EMUL_OKAY;
+
+case MSR_GS_BASE:
+if ( is_pv_32bit_domain(curr

[Xen-devel] [PATCH 14/17] x86emul: support XSETBV

2016-09-08 Thread Jan Beulich
This is a prereq for switching PV privileged op emulation to the
generic instruction emulator. Since handle_xsetbv() is already capable
of dealing with all guest kinds, avoid introducing another hook here.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4174,6 +4174,24 @@ x86_emulate(
 
 switch( modrm )
 {
+#ifdef __XEN__
+case 0xd1: /* xsetbv */
+{
+unsigned long cr4;
+
+if ( vex.pfx )
+break;
+if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
+cr4 = 0;
+generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD, -1);
+generate_exception_if(!mode_ring0() ||
+  handle_xsetbv(_regs._ecx,
+_regs._eax | (_regs.rdx << 
32)),
+  EXC_GP, 0);
+goto no_writeback;
+}
+#endif
+
 case 0xdf: /* invlpga */
 generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
 generate_exception_if(!mode_ring0(), EXC_GP, 0);



x86emul: support XSETBV

This is a prereq for switching PV privileged op emulation to the
generic instruction emulator. Since handle_xsetbv() is already capable
of dealing with all guest kinds, avoid introducing another hook here.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4174,6 +4174,24 @@ x86_emulate(
 
 switch( modrm )
 {
+#ifdef __XEN__
+case 0xd1: /* xsetbv */
+{
+unsigned long cr4;
+
+if ( vex.pfx )
+break;
+if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
+cr4 = 0;
+generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD, -1);
+generate_exception_if(!mode_ring0() ||
+  handle_xsetbv(_regs._ecx,
+_regs._eax | (_regs.rdx << 
32)),
+  EXC_GP, 0);
+goto no_writeback;
+}
+#endif
+
 case 0xdf: /* invlpga */
 generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
 generate_exception_if(!mode_ring0(), EXC_GP, 0);
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 04/17] x86emul: track only rIP in emulator state

2016-09-08 Thread Jan Beulich
>>> On 08.09.16 at 15:08,  wrote:

Please disregard this one - it got sent out with the wrong number in the 
subject.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 15/17] x86emul: sort opcode 0f01 special case switch() statement

2016-09-08 Thread Jan Beulich
Sort the special case opcode 0f01 entries numerically, insert blank
lines between each of the cases, and properly place opening braces.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4192,6 +4192,14 @@ x86_emulate(
 }
 #endif
 
+case 0xd4: /* vmfunc */
+generate_exception_if(lock_prefix | rep_prefix() | (vex.pfx == 
vex_66),
+  EXC_UD, -1);
+fail_if(ops->vmfunc == NULL);
+if ( (rc = ops->vmfunc(ctxt) != X86EMUL_OKAY) )
+goto done;
+goto no_writeback;
+
 case 0xdf: /* invlpga */
 generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
 generate_exception_if(!mode_ring0(), EXC_GP, 0);
@@ -4200,7 +4208,9 @@ x86_emulate(
ctxt)) )
 goto done;
 goto no_writeback;
-case 0xf9: /* rdtscp */ {
+
+case 0xf9: /* rdtscp */
+{
 uint64_t tsc_aux;
 fail_if(ops->read_msr == NULL);
 if ( (rc = ops->read_msr(MSR_TSC_AUX, &tsc_aux, ctxt)) != 0 )
@@ -4208,14 +4218,9 @@ x86_emulate(
 _regs.ecx = (uint32_t)tsc_aux;
 goto rdtsc;
 }
-case 0xd4: /* vmfunc */
-generate_exception_if(lock_prefix | rep_prefix() | (vex.pfx == 
vex_66),
-  EXC_UD, -1);
-fail_if(ops->vmfunc == NULL);
-if ( (rc = ops->vmfunc(ctxt) != X86EMUL_OKAY) )
-goto done;
-goto no_writeback;
-   case 0xfc: /* clzero */ {
+
+case 0xfc: /* clzero */
+{
 unsigned int eax = 1, ebx = 0, dummy = 0;
 unsigned long zero = 0;
 



x86emul: sort opcode 0f01 special case switch() statement

Sort the special case opcode 0f01 entries numerically, insert blank
lines between each of the cases, and properly place opening braces.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -4192,6 +4192,14 @@ x86_emulate(
 }
 #endif
 
+case 0xd4: /* vmfunc */
+generate_exception_if(lock_prefix | rep_prefix() | (vex.pfx == 
vex_66),
+  EXC_UD, -1);
+fail_if(ops->vmfunc == NULL);
+if ( (rc = ops->vmfunc(ctxt) != X86EMUL_OKAY) )
+goto done;
+goto no_writeback;
+
 case 0xdf: /* invlpga */
 generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
 generate_exception_if(!mode_ring0(), EXC_GP, 0);
@@ -4200,7 +4208,9 @@ x86_emulate(
ctxt)) )
 goto done;
 goto no_writeback;
-case 0xf9: /* rdtscp */ {
+
+case 0xf9: /* rdtscp */
+{
 uint64_t tsc_aux;
 fail_if(ops->read_msr == NULL);
 if ( (rc = ops->read_msr(MSR_TSC_AUX, &tsc_aux, ctxt)) != 0 )
@@ -4208,14 +4218,9 @@ x86_emulate(
 _regs.ecx = (uint32_t)tsc_aux;
 goto rdtsc;
 }
-case 0xd4: /* vmfunc */
-generate_exception_if(lock_prefix | rep_prefix() | (vex.pfx == 
vex_66),
-  EXC_UD, -1);
-fail_if(ops->vmfunc == NULL);
-if ( (rc = ops->vmfunc(ctxt) != X86EMUL_OKAY) )
-goto done;
-goto no_writeback;
-   case 0xfc: /* clzero */ {
+
+case 0xfc: /* clzero */
+{
 unsigned int eax = 1, ebx = 0, dummy = 0;
 unsigned long zero = 0;
 
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 16/17] x86/PV: use generic emulator for privileged instruction handling

2016-09-08 Thread Jan Beulich
There's a new emulator return code being added to allow bypassing
certain operations (see the code comment). Its handling in the epilogue
code involves moving the raising of the single step trap until after
registers were updated. This should probably have been that way from
the beginning, to allow the inject_hw_exception() hook to see updated
register state (in case it cares) - it's a trap, after all.

The other small tweak to the emulator is to single iteration handling
of INS and OUTS: Since we don't want to handle any other memory access
instructions, we want these to be handled by the rep_ins() / rep_outs()
hooks here too. The read() / write() hook pointers get checked for that
purpose.

And finally handling of exceptions gets changed for REP INS / REP OUTS:
If the hook return X86EMUL_EXCEPTION, register state will still get
updated if some iterations have been performed (but the rIP update will
get suppressed if not all of them did get handled). While on the HVM side
the VA -> LA -> PA translation process clips the number of repetitions,
doing so would unduly complicate the PV side code being added here.

Signed-off-by: Jan Beulich 
---
One thing to be considered is that despite avoiding the handling of
memory reads and writes (other than for INS and OUTS) the set of insns
now getting potentially handled by the emulator is much larger than
before. A possible solution to this would be a new hook to be called
between decode and execution stages, allowing further restrictions to
be enforced. Of course this could easily be a follow-up patch, as the
one here is quite big already.

Another thing to consider is to the extend the X86EMUL_EXCEPTION
handling change mentioned above to other string instructions. In that
case this should probably be broken out into a prereq patch.

--- a/tools/tests/x86_emulator/x86_emulate.c
+++ b/tools/tests/x86_emulator/x86_emulate.c
@@ -20,6 +20,9 @@ typedef bool bool_t;
 #define cpu_has_amd_erratum(nr) 0
 #define mark_regs_dirty(r) ((void)(r))
 
+#define likely(x)   __builtin_expect(!!(x), true)
+#define unlikely(x) __builtin_expect(!!(x), false)
+
 #define __packed __attribute__((packed))
 
 /* For generic assembly code: use macros to define operation/operand sizes. */
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -459,6 +459,7 @@ static int hvmemul_linear_to_phys(
 {
 if ( pfec & (PFEC_page_paged | PFEC_page_shared) )
 return X86EMUL_RETRY;
+*reps = 0;
 hvm_inject_page_fault(pfec, addr);
 return X86EMUL_EXCEPTION;
 }
@@ -478,6 +479,7 @@ static int hvmemul_linear_to_phys(
 if ( pfec & (PFEC_page_paged | PFEC_page_shared) )
 return X86EMUL_RETRY;
 done /= bytes_per_rep;
+*reps = done;
 if ( done == 0 )
 {
 ASSERT(!reverse);
@@ -486,7 +488,6 @@ static int hvmemul_linear_to_phys(
 hvm_inject_page_fault(pfec, addr & PAGE_MASK);
 return X86EMUL_EXCEPTION;
 }
-*reps = done;
 break;
 }
 
@@ -568,6 +569,7 @@ static int hvmemul_virtual_to_linear(
 return X86EMUL_UNHANDLEABLE;
 
 /* This is a singleton operation: fail it with an exception. */
+*reps = 0;
 hvmemul_ctxt->exn_pending = 1;
 hvmemul_ctxt->trap.vector =
 (seg == x86_seg_ss) ? TRAP_stack_error : TRAP_gp_fault;
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -662,16 +662,13 @@ static void do_guest_trap(unsigned int t
 trapstr(trapnr), trapnr, regs->error_code);
 }
 
-static void instruction_done(
-struct cpu_user_regs *regs, unsigned long eip, unsigned int bpmatch)
+static void instruction_done(struct cpu_user_regs *regs, unsigned long eip)
 {
 regs->eip = eip;
 regs->eflags &= ~X86_EFLAGS_RF;
-if ( bpmatch || (regs->eflags & X86_EFLAGS_TF) )
+if ( regs->eflags & X86_EFLAGS_TF )
 {
-current->arch.debugreg[6] |= bpmatch | DR_STATUS_RESERVED_ONE;
-if ( regs->eflags & X86_EFLAGS_TF )
-current->arch.debugreg[6] |= DR_STEP;
+current->arch.debugreg[6] |= DR_STEP | DR_STATUS_RESERVED_ONE;
 do_guest_trap(TRAP_debug, regs);
 }
 }
@@ -1272,7 +1269,7 @@ static int emulate_invalid_rdtscp(struct
 return 0;
 eip += sizeof(opcode);
 pv_soft_rdtsc(v, regs, 1);
-instruction_done(regs, eip, 0);
+instruction_done(regs, eip);
 return EXCRET_fault_fixed;
 }
 
@@ -1305,7 +1302,7 @@ static int emulate_forced_invalid_op(str
 
 pv_cpuid(regs);
 
-instruction_done(regs, eip, 0);
+instruction_done(regs, eip);
 
 trace_trap_one_addr(TRC_PV_FORCED_INVALID_OP, regs->eip);
 
@@ -1989,6 +1986,154 @@ static int read_gate_descriptor(unsigned
 return 1;
 }
 
+struct priv_op_ctxt {
+struct x86_emulate_ctxt ctxt;
+struct {
+unsigned long base, limit;
+} cs;
+char *io_emul_stub;
+unsigned int bpmatch;
+unsigned int tsc

[Xen-devel] [PATCH 17/17] x86emul: don't assume a memory operand

2016-09-08 Thread Jan Beulich
Especially for x86_insn_operand_ea() to return dependable segment
information even when the caller didn't consider applicability we
shouldn't have ea.type start out as OP_MEM. Make it OP_NONE instead,
and set it to OP_MEM when we actually encounter memory like operands.

This requires to eliminate the XSA-123 fix, which has been no longer
necessary since the elimination of the union in commit dd766684e7. That
in turn allows restricting the scope of override_seg to x86_decode().
At this occasion also make it have a proper type, instead of plain int.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1632,7 +1632,6 @@ struct x86_emulate_state {
 opcode_desc_t desc;
 union vex vex;
 union evex evex;
-int override_seg;
 
 /*
  * Data operand effective address (usually computed from ModRM).
@@ -1664,7 +1663,6 @@ struct x86_emulate_state {
 #define lock_prefix (state->lock_prefix)
 #define vex (state->vex)
 #define evex (state->evex)
-#define override_seg (state->override_seg)
 #define ea (state->ea)
 
 static int
@@ -1693,6 +1691,7 @@ x86_decode_base(
 case 0xa0: case 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
 case 0xa2: case 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
 /* Source EA is not encoded via ModRM. */
+ea.type = OP_MEM;
 ea.mem.off = insn_fetch_bytes(ad_bytes);
 break;
 
@@ -1783,11 +1782,11 @@ x86_decode(
 {
 uint8_t b, d, sib, sib_index, sib_base;
 unsigned int def_op_bytes, def_ad_bytes, opcode;
+enum x86_segment override_seg = x86_seg_none;
 int rc = X86EMUL_OKAY;
 
 memset(state, 0, sizeof(*state));
-override_seg = -1;
-ea.type = OP_MEM;
+ea.type = OP_NONE;
 ea.mem.seg = x86_seg_ds;
 ea.reg = REG_POISON;
 state->regs = ctxt->regs;
@@ -2085,6 +2084,7 @@ x86_decode(
 else if ( ad_bytes == 2 )
 {
 /* 16-bit ModR/M decode. */
+ea.type = OP_MEM;
 switch ( modrm_rm )
 {
 case 0:
@@ -2135,6 +2135,7 @@ x86_decode(
 else
 {
 /* 32/64-bit ModR/M decode. */
+ea.type = OP_MEM;
 if ( modrm_rm == 4 )
 {
 sib = insn_fetch_type(uint8_t);
@@ -2199,7 +2200,7 @@ x86_decode(
 }
 }
 
-if ( override_seg != -1 && ea.type == OP_MEM )
+if ( override_seg != x86_seg_none )
 ea.mem.seg = override_seg;
 
 /* Fetch the immediate operand, if present. */
@@ -4250,13 +4251,11 @@ x86_emulate(
 generate_exception_if(limit < sizeof(long) ||
   (limit & (limit - 1)), EXC_UD, -1);
 base &= ~(limit - 1);
-if ( override_seg == -1 )
-override_seg = x86_seg_ds;
 if ( ops->rep_stos )
 {
 unsigned long nr_reps = limit / sizeof(zero);
 
-rc = ops->rep_stos(&zero, override_seg, base, sizeof(zero),
+rc = ops->rep_stos(&zero, ea.mem.seg, base, sizeof(zero),
&nr_reps, ctxt);
 if ( rc == X86EMUL_OKAY )
 {
@@ -4268,7 +4267,7 @@ x86_emulate(
 }
 while ( limit )
 {
-rc = ops->write(override_seg, base, &zero, sizeof(zero), ctxt);
+rc = ops->write(ea.mem.seg, base, &zero, sizeof(zero), ctxt);
 if ( rc != X86EMUL_OKAY )
 goto done;
 base += sizeof(zero);
@@ -5254,7 +5253,6 @@ x86_emulate(
 #undef rex_prefix
 #undef lock_prefix
 #undef vex
-#undef override_seg
 #undef ea
 
 #ifdef __XEN__



x86emul: don't assume a memory operand

Especially for x86_insn_operand_ea() to return dependable segment
information even when the caller didn't consider applicability we
shouldn't have ea.type start out as OP_MEM. Make it OP_NONE instead,
and set it to OP_MEM when we actually encounter memory like operands.

This requires to eliminate the XSA-123 fix, which has been no longer
necessary since the elimination of the union in commit dd766684e7. That
in turn allows restricting the scope of override_seg to x86_decode().
At this occasion also make it have a proper type, instead of plain int.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1632,7 +1632,6 @@ struct x86_emulate_state {
 opcode_desc_t desc;
 union vex vex;
 union evex evex;
-int override_seg;
 
 /*
  * Data operand effective address (usually computed from ModRM).
@@ -1664,7 +1663,6 @@ struct x86_emulate_state {
 #define lock_prefix (state->lock_prefix)
 #define vex (state->vex)
 #define evex (state->evex)
-#define override_seg (state->override_seg)
 #define ea (state->ea)
 
 static int
@@ -1693,6 +1691,7 @@ x86_decode_base(
 case 0xa0: case 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
 case 0xa2: 

Re: [Xen-devel] [PATCH 07/17] x86emul: move x86_execute() common epilogue code

2016-09-08 Thread Jan Beulich
>>> On 08.09.16 at 15:13,  wrote:
> Only code movement, no functional change.
> 
> Signed-off-by: Jan Beulich 

Just noticed the title was left stale - should really be "x86emul:
move x86_emulate() common epilogue code".

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [ovmf test] 100810: all pass - PUSHED

2016-09-08 Thread osstest service owner
flight 100810 ovmf real [real]
http://logs.test-lab.xenproject.org/osstest/logs/100810/

Perfect :-)
All tests in this flight passed as required
version targeted for testing:
 ovmf d74135cd0f8d00d2126df0b4db54938c96456db6
baseline version:
 ovmf 4ac14ceae076439dcea926bc47cda4e1d2779cae

Last test of basis   100805  2016-09-08 05:50:03 Z0 days
Testing same since   100810  2016-09-08 10:13:28 Z0 days1 attempts


People who touched revisions under test:
  Ard Biesheuvel 
  Dennis Chen 
  Laszlo Ersek 

jobs:
 build-amd64-xsm  pass
 build-i386-xsm   pass
 build-amd64  pass
 build-i386   pass
 build-amd64-libvirt  pass
 build-i386-libvirt   pass
 build-amd64-pvopspass
 build-i386-pvops pass
 test-amd64-amd64-xl-qemuu-ovmf-amd64 pass
 test-amd64-i386-xl-qemuu-ovmf-amd64  pass



sg-report-flight on osstest.test-lab.xenproject.org
logs: /home/logs/logs
images: /home/logs/images

Logs, config files, etc. are available at
http://logs.test-lab.xenproject.org/osstest/logs

Explanation of these reports, and of osstest in general, is at
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README.email;hb=master
http://xenbits.xen.org/gitweb/?p=osstest.git;a=blob;f=README;hb=master

Test harness code can be found at
http://xenbits.xen.org/gitweb?p=osstest.git;a=summary


Pushing revision :

+ branch=ovmf
+ revision=d74135cd0f8d00d2126df0b4db54938c96456db6
+ . ./cri-lock-repos
++ . ./cri-common
+++ . ./cri-getconfig
+++ umask 002
+++ getrepos
 getconfig Repos
 perl -e '
use Osstest;
readglobalconfig();
print $c{"Repos"} or die $!;
'
+++ local repos=/home/osstest/repos
+++ '[' -z /home/osstest/repos ']'
+++ '[' '!' -d /home/osstest/repos ']'
+++ echo /home/osstest/repos
++ repos=/home/osstest/repos
++ repos_lock=/home/osstest/repos/lock
++ '[' x '!=' x/home/osstest/repos/lock ']'
++ OSSTEST_REPOS_LOCK_LOCKED=/home/osstest/repos/lock
++ exec with-lock-ex -w /home/osstest/repos/lock ./ap-push ovmf 
d74135cd0f8d00d2126df0b4db54938c96456db6
+ branch=ovmf
+ revision=d74135cd0f8d00d2126df0b4db54938c96456db6
+ . ./cri-lock-repos
++ . ./cri-common
+++ . ./cri-getconfig
+++ umask 002
+++ getrepos
 getconfig Repos
 perl -e '
use Osstest;
readglobalconfig();
print $c{"Repos"} or die $!;
'
+++ local repos=/home/osstest/repos
+++ '[' -z /home/osstest/repos ']'
+++ '[' '!' -d /home/osstest/repos ']'
+++ echo /home/osstest/repos
++ repos=/home/osstest/repos
++ repos_lock=/home/osstest/repos/lock
++ '[' x/home/osstest/repos/lock '!=' x/home/osstest/repos/lock ']'
+ . ./cri-common
++ . ./cri-getconfig
++ umask 002
+ select_xenbranch
+ case "$branch" in
+ tree=ovmf
+ xenbranch=xen-unstable
+ '[' xovmf = xlinux ']'
+ linuxbranch=
+ '[' x = x ']'
+ qemuubranch=qemu-upstream-unstable
+ select_prevxenbranch
++ ./cri-getprevxenbranch xen-unstable
+ prevxenbranch=xen-4.7-testing
+ '[' xd74135cd0f8d00d2126df0b4db54938c96456db6 = x ']'
+ : tested/2.6.39.x
+ . ./ap-common
++ : osst...@xenbits.xen.org
+++ getconfig OsstestUpstream
+++ perl -e '
use Osstest;
readglobalconfig();
print $c{"OsstestUpstream"} or die $!;
'
++ :
++ : git://xenbits.xen.org/xen.git
++ : osst...@xenbits.xen.org:/home/xen/git/xen.git
++ : git://xenbits.xen.org/qemu-xen-traditional.git
++ : git://git.kernel.org
++ : git://git.kernel.org/pub/scm/linux/kernel/git
++ : git
++ : git://xenbits.xen.org/libvirt.git
++ : osst...@xenbits.xen.org:/home/xen/git/libvirt.git
++ : git://xenbits.xen.org/libvirt.git
++ : git://xenbits.xen.org/osstest/rumprun.git
++ : git
++ : git://xenbits.xen.org/osstest/rumprun.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/rumprun.git
++ : git://git.seabios.org/seabios.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/seabios.git
++ : git://xenbits.xen.org/osstest/seabios.git
++ : https://github.com/tianocore/edk2.git
++ : osst...@xenbits.xen.org:/home/xen/git/osstest/ovmf.git
++ : git://xenbits.xen.org/osstest/ovmf.git
++ : git://xenbits.xen.org/osstest/linux-firmware.git
++ : osst...@xenbits.xen.org:/home/osstest/ext/linux-firmware.git
++ : git://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
++ : osst...@xenbits.xen.org:/home/xen/git/linux-pvops.git
++ : git://xenbits.xen.org/linux-pvops.git
++ : tested/linux-3.14
++ : tested/linux

[Xen-devel] [PATCH 0/5] x86: further insn emulator improvements

2016-09-08 Thread Jan Beulich
These are really independent of

and I prefer them to be a separate series, but won't apply without that
one in place. The final two I decided to pick up from Mihai, as it seemed
natural for me to do the rebasing on top of the major earlier changes,
and as I'd like to get the original issue (certain Windows drivers using
these insns) dealt with in 4.8.

1: support UMIP
2: consolidate segment register handling
3: support RTM instructions
4: add support for {,v}movq xmm,xmm/m64
5: add support for {,v}movd {,x}mm,r/m32 and {,v}movq {,x}mm,r/m64

Signed-off-by: Jan Beulich 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 02/19] acpi/hvmloader: Collect processor and NUMA info in hvmloader

2016-09-08 Thread Jan Beulich
>>> On 07.09.16 at 20:59,  wrote:
> Changes in v3:
> * Constified acpi_numa's pointers
> * Constified acpi_config call parameter where possible

Thanks, but how about ...

> --- a/tools/firmware/hvmloader/acpi/build.c
> +++ b/tools/firmware/hvmloader/acpi/build.c
> @@ -70,18 +70,20 @@ static void set_checksum(
>  p[checksum_offset] = -sum;
>  }
>  
> -static struct acpi_20_madt *construct_madt(struct acpi_info *info)
> +static struct acpi_20_madt *construct_madt(const struct acpi_config *config,
> +   struct acpi_info *info)
>  {
>  struct acpi_20_madt   *madt;
>  struct acpi_20_madt_intsrcovr *intsrcovr;
>  struct acpi_20_madt_ioapic*io_apic;
>  struct acpi_20_madt_lapic *lapic;
> +struct hvm_info_table *hvminfo = config->hvminfo;

... this?

> --- a/tools/firmware/hvmloader/acpi/libacpi.h
> +++ b/tools/firmware/hvmloader/acpi/libacpi.h
> @@ -20,6 +20,8 @@
>  #ifndef __LIBACPI_H__
>  #define __LIBACPI_H__
>  
> +#include 

Why? struct xen_vmemrange doesn't get instantiated anywhere in
this header.

> @@ -49,6 +59,9 @@ struct acpi_config {
>  uint32_t length;
>  } pt;
>  
> +struct acpi_numa numa;
> +struct hvm_info_table *hvminfo;

And this cannot be const?

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 1/5] x86emul: support UMIP

2016-09-08 Thread Jan Beulich
To make this complete, also add support for SLDT and STR. Note that by
just looking at the guest CR4 bit, this is independent of actually
making available the UMIP feature to guests.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -182,7 +182,7 @@ static const opcode_desc_t opcode_table[
 
 static const opcode_desc_t twobyte_table[256] = {
 /* 0x00 - 0x07 */
-SrcMem16|ModRM, ImplicitOps|ModRM, ModRM, ModRM,
+ModRM, ImplicitOps|ModRM, ModRM, ModRM,
 0, ImplicitOps, ImplicitOps, ImplicitOps,
 /* 0x08 - 0x0F */
 ImplicitOps, ImplicitOps, 0, ImplicitOps,
@@ -421,6 +421,7 @@ typedef union {
 /* Control register flags. */
 #define CR0_PE(1<<0)
 #define CR4_TSD   (1<<2)
+#define CR4_UMIP  (1<<11)
 
 /* EFLAGS bit definitions. */
 #define EFLG_VIP  (1<<20)
@@ -1484,6 +1485,17 @@ static bool is_aligned(enum x86_segment
 return !((reg.base + offs) & (size - 1));
 }
 
+static bool is_umip(struct x86_emulate_ctxt *ctxt,
+const struct x86_emulate_ops *ops)
+{
+unsigned long cr4;
+
+/* Intentionally not using mode_ring0() here to avoid its fail_if(). */
+return get_cpl(ctxt, ops) > 0 &&
+   ops->read_cr && ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&
+   (cr4 & CR4_UMIP);
+}
+
 /* Inject a software interrupt/exception, emulating if needed. */
 static int inject_swint(enum x86_swint_type type,
 uint8_t vector, uint8_t insn_len,
@@ -2051,10 +2063,20 @@ x86_decode(
 break;
 
 case ext_0f:
-case ext_0f3a:
-case ext_8f08:
-case ext_8f09:
-case ext_8f0a:
+switch ( b )
+{
+case 0x00: /* Grp6 */
+switch ( modrm_reg & 6 )
+{
+case 0:
+d |= DstMem | SrcImplicit | Mov;
+break;
+case 2: case 4:
+d |= SrcMem16;
+break;
+}
+break;
+}
 break;
 
 case ext_0f38:
@@ -2070,6 +2092,12 @@ x86_decode(
 }
 break;
 
+case ext_0f3a:
+case ext_8f08:
+case ext_8f09:
+case ext_8f0a:
+break;
+
 default:
 ASSERT_UNREACHABLE();
 }
@@ -4177,14 +4205,31 @@ x86_emulate(
 }
 break;
 
-case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */
-fail_if((modrm_reg & 6) != 2);
+case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */ {
+enum x86_segment seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
+
+fail_if(modrm_reg & 4);
 generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
-generate_exception_if(!mode_ring0(), EXC_GP, 0);
-if ( (rc = load_seg((modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr,
-src.val, 0, NULL, ctxt, ops)) != 0 )
-goto done;
+if ( modrm_reg & 2 )
+{
+generate_exception_if(!mode_ring0(), EXC_GP, 0);
+if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
+goto done;
+}
+else
+{
+struct segment_register reg;
+
+generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
+fail_if(!ops->read_segment);
+if ( (rc = ops->read_segment(seg, ®, ctxt)) != 0 )
+goto done;
+dst.val = reg.sel;
+if ( dst.type == OP_MEM )
+dst.bytes = 2;
+}
 break;
+}
 
 case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */ {
 struct segment_register reg;
@@ -4282,6 +4327,7 @@ x86_emulate(
 case 0: /* sgdt */
 case 1: /* sidt */
 generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
+generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
 fail_if(ops->read_segment == NULL);
 if ( (rc = ops->read_segment((modrm_reg & 1) ?
  x86_seg_idtr : x86_seg_gdtr,
@@ -4316,6 +4362,7 @@ x86_emulate(
 goto done;
 break;
 case 4: /* smsw */
+generate_exception_if(is_umip(ctxt, ops), EXC_GP, 0);
 ea.bytes = (ea.type == OP_MEM) ? 2 : op_bytes;
 dst = ea;
 fail_if(ops->read_cr == NULL);


x86emul: support UMIP

To make this complete, also add support for SLDT and STR. Note that by
just looking at the guest CR4 bit, this is independent of actually
making available the UMIP feature to guests.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -182,7 +182,7 @@ static const opcode_desc_t opcode_table[
 
 static const opcode_desc_t twobyte_table[256] = {
 /* 0x00 - 0x07 */
-SrcMem16|ModRM, ImplicitOps|ModRM, ModRM, ModRM,
+ModRM, ImplicitOps|ModRM, ModRM, ModRM,
 0, ImplicitOps

[Xen-devel] [PATCH 2/5] x86emul: consolidate segment register handling

2016-09-08 Thread Jan Beulich
Use a single set of variables throughout the huge switch() statement,
allowing to funnel SLDT/STR into the mov-from-sreg code path.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -2494,7 +2494,8 @@ x86_emulate(
 
 switch ( ctxt->opcode )
 {
-struct segment_register cs;
+enum x86_segment seg;
+struct segment_register cs, sreg;
 
 case 0x00 ... 0x05: add: /* add */
 emulate_2op_SrcV("add", src, dst, _regs.eflags);
@@ -2530,22 +2531,20 @@ x86_emulate(
 dst.type = OP_NONE;
 break;
 
-case 0x06: /* push %%es */ {
-struct segment_register reg;
+case 0x06: /* push %%es */
 src.val = x86_seg_es;
 push_seg:
 generate_exception_if(mode_64bit() && !ext, EXC_UD, -1);
 fail_if(ops->read_segment == NULL);
-if ( (rc = ops->read_segment(src.val, ®, ctxt)) != 0 )
+if ( (rc = ops->read_segment(src.val, &sreg, ctxt)) != 0 )
 goto done;
 /* 64-bit mode: PUSH defaults to a 64-bit operand. */
 if ( mode_64bit() && (op_bytes == 4) )
 op_bytes = 8;
 if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
-  ®.sel, op_bytes, ctxt)) != 0 )
+  &sreg.sel, op_bytes, ctxt)) != 0 )
 goto done;
 break;
-}
 
 case 0x07: /* pop %%es */
 src.val = x86_seg_es;
@@ -2861,21 +2860,20 @@ x86_emulate(
 dst.val = src.val;
 break;
 
-case 0x8c: /* mov Sreg,r/m */ {
-struct segment_register reg;
-enum x86_segment seg = decode_segment(modrm_reg);
+case 0x8c: /* mov Sreg,r/m */
+seg = decode_segment(modrm_reg);
 generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
+store_seg:
 fail_if(ops->read_segment == NULL);
-if ( (rc = ops->read_segment(seg, ®, ctxt)) != 0 )
+if ( (rc = ops->read_segment(seg, &sreg, ctxt)) != 0 )
 goto done;
-dst.val = reg.sel;
+dst.val = sreg.sel;
 if ( dst.type == OP_MEM )
 dst.bytes = 2;
 break;
-}
 
-case 0x8e: /* mov r/m,Sreg */ {
-enum x86_segment seg = decode_segment(modrm_reg);
+case 0x8e: /* mov r/m,Sreg */
+seg = decode_segment(modrm_reg);
 generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
 generate_exception_if(seg == x86_seg_cs, EXC_UD, -1);
 if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
@@ -2884,7 +2882,6 @@ x86_emulate(
 ctxt->retire.flags.mov_ss = 1;
 dst.type = OP_NONE;
 break;
-}
 
 case 0x8d: /* lea */
 generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
@@ -2941,17 +2938,15 @@ x86_emulate(
 }
 break;
 
-case 0x9a: /* call (far, absolute) */ {
-struct segment_register reg;
-
+case 0x9a: /* call (far, absolute) */
 ASSERT(!mode_64bit());
 fail_if(ops->read_segment == NULL);
 
-if ( (rc = ops->read_segment(x86_seg_cs, ®, ctxt)) ||
+if ( (rc = ops->read_segment(x86_seg_cs, &sreg, ctxt)) ||
  (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) ||
  (validate_far_branch(&cs, imm1),
   rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
-  ®.sel, op_bytes, ctxt)) ||
+  &sreg.sel, op_bytes, ctxt)) ||
  (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
   &_regs.eip, op_bytes, ctxt)) ||
  (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) )
@@ -2959,7 +2954,6 @@ x86_emulate(
 
 _regs.eip = imm1;
 break;
-}
 
 case 0x9b:  /* wait/fwait */
 host_and_vcpu_must_have(fpu);
@@ -4178,13 +4172,12 @@ x86_emulate(
 
 if ( (modrm_reg & 7) == 3 ) /* call */
 {
-struct segment_register reg;
 fail_if(ops->read_segment == NULL);
-if ( (rc = ops->read_segment(x86_seg_cs, ®, ctxt)) ||
+if ( (rc = ops->read_segment(x86_seg_cs, &sreg, ctxt)) ||
  (rc = load_seg(x86_seg_cs, sel, 0, &cs, ctxt, ops)) ||
  (validate_far_branch(&cs, src.val),
   rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
-  ®.sel, op_bytes, ctxt)) ||
+  &sreg.sel, op_bytes, ctxt)) ||
  (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
   &_regs.eip, op_bytes, ctxt)) ||
  (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) )
@@ -4205,34 +4198,24 @@ x86_emulate(
 }
 break;
 
-case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */ {
-enum x86_segment seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
-
+case X86EMUL_OPC(0x0f, 0x

[Xen-devel] [PATCH 3/5] x86emul: support RTM instructions

2016-09-08 Thread Jan Beulich
Minimal emulation: XBEGIN aborts right away, hence
- XABORT is just a no-op,
- XEND always raises #GP,
- XTEST always signals neither RTM nor HLE are active.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1172,6 +1172,8 @@ static bool_t vcpu_has(
 #define vcpu_has_clflush() vcpu_has(   1, EDX, 19, ctxt, ops)
 #define vcpu_has_lzcnt() vcpu_has(0x8001, ECX,  5, ctxt, ops)
 #define vcpu_has_bmi1()  vcpu_has(0x0007, EBX,  3, ctxt, ops)
+#define vcpu_has_hle()   vcpu_has(0x0007, EBX,  4, ctxt, ops)
+#define vcpu_has_rtm()   vcpu_has(0x0007, EBX, 11, ctxt, ops)
 
 #define vcpu_must_have(leaf, reg, bit) \
 generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
@@ -2852,7 +2854,18 @@ x86_emulate(
 lock_prefix = 1;
 break;
 
-case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
+case 0xc6: /* Grp11: mov / xabort */
+case 0xc7: /* Grp11: mov / xbegin */
+if ( modrm == 0xf8 && vcpu_has_rtm() )
+{
+if ( b & 1 )
+{
+jmp_rel((int32_t)src.val);
+_regs.eax = 0;
+}
+dst.type = OP_NONE;
+break;
+}
 generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
 case 0x88 ... 0x8b: /* mov */
 case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
@@ -4246,6 +4259,17 @@ x86_emulate(
 goto done;
 goto no_writeback;
 
+case 0xd5: /* xend */
+generate_exception_if(vcpu_has_rtm() && !vex.pfx, EXC_GP, 0);
+break;
+
+case 0xd6: /* xtest */
+if ( (!vcpu_has_rtm() && !vcpu_has_hle()) || vex.pfx )
+break;
+/* Neither HLE nor RTM can be active when we get here. */
+_regs.eflags |= EFLG_ZF;
+goto no_writeback;
+
 case 0xdf: /* invlpga */
 generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
 generate_exception_if(!mode_ring0(), EXC_GP, 0);



x86emul: support RTM instructions

Minimal emulation: XBEGIN aborts right away, hence
- XABORT is just a no-op,
- XEND always raises #GP,
- XTEST always signals neither RTM nor HLE are active.

Signed-off-by: Jan Beulich 

--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1172,6 +1172,8 @@ static bool_t vcpu_has(
 #define vcpu_has_clflush() vcpu_has(   1, EDX, 19, ctxt, ops)
 #define vcpu_has_lzcnt() vcpu_has(0x8001, ECX,  5, ctxt, ops)
 #define vcpu_has_bmi1()  vcpu_has(0x0007, EBX,  3, ctxt, ops)
+#define vcpu_has_hle()   vcpu_has(0x0007, EBX,  4, ctxt, ops)
+#define vcpu_has_rtm()   vcpu_has(0x0007, EBX, 11, ctxt, ops)
 
 #define vcpu_must_have(leaf, reg, bit) \
 generate_exception_if(!vcpu_has(leaf, reg, bit, ctxt, ops), EXC_UD, -1)
@@ -2852,7 +2854,18 @@ x86_emulate(
 lock_prefix = 1;
 break;
 
-case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
+case 0xc6: /* Grp11: mov / xabort */
+case 0xc7: /* Grp11: mov / xbegin */
+if ( modrm == 0xf8 && vcpu_has_rtm() )
+{
+if ( b & 1 )
+{
+jmp_rel((int32_t)src.val);
+_regs.eax = 0;
+}
+dst.type = OP_NONE;
+break;
+}
 generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
 case 0x88 ... 0x8b: /* mov */
 case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
@@ -4246,6 +4259,17 @@ x86_emulate(
 goto done;
 goto no_writeback;
 
+case 0xd5: /* xend */
+generate_exception_if(vcpu_has_rtm() && !vex.pfx, EXC_GP, 0);
+break;
+
+case 0xd6: /* xtest */
+if ( (!vcpu_has_rtm() && !vcpu_has_hle()) || vex.pfx )
+break;
+/* Neither HLE nor RTM can be active when we get here. */
+_regs.eflags |= EFLG_ZF;
+goto no_writeback;
+
 case 0xdf: /* invlpga */
 generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
 generate_exception_if(!mode_ring0(), EXC_GP, 0);
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 5/5] x86/emulate: add support for {, v}movd {, x}mm, r/m32 and {, v}movq {, x}mm, r/m64

2016-09-08 Thread Jan Beulich
From: Zhi Wang 

Found that Windows driver was using a SSE2 instruction MOVD.

Signed-off-by: Zhi Wang 
Signed-off-by: Mihai Donțu 
Signed-off-by: Jan Beulich 
---
v4: Re-base on decoding changes. Address Andrew's and my own review
comments (where still applicable). #UD when vex.l is set. Various
adjustments to the test tool change.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -973,6 +973,296 @@ int main(int argc, char **argv)
 else
 printf("skipped\n");
 
+printf("%-40s", "Testing movd %%mm3,32(%%ecx)...");
+if ( stack_exec && cpu_has_mmx )
+{
+decl_insn(movd_to_mem);
+
+asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
+   put_insn(movd_to_mem, "movd %%mm3, 32(%0)")
+   :: "c" (NULL) );
+
+memset(res, 0xbd, 64);
+set_insn(movd_to_mem);
+regs.ecx = (unsigned long)res;
+regs.edx = 0;
+rc = x86_emulate(&ctxt, &emulops);
+if ( rc != X86EMUL_OKAY || !check_eip(movd_to_mem) ||
+ res[8] + 1 ||
+ memcmp(res, res + 9, 28) ||
+ memcmp(res, res + 6, 8) )
+goto fail;
+printf("okay\n");
+}
+else
+printf("skipped\n");
+
+printf("%-40s", "Testing movd %%xmm2,32(%%edx)...");
+if ( stack_exec && cpu_has_sse2 )
+{
+decl_insn(movd_to_mem2);
+
+asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+   put_insn(movd_to_mem2, "movd %%xmm2, 32(%0)")
+   :: "d" (NULL) );
+
+memset(res, 0xdb, 64);
+set_insn(movd_to_mem2);
+regs.ecx = 0;
+regs.edx = (unsigned long)res;
+rc = x86_emulate(&ctxt, &emulops);
+if ( rc != X86EMUL_OKAY || !check_eip(movd_to_mem2) ||
+ res[8] + 1 ||
+ memcmp(res, res + 9, 28) ||
+ memcmp(res, res + 6, 8) )
+goto fail;
+printf("okay\n");
+}
+else
+printf("skipped\n");
+
+printf("%-40s", "Testing vmovd %%xmm1,32(%%ecx)...");
+if ( stack_exec && cpu_has_avx )
+{
+decl_insn(vmovd_to_mem);
+
+asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+   put_insn(vmovd_to_mem, "vmovd %%xmm1, 32(%0)")
+   :: "c" (NULL) );
+
+memset(res, 0xbd, 64);
+set_insn(vmovd_to_mem);
+regs.ecx = (unsigned long)res;
+regs.edx = 0;
+rc = x86_emulate(&ctxt, &emulops);
+if ( rc != X86EMUL_OKAY || !check_eip(vmovd_to_mem) ||
+ res[8] + 1 ||
+ memcmp(res, res + 9, 28) ||
+ memcmp(res, res + 6, 8) )
+goto fail;
+printf("okay\n");
+}
+else
+printf("skipped\n");
+
+printf("%-40s", "Testing movd %%mm3,%%ebx...");
+if ( stack_exec && cpu_has_mmx )
+{
+decl_insn(movd_to_reg);
+
+/*
+ * Intentionally not specifying "b" as an input (or even output) here
+ * to not keep the compiler from using the variable, which in turn
+ * allows noticing whether the emulator touches the actual register
+ * instead of the regs field.
+ */
+asm volatile ( "pcmpeqb %%mm3, %%mm3\n"
+   put_insn(movd_to_reg, "movd %%mm3, %%ebx")
+   :: );
+
+set_insn(movd_to_reg);
+#ifdef __x86_64__
+regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+#else
+regs.ebx = 0xbdbdbdbdUL;
+#endif
+rc = x86_emulate(&ctxt, &emulops);
+if ( (rc != X86EMUL_OKAY) || !check_eip(movd_to_reg) ||
+ regs.ebx != 0x )
+goto fail;
+printf("okay\n");
+}
+else
+printf("skipped\n");
+
+printf("%-40s", "Testing movd %%xmm2,%%ebx...");
+if ( stack_exec && cpu_has_sse2 )
+{
+decl_insn(movd_to_reg2);
+
+/* See comment next to movd above. */
+asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n"
+   put_insn(movd_to_reg2, "movd %%xmm2, %%ebx")
+   :: );
+
+set_insn(movd_to_reg2);
+#ifdef __x86_64__
+regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+#else
+regs.ebx = 0xbdbdbdbdUL;
+#endif
+rc = x86_emulate(&ctxt, &emulops);
+if ( (rc != X86EMUL_OKAY) || !check_eip(movd_to_reg2) ||
+ regs.ebx != 0x )
+goto fail;
+printf("okay\n");
+}
+else
+printf("skipped\n");
+
+printf("%-40s", "Testing vmovd %%xmm1,%%ebx...");
+if ( stack_exec && cpu_has_avx )
+{
+decl_insn(vmovd_to_reg);
+
+/* See comment next to movd above. */
+asm volatile ( "pcmpeqb %%xmm1, %%xmm1\n"
+   put_insn(vmovd_to_reg, "vmovd %%xmm1, %%ebx")
+   :: );
+
+set_insn(vmovd_to_reg);
+#ifdef __x86_64__
+regs.rbx = 0xbdbdbdbdbdbdbdbdUL;
+#else
+regs.ebx = 0xbdbdbdbdUL;
+#endif
+rc = x86_emulate(&ctxt, &emulops)

[Xen-devel] [PATCH 4/5] x86/emulate: add support for {, v}movq xmm, xmm/m64

2016-09-08 Thread Jan Beulich
From: Mihai Donțu 

Signed-off-by: Mihai Donțu 
Signed-off-by: Jan Beulich 
---
v4: Re-base on decoding changes. Address my own review comments (where
still applicable). #UD when vex.l is set. Various adjustments to
the test tool change.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -713,6 +713,54 @@ int main(int argc, char **argv)
 else
 printf("skipped\n");
 
+printf("%-40s", "Testing movq %%xmm0,32(%%ecx)...");
+if ( stack_exec && cpu_has_sse2 )
+{
+decl_insn(movq_to_mem2);
+
+asm volatile ( "pcmpgtb %%xmm0, %%xmm0\n"
+   put_insn(movq_to_mem2, "movq %%xmm0, 32(%0)")
+   :: "c" (NULL) );
+
+memset(res, 0xbd, 64);
+set_insn(movq_to_mem2);
+regs.ecx = (unsigned long)res;
+regs.edx = 0;
+rc = x86_emulate(&ctxt, &emulops);
+if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem2) ||
+ *((uint64_t *)res + 4) ||
+ memcmp(res, res + 10, 24) ||
+ memcmp(res, res + 6, 8) )
+goto fail;
+printf("okay\n");
+}
+else
+printf("skipped\n");
+
+printf("%-40s", "Testing vmovq %%xmm1,32(%%edx)...");
+if ( stack_exec && cpu_has_avx )
+{
+decl_insn(vmovq_to_mem);
+
+asm volatile ( "pcmpgtb %%xmm1, %%xmm1\n"
+   put_insn(vmovq_to_mem, "vmovq %%xmm1, 32(%0)")
+   :: "d" (NULL) );
+
+memset(res, 0xdb, 64);
+set_insn(vmovq_to_mem);
+regs.ecx = 0;
+regs.edx = (unsigned long)res;
+rc = x86_emulate(&ctxt, &emulops);
+if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem) ||
+ *((uint64_t *)res + 4) ||
+ memcmp(res, res + 10, 24) ||
+ memcmp(res, res + 6, 8) )
+goto fail;
+printf("okay\n");
+}
+else
+printf("skipped\n");
+
 printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
 if ( stack_exec && cpu_has_sse2 )
 {
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -269,7 +269,7 @@ static const opcode_desc_t twobyte_table
 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
 ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
 /* 0xD0 - 0xDF */
-ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
+ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM, ModRM,
 ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
 /* 0xE0 - 0xEF */
 ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
@@ -4779,6 +4779,8 @@ x86_emulate(
 case X86EMUL_OPC_F3(0x0f, 0x7f): /* movdqu xmm,xmm/m128 */
 case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu xmm,xmm/m128 */
  /* vmovdqu ymm,ymm/m256 */
+case X86EMUL_OPC_66(0x0f, 0xd6): /* movq xmm,xmm/m64 */
+case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
 {
 uint8_t *buf = get_stub(stub);
 struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
@@ -4796,7 +4798,8 @@ x86_emulate(
 case vex_66:
 case vex_f3:
 host_and_vcpu_must_have(sse2);
-buf[0] = 0x66; /* movdqa */
+/* Converting movdqu to movdqa here: Our buffer is aligned. */
+buf[0] = 0x66;
 get_fpu(X86EMUL_FPU_xmm, &fic);
 ea.bytes = 16;
 break;
@@ -4819,6 +4822,11 @@ x86_emulate(
 get_fpu(X86EMUL_FPU_ymm, &fic);
 ea.bytes = 16 << vex.l;
 }
+if ( b == 0xd6 )
+{
+generate_exception_if(vex.l, EXC_UD, -1);
+ea.bytes = 8;
+}
 if ( ea.type == OP_MEM )
 {
 generate_exception_if((vex.pfx == vex_66) &&


x86/emulate: add support for {,v}movq xmm,xmm/m64

From: Mihai Donțu 

Signed-off-by: Mihai Donțu 
Signed-off-by: Jan Beulich 
---
v4: Re-base on decoding changes. Address my own review comments (where
still applicable). #UD when vex.l is set. Various adjustments to
the test tool change.

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -713,6 +713,54 @@ int main(int argc, char **argv)
 else
 printf("skipped\n");
 
+printf("%-40s", "Testing movq %%xmm0,32(%%ecx)...");
+if ( stack_exec && cpu_has_sse2 )
+{
+decl_insn(movq_to_mem2);
+
+asm volatile ( "pcmpgtb %%xmm0, %%xmm0\n"
+   put_insn(movq_to_mem2, "movq %%xmm0, 32(%0)")
+   :: "c" (NULL) );
+
+memset(res, 0xbd, 64);
+set_insn(movq_to_mem2);
+regs.ecx = (unsigned long)res;
+regs.edx = 0;
+rc = x86_emulate(&ctxt, &emulops);
+if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem2) ||
+ *((uint64_t *)res + 4) ||
+ memcmp(res, res + 1

Re: [Xen-devel] [PATCH v3 06/19] acpi/hvmloader: Replace mem_alloc() and virt_to_phys() with memory ops

2016-09-08 Thread Jan Beulich
>>> On 07.09.16 at 20:59,  wrote:
> Components that wish to use ACPI builder will need to provide their own
> mem_alloc() and virt_to_phys() routines. Pointers to these routines will
> be passed to the builder as memory ops.
> 
> Signed-off-by: Boris Ostrovsky 

Acked-by: Jan Beulich 

Albeit I'd prefer if ...

> --- a/tools/firmware/hvmloader/util.c
> +++ b/tools/firmware/hvmloader/util.c
> @@ -866,10 +866,27 @@ static uint8_t battery_port_exists(void)
>  return (inb(0x88) == 0x1F);
>  }
>  
> +static unsigned long acpi_v2p(struct acpi_ctxt *ctxt, void *v)
> +{
> +return virt_to_phys(v);
> +}
> +
> +static void *acpi_mem_alloc(struct acpi_ctxt *ctxt,
> +uint32_t size, uint32_t align)
> +{
> +return mem_alloc(size, align);
> +}
> +
> +static void acpi_mem_free(struct acpi_ctxt *ctxt,
> +  void *v, uint32_t size)
> +{
> +}

... the body of this function was actually a brief comment,
clarifying why this does nothing.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 4/5] x86/emulate: add support for {, v}movq xmm, xmm/m64

2016-09-08 Thread Mihai Donțu
On Thursday 08 September 2016 07:45:19 Jan Beulich wrote:
> From: Mihai Donțu 
> 
> Signed-off-by: Mihai Donțu 
> Signed-off-by: Jan Beulich 
> ---
> v4: Re-base on decoding changes. Address my own review comments (where
> still applicable). #UD when vex.l is set. Various adjustments to
> the test tool change.

Thank you! They were in my queue for too long and I was struggling to
find a window of time to get them in shape.

> --- a/tools/tests/x86_emulator/test_x86_emulator.c
> +++ b/tools/tests/x86_emulator/test_x86_emulator.c
> @@ -713,6 +713,54 @@ int main(int argc, char **argv)
>  else
>  printf("skipped\n");
>  
> +printf("%-40s", "Testing movq %%xmm0,32(%%ecx)...");
> +if ( stack_exec && cpu_has_sse2 )
> +{
> +decl_insn(movq_to_mem2);
> +
> +asm volatile ( "pcmpgtb %%xmm0, %%xmm0\n"
> +   put_insn(movq_to_mem2, "movq %%xmm0, 32(%0)")
> +   :: "c" (NULL) );
> +
> +memset(res, 0xbd, 64);
> +set_insn(movq_to_mem2);
> +regs.ecx = (unsigned long)res;
> +regs.edx = 0;
> +rc = x86_emulate(&ctxt, &emulops);
> +if ( rc != X86EMUL_OKAY || !check_eip(movq_to_mem2) ||
> + *((uint64_t *)res + 4) ||
> + memcmp(res, res + 10, 24) ||
> + memcmp(res, res + 6, 8) )
> +goto fail;
> +printf("okay\n");
> +}
> +else
> +printf("skipped\n");
> +
> +printf("%-40s", "Testing vmovq %%xmm1,32(%%edx)...");
> +if ( stack_exec && cpu_has_avx )
> +{
> +decl_insn(vmovq_to_mem);
> +
> +asm volatile ( "pcmpgtb %%xmm1, %%xmm1\n"
> +   put_insn(vmovq_to_mem, "vmovq %%xmm1, 32(%0)")
> +   :: "d" (NULL) );
> +
> +memset(res, 0xdb, 64);
> +set_insn(vmovq_to_mem);
> +regs.ecx = 0;
> +regs.edx = (unsigned long)res;
> +rc = x86_emulate(&ctxt, &emulops);
> +if ( rc != X86EMUL_OKAY || !check_eip(vmovq_to_mem) ||
> + *((uint64_t *)res + 4) ||
> + memcmp(res, res + 10, 24) ||
> + memcmp(res, res + 6, 8) )
> +goto fail;
> +printf("okay\n");
> +}
> +else
> +printf("skipped\n");
> +
>  printf("%-40s", "Testing movdqu %xmm2,(%ecx)...");
>  if ( stack_exec && cpu_has_sse2 )
>  {
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -269,7 +269,7 @@ static const opcode_desc_t twobyte_table
>  ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
>  ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
>  /* 0xD0 - 0xDF */
> -ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
> +ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM, ModRM,
>  ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM,
>  /* 0xE0 - 0xEF */
>  ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ModRM, ImplicitOps|ModRM,
> @@ -4779,6 +4779,8 @@ x86_emulate(
>  case X86EMUL_OPC_F3(0x0f, 0x7f): /* movdqu xmm,xmm/m128 */
>  case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu xmm,xmm/m128 */
>   /* vmovdqu ymm,ymm/m256 */
> +case X86EMUL_OPC_66(0x0f, 0xd6): /* movq xmm,xmm/m64 */
> +case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
>  {
>  uint8_t *buf = get_stub(stub);
>  struct fpu_insn_ctxt fic = { .insn_bytes = 5 };
> @@ -4796,7 +4798,8 @@ x86_emulate(
>  case vex_66:
>  case vex_f3:
>  host_and_vcpu_must_have(sse2);
> -buf[0] = 0x66; /* movdqa */
> +/* Converting movdqu to movdqa here: Our buffer is aligned. 
> */
> +buf[0] = 0x66;
>  get_fpu(X86EMUL_FPU_xmm, &fic);
>  ea.bytes = 16;
>  break;
> @@ -4819,6 +4822,11 @@ x86_emulate(
>  get_fpu(X86EMUL_FPU_ymm, &fic);
>  ea.bytes = 16 << vex.l;
>  }
> +if ( b == 0xd6 )
> +{
> +generate_exception_if(vex.l, EXC_UD, -1);
> +ea.bytes = 8;
> +}
>  if ( ea.type == OP_MEM )
>  {
>  generate_exception_if((vex.pfx == vex_66) &&
> 

-- 
Mihai DONȚU


pgpGMsA4XM4H2.pgp
Description: OpenPGP digital signature
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 08/19] acpi/hvmloader: Link ACPI object files directly

2016-09-08 Thread Jan Beulich
>>> On 07.09.16 at 20:59,  wrote:
> ACPI sources will be available to various component which will build
> them according to their own rules. ACPI's Makefile will only generate
> necessary source files.
> 
> Signed-off-by: Boris Ostrovsky 

Acked-by: Jan Beulich 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 09/19] acpi/hvmloader: Include file/paths adjustments

2016-09-08 Thread Jan Beulich
>>> On 07.09.16 at 20:59,  wrote:
> In prepearation to moving acpi sources into generally available
> libacpi:
> 
> 1. Pass IOAPIC/LAPIC/PCI mask values via struct acpi_config
> 2. Modify include files search paths to point to acpi directory
> 3. Macro-ise include file for build.c that defines various
>utilities used by that file. Users of libacpi will be expected
>to define this macro when compiling build.c
> 
> Signed-off-by: Boris Ostrovsky 
> ---
> Changes in v3:
> * Instead of adding x86.h pass APIC/IOAPIC info via acpi_config parameter.
> * Use <> instead of "" for include directive
> 
> 
>  tools/firmware/hvmloader/Makefile |  3 ++-
>  tools/firmware/hvmloader/acpi/README  | 16 
>  tools/firmware/hvmloader/acpi/build.c | 19 ++-
>  tools/firmware/hvmloader/acpi/libacpi.h   |  7 +++
>  tools/firmware/hvmloader/hvmloader.c  |  2 +-
>  tools/firmware/hvmloader/rombios.c|  2 +-
>  tools/firmware/hvmloader/seabios.c|  5 +++--
>  tools/firmware/hvmloader/util.c   | 15 +--
>  tools/firmware/rombios/32bit/Makefile |  2 +-
>  tools/firmware/rombios/32bit/tcgbios/Makefile |  2 +-
>  tools/firmware/rombios/32bit/util.h   |  2 +-
>  11 files changed, 52 insertions(+), 23 deletions(-)
> 
> diff --git a/tools/firmware/hvmloader/Makefile 
> b/tools/firmware/hvmloader/Makefile
> index b6c5b83..77e95f1 100644
> --- a/tools/firmware/hvmloader/Makefile
> +++ b/tools/firmware/hvmloader/Makefile
> @@ -76,7 +76,8 @@ smbios.o: CFLAGS += 
> -D__SMBIOS_DATE__="\"$(SMBIOS_REL_DATE)\""
>  ACPI_PATH = acpi
>  ACPI_FILES = dsdt_anycpu.c dsdt_15cpu.c dsdt_anycpu_qemu_xen.c
>  ACPI_OBJS = $(patsubst %.c,%.o,$(ACPI_FILES)) build.o static_tables.o
> -$(ACPI_OBJS): CFLAGS += -I$(ACPI_PATH) -I.
> +$(ACPI_OBJS): CFLAGS += -I. -DLIBACPI_STDUTILS=\"../util.h\"
> +CFLAGS += -I$(ACPI_PATH)
>  vpath build.c $(ACPI_PATH)
>  vpath static_tables.c $(ACPI_PATH)
>  OBJS += $(ACPI_OBJS)
> diff --git a/tools/firmware/hvmloader/acpi/README 
> b/tools/firmware/hvmloader/acpi/README
> index 210d5ba..2b9d6e1 100644
> --- a/tools/firmware/hvmloader/acpi/README
> +++ b/tools/firmware/hvmloader/acpi/README
> @@ -1,11 +1,19 @@
> -ACPI Table for domain firmware
> +ACPI builder for domain firmware
>  
>  
> -INSTALL
> +BUILDING ACPI
>  -
> -Simply make is OK.
> -# make 
> +Users of ACPI builder are expected to provide an include file that makes 
> available
> +the following:
> +* strncpy
> +* printf
> +* NULL
> +* test_bit
> +* offsetof
>  
> +When compiling build.c, the name of this include file should be given to
> +compiler as -DLIBACPI_STDUTILS=\"\". See 
> tools/firmware/hvmloader/Makefile
> +for an example.
>  
>  Note on DSDT Table
>  --
> diff --git a/tools/firmware/hvmloader/acpi/build.c 
> b/tools/firmware/hvmloader/acpi/build.c
> index 2098920..1cd640d 100644
> --- a/tools/firmware/hvmloader/acpi/build.c
> +++ b/tools/firmware/hvmloader/acpi/build.c
> @@ -13,15 +13,13 @@
>   * GNU Lesser General Public License for more details.
>   */
>  
> +#include LIBACPI_STDUTILS
>  #include "acpi2_0.h"
>  #include "libacpi.h"
>  #include "ssdt_s3.h"
>  #include "ssdt_s4.h"
>  #include "ssdt_tpm.h"
>  #include "ssdt_pm.h"
> -#include "../config.h"
> -#include "../util.h"
> -#include "../vnuma.h"
>  #include 
>  #include 
>  
> @@ -81,6 +79,9 @@ static struct acpi_20_madt *construct_madt(struct acpi_ctxt 
> *ctxt,
>  struct hvm_info_table *hvminfo = config->hvminfo;
>  int i, sz;
>  
> +if ( config->lapic_id == NULL )
> +return NULL;
> +
>  sz  = sizeof(struct acpi_20_madt);
>  sz += sizeof(struct acpi_20_madt_intsrcovr) * 16;
>  sz += sizeof(struct acpi_20_madt_ioapic);
> @@ -97,7 +98,7 @@ static struct acpi_20_madt *construct_madt(struct acpi_ctxt 
> *ctxt,
>  madt->header.oem_revision = ACPI_OEM_REVISION;
>  madt->header.creator_id   = ACPI_CREATOR_ID;
>  madt->header.creator_revision = ACPI_CREATOR_REVISION;
> -madt->lapic_addr = LAPIC_BASE_ADDRESS;
> +madt->lapic_addr = config->lapic_base_address;
>  madt->flags  = ACPI_PCAT_COMPAT;
>  
>  if ( config->table_flags & ACPI_HAS_IOAPIC )
> @@ -116,7 +117,7 @@ static struct acpi_20_madt *construct_madt(struct 
> acpi_ctxt *ctxt,
>  intsrcovr->gsi= 2;
>  intsrcovr->flags  = 0x0;
>  }
> -else if ( PCI_ISA_IRQ_MASK & (1U << i) )
> +else if ( config->pci_isa_irq_mask & (1U << i) )
>  {
>  /* PCI: active-low level-triggered. */
>  intsrcovr->gsi= i;
> @@ -135,8 +136,8 @@ static struct acpi_20_madt *construct_madt(struct 
> acpi_ctxt *ctxt,
>  memset(io_apic, 0, sizeof(*io_apic));
>  io_apic->type= ACPI_IO_APIC;
>  io_apic->length  = sizeof(*io_apic);
> -io_apic->ioapic_id   = IOAPIC_ID;
> -io_apic->

Re: [Xen-devel] [PATCH v2] xen/pciback: support driver_override

2016-09-08 Thread Boris Ostrovsky
On 09/02/2016 08:30 AM, Juergen Gross wrote:
> Support the driver_override scheme introduced with commit 782a985d7af2
> ("PCI: Introduce new device binding path using pci_dev.driver_override")
>
> As pcistub_probe() is called for all devices (it has to check for a
> match based on the slot address rather than device type) it has to
> check for driver_override set to "pciback" itself.
>
> Signed-off-by: Juergen Gross 
> ---
> V2: removed now unused label
> ---
>  drivers/xen/xen-pciback/pci_stub.c | 16 ++--
>  1 file changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/xen/xen-pciback/pci_stub.c 
> b/drivers/xen/xen-pciback/pci_stub.c
> index 258b7c3..85c28f7 100644
> --- a/drivers/xen/xen-pciback/pci_stub.c
> +++ b/drivers/xen/xen-pciback/pci_stub.c
> @@ -25,6 +25,8 @@
>  #include "conf_space.h"
>  #include "conf_space_quirks.h"
>  
> +#define PCISTUB_DRIVER_NAME "pciback"
> +
>  static char *pci_devs_to_hide;
>  wait_queue_head_t xen_pcibk_aer_wait_queue;
>  /*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops,
> @@ -529,16 +531,18 @@ static int pcistub_probe(struct pci_dev *dev, const 
> struct pci_device_id *id)
>   "don't have a normal (0) or bridge (1) "
>   "header type!\n");
>   err = -ENODEV;
> - goto out;
>   }
>  
> + } else if (!dev->driver_override ||
> +strcmp(dev->driver_override, PCISTUB_DRIVER_NAME))
> + /* Didn't find the device */
> + err = -ENODEV;
> +
> + if (!err) {
>   dev_info(&dev->dev, "seizing device\n");
>   err = pcistub_seize(dev);
> - } else
> - /* Didn't find the device */
> - err = -ENODEV;
> + }

Should devices with pciback override be displayed in
/sys/bus/pci/drivers/pciback/slots? If they should then they need to be
either added to pcistub_device_ids or kept on some other list.

Also, do you think checking override might better be done first, before
testing for ID match?

-boris


>  
> -out:
>   return err;
>  }
>  
> @@ -945,7 +949,7 @@ static const struct pci_error_handlers 
> xen_pcibk_error_handler = {
>  static struct pci_driver xen_pcibk_pci_driver = {
>   /* The name should be xen_pciback, but until the tools are updated
>* we will keep it as pciback. */
> - .name = "pciback",
> + .name = PCISTUB_DRIVER_NAME,
>   .id_table = pcistub_ids,
>   .probe = pcistub_probe,
>   .remove = pcistub_remove,



___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 13/19] acpi: Makefile should better tolerate interrupts

2016-09-08 Thread Jan Beulich
>>> On 07.09.16 at 20:59,  wrote:
> Intermediate stages of building a target should be made with
> temporary files that are copied to final target in the end.
> 
> Signed-off-by: Boris Ostrovsky 
> ---
> New in v3

Ah, here we go.

> --- a/tools/libacpi/Makefile
> +++ b/tools/libacpi/Makefile
> @@ -21,38 +21,45 @@ MK_DSDT = $(ACPI_BUILD_DIR)/mk_dsdt
>  C_SRC = $(addprefix $(ACPI_BUILD_DIR)/, dsdt_anycpu.c dsdt_15cpu.c  
> dsdt_anycpu_qemu_xen.c dsdt_pvh.c)
>  H_SRC = $(addprefix $(ACPI_BUILD_DIR)/, ssdt_s3.h ssdt_s4.h ssdt_pm.h 
> ssdt_tpm.h)
>  
> +ifeq ($(subst all,,$(MAKECMDGOALS)),)
> +TDIR := $(shell mktemp -d --tmpdir=$(TMPDIR) tmp_XX)
> +endif

How is this (or really the rules using this directory) supposed to work
when other than "all" gets built?

>  vpath iasl $(PATH)
>  all: $(C_SRC) $(H_SRC)
> + rm -fr $(TDIR)

And how is the temporary directory going to get cleaned up when
interrupting make? I think you really should use a subdirectory
underneath the build directory, which then can stay there until
"make clean". And then you can also use mv instead of cp below,
or even move-if-changed.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [xen-unstable test] 100789: regressions - FAIL [and 2 more messages]

2016-09-08 Thread Ian Jackson
Wei Liu writes ("Re: [Xen-devel] [xen-unstable test] 100789: regressions - 
FAIL"):
> I see three ways to move this forward.
>
> 3. Retire these two tests.

Do we expect users still to want VHD support ?  We still allegedly
support VHD for guests.  So we shouldn't retire these tests unless we
are dropping VHD support entirely.

AFAICT users who want VHD support need to be able to create images
etc.

> 2. Install blktap-utils shipped in Debian (available from Wheezy
>onwards), the main difficulty would be the package depends on a dkms
>package that seems to require building with kernel header when
>installing.

According to Debian, we (Xen) are upstream for this package.  It makes
no sense for osstest to install something from Debian which we have
deleted upstream !

> 1. Resurrect vhd-util from blktap2.

What is wrong with this plan ?

> In the meantime, if we want to avoid blocking xen-unstable for too long,
> we might want to force push.

If that's a consideration, we should be considering a revert, not a
force push.

Andrew Cooper writes ("Re: [Xen-devel] [xen-unstable test] 100789: regressions 
- FAIL"):
> +1 to a force push for now.  There are quite a few changes currently
> blocked.

IMO that is not a good reason for a force push.

Wei Liu writes ("Re: [Xen-devel] [xen-unstable test] 100789: regressions - 
FAIL"):
> 4. Provide a pre-made vhd image.
> 
> vhd-util create disk.vhd -s 1 -> 24K in actual size.

This is no good because

1. disk.vhd is a file for which we would have deleted the source code!

2. Users need the ability to create images.

Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 2/4] x86/segment: Bounds check accesses to emulation ctxt->seg_reg[]

2016-09-08 Thread Andrew Cooper
HVM HAP codepaths have space for all segment registers in the seg_reg[]
cache (with x86_seg_none still risking an array overrun), while the shadow
codepaths only have space for the user segments.

Range check the input segment of *_get_seg_reg() against the size of the array
used to cache the results, to avoid overruns in the case that the callers
don't filter their input suitably.

Subsume the is_x86_user_segment(seg) checks from the shadow code, which were
an incomplete attempt at range checking, and are now superceeded.  Make
hvm_get_seg_reg() static, as it is not used outside of shadow/common.c

No functional change, but far easier to reason that no overflow is possible.

Reported-by: Andrew Cooper 
Signed-off-by: Andrew Cooper 
Acked-by: Tim Deegan 
Acked-by: Jan Beulich 
---
 xen/arch/x86/hvm/emulate.c| 16 
 xen/arch/x86/mm/shadow/common.c   | 27 ++-
 xen/arch/x86/mm/shadow/private.h  |  2 --
 xen/include/asm-x86/hvm/emulate.h |  1 +
 4 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index c55ad7b..0eb7a4d 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -535,6 +535,8 @@ static int hvmemul_virtual_to_linear(
 *reps = min_t(unsigned long, *reps, max_reps);
 
 reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+if ( IS_ERR(reg) )
+return -PTR_ERR(reg);
 
 if ( (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1) )
 {
@@ -1430,6 +1432,10 @@ static int hvmemul_read_segment(
 struct hvm_emulate_ctxt *hvmemul_ctxt =
 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
 struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
+
+if ( IS_ERR(sreg) )
+ return -PTR_ERR(sreg);
+
 memcpy(reg, sreg, sizeof(struct segment_register));
 return X86EMUL_OKAY;
 }
@@ -1443,6 +1449,9 @@ static int hvmemul_write_segment(
 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
 struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
 
+if ( IS_ERR(sreg) )
+ return -PTR_ERR(sreg);
+
 memcpy(sreg, reg, sizeof(struct segment_register));
 __set_bit(seg, &hvmemul_ctxt->seg_reg_dirty);
 
@@ -1995,10 +2004,17 @@ void hvm_emulate_writeback(
 }
 }
 
+/*
+ * Callers which pass a known in-range x86_segment can rely on the return
+ * pointer being valid.  Other callers must explicitly check for errors.
+ */
 struct segment_register *hvmemul_get_seg_reg(
 enum x86_segment seg,
 struct hvm_emulate_ctxt *hvmemul_ctxt)
 {
+if ( seg < 0 || seg >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
+return ERR_PTR(-X86EMUL_UNHANDLEABLE);
+
 if ( !__test_and_set_bit(seg, &hvmemul_ctxt->seg_reg_accessed) )
 hvm_get_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]);
 return &hvmemul_ctxt->seg_reg[seg];
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 7032869..8d6661c 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -123,10 +123,19 @@ __initcall(shadow_audit_key_init);
 /* x86 emulator support for the shadow code
  */
 
-struct segment_register *hvm_get_seg_reg(
+/*
+ * Callers which pass a known in-range x86_segment can rely on the return
+ * pointer being valid.  Other callers must explicitly check for errors.
+ */
+static struct segment_register *hvm_get_seg_reg(
 enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt)
 {
-struct segment_register *seg_reg = &sh_ctxt->seg_reg[seg];
+struct segment_register *seg_reg;
+
+if ( seg < 0 || seg >= ARRAY_SIZE(sh_ctxt->seg_reg) )
+return ERR_PTR(-X86EMUL_UNHANDLEABLE);
+
+seg_reg = &sh_ctxt->seg_reg[seg];
 if ( !__test_and_set_bit(seg, &sh_ctxt->valid_seg_regs) )
 hvm_get_segment_register(current, seg, seg_reg);
 return seg_reg;
@@ -143,14 +152,9 @@ static int hvm_translate_linear_addr(
 const struct segment_register *reg;
 int okay;
 
-/*
- * Can arrive here with non-user segments.  However, no such cirucmstance
- * is part of a legitimate pagetable update, so fail the emulation.
- */
-if ( !is_x86_user_segment(seg) )
-return X86EMUL_UNHANDLEABLE;
-
 reg = hvm_get_seg_reg(seg, sh_ctxt);
+if ( IS_ERR(reg) )
+return -PTR_ERR(reg);
 
 okay = hvm_virtual_to_linear_addr(
 seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr);
@@ -253,9 +257,6 @@ hvm_emulate_write(enum x86_segment seg,
 unsigned long addr;
 int rc;
 
-if ( !is_x86_user_segment(seg) )
-return X86EMUL_UNHANDLEABLE;
-
 /* How many emulations could we save if we unshadowed on stack writes? */
 if ( seg == x86_seg_ss )
 perfc_incr(shadow_fault_emulate_stack);
@@ -283,7 +284,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg,
 unsigned long addr, old, new;
 int rc;
 
-if ( !is_x86_user_segment(seg) || bytes

[Xen-devel] [PATCH 1/4] hvm/fep: Allow testing of instructions crossing the -1 -> 0 virtual boundary

2016-09-08 Thread Andrew Cooper
The Force Emulation Prefix is named to follow its PV counterpart for cpuid or
rdtsc, but isn't really an instruction prefix.  It behaves as a break-out into
Xen, with the purpose of emulating the next instruction in the current state.

It is important to be able to test legal situations which occur in real
hardware, including instruction which cross certain boundaries, and
instructions starting at 0.

Signed-off-by: Andrew Cooper 
Reviewed-by: Jan Beulich 
---
 xen/arch/x86/hvm/hvm.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 787f055..596a903 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3981,15 +3981,8 @@ void hvm_ud_intercept(struct cpu_user_regs *regs)
 unsigned long addr;
 char sig[5]; /* ud2; .ascii "xen" */
 
-/*
- * Note that in the call below we pass 1 more than the signature
- * size, to guard against the overall code sequence wrapping between
- * "prefix" and actual instruction. There's necessarily at least one
- * actual instruction byte required, so this won't cause failure on
- * legitimate uses.
- */
 if ( hvm_virtual_to_linear_addr(x86_seg_cs, cs, regs->eip,
-sizeof(sig) + 1, hvm_access_insn_fetch,
+sizeof(sig), hvm_access_insn_fetch,
 (hvm_long_mode_enabled(cur) &&
  cs->attr.fields.l) ? 64 :
 cs->attr.fields.db ? 32 : 16, &addr) &&
@@ -3999,6 +3992,11 @@ void hvm_ud_intercept(struct cpu_user_regs *regs)
 {
 regs->eip += sizeof(sig);
 regs->eflags &= ~X86_EFLAGS_RF;
+
+/* Zero the upper 32 bits of %rip if not in long mode. */
+if ( !(hvm_long_mode_enabled(cur) && cs->attr.fields.l) )
+regs->eip = regs->_eip;
+
 add_taint(TAINT_HVM_FEP);
 }
 }
-- 
2.1.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 18/19] libxl/acpi: Build ACPI tables for HVMlite guests

2016-09-08 Thread Jan Beulich
>>> On 07.09.16 at 20:59,  wrote:
> Signed-off-by: Boris Ostrovsky 
> ---
> Changes in v3:
> * Some constification of call parameters
> * Format adjustments
> * New acpi_mem_free hook (a nop)
> * Changes in init_acpi_config() to deal with constified acpi_numa's
>   pointers (initialize pointers as temp variabales)
> * Add '-include acpi' directive in Makefile to make sure acpi
>   target is built before build.o dependencies are processed
>   (specifically, ssdt_*.h files need to exist)
> 
> 
>  .gitignore   |  12 ++-
>  tools/libacpi/build.c|   7 +-
>  tools/libacpi/libacpi.h  |  15 ++-
>  tools/libxl/Makefile |  18 +++-
>  tools/libxl/libxl_arch.h |   3 +
>  tools/libxl/libxl_x86.c  |  30 --
>  tools/libxl/libxl_x86_acpi.c | 218 
> +++
>  tools/libxl/libxl_x86_acpi.h |  35 +++
>  8 files changed, 318 insertions(+), 20 deletions(-)
>  create mode 100644 tools/libxl/libxl_x86_acpi.c
>  create mode 100644 tools/libxl/libxl_x86_acpi.h
> 
> diff --git a/.gitignore b/.gitignore
> index 9b2c405..9f5bd8c 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -173,15 +173,19 @@ tools/include/xen/*
>  tools/include/xen-xsm/*
>  tools/include/xen-foreign/*.(c|h|size)
>  tools/include/xen-foreign/checker
> -tools/libxl/libxlu_cfg_y.output
> +tools/libxl/_libxl.api-for-check
> +tools/libxl/*.api-ok
>  tools/libxl/*.pc
>  tools/libxl/*.pc.in
> -tools/libxl/xl
> +tools/libxl/dsdt*.c
> +tools/libxl/dsdt_*.asl
> +tools/libxl/libxlu_cfg_y.output
> +tools/libxl/mk_dsdt
> +tools/libxl/ssdt_*.h
>  tools/libxl/testenum
>  tools/libxl/testenum.c
>  tools/libxl/tmp.*
> -tools/libxl/_libxl.api-for-check
> -tools/libxl/*.api-ok
> +tools/libxl/xl
>  tools/misc/cpuperf/cpuperf-perfcntr
>  tools/misc/cpuperf/cpuperf-xen
>  tools/misc/xc_shadow
> diff --git a/tools/libacpi/build.c b/tools/libacpi/build.c
> index 1cd640d..ee5f779 100644
> --- a/tools/libacpi/build.c
> +++ b/tools/libacpi/build.c
> @@ -20,6 +20,7 @@
>  #include "ssdt_s4.h"
>  #include "ssdt_tpm.h"
>  #include "ssdt_pm.h"
> +#include 
>  #include 
>  #include 
>  
> @@ -495,7 +496,7 @@ static int new_vm_gid(struct acpi_ctxt *ctxt,
>  return 1;
>  }
>  
> -void acpi_build_tables(struct acpi_ctxt *ctxt, struct acpi_config *config)
> +int acpi_build_tables(struct acpi_ctxt *ctxt, struct acpi_config *config)
>  {
>  struct acpi_info *acpi_info;
>  struct acpi_20_rsdp *rsdp;
> @@ -630,11 +631,11 @@ void acpi_build_tables(struct acpi_ctxt *ctxt, struct 
> acpi_config *config)
>  if ( !new_vm_gid(ctxt, config, acpi_info) )
>  goto oom;
>  
> -return;
> +return 0;
>  
>  oom:
>  printf("unable to build ACPI tables: out of memory\n");
> -
> +return -1;
>  }
>  
>  /*
> diff --git a/tools/libacpi/libacpi.h b/tools/libacpi/libacpi.h
> index d803139..b0ff5aa 100644
> --- a/tools/libacpi/libacpi.h
> +++ b/tools/libacpi/libacpi.h
> @@ -48,6 +48,15 @@ struct acpi_ctxt {
>  void (*free)(struct acpi_ctxt *ctxt, void *v, uint32_t size);
>  unsigned long (*v2p)(struct acpi_ctxt *ctxt, void *v);
>  } mem_ops;
> +
> +unsigned int page_size;
> +unsigned int page_shift;
> +
> +/* Memory allocator */
> +unsigned long alloc_base_paddr;
> +unsigned long alloc_base_vaddr;
> +unsigned long alloc_currp;
> +unsigned long alloc_end;
>  };

There not being (or getting added) any users of these in libacpi/, I
wonder how this is related to the subject of the patch. If this is
data that only libxl needs for its own purposes, then surely this
shouldn't get added to struct acpi_ctxt, but should be a libxl
private extension of that structure.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 3/4] x86/hvm: Optimise segment accesses in hvmemul_write_segment()

2016-09-08 Thread Andrew Cooper
There is no need to read the segment information from VMCS/VMCB and cache it,
just to clobber the cached content immediately afterwards.

Write straight into the cache and set the accessed/dirty bits.

Signed-off-by: Andrew Cooper 
---
CC: Jan Beulich 
CC: Paul Durrant 
---
 xen/arch/x86/hvm/emulate.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 0eb7a4d..e3bfda5 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -1447,12 +1447,12 @@ static int hvmemul_write_segment(
 {
 struct hvm_emulate_ctxt *hvmemul_ctxt =
 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
-struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
 
-if ( IS_ERR(sreg) )
- return -PTR_ERR(sreg);
+if ( seg < 0 || seg >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
+return X86EMUL_UNHANDLEABLE;
 
-memcpy(sreg, reg, sizeof(struct segment_register));
+hvmemul_ctxt->seg_reg[seg] = *reg;
+__set_bit(seg, &hvmemul_ctxt->seg_reg_accessed);
 __set_bit(seg, &hvmemul_ctxt->seg_reg_dirty);
 
 return X86EMUL_OKAY;
-- 
2.1.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] [PATCH 4/4] x86/hvm: Perform a user instruction fetch for a FEP in userspace

2016-09-08 Thread Andrew Cooper
This matches hardware behaviour, and prevents erroneous failures when a guest
has SMEP/SMAP active and issues a FEP from userspace.

Signed-off-by: Andrew Cooper 
Reviewed-by: Jan Beulich 
---
 xen/arch/x86/hvm/hvm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 596a903..159671e 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3978,6 +3978,8 @@ void hvm_ud_intercept(struct cpu_user_regs *regs)
 {
 struct vcpu *cur = current;
 const struct segment_register *cs = &ctxt.seg_reg[x86_seg_cs];
+uint32_t walk = (ctxt.seg_reg[x86_seg_ss].attr.fields.dpl == 3)
+? PFEC_user_mode : 0;
 unsigned long addr;
 char sig[5]; /* ud2; .ascii "xen" */
 
@@ -3987,7 +3989,7 @@ void hvm_ud_intercept(struct cpu_user_regs *regs)
  cs->attr.fields.l) ? 64 :
 cs->attr.fields.db ? 32 : 16, &addr) &&
  (hvm_fetch_from_guest_virt_nofault(sig, addr, sizeof(sig),
-0) == HVMCOPY_okay) &&
+walk) == HVMCOPY_okay) &&
  (memcmp(sig, "\xf\xbxen", sizeof(sig)) == 0) )
 {
 regs->eip += sizeof(sig);
-- 
2.1.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 12/19] libacpi: Build DSDT for PVH guests

2016-09-08 Thread Jan Beulich
>>> On 07.09.16 at 20:59,  wrote:
> @@ -32,15 +32,22 @@ $(H_SRC): $(ACPI_BUILD_DIR)/%.h: %.asl iasl
>  $(MK_DSDT): mk_dsdt.c
>   $(HOSTCC) $(HOSTCFLAGS) $(CFLAGS_xeninclude) -o $@ mk_dsdt.c
>  
> -$(ACPI_BUILD_DIR)/dsdt_anycpu_qemu_xen.asl: dsdt.asl $(MK_DSDT)
> +$(ACPI_BUILD_DIR)/dsdt_anycpu_qemu_xen.asl: dsdt.asl dsdt_acpi_info.asl 
> $(MK_DSDT)
>   awk 'NR > 1 {print s} {s=$$0}' $< > $@
> + cat dsdt_acpi_info.asl >> $@
>   $(MK_DSDT) --debug=$(debug) --dm-version qemu-xen >> $@
>  
>  # NB. awk invocation is a portable alternative to 'head -n -1'
> -$(ACPI_BUILD_DIR)/dsdt_%cpu.asl: dsdt.asl $(MK_DSDT)
> +$(ACPI_BUILD_DIR)/dsdt_%cpu.asl: dsdt.asl dsdt_acpi_info.asl $(MK_DSDT)
>   awk 'NR > 1 {print s} {s=$$0}' $< > $@
> + cat dsdt_acpi_info.asl >> $@
>   $(MK_DSDT) --debug=$(debug) --maxcpu $*  >> $@
>  
> +$(ACPI_BUILD_DIR)/dsdt_pvh.asl: dsdt_acpi_info.asl $(MK_DSDT)
> + printf "DefinitionBlock (\"DSDT.aml\", \"DSDT\", 5, \"Xen\", \"HVM\", 
> 0)\n{" > $@
> + cat dsdt_acpi_info.asl >> $@
> + $(MK_DSDT) --debug=$(debug) --maxcpu any --dm-version none >> $@

Hadn't I seen you switch to use intermediate files with all this output
redirection in v2? Did that get lost, or do I misremember?

Everything else looks fine.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 3/4] x86/hvm: Optimise segment accesses in hvmemul_write_segment()

2016-09-08 Thread Paul Durrant
> -Original Message-
> From: Andrew Cooper [mailto:andrew.coop...@citrix.com]
> Sent: 08 September 2016 15:12
> To: Xen-devel 
> Cc: Andrew Cooper ; Jan Beulich
> ; Paul Durrant 
> Subject: [PATCH 3/4] x86/hvm: Optimise segment accesses in
> hvmemul_write_segment()
> 
> There is no need to read the segment information from VMCS/VMCB and
> cache it, just to clobber the cached content immediately afterwards.
> 
> Write straight into the cache and set the accessed/dirty bits.
> 

Yes, the way the code is now does look somewhat silly.

> Signed-off-by: Andrew Cooper 
> ---
> CC: Jan Beulich 
> CC: Paul Durrant 

Reviewed-by: Paul Durrant 

> ---
>  xen/arch/x86/hvm/emulate.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
> index 0eb7a4d..e3bfda5 100644
> --- a/xen/arch/x86/hvm/emulate.c
> +++ b/xen/arch/x86/hvm/emulate.c
> @@ -1447,12 +1447,12 @@ static int hvmemul_write_segment(  {
>  struct hvm_emulate_ctxt *hvmemul_ctxt =
>  container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
> -struct segment_register *sreg = hvmemul_get_seg_reg(seg,
> hvmemul_ctxt);
> 
> -if ( IS_ERR(sreg) )
> - return -PTR_ERR(sreg);
> +if ( seg < 0 || seg >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
> +return X86EMUL_UNHANDLEABLE;
> 
> -memcpy(sreg, reg, sizeof(struct segment_register));
> +hvmemul_ctxt->seg_reg[seg] = *reg;
> +__set_bit(seg, &hvmemul_ctxt->seg_reg_accessed);
>  __set_bit(seg, &hvmemul_ctxt->seg_reg_dirty);
> 
>  return X86EMUL_OKAY;
> --
> 2.1.4


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v3 1/1] xen: move TLB-flush filtering out into populate_physmap during vm creation

2016-09-08 Thread Ian Jackson
Wei Liu writes ("Re: [PATCH v3 1/1] xen: move TLB-flush filtering out into 
populate_physmap during vm creation"):
> On Thu, Sep 08, 2016 at 01:01:40PM +0200, Dario Faggioli wrote:
> > On Thu, 2016-09-08 at 11:50 +0100, Wei Liu wrote:
> > > On Thu, Sep 08, 2016 at 01:30:03PM +0800, Dongli Zhang wrote:
> > > > +if ( next->domain->already_scheduled == 0 )
> > > > +next->domain->already_scheduled = 1;
> > > > +
> > > Can be simplified by omitting the "if" altogether.  
> > >
> > Are you sure? I mean looking at the cases when the flag is already true
> > (which means, during the life of a domain, basically **always** except
> > a handful of instances after creation), what costs less, a check that
> > is always false, or a write that is always updating a value with its
> > current value?
> 
> Omitting the check certain results in less instructions. And it would
> probably eliminate misses in instruction cache and branch prediction
> logic in the processor.
> 
> In the grand scheme of things, this is a rather minor optimisation, so I
> wouldn't argue strongly for this.

Are we sure we ought to be discussing this in terms of optimisation ?
I doubt it makes any significant difference either way.

But there is a difference in clarity.  I would not normally expect to
see this:

   bool x;

   ...

   if (!x)
   x = 1;

If I saw that I would wonder if the programmer was confused, or
whether I was missing something.

Looking at it without the benefit of the definition of x, it looks
more like x might be a non-boolean type.

Thanks,
Ian.

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 1/4] hvm/fep: Allow testing of instructions crossing the -1 -> 0 virtual boundary

2016-09-08 Thread Jan Beulich
>>> On 08.09.16 at 16:11,  wrote:
> The Force Emulation Prefix is named to follow its PV counterpart for cpuid or
> rdtsc, but isn't really an instruction prefix.  It behaves as a break-out into
> Xen, with the purpose of emulating the next instruction in the current state.
> 
> It is important to be able to test legal situations which occur in real
> hardware, including instruction which cross certain boundaries, and
> instructions starting at 0.
> 
> Signed-off-by: Andrew Cooper 
> Reviewed-by: Jan Beulich 

While you did mostly convince me at that time, I've got some more
concerns here: What if the instruction to be emulated causes a
fault that then needs to be propagated to and handled by the
guest, before it can be restarted? Such a fault would be raised
with rIP pointing past the forced emulation prefix, and hence the
restarted instruction then wouldn't get emulated.

Along those line, if you don't want to treat this as an instruction
prefix, there ought to be two #DB due to instruction breakpoint
match (if set for both places, of course), yet that's impossible to
implement together with the desire to emulate the insn.

Jan


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 3/4] x86/hvm: Optimise segment accesses in hvmemul_write_segment()

2016-09-08 Thread Jan Beulich
>>> On 08.09.16 at 16:11,  wrote:
> There is no need to read the segment information from VMCS/VMCB and cache it,
> just to clobber the cached content immediately afterwards.
> 
> Write straight into the cache and set the accessed/dirty bits.
> 
> Signed-off-by: Andrew Cooper 

Reviewed-by: Jan Beulich 


___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH v2] Remove ambiguities in the COPYING file; add CONTRIBUTING file

2016-09-08 Thread Lars Kurth

> On 16 Aug 2016, at 09:19, George Dunlap  wrote:
> 
> On Mon, Aug 15, 2016 at 11:24 AM, Andrew Cooper
>  wrote:
>> On 12/08/16 10:37, Lars Kurth wrote:
>>> COPYING file:
>>> The motivation of this change is to make it easier for new
>>> contributors to conduct a license and patent review, WITHOUT
>>> changing any licenses.
>>> - Remove references to BSD-style licenses as we have more
>>>  common license exceptions and replace with "other license
>>>  stanzas"
>>> - List the most common situations under which code is licensed
>>>  under licenses other than GPLv2 (section "Licensing Exceptions")
>>> - List the most common non-GPLv2 licenses that are in use in
>>>  this repository based on a recent FOSSology scan (section
>>>  "Licensing Exceptions")
>>> - List other license related conventions within the project
>>>  to make it easier to conduct a license review.
>>> - Clarify the incoming license as its omission has confused
>>>  past contributors (section "Contributions")
>>> 
>>> CONTRIBUTION file:
>>> The motivation of this file is to make it easier for contributors
>>> to find contribution related resources. Add information on existing
>>> license related conventions to avoid unintentional future licensing
>>> issues. Provide templates for copyright headers for the most commonly
>>> used licenses in this repository.
>>> 
>>> Signed-off-by: Lars Kurth 
>> 
>> Reviewed-by: Andrew Cooper , with one style
>> correction.
>> 
>>> diff --git a/CONTRIBUTING b/CONTRIBUTING
>>> new file mode 100644
>>> index 000..67ecdb7
>>> --- /dev/null
>>> +++ b/CONTRIBUTING
>>> @@ -0,0 +1,210 @@
>>> 
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
>>> + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
>>> + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>>> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>>> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>>> + */
>>> \ No newline at end of file
>> 
>> Newline at the end.
> 
> That can presumably be fixed up on check-in -- no need to resend.
> 
> -George

Thank you. 

Otherwise: Ping? Who else needs to ACK to check this in
Lars
___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


[Xen-devel] Unable to build with gcc 6 because of etherboot

2016-09-08 Thread Daniel E. Shub
This is a follow on to a message I sent to xen-users:
https://lists.xen.org/archives/html/xen-devel/2015-08/msg01924.html

I am trying to compile Xen 4.7.0 with gcc 6.1.1, but I get an error
related to etherboot. It was suggested to update the etherboot
Makefile to the head of the etherboot repository. Another possibility
would be to just pull in the gcc 6 patches from upstream (e.g.,
https://git.ipxe.org/ipxe.git?a=search&h=refs%2Fheads%2Fmaster&st=commit&s=gcc+6).

For 4.6.0 and gcc 5 the recommendation was to just pull in the patches
(cf. https://lists.xen.org/archives/html/xen-devel/2015-08/msg01924.html)
but to update the etherboot version at some point. Is the recommended
course still to just patch as needed or has there been work to update
the etherboot version?

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 1/4] hvm/fep: Allow testing of instructions crossing the -1 -> 0 virtual boundary

2016-09-08 Thread Andrew Cooper
On 08/09/16 15:28, Jan Beulich wrote:
 On 08.09.16 at 16:11,  wrote:
>> The Force Emulation Prefix is named to follow its PV counterpart for cpuid or
>> rdtsc, but isn't really an instruction prefix.  It behaves as a break-out 
>> into
>> Xen, with the purpose of emulating the next instruction in the current state.
>>
>> It is important to be able to test legal situations which occur in real
>> hardware, including instruction which cross certain boundaries, and
>> instructions starting at 0.
>>
>> Signed-off-by: Andrew Cooper 
>> Reviewed-by: Jan Beulich 
> While you did mostly convince me at that time, I've got some more
> concerns here: What if the instruction to be emulated causes a
> fault that then needs to be propagated to and handled by the
> guest, before it can be restarted? Such a fault would be raised
> with rIP pointing past the forced emulation prefix, and hence the
> restarted instruction then wouldn't get emulated.

The current behaviour is to report a fault at the start of the real
instruction.

Furthermore, this is the useful behaviour for it to have.  If a guest is
explicitly probing the Xen x86 emulator with FEP, it can take
responsibility of rewinding %rip by 5 if it needs to replay.

Having said that, I haven't yet encountered a case where replaying a
faulting instruction in a test is useful.  All tests thusfar check that
in specific situations, faults occur architecturally whether run on real
hardware, or via the xen emulator.

> Along those line, if you don't want to treat this as an instruction
> prefix, there ought to be two #DB due to instruction breakpoint
> match (if set for both places, of course), yet that's impossible to
> implement together with the desire to emulate the insn.

True, but I don't see this is a problem.

The *only* code using FEP is test code deliberately trying to elicit
behaviour from the Xen emulator and check that it matches real
hardware.  It is perfectly fine for test code to know its special when
it is using a special backdoor to perform said tests.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel


  1   2   >