---
docs/man/xl.cfg.pod.5 | 21 ++++
tools/libxc/xc_hvm_build_x86.c | 5 +-
tools/libxl/libxl.h | 6 +
tools/libxl/libxl_create.c | 6 +-
tools/libxl/libxl_dm.c | 255
+++++++++++++++++++++++++++++++++++++++++
tools/libxl/libxl_dom.c | 11 +-
tools/libxl/libxl_internal.h | 11 +-
tools/libxl/libxl_types.idl | 8 ++
tools/libxl/xl_cmdimpl.c | 3 +
9 files changed, 322 insertions(+), 4 deletions(-)
diff --git a/docs/man/xl.cfg.pod.5 b/docs/man/xl.cfg.pod.5
index 638b350..6fd2370 100644
--- a/docs/man/xl.cfg.pod.5
+++ b/docs/man/xl.cfg.pod.5
@@ -767,6 +767,27 @@ to a given device, and "strict" is default here.
Note this would override global B<rdm> option.
+=item B<rdm_mem_boundary=MBYTES>
+
+Number of megabytes to set a boundary for checking rdm conflict.
+
+When RDM conflicts with RAM, RDM probably scatter the whole RAM space.
+Especially multiple RMRR entries would worsen this to lead a complicated
+memory layout. So here we're trying to figure out a simple solution to
+avoid breaking existing layout. So when a conflict occurs,
+
+ #1. Above a predefined boundary
+ - move lowmem_end below reserved region to solve conflict;
+
+ #2. Below a predefined boundary
+ - Check strict/relaxed policy.
+ "strict" policy leads to fail libxl. Note when both policies
+ are specified on a given region, 'strict' is always preferred.
+ "relaxed" policy issue a warning message and also mask this entry
INVALID
+ to indicate we shouldn't expose this entry to hvmloader.
+
+Here the default is 2G.
+
=back
=back
diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c
index 0e98c84..5142578 100644
--- a/tools/libxc/xc_hvm_build_x86.c
+++ b/tools/libxc/xc_hvm_build_x86.c
@@ -21,6 +21,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <zlib.h>
+#include <assert.h>
#include "xg_private.h"
#include "xc_private.h"
@@ -270,7 +271,7 @@ static int setup_guest(xc_interface *xch,
elf_parse_binary(&elf);
v_start = 0;
- v_end = args->mem_size;
+ v_end = args->lowmem_end;
if ( nr_pages > target_pages )
memflags |= XENMEMF_populate_on_demand;
@@ -754,6 +755,8 @@ int xc_hvm_build_target_mem(xc_interface *xch,
args.mem_size = (uint64_t)memsize << 20;
args.mem_target = (uint64_t)target << 20;
args.image_file_name = image_name;
+ if ( args.mmio_size == 0 )
+ args.mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
return xc_hvm_build(xch, domid, &args);
}
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 0a7913b..a6212fb 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -858,6 +858,12 @@ const char *libxl_defbool_to_string(libxl_defbool b);
#define LIBXL_TIMER_MODE_DEFAULT -1
#define LIBXL_MEMKB_DEFAULT ~0ULL
+/*
+ * We'd like to set a memory boundary to determine if we need to check
+ * any overlap with reserved device memory.
+ */
+#define LIBXL_RDM_MEM_BOUNDARY_MEMKB_DEFAULT (2048 * 1024)
+
#define LIBXL_MS_VM_GENID_LEN 16
typedef struct {
uint8_t bytes[LIBXL_MS_VM_GENID_LEN];
diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c
index 6c8ec63..0438731 100644
--- a/tools/libxl/libxl_create.c
+++ b/tools/libxl/libxl_create.c
@@ -109,6 +109,10 @@ void libxl__rdm_setdefault(libxl__gc *gc,
libxl_domain_build_info
*b_info)
{
if (b_info->rdm.reserve == LIBXL_RDM_RESERVE_FLAG_INVALID)
b_info->rdm.reserve = LIBXL_RDM_RESERVE_FLAG_RELAXED;
+
+ if (b_info->rdm_mem_boundary_memkb == LIBXL_MEMKB_DEFAULT)
+ b_info->rdm_mem_boundary_memkb =
+ LIBXL_RDM_MEM_BOUNDARY_MEMKB_DEFAULT;
}
int libxl__domain_build_info_setdefault(libxl__gc *gc,
@@ -460,7 +464,7 @@ int libxl__domain_build(libxl__gc *gc,
switch (info->type) {
case LIBXL_DOMAIN_TYPE_HVM:
- ret = libxl__build_hvm(gc, domid, info, state);
+ ret = libxl__build_hvm(gc, domid, d_config, state);
if (ret)
goto out;
diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c
index 33f9ce6..d908350 100644
--- a/tools/libxl/libxl_dm.c
+++ b/tools/libxl/libxl_dm.c
@@ -90,6 +90,261 @@ const char *libxl__domain_device_model(libxl__gc *gc,
return dm;
}
+static struct xen_reserved_device_memory
+*xc_device_get_rdm(libxl__gc *gc,
+ uint32_t flag,
+ uint16_t seg,
+ uint8_t bus,
+ uint8_t devfn,
+ unsigned int *nr_entries)
+{
+ struct xen_reserved_device_memory *xrdm;
+ int rc;
+
+ rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn,
+ NULL, nr_entries);
+ assert(rc <= 0);
+ /* "0" means we have no any rdm entry. */
+ if (!rc)
+ goto out;
+
+ if (errno == ENOBUFS) {
+ xrdm = malloc(*nr_entries * sizeof(xen_reserved_device_memory_t));
+ if (!xrdm) {
+ LOG(ERROR, "Could not allocate RDM buffer!\n");
+ goto out;
+ }
+ rc = xc_reserved_device_memory_map(CTX->xch, flag, seg, bus, devfn,
+ xrdm, nr_entries);
+ if (rc) {
+ LOG(ERROR, "Could not get reserved device memory maps.\n");
+ *nr_entries = 0;
+ free(xrdm);
+ xrdm = NULL;
+ }
+ } else
+ LOG(ERROR, "Could not get reserved device memory maps.\n");
+
+ out:
+ return xrdm;
+}
+
+/*
+ * Check whether there exists rdm hole in the specified memory range.
+ * Returns true if exists, else returns false.
+ */
+static bool overlaps_rdm(uint64_t start, uint64_t memsize,
+ uint64_t rdm_start, uint64_t rdm_size)
+{
+ return (start + memsize > rdm_start) && (start < rdm_start + rdm_size);
+}
+
+/*
+ * Check reported RDM regions and handle potential gfn conflicts according
+ * to user preferred policy.
+ *
+ * RMRR can reside in address space beyond 4G theoretically, but we never
+ * see this in real world. So in order to avoid breaking highmem layout
+ * we don't solve highmem conflict. Note this means highmem rmrr could still
+ * be supported if no conflict.
+ *
+ * But in the case of lowmem, RMRR probably scatter the whole RAM space.
+ * Especially multiple RMRR entries would worsen this to lead a complicated
+ * memory layout. And then its hard to extend hvm_info_table{} to work
+ * hvmloader out. So here we're trying to figure out a simple solution to
+ * avoid breaking existing layout. So when a conflict occurs,
+ *
+ * #1. Above a predefined boundary (default 2G)
+ * - Move lowmem_end below reserved region to solve conflict;
+ *
+ * #2. Below a predefined boundary (default 2G)
+ * - Check strict/relaxed policy.
+ * "strict" policy leads to fail libxl. Note when both policies
+ * are specified on a given region, 'strict' is always preferred.
+ * "relaxed" policy issue a warning message and also mask this entry
+ * INVALID to indicate we shouldn't expose this entry to hvmloader.
+ */
+int libxl__domain_device_construct_rdm(libxl__gc *gc,
+ libxl_domain_config *d_config,
+ uint64_t rdm_mem_boundary,
+ struct xc_hvm_build_args *args)
+{
+ int i, j, conflict;
+ struct xen_reserved_device_memory *xrdm = NULL;
+ uint32_t type = d_config->b_info.rdm.type;
+ uint16_t seg;
+ uint8_t bus, devfn;
+ uint64_t rdm_start, rdm_size;
+ uint64_t highmem_end = args->highmem_end ? args->highmem_end : (1ull<<32);
+
+ /* Might not expose rdm. */
+ if (type == LIBXL_RDM_RESERVE_TYPE_NONE && !d_config->num_pcidevs)
+ return 0;
+
+ /* Query all RDM entries in this platform */
+ if (type == LIBXL_RDM_RESERVE_TYPE_HOST) {
+ unsigned int nr_entries;
+
+ /* Collect all rdm info if exist. */
+ xrdm = xc_device_get_rdm(gc, PCI_DEV_RDM_ALL,
+ 0, 0, 0, &nr_entries);
+ if (!nr_entries)
+ return 0;
+
+ assert(xrdm);
+
+ d_config->num_rdms = nr_entries;
+ d_config->rdms = libxl__realloc(NOGC, d_config->rdms,
+ d_config->num_rdms * sizeof(libxl_device_rdm));
+
+ for (i = 0; i < d_config->num_rdms; i++) {
+ d_config->rdms[i].start =
+ (uint64_t)xrdm[i].start_pfn << XC_PAGE_SHIFT;
+ d_config->rdms[i].size =
+ (uint64_t)xrdm[i].nr_pages << XC_PAGE_SHIFT;
+ d_config->rdms[i].flag = d_config->b_info.rdm.reserve;
+ }
+
+ free(xrdm);
+ } else
+ d_config->num_rdms = 0;
+
+ /* Query RDM entries per-device */
+ for (i = 0; i < d_config->num_pcidevs; i++) {
+ unsigned int nr_entries;
+ bool new = true;
+
+ seg = d_config->pcidevs[i].domain;
+ bus = d_config->pcidevs[i].bus;
+ devfn = PCI_DEVFN(d_config->pcidevs[i].dev, d_config->pcidevs[i].func);
+ nr_entries = 0;
+ xrdm = xc_device_get_rdm(gc, ~PCI_DEV_RDM_ALL,
+ seg, bus, devfn, &nr_entries);
+ /* No RDM to associated with this device. */
+ if (!nr_entries)
+ continue;
+
+ assert(xrdm);
+
+ /*
+ * Need to check whether this entry is already saved in the array.
+ * This could come from two cases:
+ *
+ * - user may configure to get all RMRRs in this platform, which
+ * is already queried before this point
+ * - or two assigned devices may share one RMRR entry
+ *
+ * different policies may be configured on the same RMRR due to above
+ * two cases. We choose a simple policy to always favor stricter policy
+ */
+ for (j = 0; j < d_config->num_rdms; j++) {
+ if (d_config->rdms[j].start ==
+ (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT)
+ {
+ if (d_config->rdms[j].flag != LIBXL_RDM_RESERVE_FLAG_STRICT)
+ d_config->rdms[j].flag = d_config->pcidevs[i].rdm_reserve;
+ new = false;
+ break;
+ }
+ }
+
+ if (new) {
+ d_config->num_rdms++;
+ d_config->rdms = libxl__realloc(NOGC, d_config->rdms,
+ d_config->num_rdms * sizeof(libxl_device_rdm));
+
+ d_config->rdms[d_config->num_rdms - 1].start =
+ (uint64_t)xrdm[0].start_pfn << XC_PAGE_SHIFT;
+ d_config->rdms[d_config->num_rdms - 1].size =
+ (uint64_t)xrdm[0].nr_pages << XC_PAGE_SHIFT;
+ d_config->rdms[d_config->num_rdms - 1].flag =
+ d_config->pcidevs[i].rdm_reserve;
+ }
+ free(xrdm);
+ }
+
+ /*
+ * Next step is to check and avoid potential conflict between RDM entries
+ * and guest RAM. To avoid intrusive impact to existing memory layout
+ * {lowmem, mmio, highmem} which is passed around various function blocks,
+ * below conflicts are not handled which are rare and handling them would
+ * lead to a more scattered layout:
+ * - RMRR in highmem area (>4G)
+ * - RMRR lower than a defined memory boundary (e.g. 2G)
+ * Otherwise for conflicts between boundary and 4G, we'll simply move
lowmem
+ * end below reserved region to solve conflict.
+ *
+ * If a conflict is detected on a given RMRR entry, an error will be
+ * returned if 'strict' policy is specified. Instead, if 'relaxed' policy
+ * specified, this conflict is treated just as a warning, but we mark this
+ * RMRR entry as INVALID to indicate that this entry shouldn't be exposed
+ * to hvmloader.
+ *
+ * Firstly we should check the case of rdm < 4G because we may need to
+ * expand highmem_end.
+ */
+ for (i = 0; i < d_config->num_rdms; i++) {
+ rdm_start = d_config->rdms[i].start;
+ rdm_size = d_config->rdms[i].size;
+ conflict = overlaps_rdm(0, args->lowmem_end, rdm_start, rdm_size);
+
+ if (!conflict)
+ continue;
+
+ /* Just check if RDM > our memory boundary. */
+ if (rdm_start > rdm_mem_boundary) {
+ /*
+ * We will move downwards lowmem_end so we have to expand
+ * highmem_end.
+ */
+ highmem_end += (args->lowmem_end - rdm_start);
+ /* Now move downwards lowmem_end. */
+ args->lowmem_end = rdm_start;
+ }
+ }
+
+ /* Sync highmem_end. */
+ args->highmem_end = highmem_end;
+
+ /*
+ * Finally we can take same policy to check lowmem(< 2G) and
+ * highmem adjusted above.
+ */
+ for (i = 0; i < d_config->num_rdms; i++) {
+ rdm_start = d_config->rdms[i].start;
+ rdm_size = d_config->rdms[i].size;
+ /* Does this entry conflict with lowmem? */
+ conflict = overlaps_rdm(0, args->lowmem_end,
+ rdm_start, rdm_size);
+ /* Does this entry conflict with highmem? */
+ conflict |= overlaps_rdm((1ULL<<32),
+ args->highmem_end - (1ULL<<32),
+ rdm_start, rdm_size);
+
+ if (!conflict)
+ continue;
+
+ if(d_config->rdms[i].flag == LIBXL_RDM_RESERVE_FLAG_STRICT) {
+ LOG(ERROR, "RDM conflict at 0x%lx.\n", d_config->rdms[i].start);
+ goto out;
+ } else {
+ LOG(WARN, "Ignoring RDM conflict at 0x%lx.\n",
+ d_config->rdms[i].start);
+
+ /*
+ * Then mask this INVALID to indicate we shouldn't expose this
+ * to hvmloader.
+ */
+ d_config->rdms[i].flag = LIBXL_RDM_RESERVE_FLAG_INVALID;
+ }
+ }
+
+ return 0;
+
+ out:
+ return ERROR_FAIL;
+}
+
const libxl_vnc_info *libxl__dm_vnc(const libxl_domain_config *guest_config)
{
const libxl_vnc_info *vnc = NULL;
diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 867172a..1777b32 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -914,13 +914,14 @@ out:
}
int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
- libxl_domain_build_info *info,
+ libxl_domain_config *d_config,
libxl__domain_build_state *state)
{
libxl_ctx *ctx = libxl__gc_owner(gc);
struct xc_hvm_build_args args = {};
int ret, rc = ERROR_FAIL;
uint64_t mmio_start, lowmem_end, highmem_end;
+ libxl_domain_build_info *const info = &d_config->b_info;
memset(&args, 0, sizeof(struct xc_hvm_build_args));
/* The params from the configuration file are in Mb, which are then
@@ -958,6 +959,14 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
args.highmem_end = highmem_end;
args.mmio_start = mmio_start;
+ ret = libxl__domain_device_construct_rdm(gc, d_config,
+ info->rdm_mem_boundary_memkb*1024,
+ &args);
+ if (ret) {
+ LOG(ERROR, "checking reserved device memory failed");
+ goto out;
+ }
+
if (info->num_vnuma_nodes != 0) {
int i;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index e9ac886..52f3831 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1011,7 +1011,7 @@ _hidden int libxl__build_post(libxl__gc *gc, uint32_t
domid,
_hidden int libxl__build_pv(libxl__gc *gc, uint32_t domid,
libxl_domain_build_info *info, libxl__domain_build_state *state);
_hidden int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
- libxl_domain_build_info *info,
+ libxl_domain_config *d_config,
libxl__domain_build_state *state);
_hidden int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
@@ -1519,6 +1519,15 @@ _hidden int libxl__need_xenpv_qemu(libxl__gc *gc,
int nr_channels, libxl_device_channel *channels);
/*
+ * This function will fix reserved device memory conflict
+ * according to user's configuration.
+ */
+_hidden int libxl__domain_device_construct_rdm(libxl__gc *gc,
+ libxl_domain_config *d_config,
+ uint64_t rdm_mem_guard,
+ struct xc_hvm_build_args *args);
+
+/*
* This function will cause the whole libxl process to hang
* if the device model does not respond. It is deprecated.
*
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 4dfcaf7..b4282a0 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -395,6 +395,7 @@ libxl_domain_build_info = Struct("domain_build_info",[
("target_memkb", MemKB),
("video_memkb", MemKB),
("shadow_memkb", MemKB),
+ ("rdm_mem_boundary_memkb", MemKB),
("rtc_timeoffset", uint32),
("exec_ssidref", uint32),
("exec_ssid_label", string),
@@ -559,6 +560,12 @@ libxl_device_pci = Struct("device_pci", [
("rdm_reserve", libxl_rdm_reserve_flag),
])
+libxl_device_rdm = Struct("device_rdm", [
+ ("start", uint64),
+ ("size", uint64),
+ ("flag", libxl_rdm_reserve_flag),
+ ])
+
libxl_device_dtdev = Struct("device_dtdev", [
("path", string),
])
@@ -589,6 +596,7 @@ libxl_domain_config = Struct("domain_config", [
("disks", Array(libxl_device_disk, "num_disks")),
("nics", Array(libxl_device_nic, "num_nics")),
("pcidevs", Array(libxl_device_pci, "num_pcidevs")),
+ ("rdms", Array(libxl_device_rdm, "num_rdms")),
("dtdevs", Array(libxl_device_dtdev, "num_dtdevs")),
("vfbs", Array(libxl_device_vfb, "num_vfbs")),
("vkbs", Array(libxl_device_vkb, "num_vkbs")),
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 4364ba4..85d74fd 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -1374,6 +1374,9 @@ static void parse_config_data(const char *config_source,
if (!xlu_cfg_get_long (config, "videoram", &l, 0))
b_info->video_memkb = l * 1024;
+ if (!xlu_cfg_get_long (config, "rdm_mem_boundary", &l, 0))
+ b_info->rdm_mem_boundary_memkb = l * 1024;
+
if (!xlu_cfg_get_long(config, "max_event_channels", &l, 0))
b_info->event_channels = l;
--
1.9.1