Bhyve supports NUMA domains configuration using the '-n'
command line argument:

  -n id,size,cpus[,domain_policy]

Here, "id" is a numeric NUMA domain id, "size" is the total VM
memory size with units format similar to the "-m" switch,
"cpus" is a cpuset, and "domain_policy" is an optional
domainset(9) memory allocation policy. The "domain_policy"
is currently not used by the libvirt driver.

This argument is repeated for every NUMA domain to be configured, e.g.:

  bhyve \
  ...
  -n id=0,size=107,cpus=0-3
  -n id=1,size=107,cpus=4-7

To support that:

 * Add a corresponding capability; it is considered supported
   if the bhyve binary has the '-n' command line switch.

 * Generate command line arguments for NUMA from
   <cpu><numa>..</numa></cpu> domain configuration.

Additionally, validate that:

 * NUMA domains can be only configured with the UEFI loaders.
 * No more than 8 domains configured per VM as limited by bhyve.

Signed-off-by: Roman Bogorodskiy <[email protected]>
---
 src/bhyve/bhyve_capabilities.c                |  3 ++
 src/bhyve/bhyve_capabilities.h                |  1 +
 src/bhyve/bhyve_command.c                     | 26 ++++++++++++
 src/bhyve/bhyve_domain.c                      | 19 +++++++++
 .../bhyvexml2argv-numa-empty-cpuset.xml       | 29 +++++++++++++
 .../bhyvexml2argv-numa-too-many-domains.xml   | 36 ++++++++++++++++
 .../x86_64/bhyvexml2argv-numa.args            | 14 +++++++
 .../x86_64/bhyvexml2argv-numa.ldargs          |  1 +
 .../x86_64/bhyvexml2argv-numa.xml             | 29 +++++++++++++
 tests/bhyvexml2argvtest.c                     | 10 ++++-
 .../x86_64/bhyvexml2xmlout-numa.xml           | 42 +++++++++++++++++++
 tests/bhyvexml2xmltest.c                      |  1 +
 12 files changed, 210 insertions(+), 1 deletion(-)
 create mode 100644 
tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml
 create mode 100644 
tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml
 create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args
 create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs
 create mode 100644 tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml
 create mode 100644 tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml

diff --git a/src/bhyve/bhyve_capabilities.c b/src/bhyve/bhyve_capabilities.c
index d2a48ed30c..c3fb88fe9f 100644
--- a/src/bhyve/bhyve_capabilities.c
+++ b/src/bhyve/bhyve_capabilities.c
@@ -258,6 +258,9 @@ bhyveProbeCapsFromHelp(unsigned int *caps, char *binary)
     if (strstr(help, "-A:") != NULL)
         *caps |= BHYVE_CAP_ACPI;
 
+    if (strstr(help, "-n:") != NULL)
+        *caps |= BHYVE_CAP_NUMA;
+
     return 0;
 }
 
diff --git a/src/bhyve/bhyve_capabilities.h b/src/bhyve/bhyve_capabilities.h
index d5346df7ba..31fd9ab86a 100644
--- a/src/bhyve/bhyve_capabilities.h
+++ b/src/bhyve/bhyve_capabilities.h
@@ -56,6 +56,7 @@ typedef enum {
     BHYVE_CAP_VIRTIO_RND = 1 << 10,
     BHYVE_CAP_NVME = 1 << 11,
     BHYVE_CAP_ACPI = 1 << 12,
+    BHYVE_CAP_NUMA = 1 << 13,
 } virBhyveCapsFlags;
 
 int virBhyveProbeGrubCaps(virBhyveGrubCapsFlags *caps);
diff --git a/src/bhyve/bhyve_command.c b/src/bhyve/bhyve_command.c
index 37618812bc..931d7dd551 100644
--- a/src/bhyve/bhyve_command.c
+++ b/src/bhyve/bhyve_command.c
@@ -905,6 +905,7 @@ virBhyveProcessBuildBhyveCmd(struct _bhyveConn *driver, 
virDomainDef *def,
     unsigned nusbcontrollers = 0;
     unsigned nisacontrollers = 0;
     unsigned nvcpus = virDomainDefGetVcpus(def);
+    size_t ncells = virDomainNumaGetNodeCount(def->numa);
 
     /* CPUs */
     virCommandAddArg(cmd, "-c");
@@ -955,6 +956,31 @@ virBhyveProcessBuildBhyveCmd(struct _bhyveConn *driver, 
virDomainDef *def,
         }
     }
 
+    /* NUMA */
+    if (ncells) {
+        if (!(bhyveDriverGetBhyveCaps(driver) & BHYVE_CAP_NUMA)) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("Installed bhyve binary does not support NUMA 
configuration"));
+            return NULL;
+        }
+
+        if (def->os.bootloader || !def->os.loader) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("NUMA configuration is only supported when 
booting using UEFI"));
+            return NULL;
+        }
+
+        for (i = 0; i < ncells; i++) {
+            unsigned long long memSize = 
virDomainNumaGetNodeMemorySize(def->numa, i);
+            virBitmap *cpus = virDomainNumaGetNodeCpumask(def->numa, i);
+            g_autofree char *cpumask = virBitmapFormat(cpus);
+
+            virCommandAddArg(cmd, "-n");
+            virCommandAddArgFormat(cmd, "id=%zu,size=%llu,cpus=%s", i, 
VIR_DIV_UP(memSize, 1024),
+                                   cpumask);
+        }
+    }
+
     /* Memory */
     virCommandAddArg(cmd, "-m");
     virCommandAddArgFormat(cmd, "%llu",
diff --git a/src/bhyve/bhyve_domain.c b/src/bhyve/bhyve_domain.c
index 85960c6e12..4594d7673f 100644
--- a/src/bhyve/bhyve_domain.c
+++ b/src/bhyve/bhyve_domain.c
@@ -411,6 +411,7 @@ bhyveDomainDefValidate(const virDomainDef *def,
                        void *parseOpaque G_GNUC_UNUSED)
 {
     size_t i;
+    size_t ncells;
     virStorageSource *src = NULL;
     g_autoptr(GHashTable) nvme_controllers = g_hash_table_new(g_direct_hash,
                                                               g_direct_equal);
@@ -445,6 +446,24 @@ bhyveDomainDefValidate(const virDomainDef *def,
         return -1;
     }
 
+    ncells = virDomainNumaGetNodeCount(def->numa);
+    if (ncells) {
+        if (ncells > 8) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                           _("Only up to 8 NUMA domains are supported"));
+            return -1;
+        }
+
+        for (i = 0; i < ncells; i++) {
+            if (!virDomainNumaGetNodeCpumask(def->numa, i)) {
+                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                               _("NUMA domain id %1$zu: empty cpusets are not 
allowed"),
+                               i);
+                return -1;
+            }
+        }
+    }
+
     if (!def->os.loader)
         return 0;
 
diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml 
b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml
new file mode 100644
index 0000000000..9a5fc282ba
--- /dev/null
+++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-empty-cpuset.xml
@@ -0,0 +1,29 @@
+<domain type='bhyve'>
+  <name>bhyve</name>
+  <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid>
+  <memory>219136</memory>
+  <vcpu>8</vcpu>
+  <os firmware='efi'>
+    <type>hvm</type>
+  </os>
+  <cpu>
+    <numa>
+      <cell id='0' cpus='0-3' memory='109568' unit='KiB'/>
+      <cell id='1' memory='109568' unit='KiB'/>
+    </numa>
+  </cpu>
+  <devices>
+    <disk type='file'>
+      <driver name='file' type='raw'/>
+      <source file='/tmp/freebsd.img'/>
+      <target dev='hda' bus='sata'/>
+      <address type='drive' controller='0' bus='0' target='2' unit='0'/>
+    </disk>
+    <interface type='bridge'>
+      <mac address='52:54:00:b9:94:02'/>
+      <model type='virtio'/>
+      <source bridge="virbr0"/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x03' 
function='0x0'/>
+    </interface>
+  </devices>
+</domain>
diff --git 
a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml 
b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml
new file mode 100644
index 0000000000..bcabe5cd85
--- /dev/null
+++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa-too-many-domains.xml
@@ -0,0 +1,36 @@
+<domain type='bhyve'>
+  <name>bhyve</name>
+  <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid>
+  <memory>876544</memory>
+  <vcpu>9</vcpu>
+  <os firmware='efi'>
+    <type>hvm</type>
+  </os>
+  <cpu>
+    <numa>
+      <cell id='0' cpus='0' memory='109568' unit='KiB'/>
+      <cell id='1' cpus='1' memory='109568' unit='KiB'/>
+      <cell id='2' cpus='2' memory='109568' unit='KiB'/>
+      <cell id='3' cpus='3' memory='109568' unit='KiB'/>
+      <cell id='4' cpus='4' memory='109568' unit='KiB'/>
+      <cell id='5' cpus='5' memory='109568' unit='KiB'/>
+      <cell id='6' cpus='6' memory='109568' unit='KiB'/>
+      <cell id='7' cpus='7' memory='109568' unit='KiB'/>
+      <cell id='8' cpus='8' memory='109568' unit='KiB'/>
+    </numa>
+  </cpu>
+  <devices>
+    <disk type='file'>
+      <driver name='file' type='raw'/>
+      <source file='/tmp/freebsd.img'/>
+      <target dev='hda' bus='sata'/>
+      <address type='drive' controller='0' bus='0' target='2' unit='0'/>
+    </disk>
+    <interface type='bridge'>
+      <mac address='52:54:00:b9:94:02'/>
+      <model type='virtio'/>
+      <source bridge="virbr0"/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x03' 
function='0x0'/>
+    </interface>
+  </devices>
+</domain>
diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args 
b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args
new file mode 100644
index 0000000000..15efd1c357
--- /dev/null
+++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.args
@@ -0,0 +1,14 @@
+bhyve \
+-c 8 \
+-n id=0,size=107,cpus=0-3 \
+-n id=1,size=107,cpus=4-7 \
+-m 214 \
+-u \
+-H \
+-P \
+-s 0:0,hostbridge \
+-l bootrom,fakefirmwaredir/BHYVE_UEFI.fd,fakenvramdir/bhyve_VARS.fd \
+-s 1:0,lpc \
+-s 2:0,ahci,hd:/tmp/freebsd.img \
+-s 3:0,virtio-net,faketapdev,mac=52:54:00:b9:94:02 \
+bhyve
diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs 
b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs
new file mode 100644
index 0000000000..421376db9e
--- /dev/null
+++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.ldargs
@@ -0,0 +1 @@
+dummy
diff --git a/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml 
b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml
new file mode 100644
index 0000000000..8a0da7830d
--- /dev/null
+++ b/tests/bhyvexml2argvdata/x86_64/bhyvexml2argv-numa.xml
@@ -0,0 +1,29 @@
+<domain type='bhyve'>
+  <name>bhyve</name>
+  <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid>
+  <memory>219136</memory>
+  <vcpu>8</vcpu>
+  <os firmware='efi'>
+    <type>hvm</type>
+  </os>
+  <cpu>
+    <numa>
+      <cell id='0' cpus='0-3' memory='109568' unit='KiB'/>
+      <cell id='1' cpus='4-7' memory='109568' unit='KiB'/>
+    </numa>
+  </cpu>
+  <devices>
+    <disk type='file'>
+      <driver name='file' type='raw'/>
+      <source file='/tmp/freebsd.img'/>
+      <target dev='hda' bus='sata'/>
+      <address type='drive' controller='0' bus='0' target='2' unit='0'/>
+    </disk>
+    <interface type='bridge'>
+      <mac address='52:54:00:b9:94:02'/>
+      <model type='virtio'/>
+      <source bridge="virbr0"/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x03' 
function='0x0'/>
+    </interface>
+  </devices>
+</domain>
diff --git a/tests/bhyvexml2argvtest.c b/tests/bhyvexml2argvtest.c
index 2330e70bbf..b7749fec6f 100644
--- a/tests/bhyvexml2argvtest.c
+++ b/tests/bhyvexml2argvtest.c
@@ -202,7 +202,7 @@ mymain(void)
                        BHYVE_CAP_FBUF | BHYVE_CAP_XHCI | \
                        BHYVE_CAP_CPUTOPOLOGY | BHYVE_CAP_SOUND_HDA | \
                        BHYVE_CAP_VNC_PASSWORD | BHYVE_CAP_VIRTIO_9P | \
-                       BHYVE_CAP_NVME;
+                       BHYVE_CAP_NVME | BHYVE_CAP_NUMA;
 
     DO_TEST("base");
     DO_TEST("wired");
@@ -254,6 +254,11 @@ mymain(void)
     DO_TEST("isa-controller");
     DO_TEST_FAILURE("isa-multiple-controllers");
     DO_TEST("firmware-efi");
+    DO_TEST("numa");
+    DO_TEST_FAILURE("numa-empty-cpuset");
+    DO_TEST_FAILURE("numa-too-many-domains");
+    driver.bhyvecaps &= ~BHYVE_CAP_NUMA;
+    DO_TEST_FAILURE("numa");
     fakefirmwaredir = g_steal_pointer(&driver.config->firmwareDir);
     driver.config->firmwareDir = g_steal_pointer(&fakefirmwareemptydir);
     DO_TEST_PREPARE_ERROR("firmware-efi");
@@ -345,10 +350,13 @@ mymain(void)
     driver.caps = virBhyveCapsBuild();
     /* bhyve does not support UTC clock on ARM */
     driver.bhyvecaps ^= BHYVE_CAP_RTC_UTC;
+    /* bhyve does not support NUMA on ARM */
+    driver.bhyvecaps &= ~BHYVE_CAP_NUMA;
 
     DO_TEST("base");
     DO_TEST("console");
     DO_TEST("bootloader");
+    DO_TEST_FAILURE("numa");
 
     virObjectUnref(driver.caps);
     virObjectUnref(driver.xmlopt);
diff --git a/tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml 
b/tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml
new file mode 100644
index 0000000000..ecc147db78
--- /dev/null
+++ b/tests/bhyvexml2xmloutdata/x86_64/bhyvexml2xmlout-numa.xml
@@ -0,0 +1,42 @@
+<domain type='bhyve'>
+  <name>bhyve</name>
+  <uuid>df3be7e7-a104-11e3-aeb0-50e5492bd3dc</uuid>
+  <memory unit='KiB'>219136</memory>
+  <currentMemory unit='KiB'>219136</currentMemory>
+  <vcpu placement='static'>8</vcpu>
+  <os firmware='efi'>
+    <type arch='x86_64'>hvm</type>
+    <boot dev='hd'/>
+  </os>
+  <cpu>
+    <numa>
+      <cell id='0' cpus='0-3' memory='109568' unit='KiB'/>
+      <cell id='1' cpus='4-7' memory='109568' unit='KiB'/>
+    </numa>
+  </cpu>
+  <clock offset='utc'/>
+  <on_poweroff>destroy</on_poweroff>
+  <on_reboot>restart</on_reboot>
+  <on_crash>destroy</on_crash>
+  <devices>
+    <disk type='file' device='disk'>
+      <driver name='file' type='raw'/>
+      <source file='/tmp/freebsd.img'/>
+      <target dev='hda' bus='sata'/>
+      <address type='drive' controller='0' bus='0' target='2' unit='0'/>
+    </disk>
+    <controller type='pci' index='0' model='pci-root'/>
+    <controller type='isa' index='0'>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x01' 
function='0x0'/>
+    </controller>
+    <controller type='sata' index='0'>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x02' 
function='0x0'/>
+    </controller>
+    <interface type='bridge'>
+      <mac address='52:54:00:b9:94:02'/>
+      <source bridge='virbr0'/>
+      <model type='virtio'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x03' 
function='0x0'/>
+    </interface>
+  </devices>
+</domain>
diff --git a/tests/bhyvexml2xmltest.c b/tests/bhyvexml2xmltest.c
index 7f9de2bc36..950aaea672 100644
--- a/tests/bhyvexml2xmltest.c
+++ b/tests/bhyvexml2xmltest.c
@@ -132,6 +132,7 @@ mymain(void)
     DO_TEST_DIFFERENT("passthru-multiple-devs");
     DO_TEST_DIFFERENT("slirp");
     DO_TEST_DIFFERENT("virtio-scsi");
+    DO_TEST_DIFFERENT("numa");
 
     /* Address allocation tests */
     DO_TEST_DIFFERENT("addr-single-sata-disk");
-- 
2.52.0

Reply via email to