This add numa topology support

numa[0-8]: memory=<mb>,[policy=<default|preferred|bind|interleave>]

example:
-------
sockets:4
cores:2
memory:4096

numa0: memory=1024,policy=bind
numa1: memory=1024,policy=bind
numa2: memory=1024,policy=bind
numa3: memory=1024,policy=bind

- total numa memory should be equal to vm memory
- we assign 1 numa node for each socket

qemu command line:
------------------
-object memory-backend-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0

-object memory-backend-ram,size=1024M,policy=bind,host-nodes=1,id=ram-node2
-numa node,nodeid=2,cpus=2-3,memdev=ram-node

-object memory-backend-ram,size=1024M,policy=bind,host-nodes=2,id=ram-node3
-numa node,nodeid=3,cpus=4-5,memdev=ram-node

-object memory-backend-ram,size=1024M,policy=bind,host-nodes=3,id=ram-node4
-numa node,nodeid=4,cpus=6-7,memdev=ram-node

Signed-off-by: Alexandre Derumier <aderum...@odiso.com>
---
 PVE/QemuServer.pm |   75 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 75ddcdd..51d9045 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -483,6 +483,19 @@ my $MAX_UNUSED_DISKS = 8;
 my $MAX_HOSTPCI_DEVICES = 4;
 my $MAX_SERIAL_PORTS = 4;
 my $MAX_PARALLEL_PORTS = 3;
+my $MAX_NUMA = 8;
+
+my $numadesc = {
+    optional => 1,
+    type => 'string', format => 'pve-qm-numa',
+    typetext => "memory=<mb>,[policy=<default|preferred|bind|interleave>]",
+    description => "numa topology",
+};
+PVE::JSONSchema::register_standard_option("pve-qm-numa", $numadesc);
+
+for (my $i = 0; $i < $MAX_NUMA; $i++)  {
+    $confdesc->{"numa$i"} = $numadesc;
+}
 
 my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000',  'pcnet',  'virtio',
                      'ne2k_isa', 'i82551', 'i82557b', 'i82559er', 'vmxnet3'];
@@ -1272,6 +1285,25 @@ sub drive_is_cdrom {
 
 }
 
+sub parse_numa {
+    my ($data) = @_;
+
+    my $res = {};
+
+    foreach my $kvp (split(/,/, $data)) {
+
+       if ($kvp =~ m/^memory=(\S+)$/) {
+           $res->{memory} = $1;
+       } elsif ($kvp =~ m/^policy=(default|preferred|bind|interleave)$/) {
+           $res->{policy} = $1;
+       } else {
+           return undef;
+       }
+    }
+
+    return $res;
+}
+
 sub parse_hostpci {
     my ($value) = @_;
 
@@ -1452,6 +1484,17 @@ sub verify_bootdisk {
     die "invalid boot disk '$value'\n";
 }
 
+PVE::JSONSchema::register_format('pve-qm-numa', \&verify_numa);
+sub verify_numa {
+    my ($value, $noerr) = @_;
+
+    return $value if parse_numa($value);
+
+    return undef if $noerr;
+
+    die "unable to parse numa options\n";
+}
+
 PVE::JSONSchema::register_format('pve-qm-net', \&verify_net);
 sub verify_net {
     my ($value, $noerr) = @_;
@@ -2686,6 +2729,36 @@ sub config_to_command {
     # push @$cmd, '-cpu', "$cpu,enforce";
     push @$cmd, '-cpu', $cpu;
 
+    my $memory =  $conf->{memory} || $defaults->{memory};    
+    push @$cmd, '-m', $memory;
+
+    my $numa_totalmemory = undef;
+    for (my $i = 0; $i < $MAX_NUMA; $i++) {
+       next if !$conf->{"numa$i"};
+       my $numa = parse_numa($conf->{"numa$i"});
+       next if !$numa;
+
+       die "host numa node$i don't exist" if !(-d 
"/sys/devices/system/node/node$i/");
+       die "vm socket don't exist for numa node$i" if $i > ($sockets-1);
+       die "missing numa node$i memory value" if !$numa->{memory};
+
+       my $numa_memory = $numa->{memory};
+       $numa_totalmemory += $numa_memory;
+       die "numa node$i memory $memory M can't be bigger than vm memory" if 
$numa_memory > $memory;
+
+       my $cpustart = ($cores * $i);
+       my $cpuend = ($cpustart + $cores - 1) if $cores && $cores > 1;
+       my $cpus = $cpustart;
+       $cpus .= "-$cpuend" if $cpuend;
+  
+       my $policy = $numa->{policy} ? $numa->{policy} : "default";
+       $numa_memory = $numa_memory."M";
+
+       push @$cmd, '-object', 
"memory-backend-ram,size=$numa_memory,policy=$policy,host-nodes=$i,id=ram-node$i";
+       push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+    }
+    die "total memory for NUMA nodes must be equal to vm memory" if 
$numa_totalmemory && $numa_totalmemory != $memory;
+
     push @$cmd, '-S' if $conf->{freeze};
 
     # set keyboard layout
@@ -2798,8 +2871,6 @@ sub config_to_command {
        push @$devices, '-device', print_drivedevice_full($storecfg, $conf, 
$vmid, $drive, $bridges);
     });
 
-    push @$cmd, '-m', $conf->{memory} || $defaults->{memory};
-
     for (my $i = 0; $i < $MAX_NETS; $i++) {
          next if !$conf->{"net$i"};
          my $d = parse_net($conf->{"net$i"});
-- 
1.7.10.4

_______________________________________________
pve-devel mailing list
pve-devel@pve.proxmox.com
http://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to