Add test cases, where colocation rules are used with the static utilization scheduler and the rebalance on start option enabled. These verify the behavior in the following scenarios:
- 7 services with intertwined colocation rules in a 3 node cluster; 1 node failing - 3 neg. colocated services in a 3 node cluster, where the rules are stated in a intransitive form; 1 node failing - 5 neg. colocated services in a 5 node cluster, where the rules are stated in a intransitive form; 2 nodes failing Signed-off-by: Daniel Kral <d.k...@proxmox.com> --- .../test-crs-static-rebalance-coloc1/README | 26 +++ .../test-crs-static-rebalance-coloc1/cmdlist | 4 + .../datacenter.cfg | 6 + .../hardware_status | 5 + .../log.expect | 120 ++++++++++++++ .../manager_status | 1 + .../rules_config | 24 +++ .../service_config | 10 ++ .../static_service_stats | 10 ++ .../test-crs-static-rebalance-coloc2/README | 16 ++ .../test-crs-static-rebalance-coloc2/cmdlist | 4 + .../datacenter.cfg | 6 + .../hardware_status | 5 + .../log.expect | 86 ++++++++++ .../manager_status | 1 + .../rules_config | 14 ++ .../service_config | 5 + .../static_service_stats | 5 + .../test-crs-static-rebalance-coloc3/README | 14 ++ .../test-crs-static-rebalance-coloc3/cmdlist | 4 + .../datacenter.cfg | 6 + .../hardware_status | 7 + .../log.expect | 156 ++++++++++++++++++ .../manager_status | 1 + .../rules_config | 49 ++++++ .../service_config | 7 + .../static_service_stats | 5 + 27 files changed, 597 insertions(+) create mode 100644 src/test/test-crs-static-rebalance-coloc1/README create mode 100644 src/test/test-crs-static-rebalance-coloc1/cmdlist create mode 100644 src/test/test-crs-static-rebalance-coloc1/datacenter.cfg create mode 100644 src/test/test-crs-static-rebalance-coloc1/hardware_status create mode 100644 src/test/test-crs-static-rebalance-coloc1/log.expect create mode 100644 src/test/test-crs-static-rebalance-coloc1/manager_status create mode 100644 src/test/test-crs-static-rebalance-coloc1/rules_config create mode 100644 src/test/test-crs-static-rebalance-coloc1/service_config create mode 100644 src/test/test-crs-static-rebalance-coloc1/static_service_stats create mode 100644 src/test/test-crs-static-rebalance-coloc2/README create mode 100644 src/test/test-crs-static-rebalance-coloc2/cmdlist create mode 100644 src/test/test-crs-static-rebalance-coloc2/datacenter.cfg create mode 100644 src/test/test-crs-static-rebalance-coloc2/hardware_status create mode 100644 src/test/test-crs-static-rebalance-coloc2/log.expect create mode 100644 src/test/test-crs-static-rebalance-coloc2/manager_status create mode 100644 src/test/test-crs-static-rebalance-coloc2/rules_config create mode 100644 src/test/test-crs-static-rebalance-coloc2/service_config create mode 100644 src/test/test-crs-static-rebalance-coloc2/static_service_stats create mode 100644 src/test/test-crs-static-rebalance-coloc3/README create mode 100644 src/test/test-crs-static-rebalance-coloc3/cmdlist create mode 100644 src/test/test-crs-static-rebalance-coloc3/datacenter.cfg create mode 100644 src/test/test-crs-static-rebalance-coloc3/hardware_status create mode 100644 src/test/test-crs-static-rebalance-coloc3/log.expect create mode 100644 src/test/test-crs-static-rebalance-coloc3/manager_status create mode 100644 src/test/test-crs-static-rebalance-coloc3/rules_config create mode 100644 src/test/test-crs-static-rebalance-coloc3/service_config create mode 100644 src/test/test-crs-static-rebalance-coloc3/static_service_stats diff --git a/src/test/test-crs-static-rebalance-coloc1/README b/src/test/test-crs-static-rebalance-coloc1/README new file mode 100644 index 0000000..c709f45 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc1/README @@ -0,0 +1,26 @@ +Test whether a mixed set of strict colocation rules in conjunction with the +static load scheduler with auto-rebalancing are applied correctly on service +start enabled and in case of a subsequent failover. + +The test scenario is: +- vm:101 and vm:102 are non-colocated services +- Services that must be kept together: + - vm:102, and vm:107 + - vm:104, vm:106, and vm:108 +- Services that must be kept separate: + - vm:103, vm:104, and vm:105 + - vm:103, vm:106, and vm:107 + - vm:107, and vm:108 +- Therefore, there are consistent interdependencies between the positive and + negative colocation rules' service members +- vm:101 and vm:102 are currently assigned to node1 and node2 respectively +- vm:103 through vm:108 are currently assigned to node3 + +Therefore, the expected outcome is: +- vm:101, vm:102, vm:103 should be started on node1, node2, and node3 + respectively, as there's nothing running on there yet +- vm:104, vm:106, and vm:108 should all be assigned on the same node, which + will be node1, since it has the most resources left for vm:104 +- vm:105 and vm:107 should both be assigned on the same node, which will be + node2, since both cannot be assigned to the other nodes because of the + colocation constraints diff --git a/src/test/test-crs-static-rebalance-coloc1/cmdlist b/src/test/test-crs-static-rebalance-coloc1/cmdlist new file mode 100644 index 0000000..eee0e40 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc1/cmdlist @@ -0,0 +1,4 @@ +[ + [ "power node1 on", "power node2 on", "power node3 on"], + [ "network node3 off" ] +] diff --git a/src/test/test-crs-static-rebalance-coloc1/datacenter.cfg b/src/test/test-crs-static-rebalance-coloc1/datacenter.cfg new file mode 100644 index 0000000..f2671a5 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc1/datacenter.cfg @@ -0,0 +1,6 @@ +{ + "crs": { + "ha": "static", + "ha-rebalance-on-start": 1 + } +} diff --git a/src/test/test-crs-static-rebalance-coloc1/hardware_status b/src/test/test-crs-static-rebalance-coloc1/hardware_status new file mode 100644 index 0000000..84484af --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc1/hardware_status @@ -0,0 +1,5 @@ +{ + "node1": { "power": "off", "network": "off", "cpus": 8, "memory": 112000000000 }, + "node2": { "power": "off", "network": "off", "cpus": 8, "memory": 112000000000 }, + "node3": { "power": "off", "network": "off", "cpus": 8, "memory": 112000000000 } +} diff --git a/src/test/test-crs-static-rebalance-coloc1/log.expect b/src/test/test-crs-static-rebalance-coloc1/log.expect new file mode 100644 index 0000000..cdd2497 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc1/log.expect @@ -0,0 +1,120 @@ +info 0 hardware: starting simulation +info 20 cmdlist: execute power node1 on +info 20 node1/crm: status change startup => wait_for_quorum +info 20 node1/lrm: status change startup => wait_for_agent_lock +info 20 cmdlist: execute power node2 on +info 20 node2/crm: status change startup => wait_for_quorum +info 20 node2/lrm: status change startup => wait_for_agent_lock +info 20 cmdlist: execute power node3 on +info 20 node3/crm: status change startup => wait_for_quorum +info 20 node3/lrm: status change startup => wait_for_agent_lock +info 20 node1/crm: got lock 'ha_manager_lock' +info 20 node1/crm: status change wait_for_quorum => master +info 20 node1/crm: using scheduler mode 'static' +info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online' +info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online' +info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online' +info 20 node1/crm: adding new service 'vm:101' on node 'node1' +info 20 node1/crm: adding new service 'vm:102' on node 'node2' +info 20 node1/crm: adding new service 'vm:103' on node 'node3' +info 20 node1/crm: adding new service 'vm:104' on node 'node3' +info 20 node1/crm: adding new service 'vm:105' on node 'node3' +info 20 node1/crm: adding new service 'vm:106' on node 'node3' +info 20 node1/crm: adding new service 'vm:107' on node 'node3' +info 20 node1/crm: adding new service 'vm:108' on node 'node3' +info 20 node1/crm: service vm:101: re-balance selected current node node1 for startup +info 20 node1/crm: service 'vm:101': state changed from 'request_start' to 'started' (node = node1) +info 20 node1/crm: service vm:102: re-balance selected current node node2 for startup +info 20 node1/crm: service 'vm:102': state changed from 'request_start' to 'started' (node = node2) +info 20 node1/crm: service vm:103: re-balance selected current node node3 for startup +info 20 node1/crm: service 'vm:103': state changed from 'request_start' to 'started' (node = node3) +info 20 node1/crm: service vm:104: re-balance selected new node node1 for startup +info 20 node1/crm: service 'vm:104': state changed from 'request_start' to 'request_start_balance' (node = node3, target = node1) +info 20 node1/crm: service vm:105: re-balance selected new node node2 for startup +info 20 node1/crm: service 'vm:105': state changed from 'request_start' to 'request_start_balance' (node = node3, target = node2) +info 20 node1/crm: service vm:106: re-balance selected new node node1 for startup +info 20 node1/crm: service 'vm:106': state changed from 'request_start' to 'request_start_balance' (node = node3, target = node1) +info 20 node1/crm: service vm:107: re-balance selected new node node2 for startup +info 20 node1/crm: service 'vm:107': state changed from 'request_start' to 'request_start_balance' (node = node3, target = node2) +info 20 node1/crm: service vm:108: re-balance selected new node node1 for startup +info 20 node1/crm: service 'vm:108': state changed from 'request_start' to 'request_start_balance' (node = node3, target = node1) +info 21 node1/lrm: got lock 'ha_agent_node1_lock' +info 21 node1/lrm: status change wait_for_agent_lock => active +info 21 node1/lrm: starting service vm:101 +info 21 node1/lrm: service status vm:101 started +info 22 node2/crm: status change wait_for_quorum => slave +info 23 node2/lrm: got lock 'ha_agent_node2_lock' +info 23 node2/lrm: status change wait_for_agent_lock => active +info 23 node2/lrm: starting service vm:102 +info 23 node2/lrm: service status vm:102 started +info 24 node3/crm: status change wait_for_quorum => slave +info 25 node3/lrm: got lock 'ha_agent_node3_lock' +info 25 node3/lrm: status change wait_for_agent_lock => active +info 25 node3/lrm: starting service vm:103 +info 25 node3/lrm: service status vm:103 started +info 25 node3/lrm: service vm:104 - start relocate to node 'node1' +info 25 node3/lrm: service vm:104 - end relocate to node 'node1' +info 25 node3/lrm: service vm:105 - start relocate to node 'node2' +info 25 node3/lrm: service vm:105 - end relocate to node 'node2' +info 25 node3/lrm: service vm:106 - start relocate to node 'node1' +info 25 node3/lrm: service vm:106 - end relocate to node 'node1' +info 25 node3/lrm: service vm:107 - start relocate to node 'node2' +info 25 node3/lrm: service vm:107 - end relocate to node 'node2' +info 25 node3/lrm: service vm:108 - start relocate to node 'node1' +info 25 node3/lrm: service vm:108 - end relocate to node 'node1' +info 40 node1/crm: service 'vm:104': state changed from 'request_start_balance' to 'started' (node = node1) +info 40 node1/crm: service 'vm:105': state changed from 'request_start_balance' to 'started' (node = node2) +info 40 node1/crm: service 'vm:106': state changed from 'request_start_balance' to 'started' (node = node1) +info 40 node1/crm: service 'vm:107': state changed from 'request_start_balance' to 'started' (node = node2) +info 40 node1/crm: service 'vm:108': state changed from 'request_start_balance' to 'started' (node = node1) +info 41 node1/lrm: starting service vm:104 +info 41 node1/lrm: service status vm:104 started +info 41 node1/lrm: starting service vm:106 +info 41 node1/lrm: service status vm:106 started +info 41 node1/lrm: starting service vm:108 +info 41 node1/lrm: service status vm:108 started +info 43 node2/lrm: starting service vm:105 +info 43 node2/lrm: service status vm:105 started +info 43 node2/lrm: starting service vm:107 +info 43 node2/lrm: service status vm:107 started +info 120 cmdlist: execute network node3 off +info 120 node1/crm: node 'node3': state changed from 'online' => 'unknown' +info 124 node3/crm: status change slave => wait_for_quorum +info 125 node3/lrm: status change active => lost_agent_lock +info 160 node1/crm: service 'vm:103': state changed from 'started' to 'fence' +info 160 node1/crm: node 'node3': state changed from 'unknown' => 'fence' +emai 160 node1/crm: FENCE: Try to fence node 'node3' +info 166 watchdog: execute power node3 off +info 165 node3/crm: killed by poweroff +info 166 node3/lrm: killed by poweroff +info 166 hardware: server 'node3' stopped by poweroff (watchdog) +info 240 node1/crm: got lock 'ha_agent_node3_lock' +info 240 node1/crm: fencing: acknowledged - got agent lock for node 'node3' +info 240 node1/crm: node 'node3': state changed from 'fence' => 'unknown' +emai 240 node1/crm: SUCCEED: fencing: acknowledged - got agent lock for node 'node3' +info 240 node1/crm: service 'vm:103': state changed from 'fence' to 'recovery' +err 240 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 260 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 280 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 300 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 320 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 340 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 360 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 380 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 400 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 420 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 440 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 460 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 480 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 500 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 520 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 540 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 560 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 580 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 600 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 620 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 640 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 660 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 680 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 700 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +info 720 hardware: exit simulation - done diff --git a/src/test/test-crs-static-rebalance-coloc1/manager_status b/src/test/test-crs-static-rebalance-coloc1/manager_status new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc1/manager_status @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/src/test/test-crs-static-rebalance-coloc1/rules_config b/src/test/test-crs-static-rebalance-coloc1/rules_config new file mode 100644 index 0000000..129778f --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc1/rules_config @@ -0,0 +1,24 @@ +colocation: vms-must-stick-together1 + services vm:102,vm:107 + affinity together + strict 1 + +colocation: vms-must-stick-together2 + services vm:104,vm:106,vm:108 + affinity together + strict 1 + +colocation: vms-must-stay-apart1 + services vm:103,vm:104,vm:105 + affinity separate + strict 1 + +colocation: vms-must-stay-apart2 + services vm:103,vm:106,vm:107 + affinity separate + strict 1 + +colocation: vms-must-stay-apart3 + services vm:107,vm:108 + affinity separate + strict 1 diff --git a/src/test/test-crs-static-rebalance-coloc1/service_config b/src/test/test-crs-static-rebalance-coloc1/service_config new file mode 100644 index 0000000..02e4a07 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc1/service_config @@ -0,0 +1,10 @@ +{ + "vm:101": { "node": "node1", "state": "started" }, + "vm:102": { "node": "node2", "state": "started" }, + "vm:103": { "node": "node3", "state": "started" }, + "vm:104": { "node": "node3", "state": "started" }, + "vm:105": { "node": "node3", "state": "started" }, + "vm:106": { "node": "node3", "state": "started" }, + "vm:107": { "node": "node3", "state": "started" }, + "vm:108": { "node": "node3", "state": "started" } +} diff --git a/src/test/test-crs-static-rebalance-coloc1/static_service_stats b/src/test/test-crs-static-rebalance-coloc1/static_service_stats new file mode 100644 index 0000000..c6472ca --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc1/static_service_stats @@ -0,0 +1,10 @@ +{ + "vm:101": { "maxcpu": 8, "maxmem": 16000000000 }, + "vm:102": { "maxcpu": 4, "maxmem": 24000000000 }, + "vm:103": { "maxcpu": 2, "maxmem": 32000000000 }, + "vm:104": { "maxcpu": 4, "maxmem": 48000000000 }, + "vm:105": { "maxcpu": 8, "maxmem": 16000000000 }, + "vm:106": { "maxcpu": 4, "maxmem": 32000000000 }, + "vm:107": { "maxcpu": 2, "maxmem": 64000000000 }, + "vm:108": { "maxcpu": 8, "maxmem": 48000000000 } +} diff --git a/src/test/test-crs-static-rebalance-coloc2/README b/src/test/test-crs-static-rebalance-coloc2/README new file mode 100644 index 0000000..1b788f8 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc2/README @@ -0,0 +1,16 @@ +Test whether a set of transitive strict negative colocation rules, i.e. there's +negative colocation relations a->b, b->c and a->c, in conjunction with the +static load scheduler with auto-rebalancing are applied correctly on service +start and in case of a subsequent failover. + +The test scenario is: +- vm:101 and vm:102 must be kept separate +- vm:102 and vm:103 must be kept separate +- vm:101 and vm:103 must be kept separate +- Therefore, vm:101, vm:102, and vm:103 must be kept separate + +Therefore, the expected outcome is: +- vm:101, vm:102, and vm:103 should be started on node1, node2, and node3 + respectively, just as if the three negative colocation rule would've been + stated in a single negative colocation rule +- As node3 fails, vm:103 cannot be recovered diff --git a/src/test/test-crs-static-rebalance-coloc2/cmdlist b/src/test/test-crs-static-rebalance-coloc2/cmdlist new file mode 100644 index 0000000..eee0e40 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc2/cmdlist @@ -0,0 +1,4 @@ +[ + [ "power node1 on", "power node2 on", "power node3 on"], + [ "network node3 off" ] +] diff --git a/src/test/test-crs-static-rebalance-coloc2/datacenter.cfg b/src/test/test-crs-static-rebalance-coloc2/datacenter.cfg new file mode 100644 index 0000000..f2671a5 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc2/datacenter.cfg @@ -0,0 +1,6 @@ +{ + "crs": { + "ha": "static", + "ha-rebalance-on-start": 1 + } +} diff --git a/src/test/test-crs-static-rebalance-coloc2/hardware_status b/src/test/test-crs-static-rebalance-coloc2/hardware_status new file mode 100644 index 0000000..84484af --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc2/hardware_status @@ -0,0 +1,5 @@ +{ + "node1": { "power": "off", "network": "off", "cpus": 8, "memory": 112000000000 }, + "node2": { "power": "off", "network": "off", "cpus": 8, "memory": 112000000000 }, + "node3": { "power": "off", "network": "off", "cpus": 8, "memory": 112000000000 } +} diff --git a/src/test/test-crs-static-rebalance-coloc2/log.expect b/src/test/test-crs-static-rebalance-coloc2/log.expect new file mode 100644 index 0000000..c59f286 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc2/log.expect @@ -0,0 +1,86 @@ +info 0 hardware: starting simulation +info 20 cmdlist: execute power node1 on +info 20 node1/crm: status change startup => wait_for_quorum +info 20 node1/lrm: status change startup => wait_for_agent_lock +info 20 cmdlist: execute power node2 on +info 20 node2/crm: status change startup => wait_for_quorum +info 20 node2/lrm: status change startup => wait_for_agent_lock +info 20 cmdlist: execute power node3 on +info 20 node3/crm: status change startup => wait_for_quorum +info 20 node3/lrm: status change startup => wait_for_agent_lock +info 20 node1/crm: got lock 'ha_manager_lock' +info 20 node1/crm: status change wait_for_quorum => master +info 20 node1/crm: using scheduler mode 'static' +info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online' +info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online' +info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online' +info 20 node1/crm: adding new service 'vm:101' on node 'node1' +info 20 node1/crm: adding new service 'vm:102' on node 'node1' +info 20 node1/crm: adding new service 'vm:103' on node 'node1' +info 20 node1/crm: service vm:101: re-balance selected current node node1 for startup +info 20 node1/crm: service 'vm:101': state changed from 'request_start' to 'started' (node = node1) +info 20 node1/crm: service vm:102: re-balance selected new node node2 for startup +info 20 node1/crm: service 'vm:102': state changed from 'request_start' to 'request_start_balance' (node = node1, target = node2) +info 20 node1/crm: service vm:103: re-balance selected new node node3 for startup +info 20 node1/crm: service 'vm:103': state changed from 'request_start' to 'request_start_balance' (node = node1, target = node3) +info 21 node1/lrm: got lock 'ha_agent_node1_lock' +info 21 node1/lrm: status change wait_for_agent_lock => active +info 21 node1/lrm: starting service vm:101 +info 21 node1/lrm: service status vm:101 started +info 21 node1/lrm: service vm:102 - start relocate to node 'node2' +info 21 node1/lrm: service vm:102 - end relocate to node 'node2' +info 21 node1/lrm: service vm:103 - start relocate to node 'node3' +info 21 node1/lrm: service vm:103 - end relocate to node 'node3' +info 22 node2/crm: status change wait_for_quorum => slave +info 24 node3/crm: status change wait_for_quorum => slave +info 40 node1/crm: service 'vm:102': state changed from 'request_start_balance' to 'started' (node = node2) +info 40 node1/crm: service 'vm:103': state changed from 'request_start_balance' to 'started' (node = node3) +info 43 node2/lrm: got lock 'ha_agent_node2_lock' +info 43 node2/lrm: status change wait_for_agent_lock => active +info 43 node2/lrm: starting service vm:102 +info 43 node2/lrm: service status vm:102 started +info 45 node3/lrm: got lock 'ha_agent_node3_lock' +info 45 node3/lrm: status change wait_for_agent_lock => active +info 45 node3/lrm: starting service vm:103 +info 45 node3/lrm: service status vm:103 started +info 120 cmdlist: execute network node3 off +info 120 node1/crm: node 'node3': state changed from 'online' => 'unknown' +info 124 node3/crm: status change slave => wait_for_quorum +info 125 node3/lrm: status change active => lost_agent_lock +info 160 node1/crm: service 'vm:103': state changed from 'started' to 'fence' +info 160 node1/crm: node 'node3': state changed from 'unknown' => 'fence' +emai 160 node1/crm: FENCE: Try to fence node 'node3' +info 166 watchdog: execute power node3 off +info 165 node3/crm: killed by poweroff +info 166 node3/lrm: killed by poweroff +info 166 hardware: server 'node3' stopped by poweroff (watchdog) +info 240 node1/crm: got lock 'ha_agent_node3_lock' +info 240 node1/crm: fencing: acknowledged - got agent lock for node 'node3' +info 240 node1/crm: node 'node3': state changed from 'fence' => 'unknown' +emai 240 node1/crm: SUCCEED: fencing: acknowledged - got agent lock for node 'node3' +info 240 node1/crm: service 'vm:103': state changed from 'fence' to 'recovery' +err 240 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 260 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 280 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 300 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 320 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 340 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 360 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 380 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 400 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 420 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 440 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 460 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 480 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 500 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 520 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 540 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 560 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 580 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 600 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 620 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 640 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 660 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 680 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +err 700 node1/crm: recovering service 'vm:103' from fenced node 'node3' failed, no recovery node found +info 720 hardware: exit simulation - done diff --git a/src/test/test-crs-static-rebalance-coloc2/manager_status b/src/test/test-crs-static-rebalance-coloc2/manager_status new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc2/manager_status @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/src/test/test-crs-static-rebalance-coloc2/rules_config b/src/test/test-crs-static-rebalance-coloc2/rules_config new file mode 100644 index 0000000..1545064 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc2/rules_config @@ -0,0 +1,14 @@ +colocation: very-lonely-services1 + services vm:101,vm:102 + affinity separate + strict 1 + +colocation: very-lonely-services2 + services vm:102,vm:103 + affinity separate + strict 1 + +colocation: very-lonely-services3 + services vm:101,vm:103 + affinity separate + strict 1 diff --git a/src/test/test-crs-static-rebalance-coloc2/service_config b/src/test/test-crs-static-rebalance-coloc2/service_config new file mode 100644 index 0000000..57e3579 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc2/service_config @@ -0,0 +1,5 @@ +{ + "vm:101": { "node": "node1", "state": "started" }, + "vm:102": { "node": "node1", "state": "started" }, + "vm:103": { "node": "node1", "state": "started" } +} diff --git a/src/test/test-crs-static-rebalance-coloc2/static_service_stats b/src/test/test-crs-static-rebalance-coloc2/static_service_stats new file mode 100644 index 0000000..d9dc9e7 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc2/static_service_stats @@ -0,0 +1,5 @@ +{ + "vm:101": { "maxcpu": 8, "maxmem": 16000000000 }, + "vm:102": { "maxcpu": 4, "maxmem": 24000000000 }, + "vm:103": { "maxcpu": 2, "maxmem": 32000000000 } +} diff --git a/src/test/test-crs-static-rebalance-coloc3/README b/src/test/test-crs-static-rebalance-coloc3/README new file mode 100644 index 0000000..e54a2d4 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc3/README @@ -0,0 +1,14 @@ +Test whether a more complex set of transitive strict negative colocation rules, +i.e. there's negative colocation relations a->b, b->c and a->c, in conjunction +with the static load scheduler with auto-rebalancing are applied correctly on +service start and in case of a subsequent failover. + +The test scenario is: +- Essentially, all 10 strict negative colocation rules say that, vm:101, + vm:102, vm:103, vm:104, and vm:105 must be kept together + +Therefore, the expected outcome is: +- vm:101, vm:102, and vm:103 should be started on node1, node2, node3, node4, + and node5 respectively, just as if the 10 negative colocation rule would've + been stated in a single negative colocation rule +- As node1 and node5 fails, vm:101 and vm:105 cannot be recovered diff --git a/src/test/test-crs-static-rebalance-coloc3/cmdlist b/src/test/test-crs-static-rebalance-coloc3/cmdlist new file mode 100644 index 0000000..a3d806d --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc3/cmdlist @@ -0,0 +1,4 @@ +[ + [ "power node1 on", "power node2 on", "power node3 on", "power node4 on", "power node5 on" ], + [ "network node1 off", "network node5 off" ] +] diff --git a/src/test/test-crs-static-rebalance-coloc3/datacenter.cfg b/src/test/test-crs-static-rebalance-coloc3/datacenter.cfg new file mode 100644 index 0000000..f2671a5 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc3/datacenter.cfg @@ -0,0 +1,6 @@ +{ + "crs": { + "ha": "static", + "ha-rebalance-on-start": 1 + } +} diff --git a/src/test/test-crs-static-rebalance-coloc3/hardware_status b/src/test/test-crs-static-rebalance-coloc3/hardware_status new file mode 100644 index 0000000..511afb9 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc3/hardware_status @@ -0,0 +1,7 @@ +{ + "node1": { "power": "off", "network": "off", "cpus": 8, "memory": 112000000000 }, + "node2": { "power": "off", "network": "off", "cpus": 8, "memory": 112000000000 }, + "node3": { "power": "off", "network": "off", "cpus": 8, "memory": 112000000000 }, + "node4": { "power": "off", "network": "off", "cpus": 8, "memory": 112000000000 }, + "node5": { "power": "off", "network": "off", "cpus": 8, "memory": 112000000000 } +} diff --git a/src/test/test-crs-static-rebalance-coloc3/log.expect b/src/test/test-crs-static-rebalance-coloc3/log.expect new file mode 100644 index 0000000..ed36dbe --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc3/log.expect @@ -0,0 +1,156 @@ +info 0 hardware: starting simulation +info 20 cmdlist: execute power node1 on +info 20 node1/crm: status change startup => wait_for_quorum +info 20 node1/lrm: status change startup => wait_for_agent_lock +info 20 cmdlist: execute power node2 on +info 20 node2/crm: status change startup => wait_for_quorum +info 20 node2/lrm: status change startup => wait_for_agent_lock +info 20 cmdlist: execute power node3 on +info 20 node3/crm: status change startup => wait_for_quorum +info 20 node3/lrm: status change startup => wait_for_agent_lock +info 20 cmdlist: execute power node4 on +info 20 node4/crm: status change startup => wait_for_quorum +info 20 node4/lrm: status change startup => wait_for_agent_lock +info 20 cmdlist: execute power node5 on +info 20 node5/crm: status change startup => wait_for_quorum +info 20 node5/lrm: status change startup => wait_for_agent_lock +info 20 node1/crm: got lock 'ha_manager_lock' +info 20 node1/crm: status change wait_for_quorum => master +info 20 node1/crm: using scheduler mode 'static' +info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online' +info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online' +info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online' +info 20 node1/crm: node 'node4': state changed from 'unknown' => 'online' +info 20 node1/crm: node 'node5': state changed from 'unknown' => 'online' +info 20 node1/crm: adding new service 'vm:101' on node 'node1' +info 20 node1/crm: adding new service 'vm:102' on node 'node1' +info 20 node1/crm: adding new service 'vm:103' on node 'node1' +info 20 node1/crm: adding new service 'vm:104' on node 'node1' +info 20 node1/crm: adding new service 'vm:105' on node 'node1' +info 20 node1/crm: service vm:101: re-balance selected current node node1 for startup +info 20 node1/crm: service 'vm:101': state changed from 'request_start' to 'started' (node = node1) +info 20 node1/crm: service vm:102: re-balance selected new node node2 for startup +info 20 node1/crm: service 'vm:102': state changed from 'request_start' to 'request_start_balance' (node = node1, target = node2) +info 20 node1/crm: service vm:103: re-balance selected new node node3 for startup +info 20 node1/crm: service 'vm:103': state changed from 'request_start' to 'request_start_balance' (node = node1, target = node3) +info 20 node1/crm: service vm:104: re-balance selected new node node4 for startup +info 20 node1/crm: service 'vm:104': state changed from 'request_start' to 'request_start_balance' (node = node1, target = node4) +info 20 node1/crm: service vm:105: re-balance selected new node node5 for startup +info 20 node1/crm: service 'vm:105': state changed from 'request_start' to 'request_start_balance' (node = node1, target = node5) +info 21 node1/lrm: got lock 'ha_agent_node1_lock' +info 21 node1/lrm: status change wait_for_agent_lock => active +info 21 node1/lrm: starting service vm:101 +info 21 node1/lrm: service status vm:101 started +info 21 node1/lrm: service vm:102 - start relocate to node 'node2' +info 21 node1/lrm: service vm:102 - end relocate to node 'node2' +info 21 node1/lrm: service vm:103 - start relocate to node 'node3' +info 21 node1/lrm: service vm:103 - end relocate to node 'node3' +info 21 node1/lrm: service vm:104 - start relocate to node 'node4' +info 21 node1/lrm: service vm:104 - end relocate to node 'node4' +info 21 node1/lrm: service vm:105 - start relocate to node 'node5' +info 21 node1/lrm: service vm:105 - end relocate to node 'node5' +info 22 node2/crm: status change wait_for_quorum => slave +info 24 node3/crm: status change wait_for_quorum => slave +info 26 node4/crm: status change wait_for_quorum => slave +info 28 node5/crm: status change wait_for_quorum => slave +info 40 node1/crm: service 'vm:102': state changed from 'request_start_balance' to 'started' (node = node2) +info 40 node1/crm: service 'vm:103': state changed from 'request_start_balance' to 'started' (node = node3) +info 40 node1/crm: service 'vm:104': state changed from 'request_start_balance' to 'started' (node = node4) +info 40 node1/crm: service 'vm:105': state changed from 'request_start_balance' to 'started' (node = node5) +info 43 node2/lrm: got lock 'ha_agent_node2_lock' +info 43 node2/lrm: status change wait_for_agent_lock => active +info 43 node2/lrm: starting service vm:102 +info 43 node2/lrm: service status vm:102 started +info 45 node3/lrm: got lock 'ha_agent_node3_lock' +info 45 node3/lrm: status change wait_for_agent_lock => active +info 45 node3/lrm: starting service vm:103 +info 45 node3/lrm: service status vm:103 started +info 47 node4/lrm: got lock 'ha_agent_node4_lock' +info 47 node4/lrm: status change wait_for_agent_lock => active +info 47 node4/lrm: starting service vm:104 +info 47 node4/lrm: service status vm:104 started +info 49 node5/lrm: got lock 'ha_agent_node5_lock' +info 49 node5/lrm: status change wait_for_agent_lock => active +info 49 node5/lrm: starting service vm:105 +info 49 node5/lrm: service status vm:105 started +info 120 cmdlist: execute network node1 off +info 120 cmdlist: execute network node5 off +info 120 node1/crm: status change master => lost_manager_lock +info 120 node1/crm: status change lost_manager_lock => wait_for_quorum +info 121 node1/lrm: status change active => lost_agent_lock +info 128 node5/crm: status change slave => wait_for_quorum +info 129 node5/lrm: status change active => lost_agent_lock +info 162 watchdog: execute power node1 off +info 161 node1/crm: killed by poweroff +info 162 node1/lrm: killed by poweroff +info 162 hardware: server 'node1' stopped by poweroff (watchdog) +info 170 watchdog: execute power node5 off +info 169 node5/crm: killed by poweroff +info 170 node5/lrm: killed by poweroff +info 170 hardware: server 'node5' stopped by poweroff (watchdog) +info 222 node3/crm: got lock 'ha_manager_lock' +info 222 node3/crm: status change slave => master +info 222 node3/crm: using scheduler mode 'static' +info 222 node3/crm: node 'node1': state changed from 'online' => 'unknown' +info 222 node3/crm: node 'node5': state changed from 'online' => 'unknown' +info 282 node3/crm: service 'vm:101': state changed from 'started' to 'fence' +info 282 node3/crm: service 'vm:105': state changed from 'started' to 'fence' +info 282 node3/crm: node 'node1': state changed from 'unknown' => 'fence' +emai 282 node3/crm: FENCE: Try to fence node 'node1' +info 282 node3/crm: got lock 'ha_agent_node1_lock' +info 282 node3/crm: fencing: acknowledged - got agent lock for node 'node1' +info 282 node3/crm: node 'node1': state changed from 'fence' => 'unknown' +emai 282 node3/crm: SUCCEED: fencing: acknowledged - got agent lock for node 'node1' +info 282 node3/crm: service 'vm:101': state changed from 'fence' to 'recovery' +info 282 node3/crm: node 'node5': state changed from 'unknown' => 'fence' +emai 282 node3/crm: FENCE: Try to fence node 'node5' +info 282 node3/crm: got lock 'ha_agent_node5_lock' +info 282 node3/crm: fencing: acknowledged - got agent lock for node 'node5' +info 282 node3/crm: node 'node5': state changed from 'fence' => 'unknown' +emai 282 node3/crm: SUCCEED: fencing: acknowledged - got agent lock for node 'node5' +info 282 node3/crm: service 'vm:105': state changed from 'fence' to 'recovery' +err 282 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 282 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 302 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 302 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 322 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 322 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 342 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 342 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 362 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 362 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 382 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 382 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 402 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 402 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 422 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 422 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 442 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 442 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 462 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 462 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 482 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 482 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 502 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 502 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 522 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 522 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 542 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 542 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 562 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 562 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 582 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 582 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 602 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 602 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 622 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 622 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 642 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 642 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 662 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 662 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 682 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 682 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +err 702 node3/crm: recovering service 'vm:101' from fenced node 'node1' failed, no recovery node found +err 702 node3/crm: recovering service 'vm:105' from fenced node 'node5' failed, no recovery node found +info 720 hardware: exit simulation - done diff --git a/src/test/test-crs-static-rebalance-coloc3/manager_status b/src/test/test-crs-static-rebalance-coloc3/manager_status new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc3/manager_status @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/src/test/test-crs-static-rebalance-coloc3/rules_config b/src/test/test-crs-static-rebalance-coloc3/rules_config new file mode 100644 index 0000000..6047eff --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc3/rules_config @@ -0,0 +1,49 @@ +colocation: very-lonely-service1 + services vm:101,vm:102 + affinity separate + strict 1 + +colocation: very-lonely-service2 + services vm:102,vm:103 + affinity separate + strict 1 + +colocation: very-lonely-service3 + services vm:103,vm:104 + affinity separate + strict 1 + +colocation: very-lonely-service4 + services vm:104,vm:105 + affinity separate + strict 1 + +colocation: very-lonely-service5 + services vm:101,vm:103 + affinity separate + strict 1 + +colocation: very-lonely-service6 + services vm:101,vm:104 + affinity separate + strict 1 + +colocation: very-lonely-service7 + services vm:101,vm:105 + affinity separate + strict 1 + +colocation: very-lonely-service8 + services vm:102,vm:104 + affinity separate + strict 1 + +colocation: very-lonely-service9 + services vm:102,vm:105 + affinity separate + strict 1 + +colocation: very-lonely-service10 + services vm:103,vm:105 + affinity separate + strict 1 diff --git a/src/test/test-crs-static-rebalance-coloc3/service_config b/src/test/test-crs-static-rebalance-coloc3/service_config new file mode 100644 index 0000000..a1d61f5 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc3/service_config @@ -0,0 +1,7 @@ +{ + "vm:101": { "node": "node1", "state": "started" }, + "vm:102": { "node": "node1", "state": "started" }, + "vm:103": { "node": "node1", "state": "started" }, + "vm:104": { "node": "node1", "state": "started" }, + "vm:105": { "node": "node1", "state": "started" } +} diff --git a/src/test/test-crs-static-rebalance-coloc3/static_service_stats b/src/test/test-crs-static-rebalance-coloc3/static_service_stats new file mode 100644 index 0000000..d9dc9e7 --- /dev/null +++ b/src/test/test-crs-static-rebalance-coloc3/static_service_stats @@ -0,0 +1,5 @@ +{ + "vm:101": { "maxcpu": 8, "maxmem": 16000000000 }, + "vm:102": { "maxcpu": 4, "maxmem": 24000000000 }, + "vm:103": { "maxcpu": 2, "maxmem": 32000000000 } +} -- 2.39.5 _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel