Do not execute any manual user migration of an HA resource to a target node, where it is not allowed to be on according to the strict node affinity rule it is part of.
This prevents users from moving an HA resource, which would be migrated back to an allowed member node of the strict node affinity rule immediately after, which just wastes time and resources. This new information is only redirected to the ha_manager's CLI stdout/stderr and the HA Manager node's syslog respectively, so other user-facing endpoints needs to implement this logic as well to give users adequate feedback why migrations are not executed. Signed-off-by: Daniel Kral <[email protected]> --- src/PVE/API2/HA/Resources.pm | 4 +-- src/PVE/CLI/ha_manager.pm | 14 +++++----- src/PVE/HA/Helpers.pm | 13 ++++++++- src/PVE/HA/Manager.pm | 7 +++-- .../test-node-affinity-strict1/log.expect | 16 +---------- .../test-node-affinity-strict2/log.expect | 16 +---------- .../test-node-affinity-strict7/log.expect | 28 ++----------------- src/test/test-recovery4/log.expect | 2 +- 8 files changed, 31 insertions(+), 69 deletions(-) diff --git a/src/PVE/API2/HA/Resources.pm b/src/PVE/API2/HA/Resources.pm index b95c0e1f..51784935 100644 --- a/src/PVE/API2/HA/Resources.pm +++ b/src/PVE/API2/HA/Resources.pm @@ -377,7 +377,7 @@ __PACKAGE__->register_method({ type => 'string', description => "The reason why the HA resource is" . " blocking the migration.", - enum => ['resource-affinity'], + enum => ['node-affinity', 'resource-affinity'], }, }, }, @@ -479,7 +479,7 @@ __PACKAGE__->register_method({ type => 'string', description => "The reason why the HA resource is" . " blocking the relocation.", - enum => ['resource-affinity'], + enum => ['node-affinity', 'resource-affinity'], }, }, }, diff --git a/src/PVE/CLI/ha_manager.pm b/src/PVE/CLI/ha_manager.pm index bccb4438..5c6cee02 100644 --- a/src/PVE/CLI/ha_manager.pm +++ b/src/PVE/CLI/ha_manager.pm @@ -160,15 +160,15 @@ my $print_resource_motion_output = sub { my $err_msg = "cannot $cmd resource '$sid' to node '$req_node':\n\n"; for my $blocking_resource (@$blocking_resources) { - my ($csid, $cause) = $blocking_resource->@{qw(sid cause)}; + my $cause = $blocking_resource->{cause}; - $err_msg .= "- resource '$csid' on target node '$req_node'"; - - if ($cause eq 'resource-affinity') { - $err_msg .= " in negative affinity with resource '$sid'"; + if ($cause eq 'node-affinity') { + $err_msg .= "- resource '$sid' not allowed on target node '$req_node'\n"; + } elsif ($cause eq 'resource-affinity') { + my $csid = $blocking_resource->{sid}; + $err_msg .= "- resource '$csid' on target node '$req_node'" + . " in negative affinity with resource '$sid'\n"; } - - $err_msg .= "\n"; } die $err_msg; diff --git a/src/PVE/HA/Helpers.pm b/src/PVE/HA/Helpers.pm index 09300cd4..b160c541 100644 --- a/src/PVE/HA/Helpers.pm +++ b/src/PVE/HA/Helpers.pm @@ -2,6 +2,7 @@ package PVE::HA::Helpers; use v5.36; +use PVE::HA::Rules::NodeAffinity qw(get_node_affinity); use PVE::HA::Rules::ResourceAffinity qw(get_affinitive_resources); =head3 get_resource_motion_info @@ -21,7 +22,9 @@ sub get_resource_motion_info($ss, $sid, $online_nodes, $compiled_rules) { my $dependent_resources = []; my $blocking_resources_by_node = {}; - my $resource_affinity = $compiled_rules->{'resource-affinity'}; + my ($node_affinity, $resource_affinity) = + $compiled_rules->@{qw(node-affinity resource-affinity)}; + my ($allowed_nodes) = get_node_affinity($node_affinity, $sid, $online_nodes); my ($together, $separate) = get_affinitive_resources($resource_affinity, $sid); for my $csid (sort keys %$together) { @@ -32,6 +35,14 @@ sub get_resource_motion_info($ss, $sid, $online_nodes, $compiled_rules) { } for my $node (keys %$online_nodes) { + if (!$allowed_nodes->{$node}) { + push $blocking_resources_by_node->{$node}->@*, + { + sid => $sid, + cause => 'node-affinity', + }; + } + for my $csid (sort keys %$separate) { next if !defined($ss->{$csid}); next if $ss->{$csid}->{state} eq 'ignored'; diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm index 470df92c..d1ff9615 100644 --- a/src/PVE/HA/Manager.pm +++ b/src/PVE/HA/Manager.pm @@ -398,9 +398,12 @@ sub execute_migration { if (my $blocking_resources = $blocking_resources_by_node->{$target}) { for my $blocking_resource (@$blocking_resources) { my $err_msg = "unknown migration blocker reason"; - my ($csid, $cause) = $blocking_resource->@{qw(sid cause)}; + my $cause = $blocking_resource->{cause}; - if ($cause eq 'resource-affinity') { + if ($cause eq 'node-affinity') { + $err_msg = "service '$sid' is not allowed on node '$target'"; + } elsif ($cause eq 'resource-affinity') { + my $csid = $blocking_resource->{sid}; $err_msg = "service '$csid' on node '$target' in negative" . " affinity with service '$sid'"; } diff --git a/src/test/test-node-affinity-strict1/log.expect b/src/test/test-node-affinity-strict1/log.expect index d86c69de..ca2c40b3 100644 --- a/src/test/test-node-affinity-strict1/log.expect +++ b/src/test/test-node-affinity-strict1/log.expect @@ -22,19 +22,5 @@ info 25 node3/lrm: status change wait_for_agent_lock => active info 25 node3/lrm: starting service vm:101 info 25 node3/lrm: service status vm:101 started info 120 cmdlist: execute service vm:101 migrate node2 -info 120 node1/crm: got crm command: migrate vm:101 node2 -info 120 node1/crm: migrate service 'vm:101' to node 'node2' -info 120 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node3, target = node2) -info 123 node2/lrm: got lock 'ha_agent_node2_lock' -info 123 node2/lrm: status change wait_for_agent_lock => active -info 125 node3/lrm: service vm:101 - start migrate to node 'node2' -info 125 node3/lrm: service vm:101 - end migrate to node 'node2' -info 140 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node2) -info 140 node1/crm: migrate service 'vm:101' to node 'node3' (running) -info 140 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node2, target = node3) -info 143 node2/lrm: service vm:101 - start migrate to node 'node3' -info 143 node2/lrm: service vm:101 - end migrate to node 'node3' -info 160 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node3) -info 165 node3/lrm: starting service vm:101 -info 165 node3/lrm: service status vm:101 started +err 120 node1/crm: crm command 'migrate vm:101 node2' error - service 'vm:101' is not allowed on node 'node2' info 720 hardware: exit simulation - done diff --git a/src/test/test-node-affinity-strict2/log.expect b/src/test/test-node-affinity-strict2/log.expect index d86c69de..ca2c40b3 100644 --- a/src/test/test-node-affinity-strict2/log.expect +++ b/src/test/test-node-affinity-strict2/log.expect @@ -22,19 +22,5 @@ info 25 node3/lrm: status change wait_for_agent_lock => active info 25 node3/lrm: starting service vm:101 info 25 node3/lrm: service status vm:101 started info 120 cmdlist: execute service vm:101 migrate node2 -info 120 node1/crm: got crm command: migrate vm:101 node2 -info 120 node1/crm: migrate service 'vm:101' to node 'node2' -info 120 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node3, target = node2) -info 123 node2/lrm: got lock 'ha_agent_node2_lock' -info 123 node2/lrm: status change wait_for_agent_lock => active -info 125 node3/lrm: service vm:101 - start migrate to node 'node2' -info 125 node3/lrm: service vm:101 - end migrate to node 'node2' -info 140 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node2) -info 140 node1/crm: migrate service 'vm:101' to node 'node3' (running) -info 140 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node2, target = node3) -info 143 node2/lrm: service vm:101 - start migrate to node 'node3' -info 143 node2/lrm: service vm:101 - end migrate to node 'node3' -info 160 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node3) -info 165 node3/lrm: starting service vm:101 -info 165 node3/lrm: service status vm:101 started +err 120 node1/crm: crm command 'migrate vm:101 node2' error - service 'vm:101' is not allowed on node 'node2' info 720 hardware: exit simulation - done diff --git a/src/test/test-node-affinity-strict7/log.expect b/src/test/test-node-affinity-strict7/log.expect index cbe9f323..9c4e9f0b 100644 --- a/src/test/test-node-affinity-strict7/log.expect +++ b/src/test/test-node-affinity-strict7/log.expect @@ -44,35 +44,11 @@ info 160 node1/crm: service 'vm:101': state changed from 'migrate' to 'sta info 165 node3/lrm: starting service vm:101 info 165 node3/lrm: service status vm:101 started info 220 cmdlist: execute service vm:101 migrate node2 -info 220 node1/crm: got crm command: migrate vm:101 node2 -info 220 node1/crm: migrate service 'vm:101' to node 'node2' -info 220 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node3, target = node2) -info 225 node3/lrm: service vm:101 - start migrate to node 'node2' -info 225 node3/lrm: service vm:101 - end migrate to node 'node2' -info 240 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node2) -info 240 node1/crm: migrate service 'vm:101' to node 'node3' (running) -info 240 node1/crm: service 'vm:101': state changed from 'started' to 'migrate' (node = node2, target = node3) -info 243 node2/lrm: service vm:101 - start migrate to node 'node3' -info 243 node2/lrm: service vm:101 - end migrate to node 'node3' -info 260 node1/crm: service 'vm:101': state changed from 'migrate' to 'started' (node = node3) -info 265 node3/lrm: starting service vm:101 -info 265 node3/lrm: service status vm:101 started +err 220 node1/crm: crm command 'migrate vm:101 node2' error - service 'vm:101' is not allowed on node 'node2' info 320 cmdlist: execute service vm:101 migrate node3 info 320 node1/crm: ignore crm command - service already on target node: migrate vm:101 node3 info 420 cmdlist: execute service vm:102 migrate node3 -info 420 node1/crm: got crm command: migrate vm:102 node3 -info 420 node1/crm: migrate service 'vm:102' to node 'node3' -info 420 node1/crm: service 'vm:102': state changed from 'started' to 'migrate' (node = node2, target = node3) -info 423 node2/lrm: service vm:102 - start migrate to node 'node3' -info 423 node2/lrm: service vm:102 - end migrate to node 'node3' -info 440 node1/crm: service 'vm:102': state changed from 'migrate' to 'started' (node = node3) -info 440 node1/crm: migrate service 'vm:102' to node 'node2' (running) -info 440 node1/crm: service 'vm:102': state changed from 'started' to 'migrate' (node = node3, target = node2) -info 445 node3/lrm: service vm:102 - start migrate to node 'node2' -info 445 node3/lrm: service vm:102 - end migrate to node 'node2' -info 460 node1/crm: service 'vm:102': state changed from 'migrate' to 'started' (node = node2) -info 463 node2/lrm: starting service vm:102 -info 463 node2/lrm: service status vm:102 started +err 420 node1/crm: crm command 'migrate vm:102 node3' error - service 'vm:102' is not allowed on node 'node3' info 520 cmdlist: execute service vm:102 migrate node2 info 520 node1/crm: ignore crm command - service already on target node: migrate vm:102 node2 info 620 cmdlist: execute service vm:102 migrate node1 diff --git a/src/test/test-recovery4/log.expect b/src/test/test-recovery4/log.expect index 12983b5f..684c796b 100644 --- a/src/test/test-recovery4/log.expect +++ b/src/test/test-recovery4/log.expect @@ -43,7 +43,7 @@ err 260 node1/crm: recovering service 'vm:102' from fenced node 'node2' f err 280 node1/crm: recovering service 'vm:102' from fenced node 'node2' failed, no recovery node found err 300 node1/crm: recovering service 'vm:102' from fenced node 'node2' failed, no recovery node found info 320 cmdlist: execute service vm:102 migrate node3 -info 320 node1/crm: got crm command: migrate vm:102 node3 +err 320 node1/crm: crm command 'migrate vm:102 node3' error - service 'vm:102' is not allowed on node 'node3' err 320 node1/crm: recovering service 'vm:102' from fenced node 'node2' failed, no recovery node found err 340 node1/crm: recovering service 'vm:102' from fenced node 'node2' failed, no recovery node found err 360 node1/crm: recovering service 'vm:102' from fenced node 'node2' failed, no recovery node found -- 2.47.3 _______________________________________________ pve-devel mailing list [email protected] https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
