Do not execute any manual user migration of an HA resource to a target
node, where it is not allowed to be on according to the strict node
affinity rule it is part of.

This prevents users from moving an HA resource, which would be migrated
back to an allowed member node of the strict node affinity rule
immediately after, which just wastes time and resources.

This new information is only redirected to the ha_manager's CLI
stdout/stderr and the HA Manager node's syslog respectively, so other
user-facing endpoints needs to implement this logic as well to give
users adequate feedback why migrations are not executed.

Signed-off-by: Daniel Kral <[email protected]>
---
 src/PVE/API2/HA/Resources.pm                  |  4 +--
 src/PVE/CLI/ha_manager.pm                     | 14 +++++-----
 src/PVE/HA/Helpers.pm                         | 13 ++++++++-
 src/PVE/HA/Manager.pm                         |  7 +++--
 .../test-node-affinity-strict1/log.expect     | 16 +----------
 .../test-node-affinity-strict2/log.expect     | 16 +----------
 .../test-node-affinity-strict7/log.expect     | 28 ++-----------------
 src/test/test-recovery4/log.expect            |  2 +-
 8 files changed, 31 insertions(+), 69 deletions(-)

diff --git a/src/PVE/API2/HA/Resources.pm b/src/PVE/API2/HA/Resources.pm
index b95c0e1f..51784935 100644
--- a/src/PVE/API2/HA/Resources.pm
+++ b/src/PVE/API2/HA/Resources.pm
@@ -377,7 +377,7 @@ __PACKAGE__->register_method({
                             type => 'string',
                             description => "The reason why the HA resource is"
                                 . " blocking the migration.",
-                            enum => ['resource-affinity'],
+                            enum => ['node-affinity', 'resource-affinity'],
                         },
                     },
                 },
@@ -479,7 +479,7 @@ __PACKAGE__->register_method({
                             type => 'string',
                             description => "The reason why the HA resource is"
                                 . " blocking the relocation.",
-                            enum => ['resource-affinity'],
+                            enum => ['node-affinity', 'resource-affinity'],
                         },
                     },
                 },
diff --git a/src/PVE/CLI/ha_manager.pm b/src/PVE/CLI/ha_manager.pm
index bccb4438..5c6cee02 100644
--- a/src/PVE/CLI/ha_manager.pm
+++ b/src/PVE/CLI/ha_manager.pm
@@ -160,15 +160,15 @@ my $print_resource_motion_output = sub {
             my $err_msg = "cannot $cmd resource '$sid' to node 
'$req_node':\n\n";
 
             for my $blocking_resource (@$blocking_resources) {
-                my ($csid, $cause) = $blocking_resource->@{qw(sid cause)};
+                my $cause = $blocking_resource->{cause};
 
-                $err_msg .= "- resource '$csid' on target node '$req_node'";
-
-                if ($cause eq 'resource-affinity') {
-                    $err_msg .= " in negative affinity with resource '$sid'";
+                if ($cause eq 'node-affinity') {
+                    $err_msg .= "- resource '$sid' not allowed on target node 
'$req_node'\n";
+                } elsif ($cause eq 'resource-affinity') {
+                    my $csid = $blocking_resource->{sid};
+                    $err_msg .= "- resource '$csid' on target node '$req_node'"
+                        . " in negative affinity with resource '$sid'\n";
                 }
-
-                $err_msg .= "\n";
             }
 
             die $err_msg;
diff --git a/src/PVE/HA/Helpers.pm b/src/PVE/HA/Helpers.pm
index 09300cd4..b160c541 100644
--- a/src/PVE/HA/Helpers.pm
+++ b/src/PVE/HA/Helpers.pm
@@ -2,6 +2,7 @@ package PVE::HA::Helpers;
 
 use v5.36;
 
+use PVE::HA::Rules::NodeAffinity qw(get_node_affinity);
 use PVE::HA::Rules::ResourceAffinity qw(get_affinitive_resources);
 
 =head3 get_resource_motion_info
@@ -21,7 +22,9 @@ sub get_resource_motion_info($ss, $sid, $online_nodes, 
$compiled_rules) {
     my $dependent_resources = [];
     my $blocking_resources_by_node = {};
 
-    my $resource_affinity = $compiled_rules->{'resource-affinity'};
+    my ($node_affinity, $resource_affinity) =
+        $compiled_rules->@{qw(node-affinity resource-affinity)};
+    my ($allowed_nodes) = get_node_affinity($node_affinity, $sid, 
$online_nodes);
     my ($together, $separate) = get_affinitive_resources($resource_affinity, 
$sid);
 
     for my $csid (sort keys %$together) {
@@ -32,6 +35,14 @@ sub get_resource_motion_info($ss, $sid, $online_nodes, 
$compiled_rules) {
     }
 
     for my $node (keys %$online_nodes) {
+        if (!$allowed_nodes->{$node}) {
+            push $blocking_resources_by_node->{$node}->@*,
+                {
+                    sid => $sid,
+                    cause => 'node-affinity',
+                };
+        }
+
         for my $csid (sort keys %$separate) {
             next if !defined($ss->{$csid});
             next if $ss->{$csid}->{state} eq 'ignored';
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 470df92c..d1ff9615 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -398,9 +398,12 @@ sub execute_migration {
     if (my $blocking_resources = $blocking_resources_by_node->{$target}) {
         for my $blocking_resource (@$blocking_resources) {
             my $err_msg = "unknown migration blocker reason";
-            my ($csid, $cause) = $blocking_resource->@{qw(sid cause)};
+            my $cause = $blocking_resource->{cause};
 
-            if ($cause eq 'resource-affinity') {
+            if ($cause eq 'node-affinity') {
+                $err_msg = "service '$sid' is not allowed on node '$target'";
+            } elsif ($cause eq 'resource-affinity') {
+                my $csid = $blocking_resource->{sid};
                 $err_msg = "service '$csid' on node '$target' in negative"
                     . " affinity with service '$sid'";
             }
diff --git a/src/test/test-node-affinity-strict1/log.expect 
b/src/test/test-node-affinity-strict1/log.expect
index d86c69de..ca2c40b3 100644
--- a/src/test/test-node-affinity-strict1/log.expect
+++ b/src/test/test-node-affinity-strict1/log.expect
@@ -22,19 +22,5 @@ info     25    node3/lrm: status change wait_for_agent_lock 
=> active
 info     25    node3/lrm: starting service vm:101
 info     25    node3/lrm: service status vm:101 started
 info    120      cmdlist: execute service vm:101 migrate node2
-info    120    node1/crm: got crm command: migrate vm:101 node2
-info    120    node1/crm: migrate service 'vm:101' to node 'node2'
-info    120    node1/crm: service 'vm:101': state changed from 'started' to 
'migrate'  (node = node3, target = node2)
-info    123    node2/lrm: got lock 'ha_agent_node2_lock'
-info    123    node2/lrm: status change wait_for_agent_lock => active
-info    125    node3/lrm: service vm:101 - start migrate to node 'node2'
-info    125    node3/lrm: service vm:101 - end migrate to node 'node2'
-info    140    node1/crm: service 'vm:101': state changed from 'migrate' to 
'started'  (node = node2)
-info    140    node1/crm: migrate service 'vm:101' to node 'node3' (running)
-info    140    node1/crm: service 'vm:101': state changed from 'started' to 
'migrate'  (node = node2, target = node3)
-info    143    node2/lrm: service vm:101 - start migrate to node 'node3'
-info    143    node2/lrm: service vm:101 - end migrate to node 'node3'
-info    160    node1/crm: service 'vm:101': state changed from 'migrate' to 
'started'  (node = node3)
-info    165    node3/lrm: starting service vm:101
-info    165    node3/lrm: service status vm:101 started
+err     120    node1/crm: crm command 'migrate vm:101 node2' error - service 
'vm:101' is not allowed on node 'node2'
 info    720     hardware: exit simulation - done
diff --git a/src/test/test-node-affinity-strict2/log.expect 
b/src/test/test-node-affinity-strict2/log.expect
index d86c69de..ca2c40b3 100644
--- a/src/test/test-node-affinity-strict2/log.expect
+++ b/src/test/test-node-affinity-strict2/log.expect
@@ -22,19 +22,5 @@ info     25    node3/lrm: status change wait_for_agent_lock 
=> active
 info     25    node3/lrm: starting service vm:101
 info     25    node3/lrm: service status vm:101 started
 info    120      cmdlist: execute service vm:101 migrate node2
-info    120    node1/crm: got crm command: migrate vm:101 node2
-info    120    node1/crm: migrate service 'vm:101' to node 'node2'
-info    120    node1/crm: service 'vm:101': state changed from 'started' to 
'migrate'  (node = node3, target = node2)
-info    123    node2/lrm: got lock 'ha_agent_node2_lock'
-info    123    node2/lrm: status change wait_for_agent_lock => active
-info    125    node3/lrm: service vm:101 - start migrate to node 'node2'
-info    125    node3/lrm: service vm:101 - end migrate to node 'node2'
-info    140    node1/crm: service 'vm:101': state changed from 'migrate' to 
'started'  (node = node2)
-info    140    node1/crm: migrate service 'vm:101' to node 'node3' (running)
-info    140    node1/crm: service 'vm:101': state changed from 'started' to 
'migrate'  (node = node2, target = node3)
-info    143    node2/lrm: service vm:101 - start migrate to node 'node3'
-info    143    node2/lrm: service vm:101 - end migrate to node 'node3'
-info    160    node1/crm: service 'vm:101': state changed from 'migrate' to 
'started'  (node = node3)
-info    165    node3/lrm: starting service vm:101
-info    165    node3/lrm: service status vm:101 started
+err     120    node1/crm: crm command 'migrate vm:101 node2' error - service 
'vm:101' is not allowed on node 'node2'
 info    720     hardware: exit simulation - done
diff --git a/src/test/test-node-affinity-strict7/log.expect 
b/src/test/test-node-affinity-strict7/log.expect
index cbe9f323..9c4e9f0b 100644
--- a/src/test/test-node-affinity-strict7/log.expect
+++ b/src/test/test-node-affinity-strict7/log.expect
@@ -44,35 +44,11 @@ info    160    node1/crm: service 'vm:101': state changed 
from 'migrate' to 'sta
 info    165    node3/lrm: starting service vm:101
 info    165    node3/lrm: service status vm:101 started
 info    220      cmdlist: execute service vm:101 migrate node2
-info    220    node1/crm: got crm command: migrate vm:101 node2
-info    220    node1/crm: migrate service 'vm:101' to node 'node2'
-info    220    node1/crm: service 'vm:101': state changed from 'started' to 
'migrate'  (node = node3, target = node2)
-info    225    node3/lrm: service vm:101 - start migrate to node 'node2'
-info    225    node3/lrm: service vm:101 - end migrate to node 'node2'
-info    240    node1/crm: service 'vm:101': state changed from 'migrate' to 
'started'  (node = node2)
-info    240    node1/crm: migrate service 'vm:101' to node 'node3' (running)
-info    240    node1/crm: service 'vm:101': state changed from 'started' to 
'migrate'  (node = node2, target = node3)
-info    243    node2/lrm: service vm:101 - start migrate to node 'node3'
-info    243    node2/lrm: service vm:101 - end migrate to node 'node3'
-info    260    node1/crm: service 'vm:101': state changed from 'migrate' to 
'started'  (node = node3)
-info    265    node3/lrm: starting service vm:101
-info    265    node3/lrm: service status vm:101 started
+err     220    node1/crm: crm command 'migrate vm:101 node2' error - service 
'vm:101' is not allowed on node 'node2'
 info    320      cmdlist: execute service vm:101 migrate node3
 info    320    node1/crm: ignore crm command - service already on target node: 
migrate vm:101 node3
 info    420      cmdlist: execute service vm:102 migrate node3
-info    420    node1/crm: got crm command: migrate vm:102 node3
-info    420    node1/crm: migrate service 'vm:102' to node 'node3'
-info    420    node1/crm: service 'vm:102': state changed from 'started' to 
'migrate'  (node = node2, target = node3)
-info    423    node2/lrm: service vm:102 - start migrate to node 'node3'
-info    423    node2/lrm: service vm:102 - end migrate to node 'node3'
-info    440    node1/crm: service 'vm:102': state changed from 'migrate' to 
'started'  (node = node3)
-info    440    node1/crm: migrate service 'vm:102' to node 'node2' (running)
-info    440    node1/crm: service 'vm:102': state changed from 'started' to 
'migrate'  (node = node3, target = node2)
-info    445    node3/lrm: service vm:102 - start migrate to node 'node2'
-info    445    node3/lrm: service vm:102 - end migrate to node 'node2'
-info    460    node1/crm: service 'vm:102': state changed from 'migrate' to 
'started'  (node = node2)
-info    463    node2/lrm: starting service vm:102
-info    463    node2/lrm: service status vm:102 started
+err     420    node1/crm: crm command 'migrate vm:102 node3' error - service 
'vm:102' is not allowed on node 'node3'
 info    520      cmdlist: execute service vm:102 migrate node2
 info    520    node1/crm: ignore crm command - service already on target node: 
migrate vm:102 node2
 info    620      cmdlist: execute service vm:102 migrate node1
diff --git a/src/test/test-recovery4/log.expect 
b/src/test/test-recovery4/log.expect
index 12983b5f..684c796b 100644
--- a/src/test/test-recovery4/log.expect
+++ b/src/test/test-recovery4/log.expect
@@ -43,7 +43,7 @@ err     260    node1/crm: recovering service 'vm:102' from 
fenced node 'node2' f
 err     280    node1/crm: recovering service 'vm:102' from fenced node 'node2' 
failed, no recovery node found
 err     300    node1/crm: recovering service 'vm:102' from fenced node 'node2' 
failed, no recovery node found
 info    320      cmdlist: execute service vm:102 migrate node3
-info    320    node1/crm: got crm command: migrate vm:102 node3
+err     320    node1/crm: crm command 'migrate vm:102 node3' error - service 
'vm:102' is not allowed on node 'node3'
 err     320    node1/crm: recovering service 'vm:102' from fenced node 'node2' 
failed, no recovery node found
 err     340    node1/crm: recovering service 'vm:102' from fenced node 'node2' 
failed, no recovery node found
 err     360    node1/crm: recovering service 'vm:102' from fenced node 'node2' 
failed, no recovery node found
-- 
2.47.3



_______________________________________________
pve-devel mailing list
[email protected]
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to