On November 20, 2019 5:43 pm, Stefan Reiter wrote: > It does some basic sanity checking, warns the user about encryption > settings and unresolved hostnames, and finally makes sure that all nodes > have the same links configured (as well as comparing the configured > links to specified interfaces, if there are any). > > A corosync.conf that has been created and modified strictly through our > API should *always* be valid. > > verify_conf is called in 'addnode', warnings and errors are returned via > the API to be displayed in the task log of the node asking to join. If a > verification error occurs, it is handled specially via a "raise" outside > of any lock code that strips extra information from an Exception > instance. This ensures that multi-line formatted errors can be returned. > Warnings are always returned as array, to be printed on the caller. > > Includes testing. > > Signed-off-by: Stefan Reiter <s.rei...@proxmox.com>
this would be great to re-use for pve5to6 (and more importantly, a future pve6to7 ;)) > --- > data/PVE/API2/ClusterConfig.pm | 27 +++++++- > data/PVE/Cluster/Setup.pm | 20 +++++- > data/PVE/Corosync.pm | 103 +++++++++++++++++++++++++++++- > data/test/corosync_parser_test.pl | 28 ++++++++ > 4 files changed, 175 insertions(+), 3 deletions(-) > > diff --git a/data/PVE/API2/ClusterConfig.pm b/data/PVE/API2/ClusterConfig.pm > index c426a30..3d778b8 100644 > --- a/data/PVE/API2/ClusterConfig.pm > +++ b/data/PVE/API2/ClusterConfig.pm > @@ -3,6 +3,7 @@ package PVE::API2::ClusterConfig; > use strict; > use warnings; > > +use PVE::Exception; > use PVE::Tools; > use PVE::SafeSyslog; > use PVE::RESTHandler; > @@ -219,7 +220,13 @@ __PACKAGE__->register_method ({ > }, > corosync_conf => { > type => 'string', > - } > + }, > + warnings => { > + type => 'array', > + items => { > + type => 'string', > + }, > + }, > }, > }, > code => sub { > @@ -227,11 +234,17 @@ __PACKAGE__->register_method ({ > > PVE::Cluster::check_cfs_quorum(); > > + my $vc_errors; > + my $vc_warnings; > + > my $code = sub { > my $conf = PVE::Cluster::cfs_read_file("corosync.conf"); > my $nodelist = PVE::Corosync::nodelist($conf); > my $totem_cfg = PVE::Corosync::totem_config($conf); > > + ($vc_errors, $vc_warnings) = PVE::Corosync::verify_conf($conf); > + die if scalar(@$vc_errors); > + > my $name = $param->{node}; > > # ensure we do not reuse an address, that can crash the whole > cluster! > @@ -317,11 +330,23 @@ __PACKAGE__->register_method ({ > }; > > $config_change_lock->($code); > + > + # If vc_errors is set, we died because of verify_conf. > + # Raise this error, since it contains more information than just a > + # single-line string. > + if (defined($vc_errors) && scalar(@$vc_errors)) { > + my @combined = (); > + push @combined, @$vc_errors, @$vc_warnings; > + my %err_hash = map { $_ => $combined[$_] } 0..$#combined; > + PVE::Exception::raise("invalid corosync.conf\n", errors => > \%err_hash); it might make sense to somehow differentiate between errors and warnings here? so that a caller/user knows which parts are actually causing this to fail.. > + } > + > die $@ if $@; > > my $res = { > corosync_authkey => PVE::Tools::file_get_contents($authfile), > corosync_conf => PVE::Tools::file_get_contents($clusterconf), > + warnings => $vc_warnings, > }; > > return $res; > diff --git a/data/PVE/Cluster/Setup.pm b/data/PVE/Cluster/Setup.pm > index 81e3ef8..5569d6c 100644 > --- a/data/PVE/Cluster/Setup.pm > +++ b/data/PVE/Cluster/Setup.pm > @@ -667,7 +667,25 @@ sub join { > $args->{link1} = $param->{link1} if defined($param->{link1}); > > print "Request addition of this node\n"; > - my $res = $conn->post("/cluster/config/nodes/$nodename", $args); > + my $res = eval { $conn->post("/cluster/config/nodes/$nodename", $args); > }; > + if ($@) { if (my $err = $@) { the block below is already big, who knows what happens in the future that might (re)set $@ half-way through ;) > + if ($@->{msg} && $@->{errors}) { would it make sense to check whether $@ is a PVE::Exception here? > + # we received additional info about the error, show the user > + $@->{msg} =~ s/^(\n|\s)*|(\n|\s)*$//g; PVE::Tools::trim ? do we actually care about newlines here (or why do we include one below, just to remove it again here)? > + warn "An error occured on the cluster node: $@->{msg}\n"; > + foreach my $err (keys %{$@->{errors}}) { > + warn "* $@->{errors}->{$err}\n" > + } > + die "Cluster join aborted!\n"; > + } > + die $@; > + } > + > + if (defined($res->{warnings})) { > + foreach my $warn (@{$res->{warnings}}) { > + warn "cluster: $warn\n"; > + } > + } > > print "Join request OK, finishing setup locally\n"; > > diff --git a/data/PVE/Corosync.pm b/data/PVE/Corosync.pm > index f52547b..5973aaf 100644 > --- a/data/PVE/Corosync.pm > +++ b/data/PVE/Corosync.pm > @@ -15,6 +15,7 @@ use PVE::Tools; > use PVE::Tools qw($IPV4RE $IPV6RE); > > my $basedir = "/etc/pve"; > +our $link_addr_re = qw/^(ring|link)(\d+)_addr$/; > > my $conf_array_sections = { > node => 1, > @@ -294,6 +295,106 @@ sub create_conf { > return { main => $conf }; > } > > +# returns (\@errors, \@warnings) to the caller, does *not* 'die' or 'warn' > +# verification was successful if \@errors is empty > +sub verify_conf { > + my ($conf) = @_; > + > + my @errors = (); > + my @warnings = (); > + > + my $nodelist = nodelist($conf); > + if (!$nodelist) { > + push @errors, "no nodes found"; > + return (\@errors, \@warnings); > + } > + > + my $totem = $conf->{main}->{totem}; > + if (!$totem) { > + push @errors, "no totem found"; > + return (\@errors, \@warnings); > + } > + > + push @warnings, "warning: authentication/encryption is not explicitly > enabled" > + . " (secauth / crypto_cipher / crypto_hash)" > + if (!defined($totem->{secauth}) || $totem->{secauth} ne 'on') && > + (!defined($totem->{crypto_cipher}) || $totem->{crypto_cipher} eq > 'none'); > + > + my $interfaces = $totem->{interface}; > + > + my $verify_link_ip = sub { > + my ($key, $link, $node) = @_; > + my ($resolved_ip, undef) = resolve_hostname_like_corosync($link, $conf); > + if (defined($resolved_ip)) { > + push @warnings, "warning: $key '$link' for node '$node' resolves to" > + . " '$resolved_ip' - consider replacing it with the currently" > + . " resolved IP address for stability" > + if $resolved_ip ne $link; > + } else { > + push @warnings, "warning: unable to resolve $key '$link' for node > '$node'" > + . " to an IP address according to Corosync's resolve strategy -" > + . " cluster could fail on restart!"; > + } > + }; > + > + my $find_option = sub { could get a better name? it's not about finding an(y) option, it's about - determining ring* vs link* - getting the value of ring/linkX_addr of node Y at least 'find_addr_option' or 'get_addr_option' would clarify a bit what it's about. alternatively, could also do what happens in the API, and first collect all the link/ring options of all nodes into a hash, and then use that directly for checking in the loops below. would also avoid iterating over each nodes config 8 times instead of just once ;) > + my ($node, $num) = @_; > + > + foreach my $opt (keys %$node) { > + next if $opt !~ $link_addr_re; > + return ("${1}${2}_addr", $node->{$opt}) if int($2) == $num; > + } > + > + return undef; > + }; > + > + # sort for output order stability > + my @node_names = sort keys %$nodelist; > + > + if (%$interfaces) { > + # if interfaces are defined, *all* links must have a matching interface > + # definition, and vice versa > + for my $link (0..1) { > + my $have_interface = defined($interfaces->{$link}); > + foreach my $node (@node_names) { > + my ($optname, $addr) = $find_option->($nodelist->{$node}, > $link); > + if (defined($optname)) { > + $verify_link_ip->($optname, $addr, $node); > + push @errors, "node '$node' has '$optname', but there is no" > + . " interface number $link configured" > + if !$have_interface; > + } else { > + push @errors, "node '$node' is missing address for > interface" > + . " number $link" > + if $have_interface; > + } > + } > + } > + } else { > + # without interfaces, only check that links are consistent among nodes > + for my $link (0..1) { > + my $nodes_with_link = {}; > + foreach my $node (@node_names) { > + my ($optname, $addr) = $find_option->($nodelist->{$node}, > $link); > + if (defined($optname)) { > + $verify_link_ip->($optname, $addr, $node); > + $nodes_with_link->{$node} = 1; > + } > + } > + > + if (%$nodes_with_link) { > + foreach my $node (@node_names) { > + push @errors, "node '$node' is missing link $link, which" > + . " is configured on other nodes" > + if !defined($nodes_with_link->{$node}); > + } > + } > + } > + } > + > + return (\@errors, \@warnings); > +} > + > sub for_all_corosync_addresses { > my ($corosync_conf, $ip_version, $func) = @_; > > @@ -304,7 +405,7 @@ sub for_all_corosync_addresses { > foreach my $node_name (sort keys %$nodelist) { > my $node_config = $nodelist->{$node_name}; > foreach my $node_key (sort keys %$node_config) { > - if ($node_key =~ /^(ring|link)\d+_addr$/) { > + if ($node_key =~ $link_addr_re) { > my $node_address = $node_config->{$node_key}; > > my($ip, $version) = > resolve_hostname_like_corosync($node_address, $corosync_conf); > diff --git a/data/test/corosync_parser_test.pl > b/data/test/corosync_parser_test.pl > index 18ee4f7..42c4853 100755 > --- a/data/test/corosync_parser_test.pl > +++ b/data/test/corosync_parser_test.pl > @@ -5,10 +5,35 @@ use lib '..'; > use strict; > use warnings; > > +use Test::MockModule; > use Test::More; > > use PVE::Corosync; > > +my $known_hosts = { > + "prox1" => "127.0.1.1", > + "prox1-ring1" => "127.0.2.1", > + "prox2" => "127.0.1.2", > + "prox2-ring1" => "127.0.2.2", > + "prox3" => "127.0.1.3", > + "prox3-ring1" => "127.0.2.3", > + "prox4" => "127.0.1.4", > + "prox4-ring1" => "127.0.2.4", > +}; > + > +sub mocked_resolve { > + my ($hostname) = @_; > + > + foreach my $host (keys %$known_hosts) { > + return $known_hosts->{$host} if $hostname eq $host; > + } > + > + die "got unknown hostname '$hostname' during mocked > resolve_hostname_like_corosync"; > +} > + > +my $mocked_corosync_module = new Test::MockModule('PVE::Corosync'); > +$mocked_corosync_module->mock('resolve_hostname_like_corosync', > \&mocked_resolve); > + > sub parser_self_check { > my $cfg_fn = shift; > > @@ -20,6 +45,9 @@ sub parser_self_check { > $raw1 = PVE::Tools::file_get_contents($cfg_fn); > $config1 = PVE::Corosync::parse_conf($cfg_fn, $raw1); > > + # test verify_config > + PVE::Corosync::verify_conf($config1); > + > # write config > $raw2 = PVE::Corosync::write_conf(undef, $config1); > # do not actually write cfg, but you can outcomment to do so, e.g. if > -- > 2.20.1 > > > _______________________________________________ > pve-devel mailing list > pve-devel@pve.proxmox.com > https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel > > _______________________________________________ pve-devel mailing list pve-devel@pve.proxmox.com https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel