Re: [pve-devel] [PATCH manager 1/2] ui: qemu: CD edit: add "Installation media" label when in wizard

2022-09-22 Thread Fiona Ebner
Am 19.09.22 um 13:00 schrieb Thomas Lamprecht:
> Bit torn on this one, otoh it's small and might help some, otoh I think that
> it shouldn't be required  - there's OS in the tab title already and a disk tab
> two steps ahead, that paired with the fact that there's clearly written 
> "CD/DVD
> drive" so it should be pretty easy to understand, and if not that extra hint
> isn't probably adding that much of extra info over the existing cues, also 
> note,
> and here I'm probably over analysing/nit-picking, but it doesn't necesarrily
> needs to be a installation media, e.g. for a live distro.
> 

Yes, if somebody actually reads the text, it should be clear what the
storage is for ;)

> The heading fits somewhat with the one from the OS type selector, so from a 
> purely
> layout POV it would be fine to have. Maybe just some other wording, like a 
> plain/
> technical "Virtual CD/DVD Drive".
> 

What about "Installation/Live media"? Should be accurate enough for the
vast majority of scenarios and I think, it'd provide a good hint to new
users. It also would avoid repeating "CD/DVD", but I'm fine with your
suggestion too :)



___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



Re: [pve-devel] [PATCH qemu-server 1/3] qmeventd: rework 'forced_cleanup' handling and set timeout to 60s

2022-09-22 Thread Fabian Grünbichler
On September 21, 2022 2:49 pm, Dominik Csapak wrote:
> currently, the 'forced_cleanup' (sending SIGKILL to the qemu process),
> is intended to be triggered 5 seconds after sending the initial shutdown
> signal (SIGTERM) which is sadly not enough for some setups.
> 
> Accidentally, it could be triggered earlier than 5 seconds, if a
> SIGALRM triggers in the timespan directly before setting it again.
> 
> Also, this approach means that depending on when machines are shutdown
> their forced cleanup may happen after 5 seconds, or any time after, if
> new vms are shut off in the meantime.
> 
> To improve this situation, rework the way we deal with this cleanup, by
> saving the time incl. timeout in the CleanupData, and trigger a
> forced_cleanup every 10 seconds. If that happends, remove it from
> the forced_cleanup list.
> 
> To improve the shutdown behaviour, increase the default timeout to 60
> seconds, which should be enough, but add a commandline toggle where
> users can set it to a different value.
> 
> Signed-off-by: Dominik Csapak 
> ---
>  qmeventd/qmeventd.c | 75 +
>  qmeventd/qmeventd.h |  2 ++
>  2 files changed, 44 insertions(+), 33 deletions(-)
> 
> diff --git a/qmeventd/qmeventd.c b/qmeventd/qmeventd.c
> index 8d32827..e9ff5b3 100644
> --- a/qmeventd/qmeventd.c
> +++ b/qmeventd/qmeventd.c
> @@ -29,6 +29,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -36,15 +37,18 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "qmeventd.h"
>  
> +#define DEFAULT_KILL_TIMEOUT 60
> +
>  static int verbose = 0;
> +static int kill_timeout = DEFAULT_KILL_TIMEOUT;
>  static int epoll_fd = 0;
>  static const char *progname;
>  GHashTable *vm_clients; // key=vmid (freed on remove), value=*Client (free 
> manually)
>  GSList *forced_cleanups;
> -volatile sig_atomic_t alarm_triggered = 0;
>  
>  /*
>   * Helper functions
> @@ -54,9 +58,10 @@ static void
>  usage()
>  {
>  fprintf(stderr, "Usage: %s [-f] [-v] PATH\n", progname);
> -fprintf(stderr, "  -f   run in foreground (default: false)\n");
> -fprintf(stderr, "  -v   verbose (default: false)\n");
> -fprintf(stderr, "  PATH use PATH for socket\n");
> +fprintf(stderr, "  -f   run in foreground (default: false)\n");
> +fprintf(stderr, "  -v   verbose (default: false)\n");
> +fprintf(stderr, "  -t timeout   kill timeout (default: %ds)\n", 
> DEFAULT_KILL_TIMEOUT);
> +fprintf(stderr, "  PATH use PATH for socket\n");
>  }
>  
>  static pid_t
> @@ -469,16 +474,16 @@ terminate_client(struct Client *client)
>  int err = kill(client->pid, SIGTERM);
>  log_neg(err, "kill");
>  
> +time_t timeout = time(NULL) + kill_timeout;
> +
>  struct CleanupData *data_ptr = malloc(sizeof(struct CleanupData));
>  struct CleanupData data = {
>   .pid = client->pid,
> - .pidfd = pidfd
> + .pidfd = pidfd,
> + .timeout = timeout,
>  };
>  *data_ptr = data;
>  forced_cleanups = g_slist_prepend(forced_cleanups, (void *)data_ptr);
> -
> -// resets any other alarms, but will fire eventually and cleanup all
> -alarm(5);
>  }
>  
>  void
> @@ -551,27 +556,16 @@ handle_client(struct Client *client)
>  json_tokener_free(tok);
>  }
>  
> -
> -/*
> - * SIGALRM and cleanup handling
> - *
> - * terminate_client will set an alarm for 5 seconds and add its client's PID 
> to
> - * the forced_cleanups list - when the timer expires, we iterate the list and
> - * attempt to issue SIGKILL to all processes which haven't yet stopped.
> - */
> -
> -static void
> -alarm_handler(__attribute__((unused)) int signum)
> -{
> -alarm_triggered = 1;
> -}
> -

wasn't this intentionally decoupled like this?

alarm_handler just sets the flag
actual force cleanup is conditionalized on the alarm having triggered, 
but the cleanup happens outside of the signal handler..

is there a reason from switching away from these scheme? we don't need 
to do the cleanup in the signal handler (timing is already plenty fuzzy 
anyway ;))

>  static void
>  sigkill(void *ptr, __attribute__((unused)) void *unused)
>  {
>  struct CleanupData data = *((struct CleanupData *)ptr);
>  int err;
>  
> +if (data.timeout > time(NULL)) {

nit: current time / cutoff could be passed in via the currently unused 
user_data parameter..

> + return;
> +}
> +
>  if (data.pidfd > 0) {
>   err = pidfd_send_signal(data.pidfd, SIGKILL, NULL, 0);
>   (void)close(data.pidfd);
> @@ -588,21 +582,29 @@ sigkill(void *ptr, __attribute__((unused)) void *unused)
>   fprintf(stderr, "cleanup failed, terminating pid '%d' with SIGKILL\n",
>   data.pid);
>  }
> +
> +// remove ourselves from the list
> +forced_cleanups = g_slist_remove(forced_cleanups, ptr);
> +free(ptr);
>  }
>  
> +/*
> + * SIGALRM and cleanup handling
> + *
> + * handles the cleanup on non terminated qe

[pve-devel] [PATCH docs] cloud init: add warning to not install cloud-init on the host

2022-09-22 Thread Fiona Ebner
>From time to time, users install the cloud-init package on the host
by accident [0]. Since the other commands are intended to be run on
the host, add a warning about the one command that shouldn't.

Patch suggested by @Neobin on the community forum [1].

[0] https://forum.proxmox.com/threads/115530/post-499610
[1] https://forum.proxmox.com/threads/115530/post-499624

Signed-off-by: Fiona Ebner 
---
 qm-cloud-init.adoc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/qm-cloud-init.adoc b/qm-cloud-init.adoc
index 1cebf14..8fb69a0 100644
--- a/qm-cloud-init.adoc
+++ b/qm-cloud-init.adoc
@@ -48,6 +48,9 @@ prepare. On Debian/Ubuntu based systems this is as simple as:
 apt-get install cloud-init
 
 
+WARNING: This command is *not* intended to be executed on the {pve} host, but
+only inside the VM.
+
 Already many distributions provide ready-to-use Cloud-Init images (provided
 as `.qcow2` files), so alternatively you can simply download and
 import such images. For the following example, we will use the cloud
-- 
2.30.2



___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH docs] cloud init: mention that serial display might not work for all images

2022-09-22 Thread Fiona Ebner
Example from the community forum:
https://forum.proxmox.com/threads/112225/

Signed-off-by: Fiona Ebner 
---
 qm-cloud-init.adoc | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/qm-cloud-init.adoc b/qm-cloud-init.adoc
index 8fb69a0..8363f10 100644
--- a/qm-cloud-init.adoc
+++ b/qm-cloud-init.adoc
@@ -33,8 +33,10 @@ Cloud-Init data.
 
 {pve} generates an ISO image to pass the Cloud-Init data to the VM. For
 that purpose, all Cloud-Init VMs need to have an assigned CD-ROM drive.
-Also many Cloud-Init images assume to have a serial console, so it is
-recommended to add a serial console and use it as display for those VMs.
+Usually, a serial console should be added and used as a display. Many 
Cloud-Init
+images rely on this, it is a requirement for OpenStack. However, other images
+might have problems with this configuration. Switch back to the default display
+configuration if using a serial console doesn't work.
 
 
 Preparing Cloud-Init Templates
@@ -93,8 +95,9 @@ a bootable CD-ROM.
 qm set 9000 --boot c --bootdisk scsi0
 
 
-Also configure a serial console and use it as a display. Many Cloud-Init
-images rely on this, as it is an requirement for OpenStack images.
+For many Cloud-Init images, it is required to configure a serial console and 
use
+it as a display. If the configuration doesn't work for a given image however,
+switch back to the default display instead.
 
 
 qm set 9000 --serial0 socket --vga serial0
-- 
2.30.2



___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



Re: [pve-devel] [PATCH qemu-server 2/3] qmeventd: cancel 'forced cleanup' when normal cleanup succeeds

2022-09-22 Thread Matthias Heiserer

On 21.09.2022 14:49, Dominik Csapak wrote:

instead of always sending a SIGKILL to the target pid.
It was not that much of a problem since the timeout previously was 5
seconds and we used pifds where possible, thus the chance of killing the
wrong process was rather slim.

Now we increased the timeout to 60s which makes the race a bit more likely
(when not using pidfds), so remove it from the 'forced_cleanups' list when
the normal cleanup succeeds.

Signed-off-by: Dominik Csapak 
---
  qmeventd/qmeventd.c | 20 
  1 file changed, 20 insertions(+)

diff --git a/qmeventd/qmeventd.c b/qmeventd/qmeventd.c
index e9ff5b3..de5efd0 100644
--- a/qmeventd/qmeventd.c
+++ b/qmeventd/qmeventd.c
@@ -415,6 +415,25 @@ cleanup_qemu_client(struct Client *client)
  }
  }
  
+static void

+remove_cleanup_data(void *ptr, void *client_ptr) {

Not that it really matters, but is there a reason we don't use
remove_cleanup_data(struct CleanupData *ptr, struct Client *client_ptr)
and let the caller deal with types?

+struct CleanupData *data = (struct CleanupData *)ptr;
+struct Client *client = (struct Client *)client_ptr;
+
+if (data->pid == client->pid) {
+   forced_cleanups = g_slist_remove(forced_cleanups, ptr);
+   free(ptr);
+}
+}
+ > +static void
+remove_from_forced_cleanup(struct Client *client) {
+if (g_slist_length(forced_cleanups) > 0) {
+   VERBOSE_PRINT("removing %s from forced cleanups\n", client->qemu.vmid);
+   g_slist_foreach(forced_cleanups, remove_cleanup_data, client);
that is, here `(void (*)(void*, void*)) remove_cleanup_data`. Seems a 
bit cleaner to me.

+}
+}
+
  void
  cleanup_client(struct Client *client)
  {
@@ -441,6 +460,7 @@ cleanup_client(struct Client *client)
break;
  }
  
+remove_from_forced_cleanup(client);

  free(client);
  }
  




___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



Re: [pve-devel] [PATCH proxmox-backup] fix #4165: SMART: add raw field

2022-09-22 Thread Dominik Csapak

all 3 patches LGTM

the only (minor) thing is that the wt patch could have handled the current 
situation
(pve raw+value, pbs normalized+value), e.g by using the field 'convert' or 
'calculate'
methods of extjs (we could have had a 'real_raw' and 'real_normalized' that uses
the values depending on which exist) then we would not need a depends/breaks on 
the
versions (not sure which direction here)

if everybody else is ok with these dependencies, consider these patches:

Reviewed-by: Dominik Csapak 
Tested-by: Dominik Csapak 


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH guest-common] replication: avoid "expected snapshot missing warning" when irrelevant

2022-09-22 Thread Fiona Ebner
Namely, when there are no snapshots at all on the volume, which also
is the case when the volume doesn't exist. This happens when a fresh
volume is added to an already replicated guest.

Fixes replication tests in pve-manager, which didn't like the additional
output.

Fixes: c0b2948 ("replication: prepare: safeguard against removal if expected 
snapshot is missing")
Signed-off-by: Fiona Ebner 
---
 src/PVE/Replication.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/PVE/Replication.pm b/src/PVE/Replication.pm
index 8591d0e..680ffe1 100644
--- a/src/PVE/Replication.pm
+++ b/src/PVE/Replication.pm
@@ -178,7 +178,7 @@ sub prepare {
$removal_ok = 0 if $last_sync == 0; # last_sync=0 if the VM was stolen, 
don't remove!
$removal_ok = 1 if $last_sync == 1; # last_sync=1 is a special value 
used to remove all
$logfunc->("expected snapshot $snapname not present for $volid, not 
removing others")
-   if !$removal_ok && $last_sync > 1;
+   if !$removal_ok && $last_sync > 1 && scalar(keys $info->%*) > 0;
 
for my $snap (keys $info->%*) {
if ( # check if it's a stale replication snapshot
-- 
2.30.2



___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



Re: [pve-devel] [PATCH guest-common] replication: avoid "expected snapshot missing warning" when irrelevant

2022-09-22 Thread Fiona Ebner
Am 22.09.22 um 13:00 schrieb Fiona Ebner:
> Namely, when there are no snapshots at all on the volume, which also
> is the case when the volume doesn't exist. This happens when a fresh
> volume is added to an already replicated guest.

The warning still triggers, when some other snapshot is added to the new
disk before the replication runs (by taking a snapshot or via another
replication). I'll send a v2 and print the warning only when there's
actually another replication snapshot that would've been removed without
the safeguard added by c0b2948.

> 
> Fixes replication tests in pve-manager, which didn't like the additional
> output.
> 
> Fixes: c0b2948 ("replication: prepare: safeguard against removal if expected 
> snapshot is missing")


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



Re: [pve-devel] [PATCH qemu-server 1/3] qmeventd: rework 'forced_cleanup' handling and set timeout to 60s

2022-09-22 Thread Dominik Csapak

[snip]

-/*
- * SIGALRM and cleanup handling
- *
- * terminate_client will set an alarm for 5 seconds and add its client's PID to
- * the forced_cleanups list - when the timer expires, we iterate the list and
- * attempt to issue SIGKILL to all processes which haven't yet stopped.
- */
-
-static void
-alarm_handler(__attribute__((unused)) int signum)
-{
-alarm_triggered = 1;
-}
-


wasn't this intentionally decoupled like this?

alarm_handler just sets the flag
actual force cleanup is conditionalized on the alarm having triggered,
but the cleanup happens outside of the signal handler..

is there a reason from switching away from these scheme? we don't need
to do the cleanup in the signal handler (timing is already plenty fuzzy
anyway ;))


no real reason, i found the code somewhat cleaner, but you're right,
we probably want to keep that, and just trigger it regularly




  static void
  sigkill(void *ptr, __attribute__((unused)) void *unused)
  {
  struct CleanupData data = *((struct CleanupData *)ptr);
  int err;
  
+if (data.timeout > time(NULL)) {


nit: current time / cutoff could be passed in via the currently unused
user_data parameter..


make sense



___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



Re: [pve-devel] [PATCH qemu-server 2/3] qmeventd: cancel 'forced cleanup' when normal cleanup succeeds

2022-09-22 Thread Dominik Csapak

On 9/22/22 12:14, Matthias Heiserer wrote:

On 21.09.2022 14:49, Dominik Csapak wrote:

instead of always sending a SIGKILL to the target pid.
It was not that much of a problem since the timeout previously was 5
seconds and we used pifds where possible, thus the chance of killing the
wrong process was rather slim.

Now we increased the timeout to 60s which makes the race a bit more likely
(when not using pidfds), so remove it from the 'forced_cleanups' list when
the normal cleanup succeeds.

Signed-off-by: Dominik Csapak 
---
  qmeventd/qmeventd.c | 20 
  1 file changed, 20 insertions(+)

diff --git a/qmeventd/qmeventd.c b/qmeventd/qmeventd.c
index e9ff5b3..de5efd0 100644
--- a/qmeventd/qmeventd.c
+++ b/qmeventd/qmeventd.c
@@ -415,6 +415,25 @@ cleanup_qemu_client(struct Client *client)
  }
  }
+static void
+remove_cleanup_data(void *ptr, void *client_ptr) {

Not that it really matters, but is there a reason we don't use
remove_cleanup_data(struct CleanupData *ptr, struct Client *client_ptr)
and let the caller deal with types?

+    struct CleanupData *data = (struct CleanupData *)ptr;
+    struct Client *client = (struct Client *)client_ptr;
+
+    if (data->pid == client->pid) {
+    forced_cleanups = g_slist_remove(forced_cleanups, ptr);
+    free(ptr);
+    }
+}
+ > +static void
+remove_from_forced_cleanup(struct Client *client) {
+    if (g_slist_length(forced_cleanups) > 0) {
+    VERBOSE_PRINT("removing %s from forced cleanups\n", client->qemu.vmid);
+    g_slist_foreach(forced_cleanups, remove_cleanup_data, client);

that is, here `(void (*)(void*, void*)) remove_cleanup_data`. Seems a bit 
cleaner to me.

+    }
+}
+
  void
  cleanup_client(struct Client *client)
  {
@@ -441,6 +460,7 @@ cleanup_client(struct Client *client)
  break;
  }
+    remove_from_forced_cleanup(client);
  free(client);
  }




i just kept the style we use for the existing call to *_foreach.

my guess is that the intention was to keep the function close to what glib 
defines
(although that uses 'gpointer'). doing as you suggested introduces a big
cast that is confusing to read IMHO (for people not that familiar with c at 
least ;) )
that could be solved with casting to 'GFunc' (not sure if that's considered 
good style?)
but in the end, i don't have strong feeling either way


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel


Re: [pve-devel] [PATCH qemu-server 1/3] qmeventd: rework 'forced_cleanup' handling and set timeout to 60s

2022-09-22 Thread Thomas Lamprecht
Am 21/09/2022 um 14:49 schrieb Dominik Csapak:
> +fprintf(stderr, "  -t timeout   kill timeout (default: %ds)\n", 
> DEFAULT_KILL_TIMEOUT);

just a nit, as you probably send a v2 for Fabian's comments anyway:

Please do s/timeout// i.e.:

-t  kill timeo..

That has two advantages, first the user actually knows the time unit and
second, the context whitespace changes aren't required anymore.

Did not checked the rest more closely, but this stuck out when skimming.


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH pve-guest-common 1/1] add pre/post-snapshot hooks

2022-09-22 Thread Stefan Hanreich
Signed-off-by: Stefan Hanreich 
---

I have opted to include the snapshot hooks in the abstract class, since this
seemed like the more straightforward way.
The other option would have been duplicating the calls of the hooks into the
respective Backends, but I couldn't see any upsides to this.

This hook runs before the preparation steps, since otherwise in case of a
failing pre-snapshot hook the VM/CT is left in a locked state, which I would
need to clean up, which would add unnecessary complexity imo.

Otoh, there is a case to be made that the hook should only run after we checked
every precondition and are as certain as we can be that the snapshot will be
successfully created. This would be more convenient from a user's pov.
This can be particularly convenient as it would avoid errors with user scripts
that are not idempotent. Although those would still fail in case of a failure
during the snapshotting process.

What do you think would be the better solution?

 src/PVE/AbstractConfig.pm | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/PVE/AbstractConfig.pm b/src/PVE/AbstractConfig.pm
index a0c0bc6..8052fde 100644
--- a/src/PVE/AbstractConfig.pm
+++ b/src/PVE/AbstractConfig.pm
@@ -799,11 +799,15 @@ sub __snapshot_activate_storages {
 sub snapshot_create {
 my ($class, $vmid, $snapname, $save_vmstate, $comment) = @_;
 
+my $conf = $class->load_config($vmid);
+PVE::GuestHelpers::exec_hookscript($conf, $vmid, "pre-snapshot", 1);
+
 my $snap = $class->__snapshot_prepare($vmid, $snapname, $save_vmstate, 
$comment);
 
 $save_vmstate = 0 if !$snap->{vmstate};
 
-my $conf = $class->load_config($vmid);
+# reload config after changes in snapshot preparation step
+$conf = $class->load_config($vmid);
 
 my ($running, $freezefs) = $class->__snapshot_check_freeze_needed($vmid, 
$conf, $snap->{vmstate});
 
@@ -843,6 +847,8 @@ sub snapshot_create {
 }
 
 $class->__snapshot_commit($vmid, $snapname);
+
+PVE::GuestHelpers::exec_hookscript($conf, $vmid, "post-snapshot");
 }
 
 # Check if the snapshot might still be needed by a replication job.
-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH pve-guest-common/pve-docs 0/1]

2022-09-22 Thread Stefan Hanreich
This patch adds hooks that run when the user creates a snapshot from the Web UI
/ CLI. I have tested this with both VMs/CTs via Web UI and CLI. Are there any
other places where the hook should get triggered that I missed?

pve-guest-common:

Stefan Hanreich (1):
  add pre/post-snapshot hooks

 src/PVE/AbstractConfig.pm | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

pve-docs:

Stefan Hanreich (1):
  add pre/post snapshot events to example hookscript

 examples/guest-example-hookscript.pl | 14 ++
 1 file changed, 14 insertions(+)

-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH pve-docs 1/1] add pre/post snapshot events to example hookscript

2022-09-22 Thread Stefan Hanreich
Signed-off-by: Stefan Hanreich 
---

The example script currently enumerates the different phases (first, second,
...). I have opted to not continue this enumeration as I couldn't see any
particular reason for this and I will add lots of new phases in subsequent patch
series. Am I missing a particular reason for the numbering? I think it might be
smart to create another patch after merging all the different hook patches, that
cleans up the comments/ordering in the example script and removes the
enumerations.

 examples/guest-example-hookscript.pl | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/examples/guest-example-hookscript.pl 
b/examples/guest-example-hookscript.pl
index adeed59..e4f032b 100755
--- a/examples/guest-example-hookscript.pl
+++ b/examples/guest-example-hookscript.pl
@@ -54,6 +54,20 @@ if ($phase eq 'pre-start') {
 
 print "$vmid stopped. Doing cleanup.\n";
 
+} elsif ($phase eq 'pre-snapshot') {
+
+# Phase 'pre-snapshot' will be executed before taking a snapshot of
+# the guest (via UI or CLI)
+
+print "$vmid will be snapshotted.\n";
+
+} elsif ($phase eq 'post-snapshot') {
+
+# Phase 'post-snapshot' will be executed after taking a snapshot of
+# the guest (via UI or CLI)
+
+print "$vmid has been successfully snapshotted.\n";
+
 } else {
 die "got unknown phase '$phase'\n";
 }
-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



Re: [pve-devel] [PATCH qemu-server 1/3] qmeventd: rework 'forced_cleanup' handling and set timeout to 60s

2022-09-22 Thread Wolfgang Bumiller
On Thu, Sep 22, 2022 at 01:31:49PM +0200, Dominik Csapak wrote:
> [snip]
> > > -/*
> > > - * SIGALRM and cleanup handling
> > > - *
> > > - * terminate_client will set an alarm for 5 seconds and add its client's 
> > > PID to
> > > - * the forced_cleanups list - when the timer expires, we iterate the 
> > > list and
> > > - * attempt to issue SIGKILL to all processes which haven't yet stopped.
> > > - */
> > > -
> > > -static void
> > > -alarm_handler(__attribute__((unused)) int signum)
> > > -{
> > > -alarm_triggered = 1;
> > > -}
> > > -
> > 
> > wasn't this intentionally decoupled like this?
> > 
> > alarm_handler just sets the flag
> > actual force cleanup is conditionalized on the alarm having triggered,
> > but the cleanup happens outside of the signal handler..
> > 
> > is there a reason from switching away from these scheme? we don't need
> > to do the cleanup in the signal handler (timing is already plenty fuzzy
> > anyway ;))
> 
> no real reason, i found the code somewhat cleaner, but you're right,
> we probably want to keep that, and just trigger it regularly

>From what I can tell the only point of this signal is to interrupt
`epoll()` after a while to call the cleanup/kill handler since we only
have a single worker here that needs to do some work after a timeout.

Why not either:
  - set a bool instead of calling `alarm()` which causes the next
`epoll()` call to use a timeout and call the cleanups if epoll turns
up empty
  - or create a timerfd (timerfd_create(2)) in the beginning which we
add to the epoll context and use `timerfd_settime(2)` in place of
`alarm()`, which will also wake up the epoll call without having to add
timeouts to it

`alarm()` is just such a gross interface...
In theory we'd also be able to ditch all of those `EINTR` loops as we
wouldn't be expecting any interrupts anymore... (and if we did expect
them, we could add a `signalfd(2)` to `epoll()` as well ;-)


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



Re: [pve-devel] [PATCH qemu-server 1/3] qmeventd: rework 'forced_cleanup' handling and set timeout to 60s

2022-09-22 Thread Dominik Csapak

On 9/22/22 14:01, Wolfgang Bumiller wrote:

On Thu, Sep 22, 2022 at 01:31:49PM +0200, Dominik Csapak wrote:

[snip]

-/*
- * SIGALRM and cleanup handling
- *
- * terminate_client will set an alarm for 5 seconds and add its client's PID to
- * the forced_cleanups list - when the timer expires, we iterate the list and
- * attempt to issue SIGKILL to all processes which haven't yet stopped.
- */
-
-static void
-alarm_handler(__attribute__((unused)) int signum)
-{
-alarm_triggered = 1;
-}
-


wasn't this intentionally decoupled like this?

alarm_handler just sets the flag
actual force cleanup is conditionalized on the alarm having triggered,
but the cleanup happens outside of the signal handler..

is there a reason from switching away from these scheme? we don't need
to do the cleanup in the signal handler (timing is already plenty fuzzy
anyway ;))


no real reason, i found the code somewhat cleaner, but you're right,
we probably want to keep that, and just trigger it regularly


 From what I can tell the only point of this signal is to interrupt
`epoll()` after a while to call the cleanup/kill handler since we only
have a single worker here that needs to do some work after a timeout.

Why not either:
   - set a bool instead of calling `alarm()` which causes the next
 `epoll()` call to use a timeout and call the cleanups if epoll turns
 up empty >- or create a timerfd (timerfd_create(2)) in the beginning 
which we
 add to the epoll context and use `timerfd_settime(2)` in place of
 `alarm()`, which will also wake up the epoll call without having to add
 timeouts to it

`alarm()` is just such a gross interface...
In theory we'd also be able to ditch all of those `EINTR` loops as we
wouldn't be expecting any interrupts anymore... (and if we did expect
them, we could add a `signalfd(2)` to `epoll()` as well ;-)


first one sounds much simpler but the second one sounds much more elegant ;)
i'll see what works/feels better

couldn't we also directly add a new timerfd for each client that
needs such a timeout instead of managing some list ?

the cleanupdata could go into the even.data.ptr and we wouldn't
have to do anything periodically, just handle the timeout
when epoll wakes up?

we probably would have to merge the client and clenaupdata structs
so that we can see which is which, but that should not be
that of a problem?


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



Re: [pve-devel] [PATCH qemu-server 1/3] qmeventd: rework 'forced_cleanup' handling and set timeout to 60s

2022-09-22 Thread Wolfgang Bumiller
On Thu, Sep 22, 2022 at 02:22:45PM +0200, Dominik Csapak wrote:
> On 9/22/22 14:01, Wolfgang Bumiller wrote:
> > On Thu, Sep 22, 2022 at 01:31:49PM +0200, Dominik Csapak wrote:
> > > [snip]
> > > > > -/*
> > > > > - * SIGALRM and cleanup handling
> > > > > - *
> > > > > - * terminate_client will set an alarm for 5 seconds and add its 
> > > > > client's PID to
> > > > > - * the forced_cleanups list - when the timer expires, we iterate the 
> > > > > list and
> > > > > - * attempt to issue SIGKILL to all processes which haven't yet 
> > > > > stopped.
> > > > > - */
> > > > > -
> > > > > -static void
> > > > > -alarm_handler(__attribute__((unused)) int signum)
> > > > > -{
> > > > > -alarm_triggered = 1;
> > > > > -}
> > > > > -
> > > > 
> > > > wasn't this intentionally decoupled like this?
> > > > 
> > > > alarm_handler just sets the flag
> > > > actual force cleanup is conditionalized on the alarm having triggered,
> > > > but the cleanup happens outside of the signal handler..
> > > > 
> > > > is there a reason from switching away from these scheme? we don't need
> > > > to do the cleanup in the signal handler (timing is already plenty fuzzy
> > > > anyway ;))
> > > 
> > > no real reason, i found the code somewhat cleaner, but you're right,
> > > we probably want to keep that, and just trigger it regularly
> > 
> >  From what I can tell the only point of this signal is to interrupt
> > `epoll()` after a while to call the cleanup/kill handler since we only
> > have a single worker here that needs to do some work after a timeout.
> > 
> > Why not either:
> >- set a bool instead of calling `alarm()` which causes the next
> >  `epoll()` call to use a timeout and call the cleanups if epoll turns
> >  up empty >- or create a timerfd (timerfd_create(2)) in the 
> > beginning which we
> >  add to the epoll context and use `timerfd_settime(2)` in place of
> >  `alarm()`, which will also wake up the epoll call without having to add
> >  timeouts to it
> > 
> > `alarm()` is just such a gross interface...
> > In theory we'd also be able to ditch all of those `EINTR` loops as we
> > wouldn't be expecting any interrupts anymore... (and if we did expect
> > them, we could add a `signalfd(2)` to `epoll()` as well ;-)
> 
> first one sounds much simpler but the second one sounds much more elegant ;)
> i'll see what works/feels better
> 
> couldn't we also directly add a new timerfd for each client that
> needs such a timeout instead of managing some list ?

I'm not really a fan of abusing kernel-side resources for a user-space
scheduling problem ;-).

If you want a per-client timeout, I'd much rather just have a list of
points in time that epoll() should target.

That list may well be `struct Client` itself if you want to merge the
data as you described below. The `struct Client` could just receive a
`STAILQ_ENTRY(Client) cleanup_entries;` member (queue(7), stailq(3)),
and a cleanup time which is used to generate the timeout for `epoll()`,
and on every wakeup, we can march through the already expired queue
entries.

> the cleanupdata could go into the even.data.ptr and we wouldn't
> have to do anything periodically, just handle the timeout
> when epoll wakes up?
> 
> we probably would have to merge the client and clenaupdata structs
> so that we can see which is which, but that should not be
> that of a problem?


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH pve-container/pve-docs/qemu-server 0/1] Add pre/post-restore hooks

2022-09-22 Thread Stefan Hanreich
This patch adds hooks that run when the user restores a backup from the Web UI
/ CLI. I have tested this with both VMs/CTs via Web UI and CLI. Are there any
other places where the hook should get triggered that I missed?

pve-container:

Stefan Hanreich (1):
  Add pre/post-restore hooks to CTs

 src/PVE/API2/LXC.pm | 6 ++
 1 file changed, 6 insertions(+)

qemu-server:

Stefan Hanreich (1):
  Add pre/post-restore hooks to VMs

 PVE/API2/Qemu.pm | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

pve-docs:

Stefan Hanreich (1):
  add pre/post-restore events to example hookscript

 examples/guest-example-hookscript.pl | 14 ++
 1 file changed, 14 insertions(+)

-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH pve-docs 1/1] add pre/post-restore events to example hookscript

2022-09-22 Thread Stefan Hanreich
Signed-off-by: Stefan Hanreich 
---

Similar to the snapshot patch series, I have refrained from continuing the
numbering. This should be harmonized in a follow-up patch.

 examples/guest-example-hookscript.pl | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/examples/guest-example-hookscript.pl 
b/examples/guest-example-hookscript.pl
index adeed59..19fe213 100755
--- a/examples/guest-example-hookscript.pl
+++ b/examples/guest-example-hookscript.pl
@@ -54,6 +54,20 @@ if ($phase eq 'pre-start') {
 
 print "$vmid stopped. Doing cleanup.\n";
 
+} elsif ($phase eq 'pre-restore') {
+
+# Phase 'pre-restore' will be executed before restoring a backup. (via UI 
or
+# CLI)
+
+print "Restoring $vmid from backup.\n";
+
+} elsif ($phase eq 'post-restore') {
+
+# Phase 'pre-restore' will be after before restoring a backup. (via UI or
+# CLI)
+
+print "$vmid finished restoring from backup.\n";
+
 } else {
 die "got unknown phase '$phase'\n";
 }
-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH qemu-server 1/1] Add pre/post-restore hooks to VMs

2022-09-22 Thread Stefan Hanreich
Signed-off-by: Stefan Hanreich 
---

There might be a better way to differentiate the different errors from
restorefn in the error handling logic, although I think in this case it is
still fine. This might get a bit messy though if in the future someone adds
another source for errors. Maybe add a single if before the restored_data checks
instead?

 PVE/API2/Qemu.pm | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index 3ec31c2..fe41634 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -884,9 +884,13 @@ __PACKAGE__->register_method({
die "$emsg $@" if $@;
 
my $restored_data = 0;
+   my $hook_executed = 0;
my $restorefn = sub {
my $conf = PVE::QemuConfig->load_config($vmid);
 
+   PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-restore', 1);
+   $hook_executed = 1;
+
PVE::QemuConfig->check_protection($conf, $emsg);
 
die "$emsg vm is running\n" if 
PVE::QemuServer::check_running($vmid);
@@ -918,6 +922,8 @@ __PACKAGE__->register_method({
eval { PVE::QemuServer::template_create($vmid, 
$restored_conf) };
warn $@ if $@;
}
+
+   PVE::GuestHelpers::exec_hookscript($restored_conf, $vmid, 
'post-restore');
};
 
# ensure no old replication state are exists
@@ -1012,10 +1018,10 @@ __PACKAGE__->register_method({
if (my $err = $@) {
eval { PVE::QemuConfig->remove_lock($vmid, 'create') };
warn $@ if $@;
-   if ($restored_data) {
+   if ($hook_executed && $restored_data) {
warn "error after data was restored, VM disks should be 
OK but config may "
."require adaptions. VM $vmid state is NOT cleaned 
up.\n";
-   } else {
+   } elsif ($hook_executed && !$restored_data) {
warn "error before or during data restore, some or all 
disks were not "
."completely restored. VM $vmid state is NOT 
cleaned up.\n";
}
-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH pve-container 1/1] Add pre/post-restore hooks to CTs

2022-09-22 Thread Stefan Hanreich
Signed-off-by: Stefan Hanreich 
---
 src/PVE/API2/LXC.pm | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/PVE/API2/LXC.pm b/src/PVE/API2/LXC.pm
index 589f96f..3ecf5e5 100644
--- a/src/PVE/API2/LXC.pm
+++ b/src/PVE/API2/LXC.pm
@@ -376,6 +376,8 @@ __PACKAGE__->register_method({
eval {
my $orig_mp_param; # only used if $restore
if ($restore) {
+   PVE::GuestHelpers::exec_hookscript($old_conf, $vmid, 
'pre-restore', 1);
+
die "can't overwrite running container\n" if 
PVE::LXC::check_running($vmid);
if ($archive ne '-') {
my $orig_conf;
@@ -502,6 +504,10 @@ __PACKAGE__->register_method({
 
PVE::API2::LXC::Status->vm_start({ vmid => $vmid, node => $node })
if $start_after_create;
+
+   if ($restore) {
+   PVE::GuestHelpers::exec_hookscript($conf, $vmid, 
'post-restore');
+   }
};
 
my $workername = $restore ? 'vzrestore' : 'vzcreate';
-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



Re: [pve-devel] [PATCH pve-container 1/1] Add pre/post-restore hooks to CTs

2022-09-22 Thread Wolfgang Bumiller
On Thu, Sep 22, 2022 at 03:19:42PM +0200, Stefan Hanreich wrote:
> Signed-off-by: Stefan Hanreich 
> ---
>  src/PVE/API2/LXC.pm | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/src/PVE/API2/LXC.pm b/src/PVE/API2/LXC.pm
> index 589f96f..3ecf5e5 100644
> --- a/src/PVE/API2/LXC.pm
> +++ b/src/PVE/API2/LXC.pm
> @@ -376,6 +376,8 @@ __PACKAGE__->register_method({
>   eval {
>   my $orig_mp_param; # only used if $restore
>   if ($restore) {
> + PVE::GuestHelpers::exec_hookscript($old_conf, $vmid, 
> 'pre-restore', 1);
> +
>   die "can't overwrite running container\n" if 
> PVE::LXC::check_running($vmid);

I think this check should happen before the hook.

>   if ($archive ne '-') {
>   my $orig_conf;
> @@ -502,6 +504,10 @@ __PACKAGE__->register_method({
>  
>   PVE::API2::LXC::Status->vm_start({ vmid => $vmid, node => $node })
>   if $start_after_create;
> +
> + if ($restore) {
> + PVE::GuestHelpers::exec_hookscript($conf, $vmid, 
> 'post-restore');
> + }
>   };
>  
>   my $workername = $restore ? 'vzrestore' : 'vzcreate';
> -- 
> 2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



Re: [pve-devel] [PATCH qemu-server 1/1] Add pre/post-restore hooks to VMs

2022-09-22 Thread Wolfgang Bumiller
On Thu, Sep 22, 2022 at 03:19:43PM +0200, Stefan Hanreich wrote:
> Signed-off-by: Stefan Hanreich 
> ---
> 
> There might be a better way to differentiate the different errors from
> restorefn in the error handling logic, although I think in this case it is
> still fine. This might get a bit messy though if in the future someone adds
> another source for errors. Maybe add a single if before the restored_data 
> checks
> instead?
> 
>  PVE/API2/Qemu.pm | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
> index 3ec31c2..fe41634 100644
> --- a/PVE/API2/Qemu.pm
> +++ b/PVE/API2/Qemu.pm
> @@ -884,9 +884,13 @@ __PACKAGE__->register_method({
>   die "$emsg $@" if $@;
>  
>   my $restored_data = 0;
> + my $hook_executed = 0;
>   my $restorefn = sub {
>   my $conf = PVE::QemuConfig->load_config($vmid);
>  
> + PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-restore', 1);
> + $hook_executed = 1;
> +
>   PVE::QemuConfig->check_protection($conf, $emsg);

This should probalby happen before the hook?

>  
>   die "$emsg vm is running\n" if 
> PVE::QemuServer::check_running($vmid);
> @@ -918,6 +922,8 @@ __PACKAGE__->register_method({
>   eval { PVE::QemuServer::template_create($vmid, 
> $restored_conf) };
>   warn $@ if $@;
>   }
> +
> + PVE::GuestHelpers::exec_hookscript($restored_conf, $vmid, 
> 'post-restore');
>   };
>  
>   # ensure no old replication state are exists
> @@ -1012,10 +1018,10 @@ __PACKAGE__->register_method({
>   if (my $err = $@) {
>   eval { PVE::QemuConfig->remove_lock($vmid, 'create') };
>   warn $@ if $@;
> - if ($restored_data) {
> + if ($hook_executed && $restored_data) {
>   warn "error after data was restored, VM disks should be 
> OK but config may "
>   ."require adaptions. VM $vmid state is NOT cleaned 
> up.\n";
> - } else {
> + } elsif ($hook_executed && !$restored_data) {
>   warn "error before or during data restore, some or all 
> disks were not "
>   ."completely restored. VM $vmid state is NOT 
> cleaned up.\n";
>   }
> -- 
> 2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH pve-container 1/1] Add CT hooks for pre/post-migrate on target/source

2022-09-22 Thread Stefan Hanreich
Signed-off-by: Stefan Hanreich 
---
 src/PVE/CLI/pct.pm | 51 ++
 src/PVE/LXC/Migrate.pm | 23 +++
 2 files changed, 74 insertions(+)

diff --git a/src/PVE/CLI/pct.pm b/src/PVE/CLI/pct.pm
index 23793ee..e80586d 100755
--- a/src/PVE/CLI/pct.pm
+++ b/src/PVE/CLI/pct.pm
@@ -803,6 +803,54 @@ __PACKAGE__->register_method ({
return undef;
 }});
 
+__PACKAGE__->register_method ({
+name => 'pre_migrate',
+path => 'pre_migrate',
+method => 'POST',
+description => "Run pre-migrate hook for VM  - do not use manually.",
+parameters => {
+   additionalProperties => 0,
+   properties => {
+   vmid => get_standard_option('pve-vmid', { completion => 
\&PVE::QemuServer::complete_vmid }),
+   'source-node' => get_standard_option('pve-node'),
+   },
+},
+returns => { type => 'null'},
+code => sub {
+   my ($param) = @_;
+
+   my $vmid = $param->{vmid};
+   my $source_node = $param->{'source-node'};
+   my $conf = PVE::LXC::Config->load_config($vmid, $source_node);
+
+   PVE::GuestHelpers::exec_hookscript($conf, $vmid, "pre-migrate-target", 
1);
+
+   return;
+}});
+
+__PACKAGE__->register_method ({
+name => 'post_migrate',
+path => 'post_migrate',
+method => 'POST',
+description => "Run post-migrate hook for VM  - do not use 
manually.",
+parameters => {
+   additionalProperties => 0,
+   properties => {
+   vmid => get_standard_option('pve-vmid', { completion => 
\&PVE::QemuServer::complete_vmid }),
+   },
+},
+returns => { type => 'null'},
+code => sub {
+   my ($param) = @_;
+
+   my $vmid = $param->{vmid};
+   my $conf = PVE::LXC::Config->load_config($vmid);
+
+   PVE::GuestHelpers::exec_hookscript($conf, $vmid, "post-migrate-target");
+
+   return;
+}});
+
 our $cmddef = {
 list=> [ 'PVE::API2::LXC', 'vmlist', [], { node => $nodename }, sub {
my $res = shift;
@@ -874,6 +922,9 @@ our $cmddef = {
 rescan  => [ __PACKAGE__, 'rescan', []],
 cpusets => [ __PACKAGE__, 'cpusets', []],
 fstrim => [ __PACKAGE__, 'fstrim', ['vmid']],
+
+'pre-migrate' => [ __PACKAGE__, 'pre_migrate', ['vmid', 'source-node']],
+'post-migrate' => [ __PACKAGE__, 'post_migrate', ['vmid']],
 };
 
 1;
diff --git a/src/PVE/LXC/Migrate.pm b/src/PVE/LXC/Migrate.pm
index 2ef1cce..4089c2c 100644
--- a/src/PVE/LXC/Migrate.pm
+++ b/src/PVE/LXC/Migrate.pm
@@ -416,4 +416,27 @@ sub final_cleanup {
 
 }
 
+sub pre_migration_hooks {
+my ($self, $vmid) = @_;
+
+PVE::GuestHelpers::exec_hookscript($self->{vmconf}, $vmid, 
'pre-migrate-source', 1);
+
+my $node = PVE::INotify::nodename();
+my $cmd = [ @{$self->{rem_ssh}}, "pct", "pre-migrate", $vmid, $node ];
+
+eval { $self->cmd($cmd); };
+die "$@\n" if $@;
+}
+
+sub post_migration_hooks {
+my ($self, $vmid) = @_;
+
+PVE::GuestHelpers::exec_hookscript($self->{vmconf}, $vmid, 
'post-migrate-source');
+
+my $cmd = [ @{$self->{rem_ssh}}, "pct", "post-migrate", $vmid ];
+
+eval { $self->cmd($cmd); };
+die "$@\n" if $@;
+}
+
 1;
-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH qemu-server 1/1] Add VM hooks for pre/post-migrate on target/source

2022-09-22 Thread Stefan Hanreich
Signed-off-by: Stefan Hanreich 
---
 PVE/CLI/qm.pm | 50 +++
 PVE/QemuMigrate.pm| 23 
 test/MigrationTest/QemuMigrateMock.pm |  4 +++
 3 files changed, 77 insertions(+)

diff --git a/PVE/CLI/qm.pm b/PVE/CLI/qm.pm
index 6a2e161..4161e6e 100755
--- a/PVE/CLI/qm.pm
+++ b/PVE/CLI/qm.pm
@@ -838,6 +838,54 @@ __PACKAGE__->register_method({
return;
 }});
 
+__PACKAGE__->register_method ({
+name => 'pre_migrate',
+path => 'pre_migrate',
+method => 'POST',
+description => "Run pre-migrate hook for VM  - do not use manually.",
+parameters => {
+   additionalProperties => 0,
+   properties => {
+   vmid => get_standard_option('pve-vmid', { completion => 
\&PVE::QemuServer::complete_vmid }),
+   'source-node' => get_standard_option('pve-node'),
+   },
+},
+returns => { type => 'null'},
+code => sub {
+   my ($param) = @_;
+
+   my $vmid = $param->{vmid};
+   my $source_node = $param->{'source-node'};
+   my $conf = PVE::QemuConfig->load_config($vmid, $source_node);
+
+   PVE::GuestHelpers::exec_hookscript($conf, $vmid, "pre-migrate-target", 
1);
+
+   return;
+}});
+
+__PACKAGE__->register_method ({
+name => 'post_migrate',
+path => 'post_migrate',
+method => 'POST',
+description => "Run post-migrate hook for VM  - do not use 
manually.",
+parameters => {
+   additionalProperties => 0,
+   properties => {
+   vmid => get_standard_option('pve-vmid', { completion => 
\&PVE::QemuServer::complete_vmid }),
+   },
+},
+returns => { type => 'null'},
+code => sub {
+   my ($param) = @_;
+
+   my $vmid = $param->{vmid};
+   my $conf = PVE::QemuConfig->load_config($vmid);
+
+   PVE::GuestHelpers::exec_hookscript($conf, $vmid, "post-migrate-target");
+
+   return;
+}});
+
 my $print_agent_result = sub {
 my ($data) = @_;
 
@@ -988,6 +1036,8 @@ our $cmddef = {
}],
 },
 
+'pre-migrate' => [ __PACKAGE__, 'pre_migrate', ['vmid', 'source-node']],
+'post-migrate' => [ __PACKAGE__, 'post_migrate', ['vmid']],
 };
 
 1;
diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
index d52dc8d..b113dec 100644
--- a/PVE/QemuMigrate.pm
+++ b/PVE/QemuMigrate.pm
@@ -1284,4 +1284,27 @@ sub round_powerof2 {
 return 2 << int(log($_[0]-1)/log(2));
 }
 
+sub pre_migration_hooks {
+my ($self, $vmid) = @_;
+
+PVE::GuestHelpers::exec_hookscript($self->{vmconf}, $vmid, 
'pre-migrate-source', 1);
+
+my $node = PVE::INotify::nodename();
+my $cmd = [ @{$self->{rem_ssh}}, "qm", "pre-migrate", $vmid, $node ];
+
+eval { $self->cmd($cmd); };
+die "$@\n" if $@;
+}
+
+sub post_migration_hooks {
+my ($self, $vmid) = @_;
+
+PVE::GuestHelpers::exec_hookscript($self->{vmconf}, $vmid, 
'post-migrate-source');
+
+my $cmd = [ @{$self->{rem_ssh}}, "qm", "post-migrate", $vmid ];
+
+eval { $self->cmd($cmd); };
+die "$@\n" if $@;
+}
+
 1;
diff --git a/test/MigrationTest/QemuMigrateMock.pm 
b/test/MigrationTest/QemuMigrateMock.pm
index f2c0281..461b390 100644
--- a/test/MigrationTest/QemuMigrateMock.pm
+++ b/test/MigrationTest/QemuMigrateMock.pm
@@ -298,6 +298,10 @@ $MigrationTest::Shared::tools_module->mock(
return 0;
} elsif ($cmd eq 'stop') {
return 0;
+   } elsif ($cmd eq 'pre-migrate') {
+   return 0;
+   } elsif ($cmd eq 'post-migrate') {
+   return 0;
}
die "run_command (mocked) ssh qm command - implement me: 
${cmd_msg}";
} elsif ($cmd eq 'pvesm') {
-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH pve-docs 1/1] Add pre/post-migrate events for target and source to example hookscript

2022-09-22 Thread Stefan Hanreich
Signed-off-by: Stefan Hanreich 
---
 examples/guest-example-hookscript.pl | 28 
 1 file changed, 28 insertions(+)

diff --git a/examples/guest-example-hookscript.pl 
b/examples/guest-example-hookscript.pl
index adeed59..e4adf6d 100755
--- a/examples/guest-example-hookscript.pl
+++ b/examples/guest-example-hookscript.pl
@@ -54,6 +54,34 @@ if ($phase eq 'pre-start') {
 
 print "$vmid stopped. Doing cleanup.\n";
 
+} elsif ($phase eq 'pre-migrate-source') {
+
+# Phase 'pre-migrate-source' will be run before a migration on the source
+# node of the VM/CT
+
+print "preparing $vmid for migration on source.\n";
+
+} elsif ($phase eq 'post-migrate-source') {
+
+# Phase 'post-migrate-source' will be run after a migration on the source
+# node of the VM/CT
+
+print "$vmid finished migrating on source.\n";
+
+} elsif ($phase eq 'pre-migrate-target') {
+
+# Phase 'pre-migrate-target' will be run before a migration on the target
+# node of the VM/CT
+
+print "preparing $vmid for migration on target.\n";
+
+} elsif ($phase eq 'post-migrate-target') {
+
+# Phase 'post-migrate-target' will be run after a migration on the target
+# node of the VM/CT
+
+print "$vmid finished migrating on target.\n";
+
 } else {
 die "got unknown phase '$phase'\n";
 }
-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH pve-guest-common 1/1] Add abstract methods for pre/post-migrate hooks

2022-09-22 Thread Stefan Hanreich
Signed-off-by: Stefan Hanreich 
---
 src/PVE/AbstractMigrate.pm | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/PVE/AbstractMigrate.pm b/src/PVE/AbstractMigrate.pm
index d90e5b7..5e03488 100644
--- a/src/PVE/AbstractMigrate.pm
+++ b/src/PVE/AbstractMigrate.pm
@@ -178,6 +178,8 @@ sub migrate {
"public key authentication\n" if $@;
}
 
+   $self->pre_migration_hooks($self->{vmid});
+
&$eval_int($self, sub { $self->phase1($self->{vmid}); });
my $err = $@;
if ($err) {
@@ -228,6 +230,8 @@ sub migrate {
$self->log('err', $err);
$self->{errors} = 1;
}
+
+   $self->post_migration_hooks($self->{vmid});
 })};
 
 my $err = $@;
@@ -368,4 +372,14 @@ sub get_bwlimit {
 return $bwlimit;
 }
 
+sub pre_migration_hooks {
+my ($self, $vmid) = @_;
+die "abstract method - implement me";
+}
+
+sub post_migration_hooks {
+my ($self, $vmid) = @_;
+die "abstract method - implement me";
+}
+
 1;
-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH pve-container/qemu-server/pve-guest-common/pve-docs 0/1] Add pre/post-migrate hooks

2022-09-22 Thread Stefan Hanreich
This patch adds pre/post-migrate hooks when the when the user migrates a CT/VM
from the Web UI / CLI. I have tested this with both VMs/CTs via Web UI and CLI.
Are there any other places where the hook should get triggered that I missed?

I have decided to create distinct event types for source/target nodes, since
otherwise the same script would run essentially twice on the source/target node.
With distinct event types, the hooks should be more flexible in their usage.

In my first try I simply ssh'ed into the target nodes and ran a perl script
hardcoded as string via perl -e. This seemed very dirty though, which is why I
have opted to move those parts into the respective binaries qm/pct. There are
other subcommands already that fulfill a similar purpose (qm mtunnel) which is
why I figured this might be the better way to implement this. Then I simply
need to call the respective subcommand in the hook methods. Would it be better
to name the subcommands pre/post-migrate-target?

I have added abstract methods to the abstract class, which I implement in the
respective backends. Those methods are virtually the same, only the binary that
gets executed on the target node is different in the backends (pct vs qm). It
might make sense to move both methods to the abstract class and parametrize the
string containing the names of the binaries. I have decided against this for
now, since usually such methods end up diverging anyway. Still, it might make
sense to move the implementation up to the parent class and only have very
simple methods in the specific backends that return the necessary name of the
binary. What do you think?

pve-guest-common:

Stefan Hanreich (1):
  Add abstract methods for pre/post-migrate hooks

 src/PVE/AbstractMigrate.pm | 14 ++
 1 file changed, 14 insertions(+)


pve-container:

Stefan Hanreich (1):
  Add CT hooks for pre/post-migrate on target/source

 src/PVE/CLI/pct.pm | 51 ++
 src/PVE/LXC/Migrate.pm | 23 +++
 2 files changed, 74 insertions(+)


qemu-server:

Stefan Hanreich (1):
  Add VM hooks for pre/post-migrate on target/source

 PVE/CLI/qm.pm | 50 +++
 PVE/QemuMigrate.pm| 23 
 test/MigrationTest/QemuMigrateMock.pm |  4 +++
 3 files changed, 77 insertions(+)


pve-docs:

Stefan Hanreich (1):
  Add pre/post-migrate events for target and source to example
hookscript

 examples/guest-example-hookscript.pl | 28 
 1 file changed, 28 insertions(+)
-- 
2.30.2


___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH qemu-server v2 3/3] qmeventd: send QMP 'quit' command instead of SIGTERM

2022-09-22 Thread Dominik Csapak
this is functionally the same, but sending SIGTERM has the ugly side
effect of printing the following to the log:

> QEMU[]: kvm: terminating on signal 15 from pid  (/usr/sbin/qmeventd)

while sending a QMP quit command does not.

Signed-off-by: Dominik Csapak 
---
 qmeventd/qmeventd.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/qmeventd/qmeventd.c b/qmeventd/qmeventd.c
index eebc19d..75490f4 100644
--- a/qmeventd/qmeventd.c
+++ b/qmeventd/qmeventd.c
@@ -287,8 +287,10 @@ handle_qmp_return(struct Client *client, struct 
json_object *data, bool error)
VERBOSE_PRINT("%s: QMP handshake complete\n", client->qemu.vmid);
break;
 
-   case STATE_IDLE:
+   // we expect an empty return object after sending quit
case STATE_TERMINATING:
+   break;
+   case STATE_IDLE:
VERBOSE_PRINT("%s: spurious return value received\n",
  client->qemu.vmid);
break;
@@ -489,8 +491,14 @@ terminate_client(struct Client *client)
}
 }
 
-int err = kill(client->pid, SIGTERM);
-log_neg(err, "kill");
+// try to send a 'quit' command first, fallback to SIGTERM of the pid
+static const char qmp_quit_command[] = "{\"execute\":\"quit\"}\n";
+VERBOSE_PRINT("%s: sending 'quit' via QMP\n", client->qemu.vmid);
+if (!must_write(client->fd, qmp_quit_command, sizeof(qmp_quit_command) - 
1)) {
+   VERBOSE_PRINT("%s: sending 'SIGTERM' to pid %d\n", client->qemu.vmid, 
client->pid);
+   int err = kill(client->pid, SIGTERM);
+   log_neg(err, "kill");
+}
 
 time_t timeout = time(NULL) + kill_timeout;
 
-- 
2.30.2



___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH qemu-server v2 0/3] qmeventd: improve shutdown behaviour

2022-09-22 Thread Dominik Csapak
includes the following improvements:
* increases 'force cleanup' timeout to 60s (from 5)
* saves individual timeout for each vm
* don't force cleanup for vms where normal cleanup worked
* sending QMP quit instead of SIGTERM (less log noise)

i opted for variant 1 of wbumillers suggestions, as it yielded the least
change and still results in clean code

changes from v1:
* remove 'alarm' calls altogether and use epoll_waits' timeout mechanic
   instead
* call 'time()' only once and give it as user data to the function
* change the function singatures and cast on callsite with '(GFunc)'
   for the g_slist_foreach calls
* change to  for the usage output for timeouts

Dominik Csapak (3):
  qmeventd: rework 'forced_cleanup' handling and set timeout to 60s
  qmeventd: cancel 'forced cleanup' when normal cleanup succeeds
  qmeventd: send QMP 'quit' command instead of SIGTERM

 qmeventd/qmeventd.c | 104 +++-
 qmeventd/qmeventd.h |   2 +
 2 files changed, 67 insertions(+), 39 deletions(-)

-- 
2.30.2



___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel



[pve-devel] [PATCH qemu-server v2 1/3] qmeventd: rework 'forced_cleanup' handling and set timeout to 60s

2022-09-22 Thread Dominik Csapak
currently, the 'forced_cleanup' (sending SIGKILL to the qemu process),
is intended to be triggered 5 seconds after sending the initial shutdown
signal (SIGTERM) which is sadly not enough for some setups.

Accidentally, it could be triggered earlier than 5 seconds, if a
SIGALRM triggers in the timespan directly before setting it again.

Also, this approach means that depending on when machines are shutdown
their forced cleanup may happen after 5 seconds, or any time after, if
new vms are shut off in the meantime.

Improve this situation by reworking the way we deal with this cleanup.
We save the time incl. timeout in the CleanupData, and set a timeout
to 'epoll_wait' of 10 seconds, which will then trigger a forced_cleanup.
Remove entries from the forced_cleanup list when that entry is killed,
or when the normal cleanup took place.

To improve the shutdown behaviour, increase the default timeout to 60
seconds, which should be enough, but add a commandline toggle where
users can set it to a different value.

Signed-off-by: Dominik Csapak 
---
 qmeventd/qmeventd.c | 73 +++--
 qmeventd/qmeventd.h |  2 ++
 2 files changed, 39 insertions(+), 36 deletions(-)

diff --git a/qmeventd/qmeventd.c b/qmeventd/qmeventd.c
index 8d32827..46bc7eb 100644
--- a/qmeventd/qmeventd.c
+++ b/qmeventd/qmeventd.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -36,15 +37,19 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "qmeventd.h"
 
+#define DEFAULT_KILL_TIMEOUT 60
+
 static int verbose = 0;
+static int kill_timeout = DEFAULT_KILL_TIMEOUT;
 static int epoll_fd = 0;
 static const char *progname;
 GHashTable *vm_clients; // key=vmid (freed on remove), value=*Client (free 
manually)
 GSList *forced_cleanups;
-volatile sig_atomic_t alarm_triggered = 0;
+static int needs_cleanup = 0;
 
 /*
  * Helper functions
@@ -56,6 +61,7 @@ usage()
 fprintf(stderr, "Usage: %s [-f] [-v] PATH\n", progname);
 fprintf(stderr, "  -f   run in foreground (default: false)\n");
 fprintf(stderr, "  -v   verbose (default: false)\n");
+fprintf(stderr, "  -tkill timeout (default: %ds)\n", 
DEFAULT_KILL_TIMEOUT);
 fprintf(stderr, "  PATH use PATH for socket\n");
 }
 
@@ -469,16 +475,17 @@ terminate_client(struct Client *client)
 int err = kill(client->pid, SIGTERM);
 log_neg(err, "kill");
 
+time_t timeout = time(NULL) + kill_timeout;
+
 struct CleanupData *data_ptr = malloc(sizeof(struct CleanupData));
 struct CleanupData data = {
.pid = client->pid,
-   .pidfd = pidfd
+   .pidfd = pidfd,
+   .timeout = timeout,
 };
 *data_ptr = data;
 forced_cleanups = g_slist_prepend(forced_cleanups, (void *)data_ptr);
-
-// resets any other alarms, but will fire eventually and cleanup all
-alarm(5);
+needs_cleanup = 1;
 }
 
 void
@@ -551,27 +558,16 @@ handle_client(struct Client *client)
 json_tokener_free(tok);
 }
 
-
-/*
- * SIGALRM and cleanup handling
- *
- * terminate_client will set an alarm for 5 seconds and add its client's PID to
- * the forced_cleanups list - when the timer expires, we iterate the list and
- * attempt to issue SIGKILL to all processes which haven't yet stopped.
- */
-
-static void
-alarm_handler(__attribute__((unused)) int signum)
-{
-alarm_triggered = 1;
-}
-
 static void
-sigkill(void *ptr, __attribute__((unused)) void *unused)
+sigkill(struct CleanupData *ptr, time_t *cur_time)
 {
-struct CleanupData data = *((struct CleanupData *)ptr);
+struct CleanupData data = *ptr;
 int err;
 
+if (data.timeout > *cur_time) {
+   return;
+}
+
 if (data.pidfd > 0) {
err = pidfd_send_signal(data.pidfd, SIGKILL, NULL, 0);
(void)close(data.pidfd);
@@ -588,21 +584,23 @@ sigkill(void *ptr, __attribute__((unused)) void *unused)
fprintf(stderr, "cleanup failed, terminating pid '%d' with SIGKILL\n",
data.pid);
 }
+
+// remove ourselves from the list
+forced_cleanups = g_slist_remove(forced_cleanups, ptr);
+free(ptr);
 }
 
 static void
 handle_forced_cleanup()
 {
-if (alarm_triggered) {
+if (g_slist_length(forced_cleanups) > 0) {
VERBOSE_PRINT("clearing forced cleanup backlog\n");
-   alarm_triggered = 0;
-   g_slist_foreach(forced_cleanups, sigkill, NULL);
-   g_slist_free_full(forced_cleanups, free);
-   forced_cleanups = NULL;
+   time_t cur_time = time(NULL);
+   g_slist_foreach(forced_cleanups, (GFunc)sigkill, &cur_time);
 }
+needs_cleanup = g_slist_length(forced_cleanups) > 0;
 }
 
-
 int
 main(int argc, char *argv[])
 {
@@ -611,7 +609,7 @@ main(int argc, char *argv[])
 char *socket_path = NULL;
 progname = argv[0];
 
-while ((opt = getopt(argc, argv, "hfv")) != -1) {
+while ((opt = getopt(argc, argv, "hfvt:")) != -1) {
switch (opt) {
case 'f':
daemonize = 0;
@@ -619,6 +617,15

[pve-devel] [PATCH qemu-server v2 2/3] qmeventd: cancel 'forced cleanup' when normal cleanup succeeds

2022-09-22 Thread Dominik Csapak
instead of always sending a SIGKILL to the target pid.
It was not that much of a problem since the timeout previously was 5
seconds and we used pifds where possible, thus the chance of killing the
wrong process was rather slim.

Now we increased the timeout to 60s which makes the race a bit more likely
(when not using pidfds), so remove it from the 'forced_cleanups' list when
the normal cleanup succeeds.

Signed-off-by: Dominik Csapak 
---
 qmeventd/qmeventd.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/qmeventd/qmeventd.c b/qmeventd/qmeventd.c
index 46bc7eb..eebc19d 100644
--- a/qmeventd/qmeventd.c
+++ b/qmeventd/qmeventd.c
@@ -416,6 +416,22 @@ cleanup_qemu_client(struct Client *client)
 }
 }
 
+static void
+remove_cleanup_data(struct CleanupData *data, struct Client *client) {
+if (data->pid == client->pid) {
+   forced_cleanups = g_slist_remove(forced_cleanups, data);
+   free(data);
+}
+}
+
+static void
+remove_from_forced_cleanup(struct Client *client) {
+if (g_slist_length(forced_cleanups) > 0) {
+   VERBOSE_PRINT("removing %s from forced cleanups\n", client->qemu.vmid);
+   g_slist_foreach(forced_cleanups, (GFunc)remove_cleanup_data, client);
+}
+}
+
 void
 cleanup_client(struct Client *client)
 {
@@ -442,6 +458,7 @@ cleanup_client(struct Client *client)
break;
 }
 
+remove_from_forced_cleanup(client);
 free(client);
 }
 
-- 
2.30.2



___
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel