with two small follow-ups: - re-order conditions for fallback to old path (ideally avoiding one stat()) - drop aliased private sub
please don't forget to submit the udev change upstream as well ;) On April 13, 2022 11:43 am, Aaron Lauterer wrote: > By adding our own customized rbd udev rules and ceph-rbdnamer we can > create device paths that include the cluster fsid and avoid any > ambiguity if the same pool and namespace combination is used in > different clusters we connect to. > > Additionally to the '/dev/rbd/<pool>/...' paths we now have > '/dev/rbd-pve/<cluster fsid>/<pool>/...' paths. > > The other half of the patch makes use of the new device paths in the RBD > plugin. > > The new 'get_rbd_dev_path' method the full device path. In case that the > image has been mapped before the rbd-pve udev rule has been installed, > it returns the old path. > > The cluster fsid is read from the 'ceph.conf' file in the case of a > hyperconverged setup. In the case of an external Ceph cluster we need to > fetch it via a rados api call. > > Co-authored-by: Thomas Lamprecht <t.lampre...@proxmox.com> > Signed-off-by: Aaron Lauterer <a.laute...@proxmox.com> > --- > > Testing the migration phase can be done by starting a VM first, then > install the new package version. Don't forget to have the disk image on > an rbd storage with krbd enabled. > > `qm showcmd VMID --pretty` should still show the old rbd path for that > disk. > Add a new disk to the running VM and run showcmd again. It should show > the new rbd path for the added disk. > > > The udev rules seem to get applied automatically after installing the > package. So we should be fine to assume that once we run this new code, > newly mapped rbd images should also be exposed in the new /dev/rbd-pve > path. > > I also realized that the rescan logic (pct rescan, qm rescan) gets > confused by having two pools with the same name on different clusters. > Something we need to take a look at as well. > > changes since v1: > * reverted changes to get_rbd_path > * added additional get_rbd_dev_path function that returns the full path > > > Makefile | 1 + > PVE/Storage/RBDPlugin.pm | 60 ++++++++++++++++++++++++++++---------- > udev-rbd/50-rbd-pve.rules | 2 ++ > udev-rbd/Makefile | 21 +++++++++++++ > udev-rbd/ceph-rbdnamer-pve | 24 +++++++++++++++ > 5 files changed, 92 insertions(+), 16 deletions(-) > create mode 100644 udev-rbd/50-rbd-pve.rules > create mode 100644 udev-rbd/Makefile > create mode 100755 udev-rbd/ceph-rbdnamer-pve > > diff --git a/Makefile b/Makefile > index 431db16..029b586 100644 > --- a/Makefile > +++ b/Makefile > @@ -41,6 +41,7 @@ install: PVE pvesm.1 pvesm.bash-completion > pvesm.zsh-completion > install -d ${DESTDIR}${SBINDIR} > install -m 0755 pvesm ${DESTDIR}${SBINDIR} > make -C PVE install > + make -C udev-rbd install > install -d ${DESTDIR}/usr/share/man/man1 > install -m 0644 pvesm.1 ${DESTDIR}/usr/share/man/man1/ > gzip -9 -n ${DESTDIR}/usr/share/man/man1/pvesm.1 > diff --git a/PVE/Storage/RBDPlugin.pm b/PVE/Storage/RBDPlugin.pm > index e287e28..3293565 100644 > --- a/PVE/Storage/RBDPlugin.pm > +++ b/PVE/Storage/RBDPlugin.pm > @@ -8,6 +8,7 @@ use JSON; > use Net::IP; > > use PVE::CephConfig; > +use PVE::Cluster qw(cfs_read_file);; > use PVE::JSONSchema qw(get_standard_option); > use PVE::ProcFSTools; > use PVE::RADOS; > @@ -22,6 +23,16 @@ my $get_parent_image_name = sub { > return $parent->{image} . "@" . $parent->{snapshot}; > }; > > +my $librados_connect = sub { > + my ($scfg, $storeid, $options) = @_; > + > + my $librados_config = PVE::CephConfig::ceph_connect_option($scfg, > $storeid); > + > + my $rados = PVE::RADOS->new(%$librados_config); > + > + return $rados; > +}; > + > my sub get_rbd_path { > my ($scfg, $volume) = @_; > my $path = $scfg->{pool} ? $scfg->{pool} : 'rbd'; > @@ -30,6 +41,32 @@ my sub get_rbd_path { > return $path; > }; > > +my sub get_rbd_dev_path { > + my ($scfg, $storeid, $volume) = @_; > + > + my $cluster_id = ''; > + if ($scfg->{monhost}) { > + my $rados = $librados_connect->($scfg, $storeid); > + $cluster_id = $rados->mon_command({ prefix => 'fsid', format => 'json' > })->{fsid}; > + } else { > + $cluster_id = cfs_read_file('ceph.conf')->{global}->{fsid}; > + } > + > + my $uuid_pattern = > "([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"; > + if ($cluster_id =~ qr/^${uuid_pattern}$/is) { > + $cluster_id = $1; # use untained value > + } else { > + die "cluster fsid has invalid format\n"; > + } > + > + my $rbd_path = get_rbd_path($scfg, $volume); > + my $pve_path = "/dev/rbd-pve/${cluster_id}/${rbd_path}"; > + my $path = "/dev/rbd/${rbd_path}"; > + > + return $path if -e $path && !-e $pve_path; # mapped before rbd-pve udev > rule existed > + return $pve_path; > +} > + > my $build_cmd = sub { > my ($binary, $scfg, $storeid, $op, @options) = @_; > > @@ -70,16 +107,6 @@ my $rados_cmd = sub { > return $build_cmd->('/usr/bin/rados', $scfg, $storeid, $op, @options); > }; > > -my $librados_connect = sub { > - my ($scfg, $storeid, $options) = @_; > - > - my $librados_config = PVE::CephConfig::ceph_connect_option($scfg, > $storeid); > - > - my $rados = PVE::RADOS->new(%$librados_config); > - > - return $rados; > -}; > - > # needed for volumes created using ceph jewel (or higher) > my $krbd_feature_update = sub { > my ($scfg, $storeid, $name) = @_; > @@ -380,9 +407,10 @@ sub path { > my ($vtype, $name, $vmid) = $class->parse_volname($volname); > $name .= '@'.$snapname if $snapname; > > - my $rbd_path = get_rbd_path($scfg, $name); > - return ("/dev/rbd/${rbd_path}", $vmid, $vtype) if $scfg->{krbd}; > + my $rbd_dev_path = get_rbd_dev_path($scfg, $storeid, $name); > + return ($rbd_dev_path, $vmid, $vtype) if $scfg->{krbd}; > > + my $rbd_path = get_rbd_path($scfg, $name); > my $path = "rbd:${rbd_path}"; > > $path .= ":conf=$cmd_option->{ceph_conf}" if $cmd_option->{ceph_conf}; > @@ -619,8 +647,8 @@ sub deactivate_storage { > } > > my sub get_kernel_device_path { > - my ($scfg, $name) = @_; > - return "/dev/rbd/" . get_rbd_path($scfg, $name); > + my ($scfg, $storeid, $name) = @_; > + return get_rbd_dev_path($scfg, $storeid, $name); > }; > > sub map_volume { > @@ -631,7 +659,7 @@ sub map_volume { > my $name = $img_name; > $name .= '@'.$snapname if $snapname; > > - my $kerneldev = get_kernel_device_path($scfg, $name); > + my $kerneldev = get_kernel_device_path($scfg, $storeid, $name); > > return $kerneldev if -b $kerneldev; # already mapped > > @@ -650,7 +678,7 @@ sub unmap_volume { > my ($vtype, $name, $vmid) = $class->parse_volname($volname); > $name .= '@'.$snapname if $snapname; > > - my $kerneldev = get_kernel_device_path($scfg, $name); > + my $kerneldev = get_kernel_device_path($scfg, $storeid, $name); > > if (-b $kerneldev) { > my $cmd = $rbd_cmd->($scfg, $storeid, 'unmap', $kerneldev); > diff --git a/udev-rbd/50-rbd-pve.rules b/udev-rbd/50-rbd-pve.rules > new file mode 100644 > index 0000000..79432df > --- /dev/null > +++ b/udev-rbd/50-rbd-pve.rules > @@ -0,0 +1,2 @@ > +KERNEL=="rbd[0-9]*", ENV{DEVTYPE}=="disk", > PROGRAM="/usr/libexec/ceph-rbdnamer-pve %k", SYMLINK+="rbd-pve/%c" > +KERNEL=="rbd[0-9]*", ENV{DEVTYPE}=="partition", > PROGRAM="/usr/libexec/ceph-rbdnamer-pve %k", SYMLINK+="rbd-pve/%c-part%n" > diff --git a/udev-rbd/Makefile b/udev-rbd/Makefile > new file mode 100644 > index 0000000..065933b > --- /dev/null > +++ b/udev-rbd/Makefile > @@ -0,0 +1,21 @@ > +PACKAGE=libpve-storage-perl > + > +DESTDIR= > +PREFIX=/usr > +LIBEXECDIR=${PREFIX}/libexec > +LIBDIR=${PREFIX}/lib > + > +all: > + > +.PHONY: install > +install: 50-rbd-pve.rules ceph-rbdnamer-pve > + install -d ${DESTDIR}${LIBEXECDIR} > + install -m 0755 ceph-rbdnamer-pve ${DESTDIR}${LIBEXECDIR} > + install -d ${DESTDIR}${LIBDIR}/udev/rules.d > + install -m 0644 50-rbd-pve.rules ${DESTDIR}${LIBDIR}/udev/rules.d > + > +.PHONY: clean > +clean: > + > +.PHONY: distclean > +distclean: clean > diff --git a/udev-rbd/ceph-rbdnamer-pve b/udev-rbd/ceph-rbdnamer-pve > new file mode 100755 > index 0000000..23dd626 > --- /dev/null > +++ b/udev-rbd/ceph-rbdnamer-pve > @@ -0,0 +1,24 @@ > +#!/bin/sh > + > +DEV=$1 > +NUM=`echo $DEV | sed 's#p.*##g; s#[a-z]##g'` > +POOL=`cat /sys/devices/rbd/$NUM/pool` > +CLUSTER_FSID=`cat /sys/devices/rbd/$NUM/cluster_fsid` > + > +if [ -f /sys/devices/rbd/$NUM/pool_ns ]; then > + NAMESPACE=`cat /sys/devices/rbd/$NUM/pool_ns` > +else > + NAMESPACE="" > +fi > +IMAGE=`cat /sys/devices/rbd/$NUM/name` > +SNAP=`cat /sys/devices/rbd/$NUM/current_snap` > + > +echo -n "/$CLUSTER_FSID/$POOL" > + > +if [ -n "$NAMESPACE" ]; then > + echo -n "/$NAMESPACE" > +fi > +echo -n "/$IMAGE" > +if [ "$SNAP" != "-" ]; then > + echo -n "@$SNAP" > +fi > -- > 2.30.2 > > > > _______________________________________________ > pve-devel mailing list > pve-devel@lists.proxmox.com > https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel > > > _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel