Hi, I've been using kvm for some time now using live migration as well with
ceph backend. Recently I started running into an issue with only one of my
VM, which happens to be a windows server (2012). When I migrate this
particular VM it seems that not all the ram is transferred. So when the
migration completes, the vm that has been migrated simply hangs and I have
to force a shutdown. Notice that not long ago it was working fine, however
I didn't notice when it started to have the issue.

Notice that my diagnostic of the problem may be wrong, I mean the behavior
i'm having is that my VM hangs after live migration, and I don't see the
problem on other VMs. But I noticed the difference in RAM usage by the qemu
process which leads me to believe that the ram is not fully transferred.

before migration on node2:
15225 libvirt+  20   0 7128048 4.200g  13592 S  15.0 27.3   1:14.18
qemu-system-x86
libvirt+ 15225 52.7 27.3 7128048 4404244 ?     Sl   12:57   1:14
qemu-system-x86_64

after migration on node1:
16507 libvirt+  20   0 6571864 1.610g  13152 R   7.6 10.5   0:07.63
qemu-system-x86
libvirt+ 16507 15.7 10.4 6571864 1688392 ?     Sl   13:00   0:08
qemu-system-x86_64

libvirtd.log on node02:
2014-12-19 13:01:06.654+0000: 6845: warning :
qemuMigrationCancelDriveMirror:1421 : Unable to stop block job on
drive-virtio-disk0
2014-12-19 13:01:06.656+0000: 6845: warning :
qemuMigrationCancelDriveMirror:1421 : Unable to stop block job on
drive-virtio-disk1

libvirtd.log on node01:
2014-12-19 13:00:52.346+0000: 7258: warning :
qemuDomainObjEnterMonitorInternal:1274 : This thread seems to be the async
job owner; entering monitor without asking for a nested job is dangerous
2014-12-19 13:00:52.436+0000: 7258: warning :
qemuDomainObjEnterMonitorInternal:1274 : This thread seems to be the async
job owner; entering monitor without asking for a nested job is dangerous
2014-12-19 13:00:52.437+0000: 7258: warning :
qemuDomainObjEnterMonitorInternal:1274 : This thread seems to be the async
job owner; entering monitor without asking for a nested job is dangerous
2014-12-19 13:00:52.441+0000: 7258: warning :
qemuDomainObjEnterMonitorInternal:1274 : This thread seems to be the async
job owner; entering monitor without asking for a nested job is dangerous
2014-12-19 13:00:52.480+0000: 7258: warning :
qemuDomainObjEnterMonitorInternal:1274 : This thread seems to be the async
job owner; entering monitor without asking for a nested job is dangerous
2014-12-19 13:00:52.480+0000: 7258: warning :
qemuDomainObjEnterMonitorInternal:1274 : This thread seems to be the async
job owner; entering monitor without asking for a nested job is dangerous
2014-12-19 13:00:52.481+0000: 7258: warning :
qemuDomainObjEnterMonitorInternal:1274 : This thread seems to be the async
job owner; entering monitor without asking for a nested job is dangerous


ceph version 0.87-1trusty
ubuntu 14.04 LTS
Linux compute02 3.13.0-43-generic #72-Ubuntu SMP Mon Dec 8 19:35:06 UTC
2014 x86_64 x86_64 x86_64 GNU/Linux (on both nodes)

virsh cmd to migrate:
virsh migrate --live win12 qemu+ssh://192.x.x.x/system
echo $?
0

qemu cmd line

before on node2:
30308 ?        Sl   449:03 qemu-system-x86_64 -enable-kvm -name win12 -S
-machine pc-i440fx-trusty,accel=kvm,usb=off -cpu Nehalem -m 4096 -realtime
mlock=off -smp 4,sockets=4,cores=1,threads=1 -uuid
be2c1183-de3b-4994-a20b-2b89a9c4b073 -no-user-config -nodefaults -chardev
socket,id=charmonitor,path=/var/lib/libvirt/qemu/win12.monitor,server,nowait
-mon chardev=charmonitor,id=monitor,mode=control -rtc
base=localtime,driftfix=slew -global kvm-pit.lost_tick_policy=discard
-no-hpet -no-shutdown -global PIIX4_PM.disable_s3=1 -global
PIIX4_PM.disable_s4=1 -boot menu=off,strict=on -device
ich9-usb-ehci1,id=usb,bus=pci.0,addr=0x5.0x7 -device
ich9-usb-uhci1,masterbus=usb.0,firstport=0,bus=pci.0,multifunction=on,addr=0x5
-device ich9-usb-uhci2,masterbus=usb.0,firstport=2,bus=pci.0,addr=0x5.0x1
-device ich9-usb-uhci3,masterbus=usb.0,firstport=4,bus=pci.0,addr=0x5.0x2
-device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x6 -drive
file=rbd:libvirt-pool/win12:id=libvirt:key=AQCKS4ZTQMYDKBAA1Q8zuys/l+OJ/n9GJjlk9g==:auth_supported=cephx\;none:mon_host=compute01\:6789\;compute02\:6789\;mgmt01\:6789,if=none,id=drive-virtio-disk0,cache=writeback
-device
virtio-blk-pci,scsi=off,bus=pci.0,addr=0x8,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1
-drive
file=rbd:live_migration/win2012_backups:id=live_migration:key=AQCyRsRRaI6IAxAAPW74dBKlAVUJvkaXadaecw==:auth_supported=cephx\;none:mon_host=compute01\:6789\;compute02\:6789\;mgmt01\:6789,if=none,id=drive-virtio-disk1
-device
virtio-blk-pci,scsi=off,bus=pci.0,addr=0x9,drive=drive-virtio-disk1,id=virtio-disk1
-drive if=none,id=drive-ide0-0-1,readonly=on,format=raw -device
ide-cd,bus=ide.0,unit=1,drive=drive-ide0-0-1,id=ide0-0-1 -netdev
tap,fd=26,id=hostnet0,vhost=on,vhostfd=27 -device
virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:fa:aa:c6,bus=pci.0,addr=0x3
-chardev pty,id=charserial0 -device
isa-serial,chardev=charserial0,id=serial0 -chardev
spicevmc,id=charchannel0,name=vdagent -device
virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=com.redhat.spice.0
-device usb-tablet,id=input0 -vnc 0.0.0.0:0 -device
qxl-vga,id=video0,ram_size=67108864,vram_size=67108864,bus=pci.0,addr=0x2
-device intel-hda,id=sound0,bus=pci.0,addr=0x4 -device
hda-duplex,id=sound0-codec0,bus=sound0.0,cad=0 -chardev
spicevmc,id=charredir0,name=usbredir -device
usb-redir,chardev=charredir0,id=redir0 -chardev
spicevmc,id=charredir1,name=usbredir -device
usb-redir,chardev=charredir1,id=redir1 -chardev
spicevmc,id=charredir2,name=usbredir -device
usb-redir,chardev=charredir2,id=redir2 -chardev
spicevmc,id=charredir3,name=usbredir -device
usb-redir,chardev=charredir3,id=redir3 -device
virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7

after on node1:
1489 ?        Sl     0:47 qemu-system-x86_64 -enable-kvm -name win12 -S
-machine pc-i440fx-trusty,accel=kvm,usb=off -cpu Nehalem -m 4096 -realtime
mlock=off -smp 4,sockets=4,cores=1,threads=1 -uuid
be2c1183-de3b-4994-a20b-2b89a9c4b073 -no-user-config -nodefaults -chardev
socket,id=charmonitor,path=/var/lib/libvirt/qemu/win12.monitor,server,nowait
-mon chardev=charmonitor,id=monitor,mode=control -rtc
base=localtime,driftfix=slew -global kvm-pit.lost_tick_policy=discard
-no-hpet -no-shutdown -global PIIX4_PM.disable_s3=1 -global
PIIX4_PM.disable_s4=1 -boot menu=off,strict=on -device
ich9-usb-ehci1,id=usb,bus=pci.0,addr=0x5.0x7 -device
ich9-usb-uhci1,masterbus=usb.0,firstport=0,bus=pci.0,multifunction=on,addr=0x5
-device ich9-usb-uhci2,masterbus=usb.0,firstport=2,bus=pci.0,addr=0x5.0x1
-device ich9-usb-uhci3,masterbus=usb.0,firstport=4,bus=pci.0,addr=0x5.0x2
-device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x6 -drive
file=rbd:libvirt-pool/win12:id=libvirt:key=AQCKS4ZTQMYDKBAA1Q8zuys/l+OJ/n9GJjlk9g==:auth_supported=cephx\;none:mon_host=compute01\:6789\;compute02\:6789\;mgmt01\:6789,if=none,id=drive-virtio-disk0,cache=writeback
-device
virtio-blk-pci,scsi=off,bus=pci.0,addr=0x8,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1
-drive
file=rbd:live_migration/win2012_backups:id=live_migration:key=AQCyRsRRaI6IAxAAPW74dBKlAVUJvkaXadaecw==:auth_supported=cephx\;none:mon_host=compute01\:6789\;compute02\:6789\;mgmt01\:6789,if=none,id=drive-virtio-disk1
-device
virtio-blk-pci,scsi=off,bus=pci.0,addr=0x9,drive=drive-virtio-disk1,id=virtio-disk1
-drive if=none,id=drive-ide0-0-1,readonly=on,format=raw -device
ide-cd,bus=ide.0,unit=1,drive=drive-ide0-0-1,id=ide0-0-1 -netdev
tap,fd=26,id=hostnet0,vhost=on,vhostfd=27 -device
virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:fa:aa:c6,bus=pci.0,addr=0x3
-chardev pty,id=charserial0 -device
isa-serial,chardev=charserial0,id=serial0 -chardev
spicevmc,id=charchannel0,name=vdagent -device
virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=com.redhat.spice.0
-device usb-tablet,id=input0 -vnc 0.0.0.0:1 -device
qxl-vga,id=video0,ram_size=67108864,vram_size=67108864,bus=pci.0,addr=0x2
-device intel-hda,id=sound0,bus=pci.0,addr=0x4 -device
hda-duplex,id=sound0-codec0,bus=sound0.0,cad=0 -chardev
spicevmc,id=charredir0,name=usbredir -device
usb-redir,chardev=charredir0,id=redir0 -chardev
spicevmc,id=charredir1,name=usbredir -device
usb-redir,chardev=charredir1,id=redir1 -chardev
spicevmc,id=charredir2,name=usbredir -device
usb-redir,chardev=charredir2,id=redir2 -chardev
spicevmc,id=charredir3,name=usbredir -device
usb-redir,chardev=charredir3,id=redir3 -incoming tcp:[::]:49152 -device
virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7

I also noticed that when I stopped the VM (after it hangs) on the migrated
node (1) the vm definition isn't saved... But it still exists on node2
[root@compute01 ~]# virsh list
 Id    Name                           State
----------------------------------------------------
 2     vpn01                          running
 3     win12                          running

[root@compute01 ~]# virsh destroy win12
Domain win12 destroyed

[root@compute01 ~]# virsh list --all
 Id    Name                           State
----------------------------------------------------
 2     vpn01                          running
 -     mon01                          shut off
 -     tools01                        shut off
 -     web01                          shut off

[root@compute02 ~]# virsh list --all
 Id    Name                           State
----------------------------------------------------
 -     mon01                          shut off
 -     testpc                         shut off
 -     tools01                        shut off
 -     web01                          shut off
 -     win12                          shut off

--
Jean-Sébastien Frerot
jsfre...@egliseespoir.com

Reply via email to