Hi,

I have a host server with multiple GPU cards, and was assigning them to
qemu with VFIO.

I found that when setting up the last free GPU, the qemu process would hang
there and took almost 10 minutes before finishing startup. I made some dig
by gdb, and found the slowest part occurred at the
hw/vfio/common.c:vfio_dma_map function call.


static int vfio_dma_map(VFIOContainer *container, hwaddr iova, ram_addr_t
size, void *vaddr, bool readonly)
{
...
    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
        (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 &&
         ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
        return 0;
    }
...
}


The hang was enable to reproduce on one of my hosts, I was setting up a 4GB
memory VM, while the host still had 16GB free. GPU physical mem is 8G.

Also, this phenomenon was observed on other hosts occasionally, and the
similarity is that they always happened on the last free GPU.


Full stack trace file is attached. Looking forward for you help, thanks


- Bob
#0  vfio_dma_map (container=0x555556b1f880, iova=0, size=655360, 
vaddr=0x7ffecfe00000, readonly=false) at 
/usr/src/debug/qemu-2.6.2/hw/vfio/common.c:227
        map = {argsz = 655359, flags = 0, vaddr = 0, iova = 140737488339248, 
size = 93824994141691}
#1  0x00005555557712dc in vfio_listener_region_add (listener=0x555556b1f890, 
section=0x7fffffffc1f0) at /usr/src/debug/qemu-2.6.2/hw/vfio/common.c:419
        container = 0x555556b1f880
        iova = 0
        end = 655359
        llend = {lo = 655360, hi = 0}
        llsize = {lo = 655360, hi = 0}
        vaddr = 0x7ffecfe00000
        ret = 7
        __func__ = "vfio_listener_region_add"
#2  0x0000555555728465 in listener_add_address_space (listener=0x555556b1f890, 
as=0x5555560823e0 <address_space_memory>)
    at /usr/src/debug/qemu-2.6.2/memory.c:2179
        section = {mr = 0x5555566ec570, address_space = 0x5555560823e0 
<address_space_memory>, offset_within_region = 0, size = {lo = 655360, hi = 0},
          offset_within_address_space = 0, readonly = false}
        view = 0x555557ae3bd0
        fr = 0x5555566f0c00
#3  0x000055555572860d in memory_listener_register (listener=0x555556b1f890, 
filter=0x5555560823e0 <address_space_memory>)
    at /usr/src/debug/qemu-2.6.2/memory.c:2208
        other = 0x5555565b4910
        as = 0x5555560823e0 <address_space_memory>
#4  0x0000555555772811 in vfio_connect_container (group=0x55555784bce0, 
as=0x5555560823e0 <address_space_memory>)
    at /usr/src/debug/qemu-2.6.2/hw/vfio/common.c:900
        container = 0x555556b1f880
        ret = 0
        fd = 35
        space = 0x55555784bd20
#5  0x0000555555772cbc in vfio_get_group (groupid=25, as=0x5555560823e0 
<address_space_memory>) at /usr/src/debug/qemu-2.6.2/hw/vfio/common.c:1008
        group = 0x55555784bce0
        path = 
"/dev/vfio/25\000U\000\000P\303\377\377\377\177\000\000\332Q\224UUU\000"
        status = {argsz = 8, flags = 1}
#6  0x000055555577af5c in vfio_initfn (pdev=0x5555581672b0) at 
/usr/src/debug/qemu-2.6.2/hw/vfio/pci.c:2447
        vdev = 0x5555581672b0
        vbasedev_iter = 0x40b00000000
        group = 0x555555bbc65d
        tmp = 0x555557640b60 ""
        group_path = 
"../../../../../../kernel/iommu_groups/25\000\000\000\000\343\003\000\000\031ĻUUU\000\000\000\000\000\000\000\000\000\000\220\304\377\377\377\177\000\000]ƻU\a\000\000\000\320ɻUUU\000\000\360\304\377\377\v\004\000\000\300\305\377\377\377\177\000\000I\252\260UUU\000\000\360\304\377\377\377\1-
77\000\000\000\000\000\000\000\000\000\000\320\304\377\377\377\177\000\000]ƻUUU\000\000\260ɻUUU\000\000f˲U\343\003\000\000\241:\000\000\000\200\377\377\002",
 '\000' <repeats 23 times>, 
"\060\000\000\000[\000\000\000`\305\377\377\377\177"...
        group_name = 0x7fffffffc466 "25"
        len = 40
        st = {st_dev = 17, st_ino = 39127, st_nlink = 3, st_mode = 16877, 
st_uid = 0, st_gid = 0, __pad0 = 0, st_rdev = 0, st_size = 0, st_blksize = 4096,
          st_blocks = 0, st_atim = {tv_sec = 1513939417, tv_nsec = 943657386}, 
st_mtim = {tv_sec = 1510113186, tv_nsec = 596000001}, st_ctim = {
            tv_sec = 1510113186, tv_nsec = 596000001}, __unused = {0, 0, 0}}
        groupid = 25
        ret = 21845
#7  0x0000555555943b65 in pci_default_realize (dev=0x5555581672b0, 
errp=0x7fffffffd4b8) at hw/pci/pci.c:1895
        pc = 0x555556568e70
        __func__ = "pci_default_realize"
#8  0x0000555555943a08 in pci_qdev_realize (qdev=0x5555581672b0, 
errp=0x7fffffffd520) at hw/pci/pci.c:1867
        pci_dev = 0x5555581672b0
        pc = 0x555556568e70
        __func__ = "pci_qdev_realize"
        local_err = 0x0
        bus = 0x5555569baea0
        is_default_rom = false
#9  0x00005555558af8da in device_set_realized (obj=0x5555581672b0, value=true, 
errp=0x7fffffffd6e0) at hw/core/qdev.c:1066
        dev = 0x5555581672b0
        __func__ = "device_set_realized"
        dc = 0x555556568e70
        hotplug_ctrl = 0x555555af83cf <visit_type_bool+50>
        bus = 0x7fffffffd5c7
        local_err = 0x0
#10 0x0000555555a3754d in property_set_bool (obj=0x5555581672b0, 
v=0x5555565a9140, name=0x555555b494e9 "realized", opaque=0x555556c092f0,
    errp=0x7fffffffd6e0) at qom/object.c:1853
        prop = 0x555556c092f0
        value = true
        local_err = 0x0
#11 0x0000555555a35a33 in object_property_set (obj=0x5555581672b0, 
v=0x5555565a9140, name=0x555555b494e9 "realized", errp=0x7fffffffd6e0)
    at qom/object.c:1081
        prop = 0x555556d2b8e0
        __func__ = "object_property_set"
#12 0x0000555555a388bf in object_property_set_qobject (obj=0x5555581672b0, 
value=0x5555583aceb0, name=0x555555b494e9 "realized", errp=0x7fffffffd6e0)
    at qom/qom-qobject.c:26
        qiv = 0x5555565a9140
#13 0x0000555555a35cc9 in object_property_set_bool (obj=0x5555581672b0, 
value=true, name=0x555555b494e9 "realized", errp=0x7fffffffd6e0)
    at qom/object.c:1150
        qbool = 0x5555583aceb0
#14 0x000055555582cf15 in qdev_device_add (opts=0x5555565647d0, 
errp=0x7fffffffd768) at qdev-monitor.c:618
        dc = 0x555556568e70
        driver = 0x555556564880 "vfio-pci"
        path = 0x0
        id = 0x0
        dev = 0x5555581672b0
        bus = 0x5555569baea0
        err = 0x0
        __func__ = "qdev_device_add"
#15 0x000055555583ee2c in device_init_func (opaque=0x0, opts=0x5555565647d0, 
errp=0x0) at vl.c:2358
        err = 0x0
        dev = 0x555557d439c0
#16 0x0000555555b1958d in qemu_opts_foreach (list=0x555555fba6a0 
<qemu_device_opts>, func=0x55555583edee <device_init_func>, opaque=0x0, 
errp=0x0)
    at util/qemu-option.c:1116
        loc = {kind = LOC_CMDLINE, num = 2, ptr = 0x7fffffffdcf0, prev = 
0x555556511070 <std_loc>}
        opts = 0x5555565647d0
        rc = 0
        __PRETTY_FUNCTION__ = "qemu_opts_foreach"
#17 0x0000555555843fee in main (argc=63, argv=0x7fffffffdbc8, 
envp=0x7fffffffddc8) at vl.c:4546
        i = -9952
        snapshot = 0
        linux_boot = 0
        initrd_filename = 0x0
        kernel_filename = 0x0
        kernel_cmdline = 0x555555b4cd66 ""
        boot_order = 0x555556564640 "cdn"
        boot_once = 0x0
        ds = 0xfffffffe7fffffff
        cyls = 0
        heads = 0
        secs = 0
        translation = 0
        hda_opts = 0x0
        opts = 0x555556564590
        machine_opts = 0x555556563390
        icount_opts = 0x0
        olist = 0x555555fbb080 <qemu_machine_opts>
        optind = 63
        optarg = 0x7fffffffe779 
"virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
        loadvm = 0x0
        machine_class = 0x555556577860
        cpu_model = 0x7fffffffe06b 
"host,hv_relaxed,hv_spinlocks=0x1fff,hv_vapic,hv_time"
        vga_model = 0x7fffffffe245 "std"
        qtest_chrdev = 0x0
        qtest_log = 0x0
        pid_file = 0x7fffffffe69f 
"/opt/cloud/workspace/servers/bb5e4935-f3a9-45db-a248-b7d442613f34/pid"
        incoming = 0x0
        show_vnc_port = 0
        defconfig = false
        userconfig = true
        log_mask = 0x0
        log_file = 0x0
        trace_file = 0x0
        maxram_size = 274877906944
        ram_slots = 4
        vmstate_dump_file = 0x0
        main_loop_err = 0x0
        err = 0x0
        __func__ = "main"

Reply via email to