Hi All. Hosts: Dell R815x5, 128 GB RAM, 25 OSD + 5 SSD(journal+system). Network: 2x10Gb+LACP Kernel: 2.6.32 QEMU emulator version 1.4.2, Copyright (c) 2003-2008 Fabrice Bellard
POOLs: root@kvm05:~# ceph osd dump | grep 'rbd' pool 5 'rbd' rep size 2 min_size 1 crush_ruleset 2 object_hash rjenkins pg_num 1400 pgp_num 1400 last_change 12550 owner 0 --------------------------- root@kvm05:~# ceph osd dump | grep 'test' pool 32 'test' rep size 2 min_size 1 crush_ruleset 2 object_hash rjenkins pg_num 1400 pgp_num 1400 last_change 12655 owner 0 ---------------------------- root@kvm01:~# ceph -v ceph version 0.72.2 (a913ded2ff138aefb8cb84d347d72164099cfd60) -------------------------- root@kvm01:~# rados bench -p test 120 write --no-cleanup Total time run: 120.125225 Total writes made: 11519 Write size: 4194304 Bandwidth (MB/sec): 383.566 Stddev Bandwidth: 36.2022 Max bandwidth (MB/sec): 408 Min bandwidth (MB/sec): 0 Average Latency: 0.166819 Stddev Latency: 0.0553357 Max latency: 1.60795 Min latency: 0.044263 -------------------------- root@kvm01:~# rados bench -p test 120 seq Total time run: 67.271769 Total reads made: 11519 Read size: 4194304 Bandwidth (MB/sec): 684.923 Average Latency: 0.0933579 Max latency: 0.808438 Min latency: 0.018063 --------------------------- [root@cephadmin cluster]# cat ceph.conf [global] fsid = 43a571a9-b3e8-4dc9-9200-1f3904e1e12a initial_members = kvm01, kvm02, kvm03 mon_host = 192.168.100.1,192.168.100.2, 192.168.100.3 auth_supported = cephx public network = 192.168.100.0/24 cluster_network = 192.168.101.0/24 [osd] osd journal size = 12500 osd mkfs type = xfs osd mkfs options xfs = -f -i size=2048 osd mount options xfs = rw,noatime,inode64,logbsize=256k,delaylog osd op threads = 10 osd disk threads = 10 osd max backfills = 2 osd recovery max active = 1 filestore op threads = 64 filestore xattr use omap = true [client] rbd cache = true rbd cache size = 134217728 rbd cache max dirty = 0 [mon.kvm01] host = kvm01 mon addr = 192.168.100.1:6789 [mon.kvm02] host = kvm02 mon addr = 192.168.100.2:6789 [mon.kvm03] host = kvm03 mon addr = 192.168.100.3:6789 [osd.0] public addr = 192.168.100.1 cluster addr = 192.168.101.1 [osd.1] public addr = 192.168.100.1 cluster addr = 192.168.101.1 [osd.2] public addr = 192.168.100.1 cluster addr = 192.168.101.1 [osd.3] public addr = 192.168.100.1 cluster addr = 192.168.101.1 [osd.4] public addr = 192.168.100.1 cluster addr = 192.168.101.1 [osd.5] public addr = 192.168.100.2 cluster addr = 192.168.101.2 [osd.6] public addr = 192.168.100.2 cluster addr = 192.168.101.2 [osd.7] public addr = 192.168.100.2 cluster addr = 192.168.101.2 [osd.8] public addr = 192.168.100.2 cluster addr = 192.168.101.2 [osd.9] public addr = 192.168.100.2 cluster addr = 192.168.101.2 [osd.10] public addr = 192.168.100.3 cluster addr = 192.168.101.3 [osd.11] public addr = 192.168.100.3 cluster addr = 192.168.101.3 [osd.12] public addr = 192.168.100.3 cluster addr = 192.168.101.3 [osd.13] public addr = 192.168.100.3 cluster addr = 192.168.101.3 [osd.14] public addr = 192.168.100.3 cluster addr = 192.168.101.3 [osd.15] public addr = 192.168.100.4 cluster addr = 192.168.101.4 [osd.16] public addr = 192.168.100.4 cluster addr = 192.168.101.4 [osd.17] public addr = 192.168.100.4 cluster addr = 192.168.101.4 [osd.18] public addr = 192.168.100.4 cluster addr = 192.168.101.4 [osd.19] public addr = 192.168.100.4 cluster addr = 192.168.101.4 [osd.20] public addr = 192.168.100.5 cluster addr = 192.168.101.5 [osd.21] public addr = 192.168.100.5 cluster addr = 192.168.101.5 [osd.22] public addr = 192.168.100.5 cluster addr = 192.168.101.5 [osd.23] public addr = 192.168.100.5 cluster addr = 192.168.101.5 [osd.24] public addr = 192.168.100.5 cluster addr = 192.168.101.5 ----------------------- [root@cephadmin ~]# cat crushd # begin crush map tunable choose_local_tries 0 tunable choose_local_fallback_tries 0 tunable choose_total_tries 50 tunable chooseleaf_descend_once 1 # devices device 0 osd.0 device 1 osd.1 device 2 osd.2 device 3 osd.3 device 4 osd.4 device 5 osd.5 device 6 osd.6 device 7 osd.7 device 8 osd.8 device 9 osd.9 device 10 osd.10 device 11 osd.11 device 12 osd.12 device 13 osd.13 device 14 osd.14 device 15 osd.15 device 16 osd.16 device 17 osd.17 device 18 osd.18 device 19 osd.19 device 20 osd.20 device 21 osd.21 device 22 osd.22 device 23 osd.23 device 24 osd.24 # types type 0 osd type 1 host type 2 rack type 3 row type 4 room type 5 datacenter type 6 root # buckets host kvm01 { id -2 # do not change unnecessarily # weight 2.750 alg straw hash 0 # rjenkins1 item osd.0 weight 0.550 item osd.1 weight 0.550 item osd.2 weight 0.550 item osd.3 weight 0.550 item osd.4 weight 0.550 } host kvm02 { id -3 # do not change unnecessarily # weight 2.750 alg straw hash 0 # rjenkins1 item osd.5 weight 0.550 item osd.6 weight 0.550 item osd.7 weight 0.550 item osd.8 weight 0.550 item osd.9 weight 0.550 } host kvm03 { id -4 # do not change unnecessarily # weight 2.750 alg straw hash 0 # rjenkins1 item osd.10 weight 0.550 item osd.11 weight 0.550 item osd.12 weight 0.550 item osd.13 weight 0.550 item osd.14 weight 0.550 } host kvm04 { id -5 # do not change unnecessarily # weight 2.750 alg straw hash 0 # rjenkins1 item osd.15 weight 0.550 item osd.16 weight 0.550 item osd.17 weight 0.550 item osd.18 weight 0.550 item osd.19 weight 0.550 } host kvm05 { id -6 # do not change unnecessarily # weight 2.750 alg straw hash 0 # rjenkins1 item osd.20 weight 0.550 item osd.21 weight 0.550 item osd.22 weight 0.550 item osd.23 weight 0.550 item osd.24 weight 0.550 } root XXXXXX { id -1 # do not change unnecessarily # weight 13.750 alg straw hash 0 # rjenkins1 item kvm01 weight 2.750 item kvm02 weight 2.750 item kvm03 weight 2.750 item kvm04 weight 2.750 item kvm05 weight 2.750 } # rules rule data { ruleset 0 type replicated min_size 1 max_size 10 step take XXXXXX step chooseleaf firstn 0 type host step emit } rule metadata { ruleset 1 type replicated min_size 1 max_size 10 step take XXXXXX step chooseleaf firstn 0 type host step emit } rule rbd { ruleset 2 type replicated min_size 1 max_size 10 step take XXXXXX step chooseleaf firstn 0 type host step emit } # end crush map --------------------------------- [root@cephadmin ~]# ceph osd tree # id weight type name up/down reweight -1 13.75 root XXXXXX -2 2.75 host kvm01 0 0.55 osd.0 up 1 1 0.55 osd.1 up 1 2 0.55 osd.2 up 1 3 0.55 osd.3 up 1 4 0.55 osd.4 up 1 -3 2.75 host kvm02 5 0.55 osd.5 up 1 6 0.55 osd.6 up 1 7 0.55 osd.7 up 1 8 0.55 osd.8 up 1 9 0.55 osd.9 up 1 -4 2.75 host kvm03 10 0.55 osd.10 up 1 11 0.55 osd.11 up 1 12 0.55 osd.12 up 1 13 0.55 osd.13 up 1 14 0.55 osd.14 up 1 -5 2.75 host kvm04 15 0.55 osd.15 up 1 16 0.55 osd.16 up 1 17 0.55 osd.17 up 1 18 0.55 osd.18 up 1 19 0.55 osd.19 up 1 -6 2.75 host kvm05 20 0.55 osd.20 up 1 21 0.55 osd.21 up 1 22 0.55 osd.22 up 1 23 0.55 osd.23 up 1 24 0.55 osd.24 up 1 ---------------------------------------- /usr/bin/kvm -id 101 -chardev socket,id=qmp,path=/var/run/qemu-server/101.qmp,server,nowait -mon chardev=qmp,mode=control -vnc unix:/var/run/qemu-server/101.vnc,x509,password -pidfile /var/run/qemu-server/101.pid -daemonize -name NFS -smp sockets=1,cores=4 -nodefaults -boot menu=on -vga qxl -cpu qemu64,+x2apic -k en-us -spice tls-port=61000,addr=127.0.0.1,tls-ciphers=DES-CBC3-SHA,seamless-migration=on -device virtio-serial,id=spice,bus=pci.0,addr=0x9 -chardev spicevmc,id=vdagent,name=vdagent -device virtserialport,chardev=vdagent,name=com.redhat.spice.0 -m 2048 -device piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 -drive file=rbd:rbd/vm-101-disk-2:mon_host=192.168.100.1\:6789\;192.168.100.2\:6789\;192.168.100.3\:6789:id=admin:auth_supported=cephx:keyring=/etc/pve/priv/ceph/ceph.keyring,if=none,id=drive-virtio1,aio=native,cache=none -device virtio-blk-pci,drive=drive-virtio1,id=virtio1,bus=pci.0,addr=0xb -drive file=rbd:rbd/vm-101-disk-3:mon_host=192.168.100.1\:6789\;192.168.100.2\:6789\;192.168.100.3\:6789:id=admin:auth_supported=cephx:keyring=/etc/pve/priv/ceph/ceph.keyring,if=none,id=drive-virtio2,aio=native,cache=none -device virtio-blk-pci,drive=drive-virtio2,id=virtio2,bus=pci.0,addr=0xc -drive if=none,id=drive-ide2,media=cdrom,aio=native -device ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200 -drive file=rbd:rbd/vm-101-disk-1:mon_host=192.168.100.1\:6789\;192.168.100.2\:6789\;192.168.100.3\:6789:id=admin:auth_supported=cephx:keyring=/etc/pve/priv/ceph/ceph.keyring,if=none,id=drive-virtio0,aio=native,cache=none -device virtio-blk-pci,drive=drive-virtio0,id=virtio0,bus=pci.0,addr=0xa,bootindex=102 -netdev type=tap,id=net0,ifname=tap101i0,script=/var/lib/qemu-server/pve-bridge,vhost=on -device virtio-net-pci,mac=9A:43:DC:FE:76:CC,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300 -netdev type=tap,id=net1,ifname=tap101i1,script=/var/lib/qemu-server/pve-bridge,vhost=on -device virtio-net-pci,mac=D2:DA:9B:C5:D4:E4,netdev=net1,bus=pci.0,addr=0x13,id=net1,bootindex=301 ----------------------------------------- >From the virtual machine [root@nfs tmp]# dd if=/dev/zero of=test bs=1000000000 count=20 20+0 records in 20+0 records out 20000000000 bytes (20 GB) copied, 66.4437 s, 301 MB/s ------------------------------------------ [root@nfs tmp]# dd if=test of=/dev/null 39062500+0 records in 39062500+0 records out 20000000000 bytes (20 GB) copied, 280.532 s, 71.3 MB/s ----------------------------------------- [root@nfs ~]# cat fio.ini [test] blocksize=4k filename=/dev/vdc rw=randwrite direct=1 buffered=0 ioengine=libaio iodepth=32 ----------------------------------------- [root@nfs ~]# fio fio.ini test: (g=0): *rw=randwrite*, bs=4K-4K/4K-4K/4K-4K, ioengine=libaio, iodepth=32 fio-2.1.4 Starting 1 process Jobs: 1 (f=1): [w] [100.0% done] [0KB/11228KB/0KB /s] [0/4807/0 iops] [eta 00m:00s] test: (groupid=0, jobs=1): err= 0: pid=1475: Fri Feb 7 11:03:23 2014 write: io=1024.0MB, bw=12651KB/s, *iops=4162*, runt= 82888msec slat (usec): min=5, max=1806, avg=12.76, stdev=15.14 clat (msec): min=3, max=5017, avg=10.10, stdev=73.46 lat (msec): min=3, max=5017, avg=10.11, stdev=73.46 clat percentiles (msec): | 1.00th=[ 5], 5.00th=[ 5], 10.00th=[ 6], 20.00th=[ 6], | 30.00th=[ 7], 40.00th=[ 7], 50.00th=[ 7], 60.00th=[ 7], | 70.00th=[ 8], 80.00th=[ 8], 90.00th=[ 9], 95.00th=[ 13], | 99.00th=[ 59], 99.50th=[ 92], 99.90th=[ 545], 99.95th=[ 922], | 99.99th=[ 5014] bw (KB /s): min= 5, max=19904, per=100.00%, avg=14387.77, stdev=4752.21 lat (msec) : 4=0.52%, 10=92.92%, 20=3.55%, 50=1.85%, 100=0.70% lat (msec) : 250=0.28%, 500=0.06%, 750=0.05%, 1000=0.02%, 2000=0.01% lat (msec) : >=2000=0.04% cpu : usr=2.02%, sys=5.68%, ctx=38260, majf=0, minf=27 IO depths : 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.1%, 32=100.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.1%, 64=0.0%, >=64=0.0% issued : total=r=0/w=262144/d=0, short=r=0/w=0/d=0 Run status group 0 (all jobs): WRITE: io=1024.0MB, aggrb=12650KB/s, minb=12650KB/s, maxb=12650KB/s, mint=82888msec, maxt=82888msec Disk stats (read/write): vdc: ios=0/261915, merge=0/0, ticks=0/2612442, in_queue=2612853, util=100.00% ---------------------------------------------- [root@nfs ~]# fio fio.ini test: (g=0): rw=*randread*, bs=4K-4K/4K-4K/4K-4K, ioengine=libaio, iodepth=32 fio-2.1.4 Starting 1 process Jobs: 1 (f=1): [r] [100.0% done] [22589KB/0KB/0KB /s] [5647/0/0 iops] [eta 00m:00s] test: (groupid=0, jobs=1): err= 0: pid=1528: Fri Feb 7 11:09:46 2014 read : io=1024.0MB, bw=22370KB/s,* iops=5592*, runt= 46874msec slat (usec): min=1, max=1505, avg=14.44, stdev=14.79 clat (msec): min=1, max=167, avg= 5.70, stdev= 1.84 lat (msec): min=1, max=167, avg= 5.72, stdev= 1.83 clat percentiles (msec): | 1.00th=[ 4], 5.00th=[ 5], 10.00th=[ 5], 20.00th=[ 6], | 30.00th=[ 6], 40.00th=[ 6], 50.00th=[ 6], 60.00th=[ 6], | 70.00th=[ 6], 80.00th=[ 7], 90.00th=[ 7], 95.00th=[ 7], | 99.00th=[ 9], 99.50th=[ 10], 99.90th=[ 16], 99.95th=[ 29], | 99.99th=[ 102] bw (KB /s): min=20624, max=24192, per=100.00%, avg=22396.29, stdev=674.38 lat (msec) : 2=0.03%, 4=1.07%, 10=98.53%, 20=0.30%, 50=0.04% lat (msec) : 100=0.02%, 250=0.01% cpu : usr=3.34%, sys=12.31%, ctx=128361, majf=0, minf=59 IO depths : 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.1%, 32=100.0%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.1%, 64=0.0%, >=64=0.0% issued : total=r=262144/w=0/d=0, short=r=0/w=0/d=0 Run status group 0 (all jobs): READ: io=1024.0MB, aggrb=22370KB/s, minb=22370KB/s, maxb=22370KB/s, mint=46874msec, maxt=46874msec Disk stats (read/write): vdc: ios=261157/0, merge=0/0, ticks=1482201/0, in_queue=1482212, util=99.90% ------------------------------------------------------ Why might such a low speed sequential read? Do ideas on this issue? Thanks. -- С уважением, Фасихов Ирек Нургаязович Моб.: +79229045757
_______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com