1 You have 3 size pool I do not know why you set min_size 1. It is too dangous.
2 You had better use the same size and same num osds each host for crush. now you can try ceph osd reweight-by-utilization. command. When there is no user in you cluster. and I will go home. At 2017-07-28 17:57:11, "Nikola Ciprich" <nikola.cipr...@linuxbox.cz> wrote: >On Fri, Jul 28, 2017 at 05:52:29PM +0800, linghucongsong wrote: >> >> >> >> You have two crush rule? One is ssd the other is hdd? >yes, exactly.. > >> >> Can you show ceph osd dump|grep pool >> > >pool 3 'vm' replicated size 3 min_size 1 crush_ruleset 0 object_hash rjenkins >pg_num 1024 pgp_num 1024 last_change 69955 flags hashpspool >min_read_recency_for_promote 1 min_write_recency_for_promote 1 stripe_width 0 >pool 4 'cephfs_data' replicated size 3 min_size 1 crush_ruleset 0 object_hash >rjenkins pg_num 1024 pgp_num 1024 last_change 74682 flags hashpspool >crash_replay_interval 45 min_write_recency_for_promote 1 stripe_width 0 >pool 5 'cephfs_metadata' replicated size 3 min_size 1 crush_ruleset 0 >object_hash rjenkins pg_num 1024 pgp_num 1024 last_change 74667 flags >hashpspool min_write_recency_for_promote 1 stripe_width 0 >pool 11 'ssd' replicated size 3 min_size 1 crush_ruleset 1 object_hash >rjenkins pg_num 128 pgp_num 128 last_change 46119 flags hashpspool >min_write_recency_for_promote 1 stripe_width 0 > > >> ceph osd crush dump > >{ > "devices": [ > { > "id": 0, > "name": "osd.0" > }, > { > "id": 1, > "name": "osd.1" > }, > { > "id": 2, > "name": "osd.2" > }, > { > "id": 3, > "name": "osd.3" > }, > { > "id": 4, > "name": "osd.4" > }, > { > "id": 5, > "name": "osd.5" > }, > { > "id": 6, > "name": "osd.6" > }, > { > "id": 7, > "name": "device7" > }, > { > "id": 8, > "name": "osd.8" > }, > { > "id": 9, > "name": "osd.9" > }, > { > "id": 10, > "name": "osd.10" > }, > { > "id": 11, > "name": "osd.11" > }, > { > "id": 12, > "name": "osd.12" > }, > { > "id": 13, > "name": "osd.13" > }, > { > "id": 14, > "name": "osd.14" > }, > { > "id": 15, > "name": "osd.15" > }, > { > "id": 16, > "name": "osd.16" > }, > { > "id": 17, > "name": "osd.17" > }, > { > "id": 18, > "name": "osd.18" > }, > { > "id": 19, > "name": "osd.19" > }, > { > "id": 20, > "name": "osd.20" > }, > { > "id": 21, > "name": "osd.21" > }, > { > "id": 22, > "name": "osd.22" > }, > { > "id": 23, > "name": "osd.23" > }, > { > "id": 24, > "name": "osd.24" > }, > { > "id": 25, > "name": "osd.25" > }, > { > "id": 26, > "name": "osd.26" > } > ], > "types": [ > { > "type_id": 0, > "name": "osd" > }, > { > "type_id": 1, > "name": "host" > }, > { > "type_id": 2, > "name": "chassis" > }, > { > "type_id": 3, > "name": "rack" > }, > { > "type_id": 4, > "name": "row" > }, > { > "type_id": 5, > "name": "pdu" > }, > { > "type_id": 6, > "name": "pod" > }, > { > "type_id": 7, > "name": "room" > }, > { > "type_id": 8, > "name": "datacenter" > }, > { > "type_id": 9, > "name": "region" > }, > { > "type_id": 10, > "name": "root" > } > ], > "buckets": [ > { > "id": -1, > "name": "default", > "type_id": 10, > "type_name": "root", > "weight": 2575553, > "alg": "straw2", > "hash": "rjenkins1", > "items": [ > { > "id": -4, > "weight": 779875, > "pos": 0 > }, > { > "id": -5, > "weight": 681571, > "pos": 1 > }, > { > "id": -6, > "weight": 511178, > "pos": 2 > }, > { > "id": -3, > "weight": 602929, > "pos": 3 > } > ] > }, > { > "id": -2, > "name": "ssd", > "type_id": 10, > "type_name": "root", > "weight": 102233, > "alg": "straw2", > "hash": "rjenkins1", > "items": [ > { > "id": -9, > "weight": 26214, > "pos": 0 > }, > { > "id": -10, > "weight": 39320, > "pos": 1 > }, > { > "id": -11, > "weight": 22282, > "pos": 2 > }, > { > "id": -7, > "weight": 14417, > "pos": 3 > } > ] > }, > { > "id": -3, > "name": "v1d-sata", > "type_id": 1, > "type_name": "host", > "weight": 602929, > "alg": "straw", > "hash": "rjenkins1", > "items": [ > { > "id": 12, > "weight": 91750, > "pos": 0 > }, > { > "id": 20, > "weight": 91750, > "pos": 1 > }, > { > "id": 21, > "weight": 235929, > "pos": 2 > }, > { > "id": 22, > "weight": 91750, > "pos": 3 > }, > { > "id": 23, > "weight": 91750, > "pos": 4 > } > ] > }, > { > "id": -4, > "name": "v1a", > "type_id": 1, > "type_name": "host", > "weight": 779875, > "alg": "straw2", > "hash": "rjenkins1", > "items": [ > { > "id": 6, > "weight": 104857, > "pos": 0 > }, > { > "id": 8, > "weight": 117964, > "pos": 1 > }, > { > "id": 2, > "weight": 104857, > "pos": 2 > }, > { > "id": 0, > "weight": 111411, > "pos": 3 > }, > { > "id": 4, > "weight": 104857, > "pos": 4 > }, > { > "id": 25, > "weight": 235929, > "pos": 5 > } > ] > }, > { > "id": -5, > "name": "v1b", > "type_id": 1, > "type_name": "host", > "weight": 681571, > "alg": "straw2", > "hash": "rjenkins1", > "items": [ > { > "id": 1, > "weight": 104857, > "pos": 0 > }, > { > "id": 3, > "weight": 117964, > "pos": 1 > }, > { > "id": 9, > "weight": 104857, > "pos": 2 > }, > { > "id": 11, > "weight": 117964, > "pos": 3 > }, > { > "id": 24, > "weight": 235929, > "pos": 4 > } > ] > }, > { > "id": -6, > "name": "v1c", > "type_id": 1, > "type_name": "host", > "weight": 511178, > "alg": "straw2", > "hash": "rjenkins1", > "items": [ > { > "id": 14, > "weight": 104857, > "pos": 0 > }, > { > "id": 15, > "weight": 117964, > "pos": 1 > }, > { > "id": 16, > "weight": 91750, > "pos": 2 > }, > { > "id": 18, > "weight": 91750, > "pos": 3 > }, > { > "id": 17, > "weight": 104857, > "pos": 4 > } > ] > }, > { > "id": -7, > "name": "v1d-ssd", > "type_id": 1, > "type_name": "host", > "weight": 14417, > "alg": "straw", > "hash": "rjenkins1", > "items": [ > { > "id": 19, > "weight": 14417, > "pos": 0 > } > ] > }, > { > "id": -9, > "name": "v1c-ssd", > "type_id": 1, > "type_name": "host", > "weight": 26214, > "alg": "straw2", > "hash": "rjenkins1", > "items": [ > { > "id": 10, > "weight": 26214, > "pos": 0 > } > ] > }, > { > "id": -10, > "name": "v1a-ssd", > "type_id": 1, > "type_name": "host", > "weight": 39320, > "alg": "straw2", > "hash": "rjenkins1", > "items": [ > { > "id": 5, > "weight": 19660, > "pos": 0 > }, > { > "id": 26, > "weight": 19660, > "pos": 1 > } > ] > }, > { > "id": -11, > "name": "v1b-ssd", > "type_id": 1, > "type_name": "host", > "weight": 22282, > "alg": "straw2", > "hash": "rjenkins1", > "items": [ > { > "id": 13, > "weight": 22282, > "pos": 0 > } > ] > } > ], > "rules": [ > { > "rule_id": 0, > "rule_name": "replicated_ruleset", > "ruleset": 0, > "type": 1, > "min_size": 1, > "max_size": 10, > "steps": [ > { > "op": "take", > "item": -1, > "item_name": "default" > }, > { > "op": "chooseleaf_firstn", > "num": 0, > "type": "host" > }, > { > "op": "emit" > } > ] > }, > { > "rule_id": 1, > "rule_name": "ssd", > "ruleset": 1, > "type": 1, > "min_size": 1, > "max_size": 10, > "steps": [ > { > "op": "take", > "item": -2, > "item_name": "ssd" > }, > { > "op": "chooseleaf_firstn", > "num": 0, > "type": "host" > }, > { > "op": "emit" > } > ] > } > ], > "tunables": { > "choose_local_tries": 0, > "choose_local_fallback_tries": 0, > "choose_total_tries": 50, > "chooseleaf_descend_once": 1, > "chooseleaf_vary_r": 1, > "chooseleaf_stable": 0, > "straw_calc_version": 1, > "allowed_bucket_algs": 54, > "profile": "hammer", > "optimal_tunables": 0, > "legacy_tunables": 0, > "minimum_required_version": "hammer", > "require_feature_tunables": 1, > "require_feature_tunables2": 1, > "has_v2_rules": 0, > "require_feature_tunables3": 1, > "has_v3_rules": 0, > "has_v4_buckets": 1, > "require_feature_tunables5": 0, > "has_v5_rules": 0 > } >} > > >> >> >> >> >> >> >> At 2017-07-28 17:47:48, "Nikola Ciprich" <nikola.cipr...@linuxbox.cz> wrote: >> > >> >On Fri, Jul 28, 2017 at 05:43:14PM +0800, linghucongsong wrote: >> >> >> >> >> >> It look like the osd in your cluster is not all the same size. >> >> >> >> can you show ceph osd df output? >> > >> >you're right, they're not.. here's the output: >> > >> >[root@v1b ~]# ceph osd df tree >> >ID WEIGHT REWEIGHT SIZE USE AVAIL %USE VAR PGS TYPE NAME >> > -2 1.55995 - 1706G 883G 805G 51.78 2.55 0 root ssd >> > -9 0.39999 - 393G 221G 171G 56.30 2.78 0 host v1c-ssd >> > 10 0.39999 1.00000 393G 221G 171G 56.30 2.78 98 osd.10 >> >-10 0.59998 - 683G 275G 389G 40.39 1.99 0 host v1a-ssd >> > 5 0.29999 1.00000 338G 151G 187G 44.77 2.21 65 osd.5 >> > 26 0.29999 1.00000 344G 124G 202G 36.07 1.78 52 osd.26 >> >-11 0.34000 - 338G 219G 119G 64.68 3.19 0 host v1b-ssd >> > 13 0.34000 1.00000 338G 219G 119G 64.68 3.19 96 osd.13 >> > -7 0.21999 - 290G 166G 123G 57.43 2.83 0 host v1d-ssd >> > 19 0.21999 1.00000 290G 166G 123G 57.43 2.83 73 osd.19 >> > -1 39.29982 - 43658G 8312G 34787G 19.04 0.94 0 root default >> > -4 11.89995 - 12806G 2422G 10197G 18.92 0.93 0 host v1a >> > 6 1.59999 1.00000 1833G 358G 1475G 19.53 0.96 366 osd.6 >> > 8 1.79999 1.00000 1833G 313G 1519G 17.11 0.84 370 osd.8 >> > 2 1.59999 1.00000 1833G 320G 1513G 17.46 0.86 331 osd.2 >> > 0 1.70000 1.00000 1804G 431G 1373G 23.90 1.18 359 osd.0 >> > 4 1.59999 1.00000 1833G 294G 1539G 16.07 0.79 360 osd.4 >> > 25 3.59999 1.00000 3667G 704G 2776G 19.22 0.95 745 osd.25 >> > -5 10.39995 - 10914G 2154G 8573G 19.74 0.97 0 host v1b >> > 1 1.59999 1.00000 1804G 350G 1454G 19.42 0.96 409 osd.1 >> > 3 1.79999 1.00000 1804G 360G 1444G 19.98 0.99 412 osd.3 >> > 9 1.59999 1.00000 1804G 331G 1473G 18.37 0.91 363 osd.9 >> > 11 1.79999 1.00000 1833G 367G 1465G 20.06 0.99 415 osd.11 >> > 24 3.59999 1.00000 3667G 744G 2736G 20.30 1.00 834 osd.24 >> > -6 7.79996 - 9051G 1769G 7282G 19.54 0.96 0 host v1c >> > 14 1.59999 1.00000 1804G 370G 1433G 20.54 1.01 442 osd.14 >> > 15 1.79999 1.00000 1833G 383G 1450G 20.92 1.03 447 osd.15 >> > 16 1.39999 1.00000 1804G 295G 1508G 16.38 0.81 355 osd.16 >> > 18 1.39999 1.00000 1804G 366G 1438G 20.29 1.00 381 osd.18 >> > 17 1.59999 1.00000 1804G 353G 1451G 19.57 0.97 429 osd.17 >> > -3 9.19997 - 10885G 1965G 8733G 18.06 0.89 0 host v1d-sata >> > 12 1.39999 1.00000 1804G 348G 1455G 19.32 0.95 365 osd.12 >> > 20 1.39999 1.00000 1804G 335G 1468G 18.60 0.92 371 osd.20 >> > 21 3.59999 1.00000 3667G 695G 2785G 18.97 0.94 871 osd.21 >> > 22 1.39999 1.00000 1804G 281G 1522G 15.63 0.77 326 osd.22 >> > 23 1.39999 1.00000 1804G 303G 1500G 16.83 0.83 321 osd.23 >> > TOTAL 45365G 9195G 35592G 20.27 >> >MIN/MAX VAR: 0.77/3.19 STDDEV: 14.69 >> > >> > >> > >> >apart from replacing OSDs, how can I help it? >> > >> > >> > >> > >> >> >> >> >> >> At 2017-07-28 17:24:29, "Nikola Ciprich" <nikola.cipr...@linuxbox.cz> >> >> wrote: >> >> >I forgot to add that OSD daemons really seem to be idle, no disk >> >> >activity, no CPU usage.. it just looks to me like some kind of >> >> >deadlock, as they were waiting for each other.. >> >> > >> >> >and so I'm trying to get last 1.5% of misplaced / degraded PGs >> >> >for almost a week.. >> >> > >> >> > >> >> >On Fri, Jul 28, 2017 at 10:56:02AM +0200, Nikola Ciprich wrote: >> >> >> Hi, >> >> >> >> >> >> I'm trying to find reason for strange recovery issues I'm seeing on >> >> >> our cluster.. >> >> >> >> >> >> it's mostly idle, 4 node cluster with 26 OSDs evenly distributed >> >> >> across nodes. jewel 10.2.9 >> >> >> >> >> >> the problem is that after some disk replaces and data moves, recovery >> >> >> is progressing extremely slowly.. pgs seem to be stuck in >> >> >> active+recovering+degraded >> >> >> state: >> >> >> >> >> >> [root@v1d ~]# ceph -s >> >> >> cluster a5efbc87-3900-4c42-a977-8c93f7aa8c33 >> >> >> health HEALTH_WARN >> >> >> 159 pgs backfill_wait >> >> >> 4 pgs backfilling >> >> >> 259 pgs degraded >> >> >> 12 pgs recovering >> >> >> 113 pgs recovery_wait >> >> >> 215 pgs stuck degraded >> >> >> 266 pgs stuck unclean >> >> >> 140 pgs stuck undersized >> >> >> 151 pgs undersized >> >> >> recovery 37788/2327775 objects degraded (1.623%) >> >> >> recovery 23854/2327775 objects misplaced (1.025%) >> >> >> noout,noin flag(s) set >> >> >> monmap e21: 3 mons at >> >> >> {v1a=10.0.0.1:6789/0,v1b=10.0.0.2:6789/0,v1c=10.0.0.3:6789/0} >> >> >> election epoch 6160, quorum 0,1,2 v1a,v1b,v1c >> >> >> fsmap e817: 1/1/1 up {0=v1a=up:active}, 1 up:standby >> >> >> osdmap e76002: 26 osds: 26 up, 26 in; 185 remapped pgs >> >> >> flags noout,noin,sortbitwise,require_jewel_osds >> >> >> pgmap v80995844: 3200 pgs, 4 pools, 2876 GB data, 757 kobjects >> >> >> 9215 GB used, 35572 GB / 45365 GB avail >> >> >> 37788/2327775 objects degraded (1.623%) >> >> >> 23854/2327775 objects misplaced (1.025%) >> >> >> 2912 active+clean >> >> >> 130 active+undersized+degraded+remapped+wait_backfill >> >> >> 97 active+recovery_wait+degraded >> >> >> 29 active+remapped+wait_backfill >> >> >> 12 active+recovery_wait+undersized+degraded+remapped >> >> >> 6 active+recovering+degraded >> >> >> 5 active+recovering+undersized+degraded+remapped >> >> >> 4 active+undersized+degraded+remapped+backfilling >> >> >> 4 active+recovery_wait+degraded+remapped >> >> >> 1 active+recovering+degraded+remapped >> >> >> client io 2026 B/s rd, 146 kB/s wr, 9 op/s rd, 21 op/s wr >> >> >> >> >> >> >> >> >> when I restart affected OSDs, it bumps the recovery, but then another >> >> >> PGs get stuck.. All OSDs were restarted multiple times, none are even >> >> >> close to >> >> >> nearfull, I just cant find what I'm doing wrong.. >> >> >> >> >> >> possibly related OSD options: >> >> >> >> >> >> osd max backfills = 4 >> >> >> osd recovery max active = 15 >> >> >> debug osd = 0/0 >> >> >> osd op threads = 4 >> >> >> osd backfill scan min = 4 >> >> >> osd backfill scan max = 16 >> >> >> >> >> >> Any hints would be greatly appreciated >> >> >> >> >> >> thanks >> >> >> >> >> >> nik >> >> >> >> >> >> >> >> >> -- >> >> >> ------------------------------------- >> >> >> Ing. Nikola CIPRICH >> >> >> LinuxBox.cz, s.r.o. >> >> >> 28.rijna 168, 709 00 Ostrava >> >> >> >> >> >> tel.: +420 591 166 214 >> >> >> fax: +420 596 621 273 >> >> >> mobil: +420 777 093 799 >> >> >> www.linuxbox.cz >> >> >> >> >> >> mobil servis: +420 737 238 656 >> >> >> email servis: ser...@linuxbox.cz >> >> >> ------------------------------------- >> >> >> _______________________________________________ >> >> >> ceph-users mailing list >> >> >> ceph-users@lists.ceph.com >> >> >> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com >> >> >> >> >> > >> >> >-- >> >> >------------------------------------- >> >> >Ing. Nikola CIPRICH >> >> >LinuxBox.cz, s.r.o. >> >> >28.rijna 168, 709 00 Ostrava >> >> > >> >> >tel.: +420 591 166 214 >> >> >fax: +420 596 621 273 >> >> >mobil: +420 777 093 799 >> >> >www.linuxbox.cz >> >> > >> >> >mobil servis: +420 737 238 656 >> >> >email servis: ser...@linuxbox.cz >> >> >------------------------------------- >> >> >_______________________________________________ >> >> >ceph-users mailing list >> >> >ceph-users@lists.ceph.com >> >> >http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com >> > >> >-- >> >------------------------------------- >> >Ing. Nikola CIPRICH >> >LinuxBox.cz, s.r.o. >> >28.rijna 168, 709 00 Ostrava >> > >> >tel.: +420 591 166 214 >> >fax: +420 596 621 273 >> >mobil: +420 777 093 799 >> >www.linuxbox.cz >> > >> >mobil servis: +420 737 238 656 >> >email servis: ser...@linuxbox.cz >> >------------------------------------- > >-- >------------------------------------- >Ing. Nikola CIPRICH >LinuxBox.cz, s.r.o. >28.rijna 168, 709 00 Ostrava > >tel.: +420 591 166 214 >fax: +420 596 621 273 >mobil: +420 777 093 799 >www.linuxbox.cz > >mobil servis: +420 737 238 656 >email servis: ser...@linuxbox.cz >-------------------------------------
_______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com