Hi Greg, Now we could see the same problem exists for kraken-filestore also. Attached the requested osdmap and crushmap.
OSD.1 was stopped in this following procedure and OSD map for a PG is displayed. ceph osd dump | grep cdvr_ec 2017-01-31 08:39:44.827079 7f323d66c700 -1 WARNING: the following dangerous and experimental features are enabled: bluestore,rocksdb 2017-01-31 08:39:44.848901 7f323d66c700 -1 WARNING: the following dangerous and experimental features are enabled: bluestore,rocksdb pool 2 'cdvr_ec' erasure size 4 min_size 4 crush_ruleset 1 object_hash rjenkins pg_num 1024 pgp_num 1024 last_change 234 flags hashpspool stripe_width 4128 [root@ca-cn2 ~]# ceph osd getmap -o /tmp/osdmap [root@ca-cn2 ~]# osdmaptool --pool 2 --test-map-object object1 /tmp/osdmap osdmaptool: osdmap file '/tmp/osdmap' object 'object1' -> 2.2bc -> [20,47,1,36] [root@ca-cn2 ~]# ceph osd map cdvr_ec object1 osdmap e402 pool 'cdvr_ec' (2) object 'object1' -> pg 2.bac5debc (2.2bc) -> up ([20,47,1,36], p20) acting ([20,47,1,36], p20) [root@ca-cn2 ~]# systemctl stop ceph-osd@1.service [root@ca-cn2 ~]# ceph osd getmap -o /tmp/osdmap1 [root@ca-cn2 ~]# osdmaptool --pool 2 --test-map-object object1 /tmp/osdmap1 osdmaptool: osdmap file '/tmp/osdmap1' object 'object1' -> 2.2bc -> [20,47,2147483647,36] [root@ca-cn2 ~]# ceph osd map cdvr_ec object1 osdmap e406 pool 'cdvr_ec' (2) object 'object1' -> pg 2.bac5debc (2.2bc) -> up ([20,47,39,36], p20) acting ([20,47,NONE,36], p20) [root@ca-cn2 ~]# ceph osd tree 2017-01-31 08:42:19.606876 7f4ed856a700 -1 WARNING: the following dangerous and experimental features are enabled: bluestore,rocksdb 2017-01-31 08:42:19.628358 7f4ed856a700 -1 WARNING: the following dangerous and experimental features are enabled: bluestore,rocksdb ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY -1 327.47314 root default -2 65.49463 host ca-cn4 3 5.45789 osd.3 up 1.00000 1.00000 5 5.45789 osd.5 up 1.00000 1.00000 10 5.45789 osd.10 up 1.00000 1.00000 16 5.45789 osd.16 up 1.00000 1.00000 21 5.45789 osd.21 up 1.00000 1.00000 27 5.45789 osd.27 up 1.00000 1.00000 30 5.45789 osd.30 up 1.00000 1.00000 35 5.45789 osd.35 up 1.00000 1.00000 42 5.45789 osd.42 up 1.00000 1.00000 47 5.45789 osd.47 up 1.00000 1.00000 51 5.45789 osd.51 up 1.00000 1.00000 53 5.45789 osd.53 up 1.00000 1.00000 -3 65.49463 host ca-cn3 2 5.45789 osd.2 up 1.00000 1.00000 6 5.45789 osd.6 up 1.00000 1.00000 11 5.45789 osd.11 up 1.00000 1.00000 15 5.45789 osd.15 up 1.00000 1.00000 20 5.45789 osd.20 up 1.00000 1.00000 25 5.45789 osd.25 up 1.00000 1.00000 29 5.45789 osd.29 up 1.00000 1.00000 33 5.45789 osd.33 up 1.00000 1.00000 38 5.45789 osd.38 up 1.00000 1.00000 40 5.45789 osd.40 up 1.00000 1.00000 45 5.45789 osd.45 up 1.00000 1.00000 49 5.45789 osd.49 up 1.00000 1.00000 -4 65.49463 host ca-cn5 0 5.45789 osd.0 up 1.00000 1.00000 7 5.45789 osd.7 up 1.00000 1.00000 12 5.45789 osd.12 up 1.00000 1.00000 17 5.45789 osd.17 up 1.00000 1.00000 23 5.45789 osd.23 up 1.00000 1.00000 26 5.45789 osd.26 up 1.00000 1.00000 32 5.45789 osd.32 up 1.00000 1.00000 34 5.45789 osd.34 up 1.00000 1.00000 41 5.45789 osd.41 up 1.00000 1.00000 46 5.45789 osd.46 up 1.00000 1.00000 52 5.45789 osd.52 up 1.00000 1.00000 56 5.45789 osd.56 up 1.00000 1.00000 -5 65.49463 host ca-cn1 4 5.45789 osd.4 up 1.00000 1.00000 9 5.45789 osd.9 up 1.00000 1.00000 14 5.45789 osd.14 up 1.00000 1.00000 19 5.45789 osd.19 up 1.00000 1.00000 24 5.45789 osd.24 up 1.00000 1.00000 36 5.45789 osd.36 up 1.00000 1.00000 43 5.45789 osd.43 up 1.00000 1.00000 50 5.45789 osd.50 up 1.00000 1.00000 55 5.45789 osd.55 up 1.00000 1.00000 57 5.45789 osd.57 up 1.00000 1.00000 58 5.45789 osd.58 up 1.00000 1.00000 59 5.45789 osd.59 up 1.00000 1.00000 -6 65.49463 host ca-cn2 1 5.45789 osd.1 down 0 1.00000 8 5.45789 osd.8 up 1.00000 1.00000 13 5.45789 osd.13 up 1.00000 1.00000 18 5.45789 osd.18 up 1.00000 1.00000 22 5.45789 osd.22 up 1.00000 1.00000 28 5.45789 osd.28 up 1.00000 1.00000 31 5.45789 osd.31 up 1.00000 1.00000 37 5.45789 osd.37 up 1.00000 1.00000 39 5.45789 osd.39 up 1.00000 1.00000 44 5.45789 osd.44 up 1.00000 1.00000 48 5.45789 osd.48 up 1.00000 1.00000 54 5.45789 osd.54 up 1.00000 1.00000 health HEALTH_ERR 69 pgs are stuck inactive for more than 300 seconds 69 pgs incomplete 69 pgs stuck inactive 69 pgs stuck unclean 512 requests are blocked > 32 sec monmap e2: 5 mons at {ca-cn1= 10.50.5.117:6789/0,ca-cn2=10.50.5.118:6789/0,ca-cn3=10.50.5.119:6789/0,ca-cn4=10.50.5.120:6789/0,ca-cn5=10.50.5.121:6789/0 } election epoch 8, quorum 0,1,2,3,4 ca-cn1,ca-cn2,ca-cn3,ca-cn4,ca-cn5 mgr active: ca-cn4 standbys: ca-cn2, ca-cn5, ca-cn3, ca-cn1 osdmap e406: 60 osds: 59 up, 59 in; 69 remapped pgs flags sortbitwise,require_jewel_osds,require_kraken_osds pgmap v23018: 1024 pgs, 1 pools, 3892 GB data, 7910 kobjects 6074 GB used, 316 TB / 322 TB avail 955 active+clean 69 remapped+incomplete Thanks, Muthu On 31 January 2017 at 02:54, Gregory Farnum <gfar...@redhat.com> wrote: > You might also check out "ceph osd tree" and crush dump and make sure > they look the way you expect. > > On Mon, Jan 30, 2017 at 1:23 PM, Gregory Farnum <gfar...@redhat.com> > wrote: > > On Sun, Jan 29, 2017 at 6:40 AM, Muthusamy Muthiah > > <muthiah.muthus...@gmail.com> wrote: > >> Hi All, > >> > >> Also tried EC profile 3+1 on 5 node cluster with bluestore enabled . > When > >> an OSD is down the cluster goes to ERROR state even when the cluster is > n+1 > >> . No recovery happening. > >> > >> health HEALTH_ERR > >> 75 pgs are stuck inactive for more than 300 seconds > >> 75 pgs incomplete > >> 75 pgs stuck inactive > >> 75 pgs stuck unclean > >> monmap e2: 5 mons at > >> {ca-cn1=10.50.5.117:6789/0,ca-cn2=10.50.5.118:6789/0,ca-cn3= > 10.50.5.119:6789/0,ca-cn4=10.50.5.120:6789/0,ca-cn5=10.50.5.121:6789/0} > >> election epoch 10, quorum 0,1,2,3,4 > >> ca-cn1,ca-cn2,ca-cn3,ca-cn4,ca-cn5 > >> mgr active: ca-cn1 standbys: ca-cn4, ca-cn3, ca-cn5, ca-cn2 > >> osdmap e264: 60 osds: 59 up, 59 in; 75 remapped pgs > >> flags sortbitwise,require_jewel_osds,require_kraken_osds > >> pgmap v119402: 1024 pgs, 1 pools, 28519 GB data, 21548 kobjects > >> 39976 GB used, 282 TB / 322 TB avail > >> 941 active+clean > >> 75 remapped+incomplete > >> 8 active+clean+scrubbing > >> > >> this seems to be an issue with bluestore , recovery not happening > properly > >> with EC . > > > > It's possible but it seems a lot more likely this is some kind of > > config issue. Can you share your osd map ("ceph osd getmap")? > > -Greg > > > >> > >> Thanks, > >> Muthu > >> > >> On 24 January 2017 at 12:57, Muthusamy Muthiah < > muthiah.muthus...@gmail.com> > >> wrote: > >>> > >>> Hi Greg, > >>> > >>> We use EC:4+1 on 5 node cluster in production deployments with > filestore > >>> and it does recovery and peering when one OSD goes down. After few > mins , > >>> other OSD from a node where the fault OSD exists will take over the PGs > >>> temporarily and all PGs goes to active + clean state . Cluster also > does not > >>> goes down during this recovery process. > >>> > >>> Only on bluestore we see cluster going to error state when one OSD is > >>> down. > >>> We are still validating this and let you know additional findings. > >>> > >>> Thanks, > >>> Muthu > >>> > >>> On 21 January 2017 at 02:06, Shinobu Kinjo <ski...@redhat.com> wrote: > >>>> > >>>> `ceph pg dump` should show you something like: > >>>> > >>>> * active+undersized+degraded ... [NONE,3,2,4,1] 3 > [NONE,3,2,4,1] > >>>> > >>>> Sam, > >>>> > >>>> Am I wrong? Or is it up to something else? > >>>> > >>>> > >>>> On Sat, Jan 21, 2017 at 4:22 AM, Gregory Farnum <gfar...@redhat.com> > >>>> wrote: > >>>> > I'm pretty sure the default configs won't let an EC PG go active > with > >>>> > only "k" OSDs in its PG; it needs at least k+1 (or possibly more? > Not > >>>> > certain). Running an "n+1" EC config is just not a good idea. > >>>> > For testing you could probably adjust this with the equivalent of > >>>> > min_size for EC pools, but I don't know the parameters off the top > of > >>>> > my head. > >>>> > -Greg > >>>> > > >>>> > On Fri, Jan 20, 2017 at 2:15 AM, Muthusamy Muthiah > >>>> > <muthiah.muthus...@gmail.com> wrote: > >>>> >> Hi , > >>>> >> > >>>> >> We are validating kraken 11.2.0 with bluestore on 5 node cluster > with > >>>> >> EC > >>>> >> 4+1. > >>>> >> > >>>> >> When an OSD is down , the peering is not happening and ceph health > >>>> >> status > >>>> >> moved to ERR state after few mins. This was working in previous > >>>> >> development > >>>> >> releases. Any additional configuration required in v11.2.0 > >>>> >> > >>>> >> Following is our ceph configuration: > >>>> >> > >>>> >> mon_osd_down_out_interval = 30 > >>>> >> mon_osd_report_timeout = 30 > >>>> >> mon_osd_down_out_subtree_limit = host > >>>> >> mon_osd_reporter_subtree_level = host > >>>> >> > >>>> >> and the recovery parameters set to default. > >>>> >> > >>>> >> [root@ca-cn1 ceph]# ceph osd crush show-tunables > >>>> >> > >>>> >> { > >>>> >> "choose_local_tries": 0, > >>>> >> "choose_local_fallback_tries": 0, > >>>> >> "choose_total_tries": 50, > >>>> >> "chooseleaf_descend_once": 1, > >>>> >> "chooseleaf_vary_r": 1, > >>>> >> "chooseleaf_stable": 1, > >>>> >> "straw_calc_version": 1, > >>>> >> "allowed_bucket_algs": 54, > >>>> >> "profile": "jewel", > >>>> >> "optimal_tunables": 1, > >>>> >> "legacy_tunables": 0, > >>>> >> "minimum_required_version": "jewel", > >>>> >> "require_feature_tunables": 1, > >>>> >> "require_feature_tunables2": 1, > >>>> >> "has_v2_rules": 1, > >>>> >> "require_feature_tunables3": 1, > >>>> >> "has_v3_rules": 0, > >>>> >> "has_v4_buckets": 0, > >>>> >> "require_feature_tunables5": 1, > >>>> >> "has_v5_rules": 0 > >>>> >> } > >>>> >> > >>>> >> ceph status: > >>>> >> > >>>> >> health HEALTH_ERR > >>>> >> 173 pgs are stuck inactive for more than 300 seconds > >>>> >> 173 pgs incomplete > >>>> >> 173 pgs stuck inactive > >>>> >> 173 pgs stuck unclean > >>>> >> monmap e2: 5 mons at > >>>> >> > >>>> >> {ca-cn1=10.50.5.117:6789/0,ca-cn2=10.50.5.118:6789/0,ca-cn3= > 10.50.5.119:6789/0,ca-cn4=10.50.5.120:6789/0,ca-cn5=10.50.5.121:6789/0} > >>>> >> election epoch 106, quorum 0,1,2,3,4 > >>>> >> ca-cn1,ca-cn2,ca-cn3,ca-cn4,ca-cn5 > >>>> >> mgr active: ca-cn1 standbys: ca-cn2, ca-cn4, ca-cn5, ca-cn3 > >>>> >> osdmap e1128: 60 osds: 59 up, 59 in; 173 remapped pgs > >>>> >> flags sortbitwise,require_jewel_ > osds,require_kraken_osds > >>>> >> pgmap v782747: 2048 pgs, 1 pools, 63133 GB data, 46293 > kobjects > >>>> >> 85199 GB used, 238 TB / 322 TB avail > >>>> >> 1868 active+clean > >>>> >> 173 remapped+incomplete > >>>> >> 7 active+clean+scrubbing > >>>> >> > >>>> >> MON log: > >>>> >> > >>>> >> 2017-01-20 09:25:54.715684 7f55bcafb700 0 log_channel(cluster) log > >>>> >> [INF] : > >>>> >> osd.54 out (down for 31.703786) > >>>> >> 2017-01-20 09:25:54.725688 7f55bf4d5700 0 mon.ca-cn1@0 > (leader).osd > >>>> >> e1120 > >>>> >> crush map has features 288250512065953792, adjusting msgr requires > >>>> >> 2017-01-20 09:25:54.729019 7f55bf4d5700 0 log_channel(cluster) log > >>>> >> [INF] : > >>>> >> osdmap e1120: 60 osds: 59 up, 59 in > >>>> >> 2017-01-20 09:25:54.735987 7f55bf4d5700 0 log_channel(cluster) log > >>>> >> [INF] : > >>>> >> pgmap v781993: 2048 pgs: 1869 active+clean, 173 incomplete, 6 > >>>> >> active+clean+scrubbing; 63159 GB data, 85201 GB used, 238 TB / 322 > TB > >>>> >> avail; > >>>> >> 21825 B/s rd, 163 MB/s wr, 2046 op/s > >>>> >> 2017-01-20 09:25:55.737749 7f55bf4d5700 0 mon.ca-cn1@0 > (leader).osd > >>>> >> e1121 > >>>> >> crush map has features 288250512065953792, adjusting msgr requires > >>>> >> 2017-01-20 09:25:55.744338 7f55bf4d5700 0 log_channel(cluster) log > >>>> >> [INF] : > >>>> >> osdmap e1121: 60 osds: 59 up, 59 in > >>>> >> 2017-01-20 09:25:55.749616 7f55bf4d5700 0 log_channel(cluster) log > >>>> >> [INF] : > >>>> >> pgmap v781994: 2048 pgs: 29 remapped+incomplete, 1869 active+clean, > >>>> >> 144 > >>>> >> incomplete, 6 active+clean+scrubbing; 63159 GB data, 85201 GB used, > >>>> >> 238 TB / > >>>> >> 322 TB avail; 44503 B/s rd, 45681 kB/s wr, 518 op/s > >>>> >> 2017-01-20 09:25:56.768721 7f55bf4d5700 0 log_channel(cluster) log > >>>> >> [INF] : > >>>> >> pgmap v781995: 2048 pgs: 47 remapped+incomplete, 1869 active+clean, > >>>> >> 126 > >>>> >> incomplete, 6 active+clean+scrubbing; 63159 GB data, 85201 GB used, > >>>> >> 238 TB / > >>>> >> 322 TB avail; 20275 B/s rd, 72742 kB/s wr, 665 op/s > >>>> >> > >>>> >> Thanks, > >>>> >> Muthu > >>>> >> > >>>> >> > >>>> >> _______________________________________________ > >>>> >> ceph-users mailing list > >>>> >> ceph-users@lists.ceph.com > >>>> >> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com > >>>> >> > >>>> > _______________________________________________ > >>>> > ceph-users mailing list > >>>> > ceph-users@lists.ceph.com > >>>> > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com > >>> > >>> > >> > >> > >> _______________________________________________ > >> ceph-users mailing list > >> ceph-users@lists.ceph.com > >> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com > >> >
# begin crush map tunable choose_local_tries 0 tunable choose_local_fallback_tries 0 tunable choose_total_tries 50 tunable chooseleaf_descend_once 1 tunable chooseleaf_vary_r 1 tunable straw_calc_version 1 # devices device 0 osd.0 device 1 osd.1 device 2 osd.2 device 3 osd.3 device 4 osd.4 device 5 osd.5 device 6 osd.6 device 7 osd.7 device 8 osd.8 device 9 osd.9 device 10 osd.10 device 11 osd.11 device 12 osd.12 device 13 osd.13 device 14 osd.14 device 15 osd.15 device 16 osd.16 device 17 osd.17 device 18 osd.18 device 19 osd.19 device 20 osd.20 device 21 osd.21 device 22 osd.22 device 23 osd.23 device 24 osd.24 device 25 osd.25 device 26 osd.26 device 27 osd.27 device 28 osd.28 device 29 osd.29 device 30 osd.30 device 31 osd.31 device 32 osd.32 device 33 osd.33 device 34 osd.34 device 35 osd.35 device 36 osd.36 device 37 osd.37 device 38 osd.38 device 39 osd.39 device 40 osd.40 device 41 osd.41 device 42 osd.42 device 43 osd.43 device 44 osd.44 device 45 osd.45 device 46 osd.46 device 47 osd.47 device 48 osd.48 device 49 osd.49 device 50 osd.50 device 51 osd.51 device 52 osd.52 device 53 osd.53 device 54 osd.54 device 55 osd.55 device 56 osd.56 device 57 osd.57 device 58 osd.58 device 59 osd.59 # types type 0 osd type 1 host type 2 chassis type 3 rack type 4 row type 5 pdu type 6 pod type 7 room type 8 datacenter type 9 region type 10 root # buckets host ca-cn4 { id -2 # do not change unnecessarily # weight 65.495 alg straw hash 0 # rjenkins1 item osd.3 weight 5.458 item osd.5 weight 5.458 item osd.10 weight 5.458 item osd.16 weight 5.458 item osd.21 weight 5.458 item osd.27 weight 5.458 item osd.30 weight 5.458 item osd.35 weight 5.458 item osd.42 weight 5.458 item osd.47 weight 5.458 item osd.51 weight 5.458 item osd.53 weight 5.458 } host ca-cn3 { id -3 # do not change unnecessarily # weight 65.495 alg straw hash 0 # rjenkins1 item osd.2 weight 5.458 item osd.6 weight 5.458 item osd.11 weight 5.458 item osd.15 weight 5.458 item osd.20 weight 5.458 item osd.25 weight 5.458 item osd.29 weight 5.458 item osd.33 weight 5.458 item osd.38 weight 5.458 item osd.40 weight 5.458 item osd.45 weight 5.458 item osd.49 weight 5.458 } host ca-cn5 { id -4 # do not change unnecessarily # weight 65.495 alg straw hash 0 # rjenkins1 item osd.0 weight 5.458 item osd.7 weight 5.458 item osd.12 weight 5.458 item osd.17 weight 5.458 item osd.23 weight 5.458 item osd.26 weight 5.458 item osd.32 weight 5.458 item osd.34 weight 5.458 item osd.41 weight 5.458 item osd.46 weight 5.458 item osd.52 weight 5.458 item osd.56 weight 5.458 } host ca-cn1 { id -5 # do not change unnecessarily # weight 65.495 alg straw hash 0 # rjenkins1 item osd.4 weight 5.458 item osd.9 weight 5.458 item osd.14 weight 5.458 item osd.19 weight 5.458 item osd.24 weight 5.458 item osd.36 weight 5.458 item osd.43 weight 5.458 item osd.50 weight 5.458 item osd.55 weight 5.458 item osd.57 weight 5.458 item osd.58 weight 5.458 item osd.59 weight 5.458 } host ca-cn2 { id -6 # do not change unnecessarily # weight 65.495 alg straw hash 0 # rjenkins1 item osd.1 weight 5.458 item osd.8 weight 5.458 item osd.13 weight 5.458 item osd.18 weight 5.458 item osd.22 weight 5.458 item osd.28 weight 5.458 item osd.31 weight 5.458 item osd.37 weight 5.458 item osd.39 weight 5.458 item osd.44 weight 5.458 item osd.48 weight 5.458 item osd.54 weight 5.458 } root default { id -1 # do not change unnecessarily # weight 327.473 alg straw hash 0 # rjenkins1 item ca-cn4 weight 65.495 item ca-cn3 weight 65.495 item ca-cn5 weight 65.495 item ca-cn1 weight 65.495 item ca-cn2 weight 65.495 } # rules rule replicated_ruleset { ruleset 0 type replicated min_size 1 max_size 10 step take default step chooseleaf firstn 0 type host step emit } rule cdvr_ec { ruleset 1 type erasure min_size 3 max_size 5 step set_chooseleaf_tries 5 step set_choose_tries 100 step take default step chooseleaf indep 0 type host step emit } # end crush map
osdmap1
Description: Binary data
_______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com