Hi, I have two OSD and Mon nodes.
I'm going to add third osd and mon on this cluster but before I want to fix this error: ``` # ceph -s cluster 8461e3b5-abda-4471-98c0-913e56aec890 health HEALTH_WARN 64 pgs degraded 64 pgs stuck unclean 64 pgs undersized recovery 8261/16522 objects degraded (50.000%) monmap e1: 2 mons at {ceph-rbx-1= 172.29.20.10:6789/0,ceph-rbx-2=172.29.20.11:6789/0} election epoch 22, quorum 0,1 ceph-rbx-1,ceph-rbx-2 osdmap e57: 2 osds: 1 up, 1 in; 64 remapped pgs flags sortbitwise,require_jewel_osds pgmap v784695: 64 pgs, 1 pools, 31719 MB data, 8261 objects 31539 MB used, 65692 MB / 97231 MB avail 8261/16522 objects degraded (50.000%) 64 active+undersized+degraded client io 22038 B/s wr, 0 op/s rd, 0 op/s wr ``` I have executed this command: ``` # ceph pg ls degraded | tail -n +2 | awk '{print $1}' | xargs -n 1 ceph pg force_create_pg ``` after which I have: ``` # ceph health HEALTH_ERR 45 pgs are stuck inactive for more than 300 seconds; 19 pgs degraded; 45 pgs stuck inactive; 19 pgs stuck unclean; 19 pgs undersized; recovery 2514/5028 objects degraded (50.000%) ``` If I look the pg detail like explain here http://docs.ceph.com/docs/infernalis/rados/troubleshooting/troubleshooting-pg/#placement-group-down-peering-failure I have: ``` # ceph pg 0.1 query { "state": "active+undersized+degraded", "snap_trimq": "[]", "epoch": 57, "up": [ 1 ], "acting": [ 1 ], "actingbackfill": [ "1" ], "info": { "pgid": "0.1", "last_update": "57'32353", "last_complete": "57'32353", "log_tail": "42'25917", "last_user_version": 32353, "last_backfill": "MAX", "last_backfill_bitwise": 0, "purged_snaps": "[1~3]", "history": { "epoch_created": 1, "last_epoch_started": 52, "last_epoch_clean": 52, "last_epoch_split": 0, "last_epoch_marked_full": 0, "same_up_since": 51, "same_interval_since": 51, "same_primary_since": 34, "last_scrub": "50'28863", "last_scrub_stamp": "2017-01-14 07:12:27.930427", "last_deep_scrub": "42'23417", "last_deep_scrub_stamp": "2017-01-10 20:31:12.351497", "last_clean_scrub_stamp": "2017-01-14 07:12:27.930427" }, "stats": { "version": "57'32353", "reported_seq": "31704", "reported_epoch": "57", "state": "active+undersized+degraded", "last_fresh": "2017-01-16 10:47:07.330850", "last_change": "2017-01-14 13:42:42.104820", "last_active": "2017-01-16 10:47:07.330850", "last_peered": "2017-01-16 10:47:07.330850", "last_clean": "2017-01-14 11:29:21.619183", "last_became_active": "2017-01-14 13:42:42.104820", "last_became_peered": "2017-01-14 13:42:42.104820", "last_unstale": "2017-01-16 10:47:07.330850", "last_undegraded": "2017-01-14 13:42:41.066061", "last_fullsized": "2017-01-14 13:42:41.066061", "mapping_epoch": 37, "log_start": "42'25917", "ondisk_log_start": "42'25917", "created": 1, "last_epoch_clean": 52, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "50'28863", "last_scrub_stamp": "2017-01-14 07:12:27.930427", "last_deep_scrub": "42'23417", "last_deep_scrub_stamp": "2017-01-10 20:31:12.351497", "last_clean_scrub_stamp": "2017-01-14 07:12:27.930427", "log_size": 6436, "ondisk_log_size": 6436, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "stat_sum": { "num_bytes": 567734272, "num_objects": 140, "num_object_clones": 0, "num_object_copies": 280, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 140, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 140, "num_whiteouts": 0, "num_read": 5801, "num_read_kb": 176032, "num_write": 64516, "num_write_kb": 1211660, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 2, "num_bytes_recovered": 8388608, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0 }, "up": [ 1 ], "acting": [ 1 ], "blocked_by": [], "up_primary": 1, "acting_primary": 1 }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 52, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, "peer_info": [], "recovery_state": [ { "name": "Started\/Primary\/Active", "enter_time": "2017-01-14 13:42:42.084021", "might_have_unfound": [], "recovery_progress": { "backfill_targets": [], "waiting_on_backfill": [], "last_backfill_started": "MIN", "backfill_info": { "begin": "MIN", "end": "MIN", "objects": [] }, "peer_backfill_info": [], "backfills_in_flight": [], "recovering": [], "pg_backend": { "pull_from_peer": [], "pushing": [] } }, "scrub": { "scrubber.epoch_start": "37", "scrubber.active": 0, "scrubber.state": "INACTIVE", "scrubber.start": "MIN", "scrubber.end": "MIN", "scrubber.subset_last_update": "0'0", "scrubber.deep": false, "scrubber.seed": 0, "scrubber.waiting_on": 0, "scrubber.waiting_on_whom": [] } }, { "name": "Started", "enter_time": "2017-01-14 13:42:41.065959" } ], "agent_state": {} } ``` I don't understand what it's mean. Now, I don't know what I need to do to fix it. Some tips? Best regards, Stéphane -- Stéphane Klein <cont...@stephane-klein.info> blog: http://stephane-klein.info cv : http://cv.stephane-klein.info Twitter: http://twitter.com/klein_stephane
_______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com