Hi,

in case someone hit same problem, try to:
stop scrubbing by enabling "no scrub" and "no deep-scrub" flags
wait until scrub ends
restart monitors (one by one)
restart OSD servers (I've restarted all three of them, because there was
small cluster, but this could be not necessary to restart all of them for
big clusters)
remove "no scrub" and "no deep-scrub" flags

As I noticed, we went in this problem after upgrading cluster from
Infernalis to Jewel. Anyway, this happened only on one of three upgraded
clusters, so seems it's not very common problem.

Br,
Arvydas

On Thu, Sep 8, 2016 at 10:26 AM, Arvydas Opulskis <zebedie...@gmail.com>
wrote:

> Hi Goncalo, there it is:
>
> # ceph pg 11.34a query
> {
>     "state": "active+clean+scrubbing",
>     "snap_trimq": "[]",
>     "epoch": 6547,
>     "up": [
>         24,
>         3
>     ],
>     "acting": [
>         24,
>         3
>     ],
>     "actingbackfill": [
>         "3",
>         "24"
>     ],
>     "info": {
>         "pgid": "11.34a",
>         "last_update": "6547'85045",
>         "last_complete": "6547'85045",
>         "log_tail": "6215'81998",
>         "last_user_version": 85045,
>         "last_backfill": "MAX",
>         "last_backfill_bitwise": 0,
>         "purged_snaps": "[]",
>         "history": {
>             "epoch_created": 5178,
>             "last_epoch_started": 5241,
>             "last_epoch_clean": 5241,
>             "last_epoch_split": 0,
>             "last_epoch_marked_full": 0,
>             "same_up_since": 5184,
>             "same_interval_since": 5240,
>             "same_primary_since": 5096,
>             "last_scrub": "6547'85045",
>             "last_scrub_stamp": "2016-09-08 09:20:06.804646",
>             "last_deep_scrub": "6547'85045",
>             "last_deep_scrub_stamp": "2016-09-08 09:18:22.582767",
>             "last_clean_scrub_stamp": "2016-09-08 09:20:06.804646"
>         },
>         "stats": {
>             "version": "6547'85045",
>             "reported_seq": "219744",
>             "reported_epoch": "6547",
>             "state": "active+clean+scrubbing",
>             "last_fresh": "2016-09-08 09:20:13.712725",
>             "last_change": "2016-09-08 09:20:13.712725",
>             "last_active": "2016-09-08 09:20:13.712725",
>             "last_peered": "2016-09-08 09:20:13.712725",
>             "last_clean": "2016-09-08 09:20:13.712725",
>             "last_became_active": "2016-07-27 18:46:25.926150",
>             "last_became_peered": "2016-07-27 18:46:25.926150",
>             "last_unstale": "2016-09-08 09:20:13.712725",
>             "last_undegraded": "2016-09-08 09:20:13.712725",
>             "last_fullsized": "2016-09-08 09:20:13.712725",
>             "mapping_epoch": 5185,
>             "log_start": "6215'81998",
>             "ondisk_log_start": "6215'81998",
>             "created": 5178,
>             "last_epoch_clean": 5241,
>             "parent": "0.0",
>             "parent_split_bits": 10,
>             "last_scrub": "6547'85045",
>             "last_scrub_stamp": "2016-09-08 09:20:06.804646",
>             "last_deep_scrub": "6547'85045",
>             "last_deep_scrub_stamp": "2016-09-08 09:18:22.582767",
>             "last_clean_scrub_stamp": "2016-09-08 09:20:06.804646",
>             "log_size": 3047,
>             "ondisk_log_size": 3047,
>             "stats_invalid": false,
>             "dirty_stats_invalid": false,
>             "omap_stats_invalid": false,
>             "hitset_stats_invalid": false,
>             "hitset_bytes_stats_invalid": false,
>             "pin_stats_invalid": true,
>             "stat_sum": {
>                 "num_bytes": 6225173162,
>                 "num_objects": 2688,
>                 "num_object_clones": 0,
>                 "num_object_copies": 5376,
>                 "num_objects_missing_on_primary": 0,
>                 "num_objects_missing": 0,
>                 "num_objects_degraded": 0,
>                 "num_objects_misplaced": 0,
>                 "num_objects_unfound": 0,
>                 "num_objects_dirty": 2688,
>                 "num_whiteouts": 0,
>                 "num_read": 3416,
>                 "num_read_kb": 710270,
>                 "num_write": 16467,
>                 "num_write_kb": 2275320,
>                 "num_scrub_errors": 0,
>                 "num_shallow_scrub_errors": 0,
>                 "num_deep_scrub_errors": 0,
>                 "num_objects_recovered": 0,
>                 "num_bytes_recovered": 0,
>                 "num_keys_recovered": 0,
>                 "num_objects_omap": 0,
>                 "num_objects_hit_set_archive": 0,
>                 "num_bytes_hit_set_archive": 0,
>                 "num_flush": 0,
>                 "num_flush_kb": 0,
>                 "num_evict": 0,
>                 "num_evict_kb": 0,
>                 "num_promote": 0,
>                 "num_flush_mode_high": 0,
>                 "num_flush_mode_low": 0,
>                 "num_evict_mode_some": 0,
>                 "num_evict_mode_full": 0,
>                 "num_objects_pinned": 0
>             },
>             "up": [
>                 24,
>                 3
>             ],
>             "acting": [
>                 24,
>                 3
>             ],
>             "blocked_by": [],
>             "up_primary": 24,
>             "acting_primary": 24
>         },
>         "empty": 0,
>         "dne": 0,
>         "incomplete": 0,
>         "last_epoch_started": 5241,
>         "hit_set_history": {
>             "current_last_update": "0'0",
>             "history": []
>         }
>     },
>     "peer_info": [
>         {
>             "peer": "3",
>             "pgid": "11.34a",
>             "last_update": "6547'85045",
>             "last_complete": "6547'85045",
>             "log_tail": "4988'75612",
>             "last_user_version": 0,
>             "last_backfill": "MAX",
>             "last_backfill_bitwise": 1,
>             "purged_snaps": "[]",
>             "history": {
>                 "epoch_created": 5178,
>                 "last_epoch_started": 5241,
>                 "last_epoch_clean": 5241,
>                 "last_epoch_split": 0,
>                 "last_epoch_marked_full": 0,
>                 "same_up_since": 5184,
>                 "same_interval_since": 5240,
>                 "same_primary_since": 5096,
>                 "last_scrub": "6547'85045",
>                 "last_scrub_stamp": "2016-09-08 09:20:06.804646",
>                 "last_deep_scrub": "6547'85045",
>                 "last_deep_scrub_stamp": "2016-09-08 09:18:22.582767",
>                 "last_clean_scrub_stamp": "2016-09-08 09:20:06.804646"
>             },
>             "stats": {
>                 "version": "5174'78681",
>                 "reported_seq": "68548",
>                 "reported_epoch": "5239",
>                 "state": "active+remapped+backfilling",
>                 "last_fresh": "2016-07-27 18:46:23.904812",
>                 "last_change": "2016-07-27 18:39:52.227105",
>                 "last_active": "2016-07-27 18:46:23.904812",
>                 "last_peered": "2016-07-27 18:46:23.904812",
>                 "last_clean": "2016-07-27 18:32:30.929929",
>                 "last_became_active": "2016-07-27 18:34:25.035629",
>                 "last_became_peered": "2016-07-27 18:34:25.035629",
>                 "last_unstale": "2016-07-27 18:46:23.904812",
>                 "last_undegraded": "2016-07-27 18:46:23.904812",
>                 "last_fullsized": "2016-07-27 18:46:23.904812",
>                 "mapping_epoch": 5185,
>                 "log_start": "4988'75612",
>                 "ondisk_log_start": "4988'75612",
>                 "created": 5178,
>                 "last_epoch_clean": 5183,
>                 "parent": "0.0",
>                 "parent_split_bits": 10,
>                 "last_scrub": "5015'78540",
>                 "last_scrub_stamp": "2016-07-22 10:10:55.296356",
>                 "last_deep_scrub": "5015'78540",
>                 "last_deep_scrub_stamp": "2016-07-22 10:10:55.296356",
>                 "last_clean_scrub_stamp": "2016-07-22 10:10:55.296356",
>                 "log_size": 3069,
>                 "ondisk_log_size": 3069,
>                 "stats_invalid": true,
>                 "dirty_stats_invalid": false,
>                 "omap_stats_invalid": false,
>                 "hitset_stats_invalid": false,
>                 "hitset_bytes_stats_invalid": false,
>                 "pin_stats_invalid": true,
>                 "stat_sum": {
>                     "num_bytes": 4469376265,
>                     "num_objects": 1704,
>                     "num_object_clones": 0,
>                     "num_object_copies": 5112,
>                     "num_objects_missing_on_primary": 0,
>                     "num_objects_missing": 0,
>                     "num_objects_degraded": 0,
>                     "num_objects_misplaced": 1711,
>                     "num_objects_unfound": 0,
>                     "num_objects_dirty": 1704,
>                     "num_whiteouts": 0,
>                     "num_read": 9692,
>                     "num_read_kb": 6474215,
>                     "num_write": 43858,
>                     "num_write_kb": 14418818,
>                     "num_scrub_errors": 0,
>                     "num_shallow_scrub_errors": 0,
>                     "num_deep_scrub_errors": 0,
>                     "num_objects_recovered": 4190,
>                     "num_bytes_recovered": 10727412780,
>                     "num_keys_recovered": 0,
>                     "num_objects_omap": 0,
>                     "num_objects_hit_set_archive": 0,
>                     "num_bytes_hit_set_archive": 0,
>                     "num_flush": 0,
>                     "num_flush_kb": 0,
>                     "num_evict": 0,
>                     "num_evict_kb": 0,
>                     "num_promote": 0,
>                     "num_flush_mode_high": 0,
>                     "num_flush_mode_low": 0,
>                     "num_evict_mode_some": 0,
>                     "num_evict_mode_full": 0,
>                     "num_objects_pinned": 0
>                 },
>                 "up": [
>                     24,
>                     3
>                 ],
>                 "acting": [
>                     24,
>                     3
>                 ],
>                 "blocked_by": [],
>                 "up_primary": 24,
>                 "acting_primary": 24
>             },
>             "empty": 0,
>             "dne": 0,
>             "incomplete": 0,
>             "last_epoch_started": 5241,
>             "hit_set_history": {
>                 "current_last_update": "0'0",
>                 "history": []
>             }
>         }
>     ],
>     "recovery_state": [
>         {
>             "name": "Started\/Primary\/Active",
>             "enter_time": "2016-07-27 18:46:25.890580",
>             "might_have_unfound": [],
>             "recovery_progress": {
>                 "backfill_targets": [],
>                 "waiting_on_backfill": [],
>                 "last_backfill_started": "MIN",
>                 "backfill_info": {
>                     "begin": "MIN",
>                     "end": "MIN",
>                     "objects": []
>                 },
>                 "peer_backfill_info": [],
>                 "backfills_in_flight": [],
>                 "recovering": [],
>                 "pg_backend": {
>                     "pull_from_peer": [],
>                     "pushing": []
>                 }
>             },
>             "scrub": {
>                 "scrubber.epoch_start": "5240",
>                 "scrubber.active": 1,
>                 "scrubber.state": "WAIT_REPLICAS",
>                 "scrubber.start": "11:52c3e5be::::0",
>                 "scrubber.end": "11:52c45d0a::::0",
>                 "scrubber.subset_last_update": "6538'84947",
>                 "scrubber.deep": false,
>                 "scrubber.seed": 4294967295,
>                 "scrubber.waiting_on": 1,
>                 "scrubber.waiting_on_whom": [
>                     "3"
>                 ]
>             }
>         },
>         {
>             "name": "Started",
>             "enter_time": "2016-07-27 18:46:24.832320"
>         }
>     ],
>     "agent_state": {}
> }
>
>
>
> On Thu, Sep 8, 2016 at 10:16 AM, Goncalo Borges <
> goncalo.bor...@sydney.edu.au> wrote:
>
>> Can you please share the result of
>>
>>     ceph pg 11.34a query
>>
>> ?
>>
>>
>> On 09/08/2016 05:03 PM, Arvydas Opulskis wrote:
>>
>>> 2016-09-08 08:45:01.441945 osd.24 [INF] 11.34a scrub starts
>>> 2016-09-08 08:45:03.585039 osd.24 [INF] 11.34a scrub ok
>>>
>>
>> --
>> Goncalo Borges
>> Research Computing
>> ARC Centre of Excellence for Particle Physics at the Terascale
>> School of Physics A28 | University of Sydney, NSW  2006
>> T: +61 2 93511937
>>
>> _______________________________________________
>> ceph-users mailing list
>> ceph-users@lists.ceph.com
>> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
>>
>
>
_______________________________________________
ceph-users mailing list
ceph-users@lists.ceph.com
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

Reply via email to