Hi Adrien,

Looks like neither of:

- #48615 (add option mds_log_periods_per_segment to solve the problem of large 
subtreemap event)
or
- #48648 (adjust the capacity of a journal segment according to the size of 
subtreemap event) 

ever made it to any Ceph releases.

- #48822 (switch submit_mutex to fair mutex for MDLog) that emerged from the 
same tracker [1] was merged and backported to Quincy v17.2.6, but it doesn't 
seem related to the issue you're facing.

Maybe try asking on the #ceph-devel Slack channel.

Regards,
Frédéric.

[1] https://github.com/ceph/ceph/pull/44180#issuecomment-1309661152

----- Le 23 Mai 25, à 11:12, Adrien Georget adrien.geor...@cc.in2p3.fr a écrit :

> Hi,
> 
> We have a CephFS Quincy cluster (17.2.7) used by Openshift for PVC
> provisioning (and lot of snapshots) with ceph-csi driver.
> On the metadata pool, we observed a continuous increase of the write
> throughput activity from 10MB/s just after a restart of the MDS to
> +200MB/s after 2 weeks.
> 
> We first thought it was related to the snapshots deletion activity (lot
> of creations and deletions every day) because we saw the same kind of
> increase for the number of Strays.
> We tried to evaluate strays using the recursive scrub [1], Strays
> decrease from 450K to 350K but no impact on the metadata write throughput.
> Only a MDS restart makes the write throughput back to normal.
> 
> I saw this very familiar issue https://tracker.ceph.com/issues/53542 and
> I'm wondering if this has been fixed in Quincy?
> I tried to increase "mds_log_events_per_segment" and
> "mds_log_max_segments" but nothing helped.
> As I answered in the tracker, I also observed large 4M objects in
> objecter_requests.
> 
> Anything we can do to prevent this and avoid restarting the MDS every
> week? If we don't do that, PVC operations stay blocked on Openshift side.
> 
> Cheers,
> Adrien
> 
> [1]
> https://docs.ceph.com/en/quincy/cephfs/scrub/#evaluate-strays-using-recursive-scrub
> 
> The MDS perf dump if it can helps :
> 
> {
>     "AsyncMessenger::Worker-0": {
>         "msgr_recv_messages": 452889868,
>         "msgr_send_messages": 210932005,
>         "msgr_recv_bytes": 381554341884,
>         "msgr_send_bytes": 31897539519042,
>         "msgr_created_connections": 5392,
>         "msgr_active_connections": 213,
>         "msgr_running_total_time": 121964.941943159,
>         "msgr_running_send_time": 85193.342708858,
>         "msgr_running_recv_time": 40274.249544605,
>         "msgr_running_fast_dispatch_time": 15899.702855460,
>         "msgr_send_messages_queue_lat": {
>             "avgcount": 210932003,
>             "sum": 140532.121408536,
>             "avgtime": 0.000666243
>         },
>         "msgr_handle_ack_lat": {
>             "avgcount": 110313532,
>             "sum": 120.513184629,
>             "avgtime": 0.000001092
>         }
>     },
>     "AsyncMessenger::Worker-1": {
>         "msgr_recv_messages": 393334601,
>         "msgr_send_messages": 170841678,
>         "msgr_recv_bytes": 307649876710,
>         "msgr_send_bytes": 23017109865662,
>         "msgr_created_connections": 12712,
>         "msgr_active_connections": 218,
>         "msgr_running_total_time": 93136.265793790,
>         "msgr_running_send_time": 63604.143090523,
>         "msgr_running_recv_time": 30881.136884349,
>         "msgr_running_fast_dispatch_time": 12763.126679902,
>         "msgr_send_messages_queue_lat": {
>             "avgcount": 170841678,
>             "sum": 153574.265735747,
>             "avgtime": 0.000898927
>         },
>         "msgr_handle_ack_lat": {
>             "avgcount": 81594240,
>             "sum": 65.480936528,
>             "avgtime": 0.000000802
>         }
>     },
>     "AsyncMessenger::Worker-2": {
>         "msgr_recv_messages": 481956104,
>         "msgr_send_messages": 301348856,
>         "msgr_recv_bytes": 349838769013,
>         "msgr_send_bytes": 26783354654792,
>         "msgr_created_connections": 34394,
>         "msgr_active_connections": 215,
>         "msgr_running_total_time": 108807.807447203,
>         "msgr_running_send_time": 74577.666991790,
>         "msgr_running_recv_time": 41392.667149426,
>         "msgr_running_fast_dispatch_time": 13971.142479134,
>         "msgr_send_messages_queue_lat": {
>             "avgcount": 301348847,
>             "sum": 138655.862052108,
>             "avgtime": 0.000460117
>         },
>         "msgr_handle_ack_lat": {
>             "avgcount": 156375610,
>             "sum": 144.168097813,
>             "avgtime": 0.000000921
>         }
>     },
>     "cct": {
>         "total_workers": 1,
>         "unhealthy_workers": 0
>     },
>     "finisher-MDSRank": {
>         "queue_len": 0,
>         "complete_latency": {
>             "avgcount": 121564865,
>             "sum": 160601.142778677,
>             "avgtime": 0.001321114
>         }
>     },
>     "finisher-PurgeQueue": {
>         "queue_len": 0,
>         "complete_latency": {
>             "avgcount": 27338289,
>             "sum": 8785.146872773,
>             "avgtime": 0.000321349
>         }
>     },
>     "mds": {
>         "request": 210298249,
>         "reply": 210297956,
>         "reply_latency": {
>             "avgcount": 210297956,
>             "sum": 1148337.019419176,
>             "avgtime": 0.005460523
>         },
>         "slow_reply": 15,
>         "forward": 0,
>         "dir_fetch": 49843551,
>         "dir_commit": 65281430,
>         "dir_split": 6,
>         "dir_merge": 17,
>         "inodes": 5113817,
>         "inodes_top": 2080514,
>         "inodes_bottom": 2263929,
>         "inodes_pin_tail": 769374,
>         "inodes_pinned": 2141653,
>         "inodes_expired": 538454667,
>         "inodes_with_caps": 413837,
>         "caps": 457168,
>         "subtrees": 2,
>         "traverse": 262113637,
>         "traverse_hit": 245551660,
>         "traverse_forward": 0,
>         "traverse_discover": 0,
>         "traverse_dir_fetch": 1642726,
>         "traverse_remote_ino": 0,
>         "traverse_lock": 140893,
>         "load_cent": 392174,
>         "q": 0,
>         "exported": 0,
>         "exported_inodes": 0,
>         "imported": 0,
>         "imported_inodes": 0,
>         "openino_dir_fetch": 407633,
>         "openino_backtrace_fetch": 7011755,
>         "openino_peer_discover": 0,
>         "root_rfiles": 11618539,
>         "root_rbytes": 11106041675168,
>         "root_rsnaps": 7576,
>         "scrub_backtrace_fetch": 7009788,
>         "scrub_set_tag": 0,
>         "scrub_backtrace_repaired": 0,
>         "scrub_inotable_repaired": 0,
>         "scrub_dir_inodes": 1526050,
>         "scrub_dir_base_inodes": 3,
>         "scrub_dirfrag_rstats": 1526047,
>         "scrub_file_inodes": 5483733,
>         "handle_inode_file_caps": 0,
>         "ceph_cap_op_revoke": 5076186,
>         "ceph_cap_op_grant": 60192925,
>         "ceph_cap_op_trunc": 658164,
>         "ceph_cap_op_flushsnap_ack": 0,
>         "ceph_cap_op_flush_ack": 6,
>         "handle_client_caps": 70349707,
>         "handle_client_caps_dirty": 21547109,
>         "handle_client_cap_release": 11468771,
>         "process_request_cap_release": 83631146
>     },
>     "mds_cache": {
>         "num_strays": 347989,
>         "num_strays_delayed": 0,
>         "num_strays_enqueuing": 0,
>         "strays_created": 19259955,
>         "strays_enqueued": 20275628,
>         "strays_reintegrated": 469,
>         "strays_migrated": 0,
>         "num_recovering_processing": 0,
>         "num_recovering_enqueued": 0,
>         "num_recovering_prioritized": 0,
>         "recovery_started": 484,
>         "recovery_completed": 484,
>         "ireq_enqueue_scrub": 2,
>         "ireq_exportdir": 0,
>         "ireq_flush": 0,
>         "ireq_fragmentdir": 23,
>         "ireq_fragstats": 0,
>         "ireq_inodestats": 0
>     },
>     "mds_log": {
>         "evadd": 148604638,
>         "evex": 148687537,
>         "evtrm": 148686957,
>         "ev": 4640,
>         "evexg": 0,
>         "evexd": 580,
>         "segadd": 5143165,
>         "segex": 5143142,
>         "segtrm": 5143128,
>         "seg": 166,
>         "segexg": 0,
>         "segexd": 14,
>         "expos": 2655552712972056,
>         "wrpos": 2655556750517207,
>         "rdpos": 2574717722176006,
>         "jlat": {
>             "avgcount": 23948614,
>             "sum": 1026045.477642289,
>             "avgtime": 0.042843626
>         },
>         "replayed": 86959
>     },
>     "mds_mem": {
>         "ino": 5091233,
>         "ino+": 537662754,
>         "ino-": 532571521,
>         "dir": 385511,
>         "dir+": 48473070,
>         "dir-": 48087559,
>         "dn": 5114534,
>         "dn+": 564239550,
>         "dn-": 559125016,
>         "cap": 457198,
>         "cap+": 296065469,
>         "cap-": 295608271,
>         "rss": 21947020,
>         "heap": 223516
>     },
>     "mds_server": {
>         "dispatch_client_request": 265192774,
>         "dispatch_server_request": 0,
>         "handle_client_request": 210298249,
>         "handle_client_session": 44456292,
>         "handle_peer_request": 0,
>         "req_create_latency": {
>             "avgcount": 19045215,
>             "sum": 243874.341572325,
>             "avgtime": 0.012805019
>         },
>         "req_getattr_latency": {
>             "avgcount": 13244393,
>             "sum": 122372.637049784,
>             "avgtime": 0.009239580
>         },
>         "req_getfilelock_latency": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         },
>         "req_link_latency": {
>             "avgcount": 108,
>             "sum": 0.089209875,
>             "avgtime": 0.000826017
>         },
>         "req_lookup_latency": {
>             "avgcount": 42591076,
>             "sum": 61153.611919024,
>             "avgtime": 0.001435831
>         },
>         "req_lookuphash_latency": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         },
>         "req_lookupino_latency": {
>             "avgcount": 6119,
>             "sum": 12.051986148,
>             "avgtime": 0.001969600
>         },
>         "req_lookupname_latency": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         },
>         "req_lookupparent_latency": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         },
>         "req_lookupsnap_latency": {
>             "avgcount": 11789,
>             "sum": 1.916955778,
>             "avgtime": 0.000162605
>         },
>         "req_lssnap_latency": {
>             "avgcount": 174999,
>             "sum": 59.628852495,
>             "avgtime": 0.000340738
>         },
>         "req_mkdir_latency": {
>             "avgcount": 386018,
>             "sum": 5323.027974258,
>             "avgtime": 0.013789584
>         },
>         "req_mknod_latency": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         },
>         "req_mksnap_latency": {
>             "avgcount": 11784,
>             "sum": 4342.774640033,
>             "avgtime": 0.368531452
>         },
>         "req_open_latency": {
>             "avgcount": 1719118,
>             "sum": 2957.961208770,
>             "avgtime": 0.001720627
>         },
>         "req_readdir_latency": {
>             "avgcount": 49303877,
>             "sum": 153658.488737674,
>             "avgtime": 0.003116559
>         },
>         "req_rename_latency": {
>             "avgcount": 1021332,
>             "sum": 4173.200914205,
>             "avgtime": 0.004086037
>         },
>         "req_renamesnap_latency": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         },
>         "req_rmdir_latency": {
>             "avgcount": 355348,
>             "sum": 6609.629921457,
>             "avgtime": 0.018600442
>         },
>         "req_rmsnap_latency": {
>             "avgcount": 11476,
>             "sum": 4647.171801363,
>             "avgtime": 0.404947002
>         },
>         "req_rmxattr_latency": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         },
>         "req_setattr_latency": {
>             "avgcount": 951092,
>             "sum": 60918.695526648,
>             "avgtime": 0.064051317
>         },
>         "req_setdirlayout_latency": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         },
>         "req_setfilelock_latency": {
>             "avgcount": 42236246,
>             "sum": 87407.285106607,
>             "avgtime": 0.002069485
>         },
>         "req_setlayout_latency": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         },
>         "req_setxattr_latency": {
>             "avgcount": 20450454,
>             "sum": 351779.625311317,
>             "avgtime": 0.017201555
>         },
>         "req_symlink_latency": {
>             "avgcount": 2,
>             "sum": 0.002286490,
>             "avgtime": 0.001143245
>         },
>         "req_unlink_latency": {
>             "avgcount": 18777510,
>             "sum": 39044.878444925,
>             "avgtime": 0.002079342
>         },
>         "cap_revoke_eviction": 0,
>         "cap_acquisition_throttle": 0,
>         "req_getvxattr_latency": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         }
>     },
>     "mds_sessions": {
>         "session_count": 545,
>         "session_add": 26598,
>         "session_remove": 26053,
>         "sessions_open": 545,
>         "sessions_stale": 0,
>         "total_load": 11076,
>         "average_load": 20,
>         "avg_session_uptime": 37409721,
>         "mdthresh_evicted": 0
>     },
>     "mempool": {
>         "bloom_filter_bytes": 10309112,
>         "bloom_filter_items": 10309112,
>         "bluestore_alloc_bytes": 0,
>         "bluestore_alloc_items": 0,
>         "bluestore_cache_data_bytes": 0,
>         "bluestore_cache_data_items": 0,
>         "bluestore_cache_onode_bytes": 0,
>         "bluestore_cache_onode_items": 0,
>         "bluestore_cache_meta_bytes": 0,
>         "bluestore_cache_meta_items": 0,
>         "bluestore_cache_other_bytes": 0,
>         "bluestore_cache_other_items": 0,
>         "bluestore_Buffer_bytes": 0,
>         "bluestore_Buffer_items": 0,
>         "bluestore_Extent_bytes": 0,
>         "bluestore_Extent_items": 0,
>         "bluestore_Blob_bytes": 0,
>         "bluestore_Blob_items": 0,
>         "bluestore_SharedBlob_bytes": 0,
>         "bluestore_SharedBlob_items": 0,
>         "bluestore_inline_bl_bytes": 0,
>         "bluestore_inline_bl_items": 0,
>         "bluestore_fsck_bytes": 0,
>         "bluestore_fsck_items": 0,
>         "bluestore_txc_bytes": 0,
>         "bluestore_txc_items": 0,
>         "bluestore_writing_deferred_bytes": 0,
>         "bluestore_writing_deferred_items": 0,
>         "bluestore_writing_bytes": 0,
>         "bluestore_writing_items": 0,
>         "bluefs_bytes": 0,
>         "bluefs_items": 0,
>         "bluefs_file_reader_bytes": 0,
>         "bluefs_file_reader_items": 0,
>         "bluefs_file_writer_bytes": 0,
>         "bluefs_file_writer_items": 0,
>         "buffer_anon_bytes": 284731060,
>         "buffer_anon_items": 5959770,
>         "buffer_meta_bytes": 616,
>         "buffer_meta_items": 7,
>         "osd_bytes": 0,
>         "osd_items": 0,
>         "osd_mapbl_bytes": 0,
>         "osd_mapbl_items": 0,
>         "osd_pglog_bytes": 0,
>         "osd_pglog_items": 0,
>         "osdmap_bytes": 81480,
>         "osdmap_items": 2396,
>         "osdmap_mapping_bytes": 0,
>         "osdmap_mapping_items": 0,
>         "pgmap_bytes": 0,
>         "pgmap_items": 0,
>         "mds_co_bytes": 16277298996,
>         "mds_co_items": 260156681,
>         "unittest_1_bytes": 0,
>         "unittest_1_items": 0,
>         "unittest_2_bytes": 0,
>         "unittest_2_items": 0
>     },
>     "objecter": {
>         "op_active": 11,
>         "op_laggy": 0,
>         "op_send": 253406063,
>         "op_send_bytes": 81014361089067,
>         "op_resend": 0,
>         "op_reply": 253406052,
>         "oplen_avg": {
>             "avgcount": 253406063,
>             "sum": 451959672
>         },
>         "op": 253406063,
>         "op_r": 58269533,
>         "op_w": 195136530,
>         "op_rmw": 0,
>         "op_pg": 0,
>         "osdop_stat": 64974069,
>         "osdop_create": 11505623,
>         "osdop_read": 1275145,
>         "osdop_write": 34882891,
>         "osdop_writefull": 1169760,
>         "osdop_writesame": 0,
>         "osdop_append": 0,
>         "osdop_zero": 2,
>         "osdop_truncate": 4,
>         "osdop_delete": 79836916,
>         "osdop_mapext": 0,
>         "osdop_sparse_read": 0,
>         "osdop_clonerange": 0,
>         "osdop_getxattr": 56700621,
>         "osdop_setxattr": 17474636,
>         "osdop_cmpxattr": 0,
>         "osdop_rmxattr": 0,
>         "osdop_resetxattrs": 0,
>         "osdop_call": 0,
>         "osdop_watch": 0,
>         "osdop_notify": 0,
>         "osdop_src_cmpxattr": 0,
>         "osdop_pgls": 0,
>         "osdop_pgls_filter": 0,
>         "osdop_other": 683839,
>         "linger_active": 0,
>         "linger_send": 0,
>         "linger_resend": 0,
>         "linger_ping": 0,
>         "poolop_active": 0,
>         "poolop_send": 0,
>         "poolop_resend": 0,
>         "poolstat_active": 0,
>         "poolstat_send": 0,
>         "poolstat_resend": 0,
>         "statfs_active": 0,
>         "statfs_send": 0,
>         "statfs_resend": 0,
>         "command_active": 0,
>         "command_send": 0,
>         "command_resend": 0,
>         "map_epoch": 685274,
>         "map_full": 0,
>         "map_inc": 19880,
>         "osd_sessions": 96,
>         "osd_session_open": 96,
>         "osd_session_close": 0,
>         "osd_laggy": 0,
>         "omap_wr": 78737471,
>         "omap_rd": 99825478,
>         "omap_del": 4893217
>     },
>     "oft": {
>         "omap_total_objs": 4,
>         "omap_total_kv_pairs": 105334,
>         "omap_total_updates": 80286291,
>         "omap_total_removes": 72648222
>     },
>     "purge_queue": {
>         "pq_executing_ops": 0,
>         "pq_executing_ops_high_water": 1559,
>         "pq_executing": 0,
>         "pq_executing_high_water": 64,
>         "pq_executed": 20275628,
>         "pq_item_in_journal": 0
>     },
>     "throttle-msgr_dispatch_throttler-mds": {
>         "val": 0,
>         "max": 104857600,
>         "get_started": 0,
>         "get": 1328180571,
>         "get_sum": 935681276002,
>         "get_or_fail_fail": 0,
>         "get_or_fail_success": 1328180571,
>         "take": 0,
>         "take_sum": 0,
>         "put": 1328180571,
>         "put_sum": 935681276002,
>         "wait": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         }
>     },
>     "throttle-objecter_bytes": {
>         "val": 21734389,
>         "max": 104857600,
>         "get_started": 0,
>         "get": 0,
>         "get_sum": 0,
>         "get_or_fail_fail": 0,
>         "get_or_fail_success": 0,
>         "take": 253406063,
>         "take_sum": 81151894362586,
>         "put": 253406052,
>         "put_sum": 81151872628197,
>         "wait": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         }
>     },
>     "throttle-objecter_ops": {
>         "val": 11,
>         "max": 1024,
>         "get_started": 0,
>         "get": 0,
>         "get_sum": 0,
>         "get_or_fail_fail": 0,
>         "get_or_fail_success": 0,
>         "take": 253406063,
>         "take_sum": 253406063,
>         "put": 253406052,
>         "put_sum": 253406052,
>         "wait": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         }
>     },
>     "throttle-write_buf_throttle": {
>         "val": 0,
>         "max": 3758096384,
>         "get_started": 0,
>         "get": 20275628,
>         "get_sum": 2230259884,
>         "get_or_fail_fail": 0,
>         "get_or_fail_success": 20275628,
>         "take": 0,
>         "take_sum": 0,
>         "put": 1242717,
>         "put_sum": 2230259884,
>         "wait": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         }
>     },
>     "throttle-write_buf_throttle-0x56401344c0a0": {
>         "val": 0,
>         "max": 3758096384,
>         "get_started": 0,
>         "get": 148604638,
>         "get_sum": 80839025768070,
>         "get_or_fail_fail": 0,
>         "get_or_fail_success": 148604638,
>         "take": 0,
>         "take_sum": 0,
>         "put": 23948617,
>         "put_sum": 80839025768070,
>         "wait": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         }
>     }
> }
> _______________________________________________
> ceph-users mailing list -- ceph-users@ceph.io
> To unsubscribe send an email to ceph-users-le...@ceph.io
_______________________________________________
ceph-users mailing list -- ceph-users@ceph.io
To unsubscribe send an email to ceph-users-le...@ceph.io

Reply via email to