Hi Adrien, Looks like neither of:
- #48615 (add option mds_log_periods_per_segment to solve the problem of large subtreemap event) or - #48648 (adjust the capacity of a journal segment according to the size of subtreemap event) ever made it to any Ceph releases. - #48822 (switch submit_mutex to fair mutex for MDLog) that emerged from the same tracker [1] was merged and backported to Quincy v17.2.6, but it doesn't seem related to the issue you're facing. Maybe try asking on the #ceph-devel Slack channel. Regards, Frédéric. [1] https://github.com/ceph/ceph/pull/44180#issuecomment-1309661152 ----- Le 23 Mai 25, à 11:12, Adrien Georget adrien.geor...@cc.in2p3.fr a écrit : > Hi, > > We have a CephFS Quincy cluster (17.2.7) used by Openshift for PVC > provisioning (and lot of snapshots) with ceph-csi driver. > On the metadata pool, we observed a continuous increase of the write > throughput activity from 10MB/s just after a restart of the MDS to > +200MB/s after 2 weeks. > > We first thought it was related to the snapshots deletion activity (lot > of creations and deletions every day) because we saw the same kind of > increase for the number of Strays. > We tried to evaluate strays using the recursive scrub [1], Strays > decrease from 450K to 350K but no impact on the metadata write throughput. > Only a MDS restart makes the write throughput back to normal. > > I saw this very familiar issue https://tracker.ceph.com/issues/53542 and > I'm wondering if this has been fixed in Quincy? > I tried to increase "mds_log_events_per_segment" and > "mds_log_max_segments" but nothing helped. > As I answered in the tracker, I also observed large 4M objects in > objecter_requests. > > Anything we can do to prevent this and avoid restarting the MDS every > week? If we don't do that, PVC operations stay blocked on Openshift side. > > Cheers, > Adrien > > [1] > https://docs.ceph.com/en/quincy/cephfs/scrub/#evaluate-strays-using-recursive-scrub > > The MDS perf dump if it can helps : > > { > "AsyncMessenger::Worker-0": { > "msgr_recv_messages": 452889868, > "msgr_send_messages": 210932005, > "msgr_recv_bytes": 381554341884, > "msgr_send_bytes": 31897539519042, > "msgr_created_connections": 5392, > "msgr_active_connections": 213, > "msgr_running_total_time": 121964.941943159, > "msgr_running_send_time": 85193.342708858, > "msgr_running_recv_time": 40274.249544605, > "msgr_running_fast_dispatch_time": 15899.702855460, > "msgr_send_messages_queue_lat": { > "avgcount": 210932003, > "sum": 140532.121408536, > "avgtime": 0.000666243 > }, > "msgr_handle_ack_lat": { > "avgcount": 110313532, > "sum": 120.513184629, > "avgtime": 0.000001092 > } > }, > "AsyncMessenger::Worker-1": { > "msgr_recv_messages": 393334601, > "msgr_send_messages": 170841678, > "msgr_recv_bytes": 307649876710, > "msgr_send_bytes": 23017109865662, > "msgr_created_connections": 12712, > "msgr_active_connections": 218, > "msgr_running_total_time": 93136.265793790, > "msgr_running_send_time": 63604.143090523, > "msgr_running_recv_time": 30881.136884349, > "msgr_running_fast_dispatch_time": 12763.126679902, > "msgr_send_messages_queue_lat": { > "avgcount": 170841678, > "sum": 153574.265735747, > "avgtime": 0.000898927 > }, > "msgr_handle_ack_lat": { > "avgcount": 81594240, > "sum": 65.480936528, > "avgtime": 0.000000802 > } > }, > "AsyncMessenger::Worker-2": { > "msgr_recv_messages": 481956104, > "msgr_send_messages": 301348856, > "msgr_recv_bytes": 349838769013, > "msgr_send_bytes": 26783354654792, > "msgr_created_connections": 34394, > "msgr_active_connections": 215, > "msgr_running_total_time": 108807.807447203, > "msgr_running_send_time": 74577.666991790, > "msgr_running_recv_time": 41392.667149426, > "msgr_running_fast_dispatch_time": 13971.142479134, > "msgr_send_messages_queue_lat": { > "avgcount": 301348847, > "sum": 138655.862052108, > "avgtime": 0.000460117 > }, > "msgr_handle_ack_lat": { > "avgcount": 156375610, > "sum": 144.168097813, > "avgtime": 0.000000921 > } > }, > "cct": { > "total_workers": 1, > "unhealthy_workers": 0 > }, > "finisher-MDSRank": { > "queue_len": 0, > "complete_latency": { > "avgcount": 121564865, > "sum": 160601.142778677, > "avgtime": 0.001321114 > } > }, > "finisher-PurgeQueue": { > "queue_len": 0, > "complete_latency": { > "avgcount": 27338289, > "sum": 8785.146872773, > "avgtime": 0.000321349 > } > }, > "mds": { > "request": 210298249, > "reply": 210297956, > "reply_latency": { > "avgcount": 210297956, > "sum": 1148337.019419176, > "avgtime": 0.005460523 > }, > "slow_reply": 15, > "forward": 0, > "dir_fetch": 49843551, > "dir_commit": 65281430, > "dir_split": 6, > "dir_merge": 17, > "inodes": 5113817, > "inodes_top": 2080514, > "inodes_bottom": 2263929, > "inodes_pin_tail": 769374, > "inodes_pinned": 2141653, > "inodes_expired": 538454667, > "inodes_with_caps": 413837, > "caps": 457168, > "subtrees": 2, > "traverse": 262113637, > "traverse_hit": 245551660, > "traverse_forward": 0, > "traverse_discover": 0, > "traverse_dir_fetch": 1642726, > "traverse_remote_ino": 0, > "traverse_lock": 140893, > "load_cent": 392174, > "q": 0, > "exported": 0, > "exported_inodes": 0, > "imported": 0, > "imported_inodes": 0, > "openino_dir_fetch": 407633, > "openino_backtrace_fetch": 7011755, > "openino_peer_discover": 0, > "root_rfiles": 11618539, > "root_rbytes": 11106041675168, > "root_rsnaps": 7576, > "scrub_backtrace_fetch": 7009788, > "scrub_set_tag": 0, > "scrub_backtrace_repaired": 0, > "scrub_inotable_repaired": 0, > "scrub_dir_inodes": 1526050, > "scrub_dir_base_inodes": 3, > "scrub_dirfrag_rstats": 1526047, > "scrub_file_inodes": 5483733, > "handle_inode_file_caps": 0, > "ceph_cap_op_revoke": 5076186, > "ceph_cap_op_grant": 60192925, > "ceph_cap_op_trunc": 658164, > "ceph_cap_op_flushsnap_ack": 0, > "ceph_cap_op_flush_ack": 6, > "handle_client_caps": 70349707, > "handle_client_caps_dirty": 21547109, > "handle_client_cap_release": 11468771, > "process_request_cap_release": 83631146 > }, > "mds_cache": { > "num_strays": 347989, > "num_strays_delayed": 0, > "num_strays_enqueuing": 0, > "strays_created": 19259955, > "strays_enqueued": 20275628, > "strays_reintegrated": 469, > "strays_migrated": 0, > "num_recovering_processing": 0, > "num_recovering_enqueued": 0, > "num_recovering_prioritized": 0, > "recovery_started": 484, > "recovery_completed": 484, > "ireq_enqueue_scrub": 2, > "ireq_exportdir": 0, > "ireq_flush": 0, > "ireq_fragmentdir": 23, > "ireq_fragstats": 0, > "ireq_inodestats": 0 > }, > "mds_log": { > "evadd": 148604638, > "evex": 148687537, > "evtrm": 148686957, > "ev": 4640, > "evexg": 0, > "evexd": 580, > "segadd": 5143165, > "segex": 5143142, > "segtrm": 5143128, > "seg": 166, > "segexg": 0, > "segexd": 14, > "expos": 2655552712972056, > "wrpos": 2655556750517207, > "rdpos": 2574717722176006, > "jlat": { > "avgcount": 23948614, > "sum": 1026045.477642289, > "avgtime": 0.042843626 > }, > "replayed": 86959 > }, > "mds_mem": { > "ino": 5091233, > "ino+": 537662754, > "ino-": 532571521, > "dir": 385511, > "dir+": 48473070, > "dir-": 48087559, > "dn": 5114534, > "dn+": 564239550, > "dn-": 559125016, > "cap": 457198, > "cap+": 296065469, > "cap-": 295608271, > "rss": 21947020, > "heap": 223516 > }, > "mds_server": { > "dispatch_client_request": 265192774, > "dispatch_server_request": 0, > "handle_client_request": 210298249, > "handle_client_session": 44456292, > "handle_peer_request": 0, > "req_create_latency": { > "avgcount": 19045215, > "sum": 243874.341572325, > "avgtime": 0.012805019 > }, > "req_getattr_latency": { > "avgcount": 13244393, > "sum": 122372.637049784, > "avgtime": 0.009239580 > }, > "req_getfilelock_latency": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "req_link_latency": { > "avgcount": 108, > "sum": 0.089209875, > "avgtime": 0.000826017 > }, > "req_lookup_latency": { > "avgcount": 42591076, > "sum": 61153.611919024, > "avgtime": 0.001435831 > }, > "req_lookuphash_latency": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "req_lookupino_latency": { > "avgcount": 6119, > "sum": 12.051986148, > "avgtime": 0.001969600 > }, > "req_lookupname_latency": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "req_lookupparent_latency": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "req_lookupsnap_latency": { > "avgcount": 11789, > "sum": 1.916955778, > "avgtime": 0.000162605 > }, > "req_lssnap_latency": { > "avgcount": 174999, > "sum": 59.628852495, > "avgtime": 0.000340738 > }, > "req_mkdir_latency": { > "avgcount": 386018, > "sum": 5323.027974258, > "avgtime": 0.013789584 > }, > "req_mknod_latency": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "req_mksnap_latency": { > "avgcount": 11784, > "sum": 4342.774640033, > "avgtime": 0.368531452 > }, > "req_open_latency": { > "avgcount": 1719118, > "sum": 2957.961208770, > "avgtime": 0.001720627 > }, > "req_readdir_latency": { > "avgcount": 49303877, > "sum": 153658.488737674, > "avgtime": 0.003116559 > }, > "req_rename_latency": { > "avgcount": 1021332, > "sum": 4173.200914205, > "avgtime": 0.004086037 > }, > "req_renamesnap_latency": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "req_rmdir_latency": { > "avgcount": 355348, > "sum": 6609.629921457, > "avgtime": 0.018600442 > }, > "req_rmsnap_latency": { > "avgcount": 11476, > "sum": 4647.171801363, > "avgtime": 0.404947002 > }, > "req_rmxattr_latency": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "req_setattr_latency": { > "avgcount": 951092, > "sum": 60918.695526648, > "avgtime": 0.064051317 > }, > "req_setdirlayout_latency": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "req_setfilelock_latency": { > "avgcount": 42236246, > "sum": 87407.285106607, > "avgtime": 0.002069485 > }, > "req_setlayout_latency": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > }, > "req_setxattr_latency": { > "avgcount": 20450454, > "sum": 351779.625311317, > "avgtime": 0.017201555 > }, > "req_symlink_latency": { > "avgcount": 2, > "sum": 0.002286490, > "avgtime": 0.001143245 > }, > "req_unlink_latency": { > "avgcount": 18777510, > "sum": 39044.878444925, > "avgtime": 0.002079342 > }, > "cap_revoke_eviction": 0, > "cap_acquisition_throttle": 0, > "req_getvxattr_latency": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "mds_sessions": { > "session_count": 545, > "session_add": 26598, > "session_remove": 26053, > "sessions_open": 545, > "sessions_stale": 0, > "total_load": 11076, > "average_load": 20, > "avg_session_uptime": 37409721, > "mdthresh_evicted": 0 > }, > "mempool": { > "bloom_filter_bytes": 10309112, > "bloom_filter_items": 10309112, > "bluestore_alloc_bytes": 0, > "bluestore_alloc_items": 0, > "bluestore_cache_data_bytes": 0, > "bluestore_cache_data_items": 0, > "bluestore_cache_onode_bytes": 0, > "bluestore_cache_onode_items": 0, > "bluestore_cache_meta_bytes": 0, > "bluestore_cache_meta_items": 0, > "bluestore_cache_other_bytes": 0, > "bluestore_cache_other_items": 0, > "bluestore_Buffer_bytes": 0, > "bluestore_Buffer_items": 0, > "bluestore_Extent_bytes": 0, > "bluestore_Extent_items": 0, > "bluestore_Blob_bytes": 0, > "bluestore_Blob_items": 0, > "bluestore_SharedBlob_bytes": 0, > "bluestore_SharedBlob_items": 0, > "bluestore_inline_bl_bytes": 0, > "bluestore_inline_bl_items": 0, > "bluestore_fsck_bytes": 0, > "bluestore_fsck_items": 0, > "bluestore_txc_bytes": 0, > "bluestore_txc_items": 0, > "bluestore_writing_deferred_bytes": 0, > "bluestore_writing_deferred_items": 0, > "bluestore_writing_bytes": 0, > "bluestore_writing_items": 0, > "bluefs_bytes": 0, > "bluefs_items": 0, > "bluefs_file_reader_bytes": 0, > "bluefs_file_reader_items": 0, > "bluefs_file_writer_bytes": 0, > "bluefs_file_writer_items": 0, > "buffer_anon_bytes": 284731060, > "buffer_anon_items": 5959770, > "buffer_meta_bytes": 616, > "buffer_meta_items": 7, > "osd_bytes": 0, > "osd_items": 0, > "osd_mapbl_bytes": 0, > "osd_mapbl_items": 0, > "osd_pglog_bytes": 0, > "osd_pglog_items": 0, > "osdmap_bytes": 81480, > "osdmap_items": 2396, > "osdmap_mapping_bytes": 0, > "osdmap_mapping_items": 0, > "pgmap_bytes": 0, > "pgmap_items": 0, > "mds_co_bytes": 16277298996, > "mds_co_items": 260156681, > "unittest_1_bytes": 0, > "unittest_1_items": 0, > "unittest_2_bytes": 0, > "unittest_2_items": 0 > }, > "objecter": { > "op_active": 11, > "op_laggy": 0, > "op_send": 253406063, > "op_send_bytes": 81014361089067, > "op_resend": 0, > "op_reply": 253406052, > "oplen_avg": { > "avgcount": 253406063, > "sum": 451959672 > }, > "op": 253406063, > "op_r": 58269533, > "op_w": 195136530, > "op_rmw": 0, > "op_pg": 0, > "osdop_stat": 64974069, > "osdop_create": 11505623, > "osdop_read": 1275145, > "osdop_write": 34882891, > "osdop_writefull": 1169760, > "osdop_writesame": 0, > "osdop_append": 0, > "osdop_zero": 2, > "osdop_truncate": 4, > "osdop_delete": 79836916, > "osdop_mapext": 0, > "osdop_sparse_read": 0, > "osdop_clonerange": 0, > "osdop_getxattr": 56700621, > "osdop_setxattr": 17474636, > "osdop_cmpxattr": 0, > "osdop_rmxattr": 0, > "osdop_resetxattrs": 0, > "osdop_call": 0, > "osdop_watch": 0, > "osdop_notify": 0, > "osdop_src_cmpxattr": 0, > "osdop_pgls": 0, > "osdop_pgls_filter": 0, > "osdop_other": 683839, > "linger_active": 0, > "linger_send": 0, > "linger_resend": 0, > "linger_ping": 0, > "poolop_active": 0, > "poolop_send": 0, > "poolop_resend": 0, > "poolstat_active": 0, > "poolstat_send": 0, > "poolstat_resend": 0, > "statfs_active": 0, > "statfs_send": 0, > "statfs_resend": 0, > "command_active": 0, > "command_send": 0, > "command_resend": 0, > "map_epoch": 685274, > "map_full": 0, > "map_inc": 19880, > "osd_sessions": 96, > "osd_session_open": 96, > "osd_session_close": 0, > "osd_laggy": 0, > "omap_wr": 78737471, > "omap_rd": 99825478, > "omap_del": 4893217 > }, > "oft": { > "omap_total_objs": 4, > "omap_total_kv_pairs": 105334, > "omap_total_updates": 80286291, > "omap_total_removes": 72648222 > }, > "purge_queue": { > "pq_executing_ops": 0, > "pq_executing_ops_high_water": 1559, > "pq_executing": 0, > "pq_executing_high_water": 64, > "pq_executed": 20275628, > "pq_item_in_journal": 0 > }, > "throttle-msgr_dispatch_throttler-mds": { > "val": 0, > "max": 104857600, > "get_started": 0, > "get": 1328180571, > "get_sum": 935681276002, > "get_or_fail_fail": 0, > "get_or_fail_success": 1328180571, > "take": 0, > "take_sum": 0, > "put": 1328180571, > "put_sum": 935681276002, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-objecter_bytes": { > "val": 21734389, > "max": 104857600, > "get_started": 0, > "get": 0, > "get_sum": 0, > "get_or_fail_fail": 0, > "get_or_fail_success": 0, > "take": 253406063, > "take_sum": 81151894362586, > "put": 253406052, > "put_sum": 81151872628197, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-objecter_ops": { > "val": 11, > "max": 1024, > "get_started": 0, > "get": 0, > "get_sum": 0, > "get_or_fail_fail": 0, > "get_or_fail_success": 0, > "take": 253406063, > "take_sum": 253406063, > "put": 253406052, > "put_sum": 253406052, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-write_buf_throttle": { > "val": 0, > "max": 3758096384, > "get_started": 0, > "get": 20275628, > "get_sum": 2230259884, > "get_or_fail_fail": 0, > "get_or_fail_success": 20275628, > "take": 0, > "take_sum": 0, > "put": 1242717, > "put_sum": 2230259884, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-write_buf_throttle-0x56401344c0a0": { > "val": 0, > "max": 3758096384, > "get_started": 0, > "get": 148604638, > "get_sum": 80839025768070, > "get_or_fail_fail": 0, > "get_or_fail_success": 148604638, > "take": 0, > "take_sum": 0, > "put": 23948617, > "put_sum": 80839025768070, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > } > } > _______________________________________________ > ceph-users mailing list -- ceph-users@ceph.io > To unsubscribe send an email to ceph-users-le...@ceph.io _______________________________________________ ceph-users mailing list -- ceph-users@ceph.io To unsubscribe send an email to ceph-users-le...@ceph.io