I am now seeing the exact same issues you are reporting. A heap release did nothing for me.
The only odd thing I'm doing is migrating data in cephfs from one pool to another. The process looks something like the following: TARGET_DIR=/media/cephfs/labs/ TARGET_POOL="cephfs_ec_data" setfattr -n ceph.dir.layout.pool -v ${TARGET_POOL} ${TARGET_DIR} #for every file ##NEWFILE="${file}.ec" ##cp "${file}" "${NEWFILE}" ##mv "${NEWFILE}" "${file}" I have a fear that this process may not be releasing the inode of ${file} and deleting the objects from RADOS. But, I'm not sure that would have much to do with MDS outside tracking an inode that isn't accessible anymore. [root@mds0 ~]# rpm -qa | grep ceph ceph-mgr-12.2.4-0.el7.x86_64 ceph-12.2.4-0.el7.x86_64 ceph-osd-12.2.4-0.el7.x86_64 ceph-release-1-1.el7.noarch libcephfs2-12.2.4-0.el7.x86_64 ceph-base-12.2.4-0.el7.x86_64 ceph-mds-12.2.4-0.el7.x86_64 ceph-deploy-2.0.0-0.noarch ceph-common-12.2.4-0.el7.x86_64 ceph-mon-12.2.4-0.el7.x86_64 ceph-radosgw-12.2.4-0.el7.x86_64 python-cephfs-12.2.4-0.el7.x86_64 ceph-selinux-12.2.4-0.el7.x86_64 [root@mds0 ~]# ceph daemon mds.mds0 config get mds_cache_memory_limit { "mds_cache_memory_limit": "80530636800" } [root@mds0 ~]# ceph daemon mds.mds0 perf dump { "AsyncMessenger::Worker-0": { "msgr_recv_messages": 48568037, "msgr_send_messages": 51895350, "msgr_recv_bytes": 50001752194, "msgr_send_bytes": 59667899407, "msgr_created_connections": 28522, "msgr_active_connections": 939, "msgr_running_total_time": 9158.145665485, "msgr_running_send_time": 3270.445768873, "msgr_running_recv_time": 8951.883602486, "msgr_running_fast_dispatch_time": 684.964408603 }, "AsyncMessenger::Worker-1": { "msgr_recv_messages": 81557461, "msgr_send_messages": 88149491, "msgr_recv_bytes": 59543645402, "msgr_send_bytes": 99790426210, "msgr_created_connections": 28705, "msgr_active_connections": 881, "msgr_running_total_time": 14513.332929088, "msgr_running_send_time": 5214.994372044, "msgr_running_recv_time": 13891.320681575, "msgr_running_fast_dispatch_time": 682.921363330 }, "AsyncMessenger::Worker-2": { "msgr_recv_messages": 104018424, "msgr_send_messages": 117265828, "msgr_recv_bytes": 70248474177, "msgr_send_bytes": 175930469394, "msgr_created_connections": 30034, "msgr_active_connections": 1043, "msgr_running_total_time": 18836.813930876, "msgr_running_send_time": 7227.884643396, "msgr_running_recv_time": 17825.385233846, "msgr_running_fast_dispatch_time": 692.710777921 }, "finisher-PurgeQueue": { "queue_len": 0, "complete_latency": { "avgcount": 22554047, "sum": 2515.425093728, "avgtime": 0.000111528 } }, "mds": { "request": 156766118, "reply": 156766111, "reply_latency": { "avgcount": 156766111, "sum": 337276.533677320, "avgtime": 0.002151463 }, "forward": 0, "dir_fetch": 6468158, "dir_commit": 539656, "dir_split": 0, "dir_merge": 0, "inode_max": 2147483647, "inodes": 35853368, "inodes_top": 23669670, "inodes_bottom": 12165298, "inodes_pin_tail": 18400, "inodes_pinned": 2039553, "inodes_expired": 142389542, "inodes_with_caps": 831824, "caps": 881384, "subtrees": 2, "traverse": 167546977, "traverse_hit": 53323050, "traverse_forward": 0, "traverse_discover": 0, "traverse_dir_fetch": 4853, "traverse_remote_ino": 0, "traverse_lock": 39597, "load_cent": 15676533928, "q": 0, "exported": 0, "exported_inodes": 0, "imported": 0, "imported_inodes": 0 }, "mds_cache": { "num_strays": 1369, "num_strays_delayed": 12, "num_strays_enqueuing": 0, "strays_created": 2667808, "strays_enqueued": 2666306, "strays_reintegrated": 246, "strays_migrated": 0, "num_recovering_processing": 0, "num_recovering_enqueued": 0, "num_recovering_prioritized": 0, "recovery_started": 524, "recovery_completed": 524, "ireq_enqueue_scrub": 0, "ireq_exportdir": 0, "ireq_flush": 0, "ireq_fragmentdir": 0, "ireq_fragstats": 0, "ireq_inodestats": 0 }, "mds_log": { "evadd": 34813343, "evex": 34809732, "evtrm": 34809732, "ev": 22489, "evexg": 0, "evexd": 728, "segadd": 47980, "segex": 47980, "segtrm": 47980, "seg": 31, "segexg": 0, "segexd": 1, "expos": 8687078876712, "wrpos": 8687143594883, "rdpos": 8586648077163, "jlat": { "avgcount": 12732690, "sum": 371322.453160705, "avgtime": 0.029162922 }, "replayed": 18878 }, "mds_mem": { "ino": 35852761, "ino+": 174413168, "ino-": 138560407, "dir": 1288886, "dir+": 6398671, "dir-": 5109785, "dn": 35853455, "dn+": 181545805, "dn-": 145692350, "cap": 881384, "cap+": 225924791, "cap-": 225043407, "rss": 124952096, "heap": 313964, "buf": 0 }, "mds_server": { "dispatch_client_request": 169327566, "dispatch_server_request": 0, "handle_client_request": 156766118, "handle_client_session": 1446020, "handle_slave_request": 0, "req_create": 2782862, "req_getattr": 7529707, "req_getfilelock": 5, "req_link": 298, "req_lookup": 123401139, "req_lookuphash": 0, "req_lookupino": 0, "req_lookupname": 89226, "req_lookupparent": 0, "req_lookupsnap": 0, "req_lssnap": 0, "req_mkdir": 42729, "req_mknod": 7, "req_mksnap": 0, "req_open": 5781795, "req_readdir": 8823398, "req_rename": 2066887, "req_renamesnap": 0, "req_rmdir": 32196, "req_rmsnap": 0, "req_rmxattr": 364883, "req_setattr": 161338, "req_setdirlayout": 0, "req_setfilelock": 5038771, "req_setlayout": 0, "req_setxattr": 2657833, "req_symlink": 3617, "req_unlink": 772280 }, "mds_sessions": { "session_count": 20, "session_add": 47, "session_remove": 27 }, "objecter": { "op_active": 2, "op_laggy": 0, "op_send": 47982687, "op_send_bytes": 105127128306, "op_resend": 13, "op_reply": 47982672, "op": 47982674, "op_r": 6735267, "op_w": 41247407, "op_rmw": 0, "op_pg": 0, "osdop_stat": 537779, "osdop_create": 4155246, "osdop_read": 173832, "osdop_write": 12906485, "osdop_writefull": 199372, "osdop_writesame": 0, "osdop_append": 0, "osdop_zero": 2, "osdop_truncate": 0, "osdop_delete": 22440537, "osdop_mapext": 0, "osdop_sparse_read": 0, "osdop_clonerange": 0, "osdop_getxattr": 6531688, "osdop_setxattr": 6577232, "osdop_cmpxattr": 0, "osdop_rmxattr": 0, "osdop_resetxattrs": 0, "osdop_tmap_up": 0, "osdop_tmap_put": 0, "osdop_tmap_get": 0, "osdop_call": 0, "osdop_watch": 0, "osdop_notify": 0, "osdop_src_cmpxattr": 0, "osdop_pgls": 0, "osdop_pgls_filter": 0, "osdop_other": 999516, "linger_active": 0, "linger_send": 0, "linger_resend": 0, "linger_ping": 0, "poolop_active": 0, "poolop_send": 0, "poolop_resend": 0, "poolstat_active": 0, "poolstat_send": 0, "poolstat_resend": 0, "statfs_active": 0, "statfs_send": 0, "statfs_resend": 0, "command_active": 0, "command_send": 0, "command_resend": 0, "map_epoch": 450530, "map_full": 0, "map_inc": 27226, "osd_sessions": 374, "osd_session_open": 87167, "osd_session_close": 86793, "osd_laggy": 0, "omap_wr": 1062388, "omap_rd": 12936360, "omap_del": 313476 }, "purge_queue": { "pq_executing_ops": 0, "pq_executing": 0, "pq_executed": 2666323 }, "throttle-msgr_dispatch_throttler-mds": { "val": 0, "max": 104857600, "get_started": 0, "get": 234143922, "get_sum": 162467221545, "get_or_fail_fail": 0, "get_or_fail_success": 234143922, "take": 0, "take_sum": 0, "put": 234143922, "put_sum": 162467221545, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-objecter_bytes": { "val": 18070, "max": 104857600, "get_started": 0, "get": 0, "get_sum": 0, "get_or_fail_fail": 0, "get_or_fail_success": 0, "take": 47982674, "take_sum": 105435633841, "put": 24512906, "put_sum": 105435615771, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-objecter_ops": { "val": 2, "max": 1024, "get_started": 0, "get": 0, "get_sum": 0, "get_or_fail_fail": 0, "get_or_fail_success": 0, "take": 47982674, "take_sum": 47982674, "put": 47982672, "put_sum": 47982672, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-write_buf_throttle": { "val": 0, "max": 3758096384, "get_started": 0, "get": 2666306, "get_sum": 247966490, "get_or_fail_fail": 0, "get_or_fail_success": 2666306, "take": 0, "take_sum": 0, "put": 173754, "put_sum": 247966490, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } }, "throttle-write_buf_throttle-0x5601defe43a0": { "val": 0, "max": 3758096384, "get_started": 0, "get": 34813343, "get_sum": 100495515382, "get_or_fail_fail": 0, "get_or_fail_success": 34813343, "take": 0, "take_sum": 0, "put": 12732692, "put_sum": 100495515382, "wait": { "avgcount": 0, "sum": 0.000000000, "avgtime": 0.000000000 } } } On Thu, Apr 19, 2018 at 12:49 AM, Alexandre DERUMIER <aderum...@odiso.com> wrote: > >>I don't find any clue. Next time it happens, could you please try > >>"ceph tell mds.xxx heap release" > > don't seem to work > > > > > USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND > ceph 1211357 13.1 18.6 12676452 12286508 ? Ssl avril05 2567:28 > /usr/bin/ceph-mds -f --cluster ceph --id ceph4-2.odiso.net --setuser ceph > --setgroup ceph > > > # ceph tell mds.ceph4-2.odiso.net heap release > mds.ceph4-2.odiso.net releasing free RAM back to system. > > > USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND > ceph 1211357 13.1 18.6 12676452 12286508 ? Ssl avril05 2567:36 > /usr/bin/ceph-mds -f --cluster ceph --id ceph4-2.odiso.net --setuser ceph > --setgroup ceph > > > > I'll try to monitor memory to see when exactly it's growing, it's seem to > grow time to time, but not continously. > > > here the stats after heap release: > > > # ceph daemon mds.ceph4-2.odiso.net cache status > { > "pool": { > "items": 15123841, > "bytes": 5167594872 > } > } > > # ceph daemon mds.ceph4-2.odiso.net perf dump > { > "AsyncMessenger::Worker-0": { > "msgr_recv_messages": 327887491, > "msgr_send_messages": 329555332, > "msgr_recv_bytes": 1530854660227, > "msgr_send_bytes": 1174619728658, > "msgr_created_connections": 146, > "msgr_active_connections": 145, > "msgr_running_total_time": 16828.645933488, > "msgr_running_send_time": 6368.459700090, > "msgr_running_recv_time": 14406.742804542, > "msgr_running_fast_dispatch_time": 1378.745242725 > }, > "AsyncMessenger::Worker-1": { > "msgr_recv_messages": 177837885, > "msgr_send_messages": 170796581, > "msgr_recv_bytes": 1426860751988, > "msgr_send_bytes": 166774861696, > "msgr_created_connections": 145, > "msgr_active_connections": 144, > "msgr_running_total_time": 9298.266921246, > "msgr_running_send_time": 3129.373504230, > "msgr_running_recv_time": 7895.052894375, > "msgr_running_fast_dispatch_time": 1322.886415635 > }, > "AsyncMessenger::Worker-2": { > "msgr_recv_messages": 325631551, > "msgr_send_messages": 314206515, > "msgr_recv_bytes": 1403013169198, > "msgr_send_bytes": 308787752784, > "msgr_created_connections": 138, > "msgr_active_connections": 133, > "msgr_running_total_time": 15012.588633448, > "msgr_running_send_time": 5510.205039583, > "msgr_running_recv_time": 14002.408569714, > "msgr_running_fast_dispatch_time": 1260.624028645 > }, > "finisher-PurgeQueue": { > "queue_len": 0, > "complete_latency": { > "avgcount": 731407, > "sum": 8003.599511421, > "avgtime": 0.010942743 > } > }, > "mds": { > "request": 608911096, > "reply": 608910899, > "reply_latency": { > "avgcount": 608910899, > "sum": 1641293.658633345, > "avgtime": 0.002695457 > }, > "forward": 0, > "dir_fetch": 32598533, > "dir_commit": 1230989, > "dir_split": 9535, > "dir_merge": 9523, > "inode_max": 2147483647, > "inodes": 2087784, > "inodes_top": 190211, > "inodes_bottom": 154977, > "inodes_pin_tail": 1742596, > "inodes_pinned": 1816054, > "inodes_expired": 5550735649, > "inodes_with_caps": 1814707, > "caps": 3170853, > "subtrees": 2, > "traverse": 645302921, > "traverse_hit": 390729564, > "traverse_forward": 0, > "traverse_discover": 0, > "traverse_dir_fetch": 26620216, > "traverse_remote_ino": 1968, > "traverse_lock": 573, > "load_cent": 60931206319, > "q": 18, > "exported": 0, > "exported_inodes": 0, > "imported": 0, > "imported_inodes": 0 > }, > "mds_cache": { > "num_strays": 1885, > "num_strays_delayed": 0, > "num_strays_enqueuing": 0, > "strays_created": 621082, > "strays_enqueued": 619458, > "strays_reintegrated": 72, > "strays_migrated": 0, > "num_recovering_processing": 0, > "num_recovering_enqueued": 0, > "num_recovering_prioritized": 0, > "recovery_started": 1, > "recovery_completed": 1, > "ireq_enqueue_scrub": 0, > "ireq_exportdir": 0, > "ireq_flush": 0, > "ireq_fragmentdir": 19058, > "ireq_fragstats": 0, > "ireq_inodestats": 0 > }, > "mds_log": { > "evadd": 108025412, > "evex": 108027485, > "evtrm": 108026461, > "ev": 25484, > "evexg": 0, > "evexd": 1024, > "segadd": 131605, > "segex": 131609, > "segtrm": 131608, > "seg": 31, > "segexg": 0, > "segexd": 1, > "expos": 5222483101644, > "wrpos": 5222526671740, > "rdpos": 5036811490502, > "jlat": { > "avgcount": 19597987, > "sum": 41720.071108694, > "avgtime": 0.002128793 > }, > "replayed": 26533 > }, > "mds_mem": { > "ino": 2087350, > "ino+": 5533126211, > "ino-": 5531038861, > "dir": 321262, > "dir+": 5672027, > "dir-": 5350765, > "dn": 2087920, > "dn+": 5553775487, > "dn-": 5551687567, > "cap": 3170853, > "cap+": 646307641, > "cap-": 643136788, > "rss": 12286508, > "heap": 313916, > "buf": 0 > }, > "mds_server": { > "dispatch_client_request": 651833084, > "dispatch_server_request": 0, > "handle_client_request": 608911096, > "handle_client_session": 5163844, > "handle_slave_request": 0, > "req_create": 754987, > "req_getattr": 5199299, > "req_getfilelock": 0, > "req_link": 170, > "req_lookup": 476304151, > "req_lookuphash": 0, > "req_lookupino": 0, > "req_lookupname": 16868, > "req_lookupparent": 0, > "req_lookupsnap": 0, > "req_lssnap": 0, > "req_mkdir": 12204, > "req_mknod": 0, > "req_mksnap": 0, > "req_open": 106156167, > "req_readdir": 20293077, > "req_rename": 28443, > "req_renamesnap": 0, > "req_rmdir": 17522, > "req_rmsnap": 0, > "req_rmxattr": 0, > "req_setattr": 34735, > "req_setdirlayout": 0, > "req_setfilelock": 238574, > "req_setlayout": 0, > "req_setxattr": 2, > "req_symlink": 122, > "req_unlink": 609565 > }, > "mds_sessions": { > "session_count": 307, > "session_add": 398, > "session_remove": 91 > }, > "objecter": { > "op_active": 0, > "op_laggy": 0, > "op_send": 60152761, > "op_send_bytes": 189780235877, > "op_resend": 4, > "op_reply": 60152757, > "op": 60152757, > "op_r": 32760612, > "op_w": 27392145, > "op_rmw": 0, > "op_pg": 0, > "osdop_stat": 1131412, > "osdop_create": 791110, > "osdop_read": 27868, > "osdop_write": 19625820, > "osdop_writefull": 81003, > "osdop_writesame": 0, > "osdop_append": 0, > "osdop_zero": 2, > "osdop_truncate": 4161, > "osdop_delete": 931372, > "osdop_mapext": 0, > "osdop_sparse_read": 0, > "osdop_clonerange": 0, > "osdop_getxattr": 9914736, > "osdop_setxattr": 1582220, > "osdop_cmpxattr": 0, > "osdop_rmxattr": 0, > "osdop_resetxattrs": 0, > "osdop_tmap_up": 0, > "osdop_tmap_put": 0, > "osdop_tmap_get": 0, > "osdop_call": 0, > "osdop_watch": 0, > "osdop_notify": 0, > "osdop_src_cmpxattr": 0, > "osdop_pgls": 0, > "osdop_pgls_filter": 0, > "osdop_other": 4645746, > "linger_active": 0, > "linger_send": 0, > "linger_resend": 0, > "linger_ping": 0, > "poolop_active": 0, > "poolop_send": 0, > "poolop_resend": 0, > "poolstat_active": 0, > "poolstat_send": 0, > "poolstat_resend": 0, > "statfs_active": 0, > "statfs_send": 0, > "statfs_resend": 0, > "command_active": 0, > "command_send": 0, > "command_resend": 0, > "map_epoch": 3121, > "map_full": 0, > "map_inc": 76, > "osd_sessions": 18, > "osd_session_open": 20, > "osd_session_close": 2, > "osd_laggy": 0, > "omap_wr": 2227270, > "omap_rd": 65197068, > "omap_del": 48058 > }, > "purge_queue": { > "pq_executing_ops": 0, > "pq_executing": 0, > "pq_executed": 619458 > }, > "throttle-msgr_dispatch_throttler-mds": { > "val": 0, > "max": 104857600, > "get_started": 0, > "get": 831356927, > "get_sum": 4299208168815, > "get_or_fail_fail": 0, > "get_or_fail_success": 831356927, > "take": 0, > "take_sum": 0, > "put": 831356927, > "put_sum": 4299208168815, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-objecter_bytes": { > "val": 0, > "max": 104857600, > "get_started": 0, > "get": 0, > "get_sum": 0, > "get_or_fail_fail": 0, > "get_or_fail_success": 0, > "take": 60152757, > "take_sum": 189890861007, > "put": 54571445, > "put_sum": 189890861007, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-objecter_ops": { > "val": 0, > "max": 1024, > "get_started": 0, > "get": 0, > "get_sum": 0, > "get_or_fail_fail": 0, > "get_or_fail_success": 0, > "take": 60152757, > "take_sum": 60152757, > "put": 60152757, > "put_sum": 60152757, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-write_buf_throttle": { > "val": 0, > "max": 3758096384, > "get_started": 0, > "get": 619458, > "get_sum": 57609986, > "get_or_fail_fail": 0, > "get_or_fail_success": 619458, > "take": 0, > "take_sum": 0, > "put": 27833, > "put_sum": 57609986, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-write_buf_throttle-0x559471d00140": { > "val": 105525, > "max": 3758096384, > "get_started": 0, > "get": 108025412, > "get_sum": 185715179864, > "get_or_fail_fail": 0, > "get_or_fail_success": 108025412, > "take": 0, > "take_sum": 0, > "put": 19597987, > "put_sum": 185715074339, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > } > } > > ----- Mail original ----- > De: "Zheng Yan" <uker...@gmail.com> > À: "aderumier" <aderum...@odiso.com> > Cc: "Patrick Donnelly" <pdonn...@redhat.com>, "ceph-users" < > ceph-users@lists.ceph.com> > Envoyé: Mardi 17 Avril 2018 05:20:18 > Objet: Re: [ceph-users] ceph mds memory usage 20GB : is it normal ? > > On Sat, Apr 14, 2018 at 9:23 PM, Alexandre DERUMIER <aderum...@odiso.com> > wrote: > > Hi, > > > > Still leaking again after update to 12.2.4, around 17G after 9 days > > > > > > > > > > USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND > > > > ceph 629903 50.7 25.9 17473680 17082432 ? Ssl avril05 6498:21 > /usr/bin/ceph-mds -f --cluster ceph --id ceph4-1.odiso.net --setuser ceph > --setgroup ceph > > > > > > > > > > > > ~# ceph daemon mds.ceph4-1.odiso.net cache status > > { > > "pool": { > > "items": 16019302, > > "bytes": 5100941968 > > } > > } > > > > > > > > > > > > # ceph daemon mds.ceph4-1.odiso.net perf dump > > { > > "AsyncMessenger::Worker-0": { > > "msgr_recv_messages": 648541059, > > "msgr_send_messages": 666102301, > > "msgr_recv_bytes": 4943336751206, > > "msgr_send_bytes": 868468165048, > > "msgr_created_connections": 167, > > "msgr_active_connections": 166, > > "msgr_running_total_time": 33884.943400671, > > "msgr_running_send_time": 12229.226645264, > > "msgr_running_recv_time": 26234.680757843, > > "msgr_running_fast_dispatch_time": 4650.248980986 > > }, > > "AsyncMessenger::Worker-1": { > > "msgr_recv_messages": 732301444, > > "msgr_send_messages": 750526966, > > "msgr_recv_bytes": 4248782228635, > > "msgr_send_bytes": 2379403291660, > > "msgr_created_connections": 172, > > "msgr_active_connections": 171, > > "msgr_running_total_time": 38490.093448635, > > "msgr_running_send_time": 14692.222019414, > > "msgr_running_recv_time": 31000.304091618, > > "msgr_running_fast_dispatch_time": 3945.573521893 > > }, > > "AsyncMessenger::Worker-2": { > > "msgr_recv_messages": 503228767, > > "msgr_send_messages": 485729577, > > "msgr_recv_bytes": 3644656184942, > > "msgr_send_bytes": 526380645708, > > "msgr_created_connections": 156, > > "msgr_active_connections": 156, > > "msgr_running_total_time": 26566.051442840, > > "msgr_running_send_time": 9335.249687474, > > "msgr_running_recv_time": 22643.927960456, > > "msgr_running_fast_dispatch_time": 3426.566334706 > > }, > > "finisher-PurgeQueue": { > > "queue_len": 0, > > "complete_latency": { > > "avgcount": 2077128, > > "sum": 10029.468276512, > > "avgtime": 0.004828526 > > } > > }, > > "mds": { > > "request": 1320419754, > > "reply": 1320418963, > > "reply_latency": { > > "avgcount": 1320418963, > > "sum": 3567340.917522550, > > "avgtime": 0.002701673 > > }, > > "forward": 0, > > "dir_fetch": 95955541, > > "dir_commit": 5380286, > > "dir_split": 29080, > > "dir_merge": 28453, > > "inode_max": 2147483647, > > "inodes": 2049324, > > "inodes_top": 55759, > > "inodes_bottom": 118910, > > "inodes_pin_tail": 1874655, > > "inodes_pinned": 1969667, > > "inodes_expired": 14225864524, > > "inodes_with_caps": 1969030, > > "caps": 3010600, > > "subtrees": 2, > > "traverse": 1433042396, > > "traverse_hit": 855810795, > > "traverse_forward": 0, > > "traverse_discover": 0, > > "traverse_dir_fetch": 75553963, > > "traverse_remote_ino": 5462, > > "traverse_lock": 217, > > "load_cent": 132079451933, > > "q": 41, > > "exported": 0, > > "exported_inodes": 0, > > "imported": 0, > > "imported_inodes": 0 > > }, > > "mds_cache": { > > "num_strays": 150, > > "num_strays_delayed": 0, > > "num_strays_enqueuing": 0, > > "strays_created": 2317004, > > "strays_enqueued": 2316671, > > "strays_reintegrated": 288, > > "strays_migrated": 0, > > "num_recovering_processing": 0, > > "num_recovering_enqueued": 0, > > "num_recovering_prioritized": 0, > > "recovery_started": 0, > > "recovery_completed": 0, > > "ireq_enqueue_scrub": 0, > > "ireq_exportdir": 0, > > "ireq_flush": 0, > > "ireq_fragmentdir": 57533, > > "ireq_fragstats": 0, > > "ireq_inodestats": 0 > > }, > > "mds_log": { > > "evadd": 293928039, > > "evex": 293928281, > > "evtrm": 293926233, > > "ev": 26595, > > "evexg": 0, > > "evexd": 2048, > > "segadd": 365381, > > "segex": 365382, > > "segtrm": 365380, > > "seg": 32, > > "segexg": 0, > > "segexd": 2, > > "expos": 4997676796422, > > "wrpos": 4997732797135, > > "rdpos": 4232612352311, > > "jlat": { > > "avgcount": 62629276, > > "sum": 260619.838247062, > > "avgtime": 0.004161310 > > }, > > "replayed": 24789 > > }, > > "mds_mem": { > > "ino": 2048405, > > "ino+": 14160488289, > > "ino-": 14158439884, > > "dir": 377882, > > "dir+": 15421679, > > "dir-": 15043797, > > "dn": 2049614, > > "dn+": 14231703198, > > "dn-": 14229653584, > > "cap": 3010600, > > "cap+": 1555206662, > > "cap-": 1552196062, > > "rss": 17082432, > > "heap": 313916, > > "buf": 0 > > }, > > "mds_server": { > > "dispatch_client_request": 1437033326, > > "dispatch_server_request": 0, > > "handle_client_request": 1320419754, > > "handle_client_session": 11542297, > > "handle_slave_request": 0, > > "req_create": 18618128, > > "req_getattr": 11195570, > > "req_getfilelock": 0, > > "req_link": 411, > > "req_lookup": 1005844421, > > "req_lookuphash": 0, > > "req_lookupino": 0, > > "req_lookupname": 37344, > > "req_lookupparent": 0, > > "req_lookupsnap": 0, > > "req_lssnap": 0, > > "req_mkdir": 691747, > > "req_mknod": 18, > > "req_mksnap": 0, > > "req_open": 230213054, > > "req_readdir": 50618109, > > "req_rename": 17377032, > > "req_renamesnap": 0, > > "req_rmdir": 463707, > > "req_rmsnap": 0, > > "req_rmxattr": 0, > > "req_setattr": 1963949, > > "req_setdirlayout": 0, > > "req_setfilelock": 210187, > > "req_setlayout": 0, > > "req_setxattr": 8, > > "req_symlink": 1971, > > "req_unlink": 1801435 > > }, > > "mds_sessions": { > > "session_count": 305, > > "session_add": 473, > > "session_remove": 168 > > }, > > "objecter": { > > "op_active": 0, > > "op_laggy": 0, > > "op_send": 197270397, > > "op_send_bytes": 796275884964, > > "op_resend": 7, > > "op_reply": 197270390, > > "op": 197270390, > > "op_r": 96075672, > > "op_w": 101194718, > > "op_rmw": 0, > > "op_pg": 0, > > "osdop_stat": 4428036, > > "osdop_create": 19400797, > > "osdop_read": 31288, > > "osdop_write": 62709547, > > "osdop_writefull": 165583, > > "osdop_writesame": 0, > > "osdop_append": 0, > > "osdop_zero": 2, > > "osdop_truncate": 13280, > > "osdop_delete": 3185444, > > "osdop_mapext": 0, > > "osdop_sparse_read": 0, > > "osdop_clonerange": 0, > > "osdop_getxattr": 27007173, > > "osdop_setxattr": 38801594, > > "osdop_cmpxattr": 0, > > "osdop_rmxattr": 0, > > "osdop_resetxattrs": 0, > > "osdop_tmap_up": 0, > > "osdop_tmap_put": 0, > > "osdop_tmap_get": 0, > > "osdop_call": 0, > > "osdop_watch": 0, > > "osdop_notify": 0, > > "osdop_src_cmpxattr": 0, > > "osdop_pgls": 0, > > "osdop_pgls_filter": 0, > > "osdop_other": 10143158, > > "linger_active": 0, > > "linger_send": 0, > > "linger_resend": 0, > > "linger_ping": 0, > > "poolop_active": 0, > > "poolop_send": 0, > > "poolop_resend": 0, > > "poolstat_active": 0, > > "poolstat_send": 0, > > "poolstat_resend": 0, > > "statfs_active": 0, > > "statfs_send": 0, > > "statfs_resend": 0, > > "command_active": 0, > > "command_send": 0, > > "command_resend": 0, > > "map_epoch": 3044, > > "map_full": 0, > > "map_inc": 160, > > "osd_sessions": 18, > > "osd_session_open": 20, > > "osd_session_close": 2, > > "osd_laggy": 0, > > "omap_wr": 9743114, > > "omap_rd": 191911089, > > "omap_del": 684272 > > }, > > "purge_queue": { > > "pq_executing_ops": 0, > > "pq_executing": 0, > > "pq_executed": 2316671 > > }, > > "throttle-msgr_dispatch_throttler-mds": { > > "val": 0, > > "max": 104857600, > > "get_started": 0, > > "get": 1884071270, > > "get_sum": 12697353890803, > > "get_or_fail_fail": 0, > > "get_or_fail_success": 1884071270, > > "take": 0, > > "take_sum": 0, > > "put": 1884071270, > > "put_sum": 12697353890803, > > "wait": { > > "avgcount": 0, > > "sum": 0.000000000, > > "avgtime": 0.000000000 > > } > > }, > > "throttle-objecter_bytes": { > > "val": 0, > > "max": 104857600, > > "get_started": 0, > > "get": 0, > > "get_sum": 0, > > "get_or_fail_fail": 0, > > "get_or_fail_success": 0, > > "take": 197270390, > > "take_sum": 796529593788, > > "put": 183928495, > > "put_sum": 796529593788, > > "wait": { > > "avgcount": 0, > > "sum": 0.000000000, > > "avgtime": 0.000000000 > > } > > }, > > "throttle-objecter_ops": { > > "val": 0, > > "max": 1024, > > "get_started": 0, > > "get": 0, > > "get_sum": 0, > > "get_or_fail_fail": 0, > > "get_or_fail_success": 0, > > "take": 197270390, > > "take_sum": 197270390, > > "put": 197270390, > > "put_sum": 197270390, > > "wait": { > > "avgcount": 0, > > "sum": 0.000000000, > > "avgtime": 0.000000000 > > } > > }, > > "throttle-write_buf_throttle": { > > "val": 0, > > "max": 3758096384, > > "get_started": 0, > > "get": 2316671, > > "get_sum": 215451035, > > "get_or_fail_fail": 0, > > "get_or_fail_success": 2316671, > > "take": 0, > > "take_sum": 0, > > "put": 31223, > > "put_sum": 215451035, > > "wait": { > > "avgcount": 0, > > "sum": 0.000000000, > > "avgtime": 0.000000000 > > } > > }, > > "throttle-write_buf_throttle-0x563c33bea220": { > > "val": 29763, > > "max": 3758096384, > > "get_started": 0, > > "get": 293928039, > > "get_sum": 765120443785, > > "get_or_fail_fail": 0, > > "get_or_fail_success": 293928039, > > "take": 0, > > "take_sum": 0, > > "put": 62629276, > > "put_sum": 765120414022, > > "wait": { > > "avgcount": 0, > > "sum": 0.000000000, > > "avgtime": 0.000000000 > > } > > } > > } > > > > I don't find any clue. Next time it happens, could you please try > "ceph tell mds.xxx heap release" > > > > > > > # ceph status > > cluster: > > id: e22b8e83-3036-4fe5-8fd5-5ce9d539beca > > health: HEALTH_OK > > > > services: > > mon: 3 daemons, quorum ceph4-1,ceph4-2,ceph4-3 > > mgr: ceph4-2.odiso.net(active), standbys: ceph4-3.odiso.net, > ceph4-1.odiso.net > > mds: cephfs4-1/1/1 up {0=ceph4-1.odiso.net=up:active}, 2 up:standby > > osd: 18 osds: 18 up, 18 in > > > > data: > > pools: 11 pools, 1992 pgs > > objects: 72258k objects, 5918 GB > > usage: 20088 GB used, 6737 GB / 26825 GB avail > > pgs: 1992 active+clean > > > > io: > > client: 3099 kB/s rd, 6412 kB/s wr, 108 op/s rd, 481 op/s wr > > > > > > ----- Mail original ----- > > De: "Patrick Donnelly" <pdonn...@redhat.com> > > À: "aderumier" <aderum...@odiso.com> > > Cc: "ceph-users" <ceph-users@lists.ceph.com> > > Envoyé: Mardi 27 Mars 2018 20:35:08 > > Objet: Re: [ceph-users] ceph mds memory usage 20GB : is it normal ? > > > > Hello Alexandre, > > > > On Thu, Mar 22, 2018 at 2:29 AM, Alexandre DERUMIER <aderum...@odiso.com> > wrote: > >> Hi, > >> > >> I'm running cephfs since 2 months now, > >> > >> and my active msd memory usage is around 20G now (still growing). > >> > >> ceph 1521539 10.8 31.2 20929836 20534868 ? Ssl janv.26 8573:34 > /usr/bin/ceph-mds -f --cluster ceph --id 2 --setuser ceph --setgroup ceph > >> USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND > >> > >> > >> this is on luminous 12.2.2 > >> > >> only tuning done is: > >> > >> mds_cache_memory_limit = 5368709120 > >> > >> > >> (5GB). I known it's a soft limit, but 20G seem quite huge vs 5GB .... > >> > >> > >> Is it normal ? > > > > No, that's definitely not normal! > > > > > >> # ceph daemon mds.2 perf dump mds > >> { > >> "mds": { > >> "request": 1444009197, > >> "reply": 1443999870, > >> "reply_latency": { > >> "avgcount": 1443999870, > >> "sum": 1657849.656122933, > >> "avgtime": 0.001148095 > >> }, > >> "forward": 0, > >> "dir_fetch": 51740910, > >> "dir_commit": 9069568, > >> "dir_split": 64367, > >> "dir_merge": 58016, > >> "inode_max": 2147483647, > >> "inodes": 2042975, > >> "inodes_top": 152783, > >> "inodes_bottom": 138781, > >> "inodes_pin_tail": 1751411, > >> "inodes_pinned": 1824714, > >> "inodes_expired": 7258145573, > >> "inodes_with_caps": 1812018, > >> "caps": 2538233, > >> "subtrees": 2, > >> "traverse": 1591668547, > >> "traverse_hit": 1259482170, > >> "traverse_forward": 0, > >> "traverse_discover": 0, > >> "traverse_dir_fetch": 30827836, > >> "traverse_remote_ino": 7510, > >> "traverse_lock": 86236, > >> "load_cent": 144401980319, > >> "q": 49, > >> "exported": 0, > >> "exported_inodes": 0, > >> "imported": 0, > >> "imported_inodes": 0 > >> } > >> } > > > > Can you also share `ceph daemon mds.2 cache status`, the full `ceph > > daemon mds.2 perf dump`, and `ceph status`? > > > > Note [1] will be in 12.2.5 and may help with your issue. > > > > [1] https://github.com/ceph/ceph/pull/20527 > > > > -- > > Patrick Donnelly > > > > _______________________________________________ > > ceph-users mailing list > > ceph-users@lists.ceph.com > > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com > > _______________________________________________ > ceph-users mailing list > ceph-users@lists.ceph.com > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com >
_______________________________________________ ceph-users mailing list ceph-users@lists.ceph.com http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com