Implement collecting rpc errors and exporting them to userspace fuse daemon via relayfs. Errors are grouped per IP addresses and exported as an array.
Also move pcs_net* functions to a separate file. Affects: #VSTOR-70161 https://pmc.acronis.work/browse/VSTOR-70161 Signed-off-by: Yuriy Vasilev <yuriy.vasi...@virtuozzo.com> --- fs/fuse/Makefile | 3 +- fs/fuse/kio/pcs/fuse_ktrace.h | 1 + fs/fuse/kio/pcs/fuse_prometheus.h | 19 ++++ fs/fuse/kio/pcs/fuse_prometheus_prot.h | 15 ++++ fs/fuse/kio/pcs/pcs_cs.c | 33 +------ fs/fuse/kio/pcs/pcs_fuse_kdirect.c | 120 +++++++++++++++++++++++-- fs/fuse/kio/pcs/pcs_net_addr.c | 93 +++++++++++++++++++ fs/fuse/kio/pcs/pcs_net_addr.h | 16 ++++ fs/fuse/kio/pcs/pcs_rpc.c | 28 ++++-- fs/fuse/kio/pcs/pcs_sock_conn.c | 25 ------ fs/fuse/kio/pcs/pcs_sock_conn.h | 1 - 11 files changed, 280 insertions(+), 74 deletions(-) create mode 100644 fs/fuse/kio/pcs/pcs_net_addr.c create mode 100644 fs/fuse/kio/pcs/pcs_net_addr.h diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index cc0d0c1d63b3..37ef32ffa13d 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -31,6 +31,7 @@ fuse_kio_pcs-objs := kio/pcs/pcs_fuse_kdirect.o \ kio/pcs/pcs_auth.o \ kio/pcs/pcs_rdma_io.o \ kio/pcs/pcs_rdma_rw.o \ - kio/pcs/pcs_rdma_conn.o + kio/pcs/pcs_rdma_conn.o \ + kio/pcs/pcs_net_addr.o virtiofs-y := virtio_fs.o diff --git a/fs/fuse/kio/pcs/fuse_ktrace.h b/fs/fuse/kio/pcs/fuse_ktrace.h index aef368fcacef..bfee4e831a70 100644 --- a/fs/fuse/kio/pcs/fuse_ktrace.h +++ b/fs/fuse/kio/pcs/fuse_ktrace.h @@ -29,6 +29,7 @@ struct fuse_ktrace struct dentry *prometheus_dentry; struct kfuse_metrics __percpu *prometheus_metrics; u8 * __percpu buf; + struct fuse_error_metrics *error_metrics; }; static inline void * fuse_trace_prepare(struct fuse_ktrace * tr, int type, int len) diff --git a/fs/fuse/kio/pcs/fuse_prometheus.h b/fs/fuse/kio/pcs/fuse_prometheus.h index 3f1b31c290d6..01c700bcc343 100644 --- a/fs/fuse/kio/pcs/fuse_prometheus.h +++ b/fs/fuse/kio/pcs/fuse_prometheus.h @@ -15,4 +15,23 @@ struct fuse_prometheus_data struct kfuse_histogram __percpu *histo; }; +struct fuse_rpc_error { + PCS_NET_ADDR_T addr; + u64 err[RPC_ERROR_MAX]; +}; + +struct fuse_rpc_error_metric { + struct fuse_rpc_error m; + struct list_head list; +}; + +struct fuse_error_metrics { + spinlock_t lock; + u64 rpc_size; + struct list_head fuse_rpc_error_metric_list; +}; + +void fuse_rpc_error_account(struct fuse_error_metrics *metrics, + PCS_NET_ADDR_T const *addr, int err, u64 val); + #endif /* __FUSE_PROMETHEUS_H__ */ diff --git a/fs/fuse/kio/pcs/fuse_prometheus_prot.h b/fs/fuse/kio/pcs/fuse_prometheus_prot.h index e7b6f8a52d26..d760d6fdb7d4 100644 --- a/fs/fuse/kio/pcs/fuse_prometheus_prot.h +++ b/fs/fuse/kio/pcs/fuse_prometheus_prot.h @@ -39,6 +39,17 @@ struct kfuse_counter { u64 val_total; }; +#define RPC_ERROR_ABORT 0 +#define RPC_ERROR_TOUT 1 +#define RPC_ERROR_MAX 2 + +#define MAX_RPC_ADDR_LEN 46 + +struct kfuse_rpc_error { + char address[MAX_RPC_ADDR_LEN]; + u64 error[RPC_ERROR_MAX]; +}; + struct kfuse_metrics { /* Histograms are compatible with old version of proto * between userspace and kio where the counters were skipped. @@ -51,6 +62,10 @@ struct kfuse_metrics { u64 stucked_reqs_cnt_8s; u64 stucked_reqs_cnt_30s; u64 stucked_reqs_cnt_120s; + + /* All rpc errors are cleaned after full reading */ + u32 rpc_errors_size; + struct kfuse_rpc_error rpc_errors[]; }; #endif /* __FUSE_PROMETHEUS_PROT__ */ diff --git a/fs/fuse/kio/pcs/pcs_cs.c b/fs/fuse/kio/pcs/pcs_cs.c index 9d81fe8bf22c..2e7c37113178 100644 --- a/fs/fuse/kio/pcs/pcs_cs.c +++ b/fs/fuse/kio/pcs/pcs_cs.c @@ -24,6 +24,7 @@ #include "pcs_ioctl.h" #include "log.h" #include "fuse_ktrace.h" +#include "pcs_net_addr.h" /* Lock order: cs->lock -> css->lock (lru, hash, bl_list) */ @@ -210,38 +211,6 @@ static void add_cs(struct pcs_cs_set *csset, struct pcs_cs *cs) hlist_add_head_rcu(&cs->hlist, &csset->ht[hash]); } -static inline int netaddr_cmp(PCS_NET_ADDR_T const *addr1, PCS_NET_ADDR_T const *addr2, int ignore_port) -{ - unsigned int d; - size_t sz = 0; - - d = addr1->type - addr2->type; - if (d) - return d; - d = addr1->port - addr2->port; - if (!ignore_port && d) - return d; - - switch (addr1->type) { - case PCS_ADDRTYPE_IP: - case PCS_ADDRTYPE_RDMA: - sz = sizeof(struct in_addr); - break; - case PCS_ADDRTYPE_IP6: - sz = sizeof(struct in6_addr); - break; - default: - BUG(); - } - - return memcmp(addr1->address, addr2->address, sz); -} - -static int pcs_netaddr_cmp(PCS_NET_ADDR_T const *addr1, PCS_NET_ADDR_T const *addr2) -{ - return netaddr_cmp(addr1, addr2, 0); -} - /* Return locked cs */ struct pcs_cs *pcs_cs_find_create(struct pcs_cs_set *csset, PCS_NODE_ID_T *id, PCS_NET_ADDR_T *addr, int flags) { diff --git a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c index af008e63294c..031d6f823bad 100644 --- a/fs/fuse/kio/pcs/pcs_fuse_kdirect.c +++ b/fs/fuse/kio/pcs/pcs_fuse_kdirect.c @@ -34,6 +34,7 @@ #include "pcs_rpc.h" #include "fuse_ktrace.h" #include "fuse_prometheus.h" +#include "pcs_net_addr.h" unsigned int pcs_loglevel = LOG_TRACE; module_param(pcs_loglevel, uint, 0644); @@ -1243,6 +1244,8 @@ static void kpcs_req_send(struct fuse_req *req, bool bg) return; } +static void fuse_rpc_error_metrics_clean(struct fuse_error_metrics *metrics); + static void fuse_trace_free(struct fuse_ktrace *tr) { relay_close(tr->rchan); @@ -1256,6 +1259,11 @@ static void fuse_trace_free(struct fuse_ktrace *tr) debugfs_remove(tr->dir); if (tr->fc) fuse_conn_put(tr->fc); + + if (tr->error_metrics) { + fuse_rpc_error_metrics_clean(tr->error_metrics); + kfree(tr->error_metrics); + } kfree(tr); } @@ -1359,6 +1367,60 @@ void fuse_stat_account(struct fuse_conn *fc, int op, u64 val) } } +struct fuse_rpc_error_metric *fuse_rpc_error_metric_get_or_create( + struct fuse_error_metrics *metrics, PCS_NET_ADDR_T const *addr) +{ + struct fuse_rpc_error_metric *metric = NULL; + + spin_lock(&metrics->lock); + + list_for_each_entry(metric, &metrics->fuse_rpc_error_metric_list, list) + if (pcs_netaddr_cmp_ignore_port(&metric->m.addr, addr) == 0) + goto out; + + metric = kzalloc(sizeof(*metric), GFP_KERNEL); + if (!metric) + goto out; + + metric->m.addr = *addr; + metrics->rpc_size++; + + list_add_tail(&metric->list, &metrics->fuse_rpc_error_metric_list); + +out: + spin_unlock(&metrics->lock); + return metric; +} + +static void fuse_rpc_error_metrics_clean(struct fuse_error_metrics *metrics) +{ + struct fuse_rpc_error_metric *entry, *next; + + spin_lock(&metrics->lock); + list_for_each_entry_safe(entry, next, + &metrics->fuse_rpc_error_metric_list, list) { + list_del(&entry->list); + kfree(entry); + } + metrics->rpc_size = 0; + spin_unlock(&metrics->lock); +} + +void fuse_rpc_error_account(struct fuse_error_metrics *metrics, + PCS_NET_ADDR_T const *addr, int err, u64 val) +{ + struct fuse_rpc_error_metric *metric; + + if (!metrics || err >= RPC_ERROR_MAX) { + WARN_ON_ONCE(1); + return; + } + + metric = fuse_rpc_error_metric_get_or_create(metrics, addr); + if (metric) + metric->m.err[err] += val; +} + static int prometheus_file_open(struct inode *inode, struct file *filp) { struct fuse_ktrace * tr = inode->i_private; @@ -1397,6 +1459,26 @@ static void prometheus_req_iter(struct fuse_file *ff, struct fuse_req *req, stats->stucked_reqs_cnt_120s++; } +static void fuse_rpc_collect_errors(struct fuse_error_metrics *metrics, u64 size, struct kfuse_metrics *stats) +{ + struct fuse_rpc_error_metric *entry; + u64 i; + + spin_lock(&metrics->lock); + stats->rpc_errors_size = size; + i = 0; + list_for_each_entry(entry, &metrics->fuse_rpc_error_metric_list, list) { + /* If the rpc errors size was changed after allocation */ + if (i > stats->rpc_errors_size - 1) + break; + pcs_format_netaddr_ignore_port(stats->rpc_errors[i].address, + MAX_RPC_ADDR_LEN, &entry->m.addr); + memcpy(stats->rpc_errors[i].error, entry->m.err, sizeof(stats->rpc_errors[i].error)); + i++; + } + spin_unlock(&metrics->lock); +} + /* NOTE: old versions of userspace could read only histograms */ static ssize_t prometheus_file_read(struct file *filp, char __user *buffer, @@ -1405,21 +1487,27 @@ static ssize_t prometheus_file_read(struct file *filp, { struct fuse_ktrace *tr = filp->private_data; struct kfuse_metrics *stats; + u64 sz, alloc_rpc_size; int cpu; - if (*ppos >= sizeof(struct kfuse_metrics)) + if (!tr->prometheus_metrics) + return -EINVAL; + + if (!tr->error_metrics) + return -EINVAL; + + alloc_rpc_size = tr->error_metrics->rpc_size; + sz = sizeof(struct kfuse_metrics) + + sizeof(struct kfuse_rpc_error) * alloc_rpc_size; + if (*ppos >= sz) return 0; - if (*ppos + count > sizeof(struct kfuse_metrics)) - count = sizeof(struct kfuse_metrics) - *ppos; + if (*ppos + count > sz) + count = sz - *ppos; - stats = (void *)get_zeroed_page(GFP_KERNEL); - BUILD_BUG_ON(sizeof(*stats) > PAGE_SIZE); + stats = kzalloc(sz, GFP_KERNEL); if (!stats) return -ENOMEM; - if (!tr->prometheus_metrics) - return -EINVAL; - for_each_possible_cpu(cpu) { struct kfuse_metrics *m; @@ -1446,12 +1534,18 @@ static ssize_t prometheus_file_read(struct file *filp, pcs_kio_req_list(tr->fc, prometheus_req_iter, stats); spin_unlock(&tr->fc->lock); + fuse_rpc_collect_errors(tr->error_metrics, alloc_rpc_size, stats); + if (copy_to_user(buffer, (char *)stats + *ppos, count)) count = -EFAULT; else *ppos += count; - free_page((unsigned long)stats); + /* clean all rpc errors as they are fully read */ + if (*ppos == sz && tr->error_metrics->rpc_size == alloc_rpc_size) + fuse_rpc_error_metrics_clean(tr->error_metrics); + + kfree(stats); return count; } @@ -1468,6 +1562,7 @@ static int fuse_ktrace_setup(struct fuse_conn * fc) struct fuse_ktrace * old_tr; struct dentry * dir; struct kfuse_metrics __percpu * metrics; + struct fuse_error_metrics * error_metrics; int cpu; char name[16]; @@ -1517,6 +1612,13 @@ static int fuse_ktrace_setup(struct fuse_conn * fc) tr->buf = __alloc_percpu(KTRACE_LOG_BUF_SIZE, 16); + error_metrics = kzalloc(sizeof(struct fuse_error_metrics), GFP_KERNEL); + if (!error_metrics) + goto err; + INIT_LIST_HEAD(&error_metrics->fuse_rpc_error_metric_list); + spin_lock_init(&error_metrics->lock); + tr->error_metrics = error_metrics; + atomic_set(&tr->refcnt, 1); ret = -EBUSY; diff --git a/fs/fuse/kio/pcs/pcs_net_addr.c b/fs/fuse/kio/pcs/pcs_net_addr.c new file mode 100644 index 000000000000..f75b42e1e7c9 --- /dev/null +++ b/fs/fuse/kio/pcs/pcs_net_addr.c @@ -0,0 +1,93 @@ +/* + * fs/fuse/kio/pcs/pcs_net_addr.c + * + * Copyright (c) 2023 Virtuozzo International GmbH. All rights reserved. + * + */ + +#include <net/sock.h> + +#include "pcs_types.h" + +int pcs_netaddr2sockaddr(PCS_NET_ADDR_T const *addr, struct sockaddr *sa, int *salen) +{ + BUG_ON(!sa); + if (addr->type == PCS_ADDRTYPE_IP || addr->type == PCS_ADDRTYPE_RDMA) { + struct sockaddr_in *saddr4 = (struct sockaddr_in *)sa; + *saddr4 = (struct sockaddr_in) { + .sin_family = AF_INET, + .sin_port = (u16)addr->port, + }; + memcpy(&saddr4->sin_addr, addr->address, sizeof(saddr4->sin_addr)); + *salen = sizeof(*saddr4); + } else if (addr->type == PCS_ADDRTYPE_IP6) { + struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)sa; + *saddr6 = (struct sockaddr_in6) { + .sin6_family = AF_INET6, + .sin6_port = (u16)addr->port, + }; + memcpy(&saddr6->sin6_addr, addr->address, sizeof(saddr6->sin6_addr)); + *salen = sizeof(*saddr6); + } else + return -EINVAL; + + return 0; +} + +static inline int netaddr_cmp(PCS_NET_ADDR_T const *addr1, + PCS_NET_ADDR_T const *addr2, int ignore_port) +{ + unsigned int d; + size_t sz = 0; + + d = addr1->type - addr2->type; + if (d) + return d; + d = addr1->port - addr2->port; + if (!ignore_port && d) + return d; + + switch (addr1->type) { + case PCS_ADDRTYPE_IP: + case PCS_ADDRTYPE_RDMA: + sz = sizeof(struct in_addr); + break; + case PCS_ADDRTYPE_IP6: + sz = sizeof(struct in6_addr); + break; + default: + BUG(); + } + + return memcmp(addr1->address, addr2->address, sz); +} + +int pcs_netaddr_cmp(PCS_NET_ADDR_T const *addr1, PCS_NET_ADDR_T const *addr2) +{ + return netaddr_cmp(addr1, addr2, 0); +} + +int pcs_netaddr_cmp_ignore_port(PCS_NET_ADDR_T const *addr1, PCS_NET_ADDR_T const *addr2) +{ + return netaddr_cmp(addr1, addr2, 1); +} + +int pcs_format_netaddr_ignore_port(char *str, int len, PCS_NET_ADDR_T const *addr) +{ + int ret; + + switch (addr->type) { + case PCS_ADDRTYPE_IP: + case PCS_ADDRTYPE_RDMA: + ret = snprintf(str, len, "%pI4", addr->address); + break; + case PCS_ADDRTYPE_IP6: + ret = snprintf(str, len, "%pI6", addr->address); + break; + default: + ret = snprintf(str, len, "unknown"); + break; + } + + return ret; +} diff --git a/fs/fuse/kio/pcs/pcs_net_addr.h b/fs/fuse/kio/pcs/pcs_net_addr.h new file mode 100644 index 000000000000..032d265472fd --- /dev/null +++ b/fs/fuse/kio/pcs/pcs_net_addr.h @@ -0,0 +1,16 @@ +/* + * fs/fuse/kio/pcs/pcs_net_addr.h + * + * Copyright (c) 2023 Virtuozzo International GmbH. All rights reserved. + * + */ + +#ifndef __PCS_NET_ADDR_H__ +#define __PCS_NET_ADDR_H__ 1 + +int pcs_netaddr2sockaddr(PCS_NET_ADDR_T const *addr, struct sockaddr *sa, int *salen); +int pcs_netaddr_cmp(PCS_NET_ADDR_T const *addr1, PCS_NET_ADDR_T const *addr2); +int pcs_netaddr_cmp_ignore_port(PCS_NET_ADDR_T const *addr1, PCS_NET_ADDR_T const *addr2); +int pcs_format_netaddr_ignore_port(char *str, int len, PCS_NET_ADDR_T const *addr); + +#endif /* __PCS_NET_ADDR_H__ */ diff --git a/fs/fuse/kio/pcs/pcs_rpc.c b/fs/fuse/kio/pcs/pcs_rpc.c index 7f8d4b250277..223962c75941 100644 --- a/fs/fuse/kio/pcs/pcs_rpc.c +++ b/fs/fuse/kio/pcs/pcs_rpc.c @@ -34,6 +34,7 @@ #include "pcs_cluster.h" #include "log.h" #include "fuse_ktrace.h" +#include "fuse_prometheus.h" static unsigned int rpc_affinity_mode = RPC_AFFINITY_RETENT; @@ -102,6 +103,17 @@ static void rpc_del_hash(struct pcs_rpc * ep) } } +static void rpc_report_error(struct pcs_rpc *ep, int err) +{ + if (!ep->eng || !cc_from_rpc(ep->eng)->fc->ktrace) { + WARN_ON_ONCE(1); + return; + } + + fuse_rpc_error_account(cc_from_rpc(ep->eng)->fc->ktrace->error_metrics, + &ep->addr, err, 1); +} + struct pcs_msg * pcs_rpc_lookup_xid(struct pcs_rpc * ep, PCS_XID_T * xid) { @@ -230,12 +242,15 @@ void rpc_abort(struct pcs_rpc * ep, int fatal, int error) queue_delayed_work(cc->wq, &ep->timer_work, ep->params.holddown_timeout); } - while (!list_empty(&failed_list)) { - struct pcs_msg * msg = list_first_entry(&failed_list, struct pcs_msg, list); - list_del_init(&msg->list); - pcs_set_rpc_error(&msg->error, error, ep); - BUG_ON(!hlist_unhashed(&msg->kill_link)); - msg->done(msg); + if (!list_empty(&failed_list)) { + rpc_report_error(ep, RPC_ERROR_ABORT); + while (!list_empty(&failed_list)) { + struct pcs_msg * msg = list_first_entry(&failed_list, struct pcs_msg, list); + list_del_init(&msg->list); + pcs_set_rpc_error(&msg->error, error, ep); + BUG_ON(!hlist_unhashed(&msg->kill_link)); + msg->done(msg); + } } if (ep->state != PCS_RPC_ABORT) @@ -1279,6 +1294,7 @@ static void timer_work(struct work_struct *w) case PCS_RPC_WORK: { int err = list_empty(&ep->pending_queue) ? PCS_ERR_RESPONSE_TIMEOUT : PCS_ERR_WRITE_TIMEOUT; + rpc_report_error(ep, RPC_ERROR_TOUT); FUSE_KTRACE(cc_from_rpc(ep->eng)->fc, "rpc timer expired, killing connection to " PEER_FMT ", %d", PEER_ARGS(ep), err); rpc_abort(ep, 0, err); diff --git a/fs/fuse/kio/pcs/pcs_sock_conn.c b/fs/fuse/kio/pcs/pcs_sock_conn.c index f463c1ecef9d..6acbd2bc6d2b 100644 --- a/fs/fuse/kio/pcs/pcs_sock_conn.c +++ b/fs/fuse/kio/pcs/pcs_sock_conn.c @@ -33,31 +33,6 @@ static inline void pcs_sock_cork(struct socket *sock) tcp_sock_set_cork(sock->sk, true); } -int pcs_netaddr2sockaddr(PCS_NET_ADDR_T const* addr, struct sockaddr *sa, int *salen) -{ - BUG_ON(!sa); - if (addr->type == PCS_ADDRTYPE_IP || addr->type == PCS_ADDRTYPE_RDMA) { - struct sockaddr_in *saddr4 = (struct sockaddr_in *)sa; - *saddr4 = (struct sockaddr_in) { - .sin_family = AF_INET, - .sin_port = (u16)addr->port, - }; - memcpy(&saddr4->sin_addr, addr->address, sizeof(saddr4->sin_addr)); - *salen = sizeof(*saddr4); - } else if (addr->type == PCS_ADDRTYPE_IP6) { - struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)sa; - *saddr6 = (struct sockaddr_in6) { - .sin6_family = AF_INET6, - .sin6_port = (u16)addr->port, - }; - memcpy(&saddr6->sin6_addr, addr->address, sizeof(saddr6->sin6_addr)); - *salen = sizeof(*saddr6); - } else - return -EINVAL; - - return 0; -} - void pcs_sockconnect_start(struct pcs_rpc *ep) { struct pcs_sockio *sio; diff --git a/fs/fuse/kio/pcs/pcs_sock_conn.h b/fs/fuse/kio/pcs/pcs_sock_conn.h index bf39a29f78e9..554958b419d0 100644 --- a/fs/fuse/kio/pcs/pcs_sock_conn.h +++ b/fs/fuse/kio/pcs/pcs_sock_conn.h @@ -9,6 +9,5 @@ #define _PCS_SOCK_CONN_H_ 1 void pcs_sockconnect_start(struct pcs_rpc *ep); -int pcs_netaddr2sockaddr(PCS_NET_ADDR_T const* addr, struct sockaddr *sa, int *salen); #endif /* _PCS_SOCK_CONN_H_ */ -- 2.34.1 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel