> -----Original Message-----
> From: netdev-ow...@vger.kernel.org [mailto:netdev-ow...@vger.kernel.org]
> On Behalf Of Roopa Prabhu
> Sent: Sunday, March 13, 2016 3:56 AM
> To: netdev@vger.kernel.org
> Cc: j...@mojatatu.com; da...@davemloft.net
> Subject: [PATCH net-next 1/2] rtnetlink: add new RTM_GETSTATS message to
> dump link stats
> 
> From: Roopa Prabhu <ro...@cumulusnetworks.com>
> 
> This patch adds a new RTM_GETSTATS message to query link stats via
> netlink from the kernel. RTM_NEWLINK also dumps stats today, but
> RTM_NEWLINK returns a lot more than just stats and is expensive in some
> cases when frequent polling for stats from userspace is a common
> operation.
> 
> RTM_GETSTATS is an attempt to provide a light weight netlink message to
> explicity query only link stats from the kernel on an interface.
> The idea is to also keep it extensible so that new kinds of stats can be
> added to it in the future.
> 
> This patch adds the following attribute for NETDEV stats:
> struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
>         [IFLA_STATS_LINK64]  = { .len = sizeof(struct rtnl_link_stats64)
> }, };
> 
> This patch also allows for af family stats (an example af stats for IPV6
> is available with the second patch in the series).
> 
> Like any other rtnetlink message, RTM_GETSTATS can be used to get stats
> of a single interface or all interfaces with NLM_F_DUMP.
> 
> Future possible new types of stat attributes:
> - IFLA_MPLS_STATS  (nested. for mpls/mdev stats)
> - IFLA_EXTENDED_STATS (nested. extended software netdev stats like
> bridge,
>   vlan, vxlan etc)
> - IFLA_EXTENDED_HW_STATS (nested. extended hardware stats which are
>   available via ethtool today)
> 
> This patch also declares a filter mask for all stat attributes.
> User has to provide a mask of stats attributes to query. This will be
> specified in a new hdr 'struct if_stats_msg' for stats messages.
> 
> Without any attributes in the filter_mask, no stats will be returned.
> 
> This patch has been tested with modified iproute2 ifstat.
> 
> Suggested-by: Jamal Hadi Salim <j...@mojatatu.com>
> Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com>
> ---
>  include/net/rtnetlink.h        |   5 ++
>  include/uapi/linux/if_link.h   |  19 ++++
>  include/uapi/linux/rtnetlink.h |   7 ++
>  net/core/rtnetlink.c           | 200
> +++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 231 insertions(+)
> 
> diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index
> 2f87c1b..fa68158 100644
> --- a/include/net/rtnetlink.h
> +++ b/include/net/rtnetlink.h
> @@ -131,6 +131,11 @@ struct rtnl_af_ops {
>                                                   const struct nlattr *attr);
>       int                     (*set_link_af)(struct net_device *dev,
>                                              const struct nlattr *attr);
> +     size_t                  (*get_link_af_stats_size)(const struct
> net_device *dev,
> +                                                       u32 filter_mask);
> +     int                     (*fill_link_af_stats)(struct sk_buff *skb,
> +                                                   const struct net_device 
> *dev,
> +                                                   u32 filter_mask);
>  };
> 
>  void __rtnl_af_unregister(struct rtnl_af_ops *ops); diff --git
> a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index
> 249eef9..0840f3e 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -741,4 +741,23 @@ enum {
> 
>  #define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
> 
> +/* STATS section */
> +
> +struct if_stats_msg {
> +     __u8  family;
> +     __u32 ifindex;
> +     __u32 filter_mask;
> +};
> +
> +enum {
> +     IFLA_STATS_UNSPEC,
> +     IFLA_STATS_LINK64,
> +     IFLA_STATS_INET6,
> +     __IFLA_STATS_MAX,
> +};
> +
> +#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
> +
> +#define IFLA_STATS_FILTER_BIT(ATTR)  (1 << (ATTR))
> +
>  #endif /* _UAPI_LINUX_IF_LINK_H */
> diff --git a/include/uapi/linux/rtnetlink.h
> b/include/uapi/linux/rtnetlink.h index ca764b5..2bbb300 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -139,6 +139,13 @@ enum {
>       RTM_GETNSID = 90,
>  #define RTM_GETNSID RTM_GETNSID
> 
> +     RTM_NEWSTATS = 92,
> +#define RTM_NEWSTATS RTM_NEWSTATS

I think that RTM_NEWSTATS and RTM_DELSTATS aren't good names, since user 
doesn't add/del statistics but only query.
Maybe just stay with RTM_GETSTATS and the message back to user will be 
RTM_GETSTATS as well?

> +     RTM_DELSTATS = 93,
> +#define RTM_DELSTATS RTM_DELSTATS

This is not in used

> +     RTM_GETSTATS = 94,
> +#define RTM_GETSTATS RTM_GETSTATS
> +
>       __RTM_MAX,
>  #define RTM_MAX              (((__RTM_MAX + 3) & ~3) - 1)
>  };
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index
> d2d9e5e..d1e3d17 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -3410,6 +3410,203 @@ out:
>       return err;
>  }
> 
> +static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device
> *dev,
> +                            int type, u32 pid, u32 seq, u32 change,
> +                            unsigned int flags, unsigned int filter_mask) {
> +     const struct rtnl_link_stats64 *stats;
> +     struct rtnl_link_stats64 temp;
> +     struct if_stats_msg *ifsm;
> +     struct nlmsghdr *nlh;
> +     struct rtnl_af_ops *af_ops;
> +     struct nlattr *attr;
> +
> +     ASSERT_RTNL();
> +
> +     nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags);
> +     if (!nlh)
> +             return -EMSGSIZE;
> +
> +     ifsm = nlmsg_data(nlh);
> +     ifsm->ifindex = dev->ifindex;
> +     ifsm->filter_mask = filter_mask;
> +
> +     if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64)) {
> +             attr = nla_reserve(skb, IFLA_STATS_LINK64,
> +                                sizeof(struct rtnl_link_stats64));
> +             if (!attr)
> +                     return -EMSGSIZE;
> +
> +             stats = dev_get_stats(dev, &temp);
> +
> +             copy_rtnl_link_stats64(nla_data(attr), stats);
> +     }
> +
> +     list_for_each_entry(af_ops, &rtnl_af_ops, list) {
> +             if (af_ops->fill_link_af_stats) {
> +                     int err;
> +
> +                     err = af_ops->fill_link_af_stats(skb, dev, filter_mask);
> +                     if (err < 0)
> +                             goto nla_put_failure;
> +             }
> +     }
> +
> +     nlmsg_end(skb, nlh);
> +
> +     return 0;
> +
> +nla_put_failure:
> +     nlmsg_cancel(skb, nlh);
> +
> +     return -EMSGSIZE;
> +}
> +
> +static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] =
> {
> +     [IFLA_STATS_LINK64]     = { .len = sizeof(struct rtnl_link_stats64)
> },
> +};
> +
> +static size_t rtnl_link_get_af_stats_size(const struct net_device *dev,
> +                                       u32 filter_mask)
> +{
> +     struct rtnl_af_ops *af_ops;
> +     size_t size = 0;
> +
> +     list_for_each_entry(af_ops, &rtnl_af_ops, list) {
> +             if (af_ops->get_link_af_stats_size)
> +                     size += af_ops->get_link_af_stats_size(dev,
> +                                                            filter_mask);
> +     }
> +
> +     return size;
> +}
> +
> +static noinline size_t if_nlmsg_stats_size(const struct net_device
> *dev,
> +                                        u32 filter_mask)
> +{
> +     size_t size = 0;
> +
> +     if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK64))
> +             size += nla_total_size(sizeof(struct rtnl_link_stats64));
> +
> +     size += rtnl_link_get_af_stats_size(dev, filter_mask);
> +
> +     return size;
> +}
> +
> +static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh) {
> +     struct net *net = sock_net(skb->sk);
> +     struct if_stats_msg *ifsm;
> +     struct net_device *dev = NULL;
> +     struct sk_buff *nskb;
> +     u32 filter_mask;
> +     int err;
> +
> +     ifsm = nlmsg_data(nlh);
> +     if (ifsm->ifindex > 0)
> +             dev = __dev_get_by_index(net, ifsm->ifindex);
> +     else
> +             return -EINVAL;
> +
> +     if (!dev)
> +             return -ENODEV;
> +
> +     filter_mask = ifsm->filter_mask;
> +     if (!filter_mask)
> +             return -EINVAL;
> +
> +     nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask),
> GFP_KERNEL);
> +     if (!nskb)
> +             return -ENOBUFS;
> +
> +     err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
> +                               NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
> +                               0, filter_mask);
> +     if (err < 0) {
> +             /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
> +             WARN_ON(err == -EMSGSIZE);
> +             kfree_skb(nskb);
> +     } else {
> +             err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
> +     }
> +
> +     return err;
> +}
> +
> +static u16 rtnl_stats_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
> +{
> +     struct net *net = sock_net(skb->sk);
> +     struct net_device *dev;
> +     u16 min_ifinfo_dump_size = 0;
> +     struct if_stats_msg *ifsm;
> +     u32 filter_mask;
> +
> +     ifsm = nlmsg_data(nlh);
> +     filter_mask = ifsm->filter_mask;
> +
> +     /* traverse the list of net devices and compute the minimum
> +      * buffer size based upon the filter mask.
> +      */
> +     list_for_each_entry(dev, &net->dev_base_head, dev_list) {
> +             min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
> +                                          if_nlmsg_stats_size(dev,
> +                                                              filter_mask));
> +     }
> +
> +     return min_ifinfo_dump_size;
> +}
> +
> +static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback
> +*cb) {
> +     struct net *net = sock_net(skb->sk);
> +     struct if_stats_msg *ifsm;
> +     int h, s_h;
> +     int idx = 0, s_idx;
> +     struct net_device *dev;
> +     struct hlist_head *head;
> +     unsigned int flags = NLM_F_MULTI;
> +     u32 filter_mask = 0;
> +     int err;
> +
> +     s_h = cb->args[0];
> +     s_idx = cb->args[1];
> +
> +     cb->seq = net->dev_base_seq;
> +
> +     ifsm = nlmsg_data(cb->nlh);
> +     filter_mask = ifsm->filter_mask;
> +
> +     for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
> +             idx = 0;
> +             head = &net->dev_index_head[h];
> +             hlist_for_each_entry(dev, head, index_hlist) {
> +                     if (idx < s_idx)
> +                             goto cont;
> +                     err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
> +                                               NETLINK_CB(cb->skb).portid,
> +                                               cb->nlh->nlmsg_seq, 0,
> +                                               flags, filter_mask);
> +                     /* If we ran out of room on the first message,
> +                      * we're in trouble
> +                      */
> +                     WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
> +
> +                     if (err < 0)
> +                             goto out;
> +
> +                     nl_dump_check_consistent(cb, nlmsg_hdr(skb));
> +cont:
> +                     idx++;
> +             }
> +     }
> +out:
> +     cb->args[1] = idx;
> +     cb->args[0] = h;
> +
> +     return skb->len;
> +}
> +
>  /* Process one rtnetlink message. */
> 
>  static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
> @@ -3559,4 +3756,7 @@ void __init rtnetlink_init(void)
>       rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink,
> NULL);
>       rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL,
> NULL);
>       rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL,
> NULL);
> +
> +     rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get,
> rtnl_stats_dump,
> +                   rtnl_stats_calcit);
>  }
> --
> 1.9.1

Reply via email to