I'm certainly very happy with a re-write: this seems like a much nicer way of doing things.
-Mike. > -----Original Message----- > From: Justin Pettit [mailto:jpet...@nicira.com] > Sent: 05 December 2011 00:57 > To: dev@openvswitch.org > Cc: Mike Bursell; Jamal Hadi Salim > Subject: [PATCH] netdev-linux: Don't restrict policing to IPv4 and don't call > "tc". > > Mike Bursell pointed out that our policer only works on IPv4 traffic--and > specifically not IPv6. By using the "basic" filter, we can enforce policing > on all > traffic for a particular interface. > > Jamal Hadi Salim pointed out that calling "tc" directly with system() is > pretty > ugly. This commit switches our remaining "tc" calls to directly sending the > appropriate netlink messages. > > Suggested-by: Mike Bursell <mike.burs...@citrix.com> > Suggested-by: Jamal Hadi Salim <h...@cyberus.ca> > --- > AUTHORS | 2 + > INSTALL.Linux | 6 +- > lib/netdev-linux.c | 191 +++++++++++++++++++++++++++++++++++------- > --------- > 3 files changed, 136 insertions(+), 63 deletions(-) > > diff --git a/AUTHORS b/AUTHORS > index 6cf99da..964e32d 100644 > --- a/AUTHORS > +++ b/AUTHORS > @@ -78,6 +78,7 @@ Hassan Khan hassan.k...@seecs.edu.pk > Hector Oron hector.o...@gmail.com > Henrik Amren hen...@nicira.com > Jad Naous jna...@gmail.com > +Jamal Hadi Salim h...@cyberus.ca > Jan Medved jmed...@juniper.net > Janis Hamme janis.ha...@student.kit.edu > Jari Sundell sundell.softw...@gmail.com > @@ -90,6 +91,7 @@ Krishna Miriyala kris...@nicira.com > Luiz Henrique Ozaki luiz.oz...@gmail.com > Michael Hu m...@nicira.com > Michael Mao m...@nicira.com > +Mike Bursell mike.burs...@citrix.com > Murphy McCauley murphy.mccau...@gmail.com > Mikael Doverhag mdover...@nicira.com > Niklas Andersson nanders...@nicira.com > diff --git a/INSTALL.Linux b/INSTALL.Linux index 4477a60..7a55ccd 100644 > --- a/INSTALL.Linux > +++ b/INSTALL.Linux > @@ -46,9 +46,9 @@ INSTALL.userspace for more information. > bridge") before starting the datapath. > > For optional support of ingress policing, you must enable kernel > - configuration options NET_CLS_ACT, NET_CLS_U32, NET_SCH_INGRESS, > - and NET_ACT_POLICE, either built-in or as modules. > - (NET_CLS_POLICE is obsolete and not needed.) > + configuration options NET_CLS_BASIC, NET_SCH_INGRESS, and > + NET_ACT_POLICE, either built-in or as modules. (NET_CLS_POLICE is > + obsolete and not needed.) > > If GRE tunneling is being used it is recommended that the kernel > be compiled with IPv6 support (CONFIG_IPV6). This allows for diff > --git > a/lib/netdev-linux.c b/lib/netdev-linux.c index 90e88c7..8293bb1 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -30,6 +30,7 @@ > #include <linux/types.h> > #include <linux/ethtool.h> > #include <linux/mii.h> > +#include <linux/pkt_cls.h> > #include <linux/pkt_sched.h> > #include <linux/rtnetlink.h> > #include <linux/sockios.h> > @@ -326,6 +327,9 @@ static unsigned int tc_buffer_per_jiffy(unsigned int > rate); static struct tcmsg *tc_make_request(const struct netdev *, int type, > unsigned int flags, struct ofpbuf *); > static int > tc_transact(struct ofpbuf *request, struct ofpbuf **replyp); > +static int tc_add_del_ingress_qdisc(struct netdev *netdev, bool add); > +static int tc_add_policer(struct netdev *netdev, int kbits_rate, > + int kbits_burst); > > static int tc_parse_qdisc(const struct ofpbuf *, const char **kind, > struct nlattr **options); @@ -1564,50 +1568,8 @@ > netdev_linux_set_advertisements(struct netdev *netdev, uint32_t > advertise) > ETHTOOL_SSET, "ETHTOOL_SSET"); } > > -#define POLICE_ADD_CMD "/sbin/tc qdisc add dev %s handle ffff: ingress" > -#define POLICE_CONFIG_CMD "/sbin/tc filter add dev %s parent ffff: > protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst %dk > mtu > 65535 drop flowid :1" > - > -/* Remove ingress policing from 'netdev'. Returns 0 if successful, otherwise > a > - * positive errno value. > - * > - * This function is equivalent to running > - * /sbin/tc qdisc del dev %s handle ffff: ingress > - * but it is much, much faster. > - */ > -static int > -netdev_linux_remove_policing(struct netdev *netdev) -{ > - struct netdev_dev_linux *netdev_dev = > - netdev_dev_linux_cast(netdev_get_dev(netdev)); > - const char *netdev_name = netdev_get_name(netdev); > - > - struct ofpbuf request; > - struct tcmsg *tcmsg; > - int error; > - > - tcmsg = tc_make_request(netdev, RTM_DELQDISC, 0, &request); > - if (!tcmsg) { > - return ENODEV; > - } > - tcmsg->tcm_handle = tc_make_handle(0xffff, 0); > - tcmsg->tcm_parent = TC_H_INGRESS; > - nl_msg_put_string(&request, TCA_KIND, "ingress"); > - nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0); > - > - error = tc_transact(&request, NULL); > - if (error && error != ENOENT && error != EINVAL) { > - VLOG_WARN_RL(&rl, "%s: removing policing failed: %s", > - netdev_name, strerror(error)); > - return error; > - } > - > - netdev_dev->kbits_rate = 0; > - netdev_dev->kbits_burst = 0; > - netdev_dev->cache_valid |= VALID_POLICING; > - return 0; > -} > - > -/* Attempts to set input rate limiting (policing) policy. */ > +/* Attempts to set input rate limiting (policing) policy. Returns 0 if > + * successful, otherwise a positive errno value. */ > static int > netdev_linux_set_policing(struct netdev *netdev, > uint32_t kbits_rate, uint32_t kbits_burst) @@ > -1615,7 +1577,7 > @@ netdev_linux_set_policing(struct netdev *netdev, > struct netdev_dev_linux *netdev_dev = > netdev_dev_linux_cast(netdev_get_dev(netdev)); > const char *netdev_name = netdev_get_name(netdev); > - char command[1024]; > + int error; > > COVERAGE_INC(netdev_set_policing); > > @@ -1630,27 +1592,36 @@ netdev_linux_set_policing(struct netdev > *netdev, > return 0; > } > > - netdev_linux_remove_policing(netdev); > + /* Remove any existing ingress qdisc. */ > + error = tc_add_del_ingress_qdisc(netdev, false); > + if (error) { > + const char *netdev_name = netdev_get_name(netdev); > + > + VLOG_WARN_RL(&rl, "%s: removing policing failed: %s", > + netdev_name, strerror(error)); > + return error; > + } > + > if (kbits_rate) { > - snprintf(command, sizeof(command), POLICE_ADD_CMD, > netdev_name); > - if (system(command) != 0) { > - VLOG_WARN_RL(&rl, "%s: problem adding policing", netdev_name); > - return -1; > + error = tc_add_del_ingress_qdisc(netdev, true); > + if (error) { > + VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s", > + netdev_name, strerror(error)); > + return error; > } > > - snprintf(command, sizeof(command), POLICE_CONFIG_CMD, > netdev_name, > - kbits_rate, kbits_burst); > - if (system(command) != 0) { > - VLOG_WARN_RL(&rl, "%s: problem configuring policing", > - netdev_name); > - return -1; > + error = tc_add_policer(netdev, kbits_rate, kbits_burst); > + if (error){ > + VLOG_WARN_RL(&rl, "%s: adding policing action failed: %s", > + netdev_name, strerror(error)); > + return error; > } > - > - netdev_dev->kbits_rate = kbits_rate; > - netdev_dev->kbits_burst = kbits_burst; > - netdev_dev->cache_valid |= VALID_POLICING; > } > > + netdev_dev->kbits_rate = kbits_rate; > + netdev_dev->kbits_burst = kbits_rate ? kbits_burst: 0; > + netdev_dev->cache_valid |= VALID_POLICING; > + > return 0; > } > > @@ -3491,6 +3462,106 @@ tc_transact(struct ofpbuf *request, struct ofpbuf > **replyp) > return error; > } > > +/* Adds or deletes a root ingress qdisc on 'netdev'. We use this for > + * policing configuration. > + * > + * This function is equivalent to running the following when 'add' is true: > + * /sbin/tc qdisc add dev <devname> handle ffff: ingress > + * > + * This function is equivalent to running the following when 'add' is false: > + * /sbin/tc qdisc del dev <devname> handle ffff: ingress > + * > + * The configuration and stats may be seen with the following command: > + * /sbin/tc -s qdisc show dev <devname> > + * > + * Returns 0 if successful, otherwise a positive errno value. > + */ > +static int > +tc_add_del_ingress_qdisc(struct netdev *netdev, bool add) { > + struct ofpbuf request; > + struct tcmsg *tcmsg; > + int error; > + int type = add ? RTM_NEWQDISC : RTM_DELQDISC; > + int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0; > + > + tcmsg = tc_make_request(netdev, type, flags, &request); > + if (!tcmsg) { > + return ENODEV; > + } > + tcmsg->tcm_handle = tc_make_handle(0xffff, 0); > + tcmsg->tcm_parent = TC_H_INGRESS; > + nl_msg_put_string(&request, TCA_KIND, "ingress"); > + nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0); > + > + error = tc_transact(&request, NULL); > + if (error) { > + /* If we're deleting the qdisc, don't worry about some of the > + * error conditions. */ > + if (!add && (error == ENOENT || error == EINVAL)) { > + return 0; > + } > + return error; > + } > + > + return 0; > +} > + > +/* Adds a policer to 'netdev' with a rate of 'kbits_rate' and a burst > +size > + * of 'kbits_burst'. > + * > + * This function is equivalent to running: > + * /sbin/tc filter add dev <devname> parent ffff: protocol all prio 49 > + * basic police rate <kbits_rate>kbit burst <kbits_burst>k > + * mtu 65535 drop > + * > + * The configuration and stats may be seen with the following command: > + * /sbin/tc -s filter show <devname> eth0 parent ffff: > + * > + * Returns 0 if successful, otherwise a positive errno value. > + */ > +static int > +tc_add_policer(struct netdev *netdev, int kbits_rate, int kbits_burst) > +{ > + struct tc_police tc_police; > + struct ofpbuf request; > + struct tcmsg *tcmsg; > + size_t basic_offset; > + size_t police_offset; > + int error; > + int mtu = 65535; > + > + memset(&tc_police, 0, sizeof tc_police); > + tc_police.action = TC_POLICE_SHOT; > + tc_police.mtu = mtu; > + tc_fill_rate(&tc_police.rate, kbits_rate/8 * 1000, mtu); > + tc_police.burst = tc_bytes_to_ticks(tc_police.rate.rate, > + kbits_burst * 1024); > + > + tcmsg = tc_make_request(netdev, RTM_NEWTFILTER, > + NLM_F_EXCL|NLM_F_CREATE, &request); > + if (!tcmsg) { > + return ENODEV; > + } > + tcmsg->tcm_parent = tc_make_handle(0xffff, 0); > + tcmsg->tcm_info = tc_make_handle(49, htons(ETH_P_ALL)); > + > + nl_msg_put_string(&request, TCA_KIND, "basic"); > + basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS); > + police_offset = nl_msg_start_nested(&request, TCA_BASIC_POLICE); > + nl_msg_put_unspec(&request, TCA_POLICE_TBF, &tc_police, sizeof > tc_police); > + tc_put_rtab(&request, TCA_POLICE_RATE, &tc_police.rate); > + nl_msg_end_nested(&request, police_offset); > + nl_msg_end_nested(&request, basic_offset); > + > + error = tc_transact(&request, NULL); > + if (error) { > + return error; > + } > + > + return 0; > +} > + > static void > read_psched(void) > { > -- > 1.7.4.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev