On Tue, 19 Dec 2017 15:33:46 +0900 Chris Mi <chr...@mellanox.com> wrote:
> Currently in tc batch mode, only one command is read from the batch > file and sent to kernel to process. With this patch, we can accumulate > several commands before sending to kernel. The batch size is specified > using option -bs or -batchsize. > > To accumulate the commands in tc, we allocate an array of struct iovec. > If batchsize is bigger than 1 and we haven't accumulated enough > commands, rtnl_talk() will return without actually sending the message. > One exception is that there is no more command in the batch file. > > But please note that kernel still processes the requests one by one. > To process the requests in parallel in kernel is another effort. > The time we're saving in this patch is the user mode and kernel mode > context switch. So this patch works on top of the current kernel. > > Using the following script in kernel, we can generate 1,000,000 rules. > tools/testing/selftests/tc-testing/tdc_batch.py > > Without this patch, 'tc -b $file' exection time is: > > real 0m14.916s > user 0m6.808s > sys 0m8.046s > > With this patch, 'tc -b $file -bs 10' exection time is: > > real 0m12.286s > user 0m5.903s > sys 0m6.312s > > The insertion rate is improved more than 10%. > > Signed-off-by: Chris Mi <chr...@mellanox.com> > --- > include/libnetlink.h | 27 ++++++++++++++++ > include/utils.h | 8 +++++ > lib/libnetlink.c | 30 +++++++++++++----- > lib/utils.c | 20 ++++++++++++ > man/man8/tc.8 | 5 +++ > tc/m_action.c | 63 ++++++++++++++++++++++++++++--------- > tc/tc.c | 27 ++++++++++++++-- > tc/tc_common.h | 3 ++ > tc/tc_filter.c | 89 > ++++++++++++++++++++++++++++++++++++---------------- > 9 files changed, 221 insertions(+), 51 deletions(-) In addition to my earlier comments, these are the implementation issues. > > diff --git a/include/libnetlink.h b/include/libnetlink.h > index a4d83b9e..07e88c94 100644 > --- a/include/libnetlink.h > +++ b/include/libnetlink.h > @@ -13,6 +13,8 @@ > #include <linux/netconf.h> > #include <arpa/inet.h> > > +#define MSG_IOV_MAX 256 > + > struct rtnl_handle { > int fd; > struct sockaddr_nl local; > @@ -93,6 +95,31 @@ int rtnl_dump_filter_nc(struct rtnl_handle *rth, > void *arg, __u16 nc_flags); > #define rtnl_dump_filter(rth, filter, arg) \ > rtnl_dump_filter_nc(rth, filter, arg, 0) > + > +extern int msg_iov_index; > +static inline int get_msg_iov_index(void) > +{ > + return msg_iov_index; > +} > +static inline void set_msg_iov_index(int index) > +{ > + msg_iov_index = index; > +} > +static inline void incr_msg_iov_index(void) > +{ > + ++msg_iov_index; > +} > + > +extern int batch_size; > +static inline int get_batch_size(void) > +{ > + return batch_size; > +} > +static inline void set_batch_size(int size) > +{ > + batch_size = size; > +} Iproute2 is C not C++; no accessors for every variable. > int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, > struct nlmsghdr **answer) > __attribute__((warn_unused_result)); > diff --git a/include/utils.h b/include/utils.h > index d3895d56..66cb4747 100644 > --- a/include/utils.h > +++ b/include/utils.h > @@ -235,6 +235,14 @@ void print_nlmsg_timestamp(FILE *fp, const struct > nlmsghdr *n); > > extern int cmdlineno; > ssize_t getcmdline(char **line, size_t *len, FILE *in); > + > +extern int cmdlinetotal; > +static inline int getcmdlinetotal(void) > +{ > + return cmdlinetotal; > +} > +void setcmdlinetotal(const char *name); > + > int makeargs(char *line, char *argv[], int maxargs); > int inet_get_addr(const char *src, __u32 *dst, struct in6_addr *dst6); > > diff --git a/lib/libnetlink.c b/lib/libnetlink.c > index 00e6ce0c..f9be1c6d 100644 > --- a/lib/libnetlink.c > +++ b/lib/libnetlink.c > @@ -24,6 +24,7 @@ > #include <sys/uio.h> > > #include "libnetlink.h" > +#include "utils.h" > > #ifndef SOL_NETLINK > #define SOL_NETLINK 270 > @@ -581,6 +582,10 @@ static void rtnl_talk_error(struct nlmsghdr *h, struct > nlmsgerr *err, > strerror(-err->error)); > } > > +static struct iovec msg_iov[MSG_IOV_MAX]; > +int msg_iov_index; > +int batch_size = 1; > + > static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, > struct nlmsghdr **answer, > bool show_rtnl_err, nl_ext_ack_fn_t errfn) > @@ -589,23 +594,34 @@ static int __rtnl_talk(struct rtnl_handle *rtnl, struct > nlmsghdr *n, > unsigned int seq; > struct nlmsghdr *h; > struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; > - struct iovec iov = { > - .iov_base = n, > - .iov_len = n->nlmsg_len > - }; > + struct iovec *iov = &msg_iov[get_msg_iov_index()]; > + int index; > + char *buf; > + > + iov->iov_base = n; > + iov->iov_len = n->nlmsg_len; > + > + incr_msg_iov_index(); > struct msghdr msg = { > .msg_name = &nladdr, > .msg_namelen = sizeof(nladdr), > - .msg_iov = &iov, > - .msg_iovlen = 1, > + .msg_iov = msg_iov, > + .msg_iovlen = get_msg_iov_index(), > }; > - char *buf; > > n->nlmsg_seq = seq = ++rtnl->seq; > > if (answer == NULL) > n->nlmsg_flags |= NLM_F_ACK; Your real performance win is just not asking for ACK for every rule. If you switched to pure streaming mode, then the batching wouldn't be necessary. > > + index = get_msg_iov_index() % get_batch_size(); > + if (index != 0 && get_batch_size() > 1 && > + cmdlineno != getcmdlinetotal() && > + (n->nlmsg_type == RTM_NEWTFILTER || > + n->nlmsg_type == RTM_NEWACTION)) > + return 3; > + set_msg_iov_index(index); > + > status = sendmsg(rtnl->fd, &msg, 0); > if (status < 0) { > perror("Cannot talk to rtnetlink"); > diff --git a/lib/utils.c b/lib/utils.c > index 7ced8c06..53ca389f 100644 > --- a/lib/utils.c > +++ b/lib/utils.c > @@ -1202,6 +1202,26 @@ ssize_t getcmdline(char **linep, size_t *lenp, FILE > *in) > return cc; > } > > +int cmdlinetotal; > + > +void setcmdlinetotal(const char *name) > +{ > + char *line = NULL; > + size_t len = 0; > + > + if (name && strcmp(name, "-") != 0) { > + if (freopen(name, "r", stdin) == NULL) { > + fprintf(stderr, "Cannot open file \"%s\" for reading: > %s\n", > + name, strerror(errno)); > + return; > + } > + } > + > + cmdlinetotal = 0; > + while (getcmdline(&line, &len, stdin) != -1) > + cmdlinetotal++; > +} > + > /* split command line into argument vector */ > int makeargs(char *line, char *argv[], int maxargs) > { > diff --git a/man/man8/tc.8 b/man/man8/tc.8 > index ff071b33..de137e16 100644 > --- a/man/man8/tc.8 > +++ b/man/man8/tc.8 > @@ -601,6 +601,11 @@ must exist already. > read commands from provided file or standard input and invoke them. > First failure will cause termination of tc. > > +.TP > +.BR "\-bs", " \-bs size", " \-batchsize", " \-batchsize size" > +How many commands are accumulated before sending to kernel. > +By default, it is 1. It only takes effect in batch mode. > + > .TP > .BR "\-force" > don't terminate tc on errors in batch mode. > diff --git a/tc/m_action.c b/tc/m_action.c > index 13f942bf..574b78ca 100644 > --- a/tc/m_action.c > +++ b/tc/m_action.c > @@ -23,6 +23,7 @@ > #include <arpa/inet.h> > #include <string.h> > #include <dlfcn.h> > +#include <errno.h> > > #include "utils.h" > #include "tc_common.h" > @@ -546,33 +547,65 @@ bad_val: > return ret; > } > > +typedef struct { > + struct nlmsghdr n; > + struct tcamsg t; > + char buf[MAX_MSG]; > +} tc_action_req; > + > +static tc_action_req *action_reqs; > + > +void free_action_reqs(void) > +{ > + if (action_reqs) > + free(action_reqs); > +} > + > +static tc_action_req *get_action_req(void) > +{ > + tc_action_req *req; > + > + if (action_reqs == NULL) { > + action_reqs = malloc(get_batch_size() * sizeof (tc_action_req)); > + if (action_reqs == NULL) > + return NULL; > + } > + req = &action_reqs[get_msg_iov_index()]; > + memset(req, 0, sizeof (*req)); > + > + return req; > +} > + > static int tc_action_modify(int cmd, unsigned int flags, > int *argc_p, char ***argv_p) > { > int argc = *argc_p; > char **argv = *argv_p; > int ret = 0; > - struct { > - struct nlmsghdr n; > - struct tcamsg t; > - char buf[MAX_MSG]; > - } req = { > - .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg)), > - .n.nlmsg_flags = NLM_F_REQUEST | flags, > - .n.nlmsg_type = cmd, > - .t.tca_family = AF_UNSPEC, > - }; > - struct rtattr *tail = NLMSG_TAIL(&req.n); > + tc_action_req *req; > + > + req = get_action_req(); > + if (req == NULL) { > + fprintf(stderr, "get_action_req error: not enough buffer\n"); > + return -ENOMEM; > + } > + > + req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcamsg)); > + req->n.nlmsg_flags = NLM_F_REQUEST | flags; > + req->n.nlmsg_type = cmd; > + req->t.tca_family = AF_UNSPEC; > + struct rtattr *tail = NLMSG_TAIL(&req->n); > > argc -= 1; > argv += 1; > - if (parse_action(&argc, &argv, TCA_ACT_TAB, &req.n)) { > + if (parse_action(&argc, &argv, TCA_ACT_TAB, &req->n)) { > fprintf(stderr, "Illegal \"action\"\n"); > return -1; > } > - tail->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail; > + tail->rta_len = (void *) NLMSG_TAIL(&req->n) - (void *) tail; > > - if (rtnl_talk(&rth, &req.n, NULL) < 0) { > + ret = rtnl_talk(&rth, &req->n, NULL); > + if (ret < 0) { > fprintf(stderr, "We have an error talking to the kernel\n"); > ret = -1; > } > @@ -739,5 +772,5 @@ int do_action(int argc, char **argv) > return -1; > } > > - return 0; > + return ret; > } > diff --git a/tc/tc.c b/tc/tc.c > index ad9f07e9..9c8e828b 100644 > --- a/tc/tc.c > +++ b/tc/tc.c > @@ -189,7 +189,7 @@ static void usage(void) > fprintf(stderr, "Usage: tc [ OPTIONS ] OBJECT { COMMAND | help }\n" > " tc [-force] -batch filename\n" > "where OBJECT := { qdisc | class | filter | action | > monitor | exec }\n" > - " OPTIONS := { -s[tatistics] | -d[etails] | > -r[aw] | -p[retty] | -b[atch] [filename] | -n[etns] name |\n" > + " OPTIONS := { -s[tatistics] | -d[etails] | > -r[aw] | -p[retty] | -b[atch] [filename] | -bs | -batchsize [size] | -n[etns] > name |\n" > " -nm | -nam[es] | { -cf | -conf } > path } | -j[son]\n"); > } > > @@ -223,6 +223,9 @@ static int batch(const char *name) > size_t len = 0; > int ret = 0; > > + if (get_batch_size() > 1) > + setcmdlinetotal(name); > + > batch_mode = 1; > if (name && strcmp(name, "-") != 0) { > if (freopen(name, "r", stdin) == NULL) { > @@ -248,15 +251,22 @@ static int batch(const char *name) > if (largc == 0) > continue; /* blank line */ > > - if (do_cmd(largc, largv)) { > + ret = do_cmd(largc, largv); > + if (ret != 0 && ret != 3) { > fprintf(stderr, "Command failed %s:%d\n", name, > cmdlineno); > ret = 1; > if (!force) > break; > } > } > + if (ret == 3) > + fprintf(stderr, > + "Command is not sent to kernel due to internal error > %s:%d\n", > + name, cmdlineno); > if (line) > free(line); > + free_filter_reqs(); > + free_action_reqs(); > > rtnl_close(&rth); > return ret; > @@ -267,6 +277,7 @@ int main(int argc, char **argv) > { > int ret; > char *batch_file = NULL; > + int size; > > while (argc > 1) { > if (argv[1][0] != '-') > @@ -297,6 +308,16 @@ int main(int argc, char **argv) > if (argc <= 1) > usage(); > batch_file = argv[1]; > + } else if (matches(argv[1], "-batchsize") == 0 || > + matches(argv[1], "-bs") == 0) { > + argc--; argv++; > + if (argc <= 1) > + usage(); > + size = atoi(argv[1]); > + if (size > MSG_IOV_MAX) > + set_batch_size(MSG_IOV_MAX); > + else > + set_batch_size(size); > } else if (matches(argv[1], "-netns") == 0) { > NEXT_ARG(); > if (netns_switch(argv[1])) > @@ -342,6 +363,8 @@ int main(int argc, char **argv) > } > > ret = do_cmd(argc-1, argv+1); > + free_filter_reqs(); > + free_action_reqs(); > Exit: > rtnl_close(&rth); > > diff --git a/tc/tc_common.h b/tc/tc_common.h > index 264fbdac..fde8db1b 100644 > --- a/tc/tc_common.h > +++ b/tc/tc_common.h > @@ -24,5 +24,8 @@ struct tc_sizespec; > extern int parse_size_table(int *p_argc, char ***p_argv, struct tc_sizespec > *s); > extern int check_size_table_opts(struct tc_sizespec *s); > > +extern void free_filter_reqs(void); > +extern void free_action_reqs(void); > + > extern int show_graph; > extern bool use_names; > diff --git a/tc/tc_filter.c b/tc/tc_filter.c > index 545cc3a1..dc53c128 100644 > --- a/tc/tc_filter.c > +++ b/tc/tc_filter.c > @@ -19,6 +19,7 @@ > #include <arpa/inet.h> > #include <string.h> > #include <linux/if_ether.h> > +#include <errno.h> > > #include "rt_names.h" > #include "utils.h" > @@ -42,18 +43,51 @@ static void usage(void) > "OPTIONS := ... try tc filter add <desired FILTER_KIND> > help\n"); > } > > +typedef struct { > + struct nlmsghdr n; > + struct tcmsg t; > + char buf[MAX_MSG]; > +} tc_filter_req; > + > +static tc_filter_req *filter_reqs; > + > +void free_filter_reqs(void) > +{ > + if (filter_reqs) > + free(filter_reqs); > +} You don't need to check for NULL when calling free. free(NULL) is a nop.