On Fri, Jul 12, 2013 at 2:54 PM, Ben Pfaff <b...@nicira.com> wrote:
> This disentangles "struct nl_dump" from "struct nl_sock", clearing the way > to make the use of either one thread-safe in an obviously correct manner. > > Signed-off-by: Ben Pfaff <b...@nicira.com> > --- > lib/dpif-linux.c | 20 ++--- > lib/netdev-linux.c | 18 +---- > lib/netlink-socket.c | 201 > +++++++++++++++++++++++-------------------------- > lib/netlink-socket.h | 9 ++- > lib/route-table.c | 26 +------ > 5 files changed, 115 insertions(+), 159 deletions(-) > > diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c > index 804a90f..958873c 100644 > --- a/lib/dpif-linux.c > +++ b/lib/dpif-linux.c > @@ -162,7 +162,6 @@ static int ovs_flow_family; > static int ovs_packet_family; > > /* Generic Netlink socket. */ > -static struct nl_sock *genl_sock; > static struct nln *nln = NULL; > > static int dpif_linux_init(void); > @@ -692,7 +691,7 @@ dpif_linux_port_dump_start(const struct dpif *dpif_, > void **statep) > > buf = ofpbuf_new(1024); > dpif_linux_vport_to_ofpbuf(&request, buf); > - nl_dump_start(&state->dump, genl_sock, buf); > + nl_dump_start(&state->dump, NETLINK_GENERIC, buf); > ofpbuf_delete(buf); > > return 0; > @@ -898,7 +897,7 @@ dpif_linux_flow_dump_start(const struct dpif *dpif_, > void **statep) > > buf = ofpbuf_new(1024); > dpif_linux_flow_to_ofpbuf(&request, buf); > - nl_dump_start(&state->dump, genl_sock, buf); > + nl_dump_start(&state->dump, NETLINK_GENERIC, buf); > ofpbuf_delete(buf); > > state->buf = NULL; > @@ -1005,7 +1004,7 @@ dpif_linux_execute__(int dp_ifindex, const struct > dpif_execute *execute) > > ofpbuf_use_stub(&request, request_stub, sizeof request_stub); > dpif_linux_encode_execute(dp_ifindex, execute, &request); > - error = nl_sock_transact(genl_sock, &request, NULL); > + error = nl_transact(NETLINK_GENERIC, &request, NULL); > ofpbuf_uninit(&request); > > return error; > @@ -1090,7 +1089,7 @@ dpif_linux_operate__(struct dpif *dpif_, struct > dpif_op **ops, size_t n_ops) > for (i = 0; i < n_ops; i++) { > txnsp[i] = &auxes[i].txn; > } > - nl_sock_transact_multiple(genl_sock, txnsp, n_ops); > + nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops); > > for (i = 0; i < n_ops; i++) { > struct op_auxdata *aux = &auxes[i]; > @@ -1464,9 +1463,6 @@ dpif_linux_init(void) > &ovs_packet_family); > } > if (!error) { > - error = nl_sock_create(NETLINK_GENERIC, &genl_sock); > - } > - if (!error) { > error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, > OVS_VPORT_MCGROUP, > &ovs_vport_mcgroup, > OVS_VPORT_MCGROUP_FALLBACK_ID); > @@ -1659,7 +1655,7 @@ dpif_linux_vport_transact(const struct > dpif_linux_vport *request, > > request_buf = ofpbuf_new(1024); > dpif_linux_vport_to_ofpbuf(request, request_buf); > - error = nl_sock_transact(genl_sock, request_buf, bufp); > + error = nl_transact(NETLINK_GENERIC, request_buf, bufp); > ofpbuf_delete(request_buf); > > if (reply) { > @@ -1780,7 +1776,7 @@ dpif_linux_dp_dump_start(struct nl_dump *dump) > > buf = ofpbuf_new(1024); > dpif_linux_dp_to_ofpbuf(&request, buf); > - nl_dump_start(dump, genl_sock, buf); > + nl_dump_start(dump, NETLINK_GENERIC, buf); > ofpbuf_delete(buf); > } > > @@ -1801,7 +1797,7 @@ dpif_linux_dp_transact(const struct dpif_linux_dp > *request, > > request_buf = ofpbuf_new(1024); > dpif_linux_dp_to_ofpbuf(request, request_buf); > - error = nl_sock_transact(genl_sock, request_buf, bufp); > + error = nl_transact(NETLINK_GENERIC, request_buf, bufp); > ofpbuf_delete(request_buf); > > if (reply) { > @@ -1965,7 +1961,7 @@ dpif_linux_flow_transact(struct dpif_linux_flow > *request, > > request_buf = ofpbuf_new(1024); > dpif_linux_flow_to_ofpbuf(request, request_buf); > - error = nl_sock_transact(genl_sock, request_buf, bufp); > + error = nl_transact(NETLINK_GENERIC, request_buf, bufp); > ofpbuf_delete(request_buf); > > if (reply) { > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index 8790f14..197e51d 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -409,9 +409,6 @@ static const struct netdev_rx_class > netdev_rx_linux_class; > /* Sockets used for ioctl operations. */ > static int af_inet_sock = -1; /* AF_INET, SOCK_DGRAM. */ > > -/* A Netlink routing socket that is not subscribed to any multicast > groups. */ > -static struct nl_sock *rtnl_sock; > - > /* This is set pretty low because we probably won't learn anything from > the > * additional log messages. */ > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > @@ -477,15 +474,6 @@ netdev_linux_init(void) > if (status) { > VLOG_ERR("failed to create inet socket: %s", > ovs_strerror(status)); > } > - > - /* Create rtnetlink socket. */ > - if (!status) { > - status = nl_sock_create(NETLINK_ROUTE, &rtnl_sock); > - if (status) { > - VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s", > - ovs_strerror(status)); > - } > - } > } > return status; > } > @@ -2027,7 +2015,7 @@ start_queue_dump(const struct netdev *netdev, struct > nl_dump *dump) > return false; > } > tcmsg->tcm_parent = 0; > - nl_dump_start(dump, rtnl_sock, &request); > + nl_dump_start(dump, NETLINK_ROUTE, &request); > ofpbuf_uninit(&request); > return true; > } > @@ -3646,7 +3634,7 @@ tc_make_request(const struct netdev *netdev, int > type, unsigned int flags, > static int > tc_transact(struct ofpbuf *request, struct ofpbuf **replyp) > { > - int error = nl_sock_transact(rtnl_sock, request, replyp); > + int error = nl_transact(NETLINK_ROUTE, request, replyp); > ofpbuf_uninit(request); > return error; > } > @@ -4322,7 +4310,7 @@ get_stats_via_netlink(int ifindex, struct > netdev_stats *stats) > ifi = ofpbuf_put_zeros(&request, sizeof *ifi); > ifi->ifi_family = PF_UNSPEC; > ifi->ifi_index = ifindex; > - error = nl_sock_transact(rtnl_sock, &request, &reply); > + error = nl_transact(NETLINK_ROUTE, &request, &reply); > ofpbuf_uninit(&request); > if (error) { > return error; > diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c > index aa7fca2..8e08841 100644 > --- a/lib/netlink-socket.c > +++ b/lib/netlink-socket.c > @@ -63,7 +63,6 @@ struct nl_sock { > uint32_t next_seq; > uint32_t pid; > int protocol; > - struct nl_dump *dump; > unsigned int rcvbuf; /* Receive buffer size (SO_RCVBUF). */ > }; > > @@ -77,11 +76,12 @@ struct nl_sock { > * Initialized by nl_sock_create(). */ > static int max_iovs; > > -static int nl_sock_cow__(struct nl_sock *); > +static int nl_pool_alloc(int protocol, struct nl_sock **sockp); > +static void nl_pool_release(struct nl_sock *); > > /* Creates a new netlink socket for the given netlink 'protocol' > * (NETLINK_ROUTE, NETLINK_GENERIC, ...). Returns 0 and sets '*sockp' to > the > - * new socket if successful, otherwise returns a positive errno value. */ > + * new socket if successful, otherwise returns a positive errno value. */ > int > nl_sock_create(int protocol, struct nl_sock **sockp) > { > @@ -117,7 +117,6 @@ nl_sock_create(int protocol, struct nl_sock **sockp) > goto error; > } > sock->protocol = protocol; > - sock->dump = NULL; > sock->next_seq = 1; > > rcvbuf = 1024 * 1024; > @@ -191,12 +190,8 @@ void > nl_sock_destroy(struct nl_sock *sock) > { > if (sock) { > - if (sock->dump) { > - sock->dump = NULL; > - } else { > - close(sock->fd); > - free(sock); > - } > + close(sock->fd); > + free(sock); > } > } > > @@ -214,10 +209,6 @@ nl_sock_destroy(struct nl_sock *sock) > int > nl_sock_join_mcgroup(struct nl_sock *sock, unsigned int multicast_group) > { > - int error = nl_sock_cow__(sock); > - if (error) { > - return error; > - } > if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, > &multicast_group, sizeof multicast_group) < 0) { > VLOG_WARN("could not join multicast group %u (%s)", > @@ -240,7 +231,6 @@ nl_sock_join_mcgroup(struct nl_sock *sock, unsigned > int multicast_group) > int > nl_sock_leave_mcgroup(struct nl_sock *sock, unsigned int multicast_group) > { > - ovs_assert(!sock->dump); > if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, > &multicast_group, sizeof multicast_group) < 0) { > VLOG_WARN("could not leave multicast group %u (%s)", > @@ -301,10 +291,6 @@ int > nl_sock_send_seq(struct nl_sock *sock, const struct ofpbuf *msg, > uint32_t nlmsg_seq, bool wait) > { > - int error = nl_sock_cow__(sock); > - if (error) { > - return error; > - } > return nl_sock_send__(sock, msg, nlmsg_seq, wait); > } > > @@ -395,10 +381,6 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf > *buf, bool wait) > int > nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, bool wait) > { > - int error = nl_sock_cow__(sock); > - if (error) { > - return error; > - } > return nl_sock_recv__(sock, buf, wait); > } > > @@ -571,12 +553,6 @@ nl_sock_transact_multiple(struct nl_sock *sock, > return; > } > > - error = nl_sock_cow__(sock); > - if (error) { > - nl_sock_record_errors__(transactions, n, error); > - return; > - } > - > /* In theory, every request could have a 64 kB reply. But the > default and > * maximum socket rcvbuf size with typical Dom0 memory sizes both > tend to > * be a bit below 128 kB, so that would only allow a single message > in a > @@ -690,93 +666,36 @@ nl_sock_transact(struct nl_sock *sock, const struct > ofpbuf *request, > int > nl_sock_drain(struct nl_sock *sock) > { > - int error = nl_sock_cow__(sock); > - if (error) { > - return error; > - } > return drain_rcvbuf(sock->fd); > } > > -/* The client is attempting some operation on 'sock'. If 'sock' has an > ongoing > - * dump operation, then replace 'sock''s fd with a new socket and hand > 'sock''s > - * old fd over to the dump. */ > -static int > -nl_sock_cow__(struct nl_sock *sock) > -{ > - struct nl_sock *copy; > - uint32_t tmp_pid; > - int tmp_fd; > - int error; > - > - if (!sock->dump) { > - return 0; > - } > - > - error = nl_sock_clone(sock, ©); > - if (error) { > - return error; > - } > - > - tmp_fd = sock->fd; > - sock->fd = copy->fd; > - copy->fd = tmp_fd; > - > - tmp_pid = sock->pid; > - sock->pid = copy->pid; > - copy->pid = tmp_pid; > - > - sock->dump->sock = copy; > - sock->dump = NULL; > - > - return 0; > -} > - > -/* Starts a Netlink "dump" operation, by sending 'request' to the kernel > via > - * 'sock', and initializes 'dump' to reflect the state of the operation. > +/* Starts a Netlink "dump" operation, by sending 'request' to the kernel > on a > + * Netlink socket created with the given 'protocol', and initializes > 'dump' to > + * reflect the state of the operation. > * > * nlmsg_len in 'msg' will be finalized to match msg->size, and nlmsg_pid > will > - * be set to 'sock''s pid, before the message is sent. NLM_F_DUMP and > - * NLM_F_ACK will be set in nlmsg_flags. > - * > - * This Netlink socket library is designed to ensure that the dump is > reliable > - * and that it will not interfere with other operations on 'sock', > including > - * destroying or sending and receiving messages on 'sock'. One corner > case is > - * not handled: > + * be set to the Netlink socket's pid, before the message is sent. > NLM_F_DUMP > + * and NLM_F_ACK will be set in nlmsg_flags. > * > - * - If 'sock' has been used to send a request (e.g. with > nl_sock_send()) > - * whose response has not yet been received (e.g. with > nl_sock_recv()). > - * This is unusual: usually nl_sock_transact() is used to send a > message > - * and receive its reply all in one go. > + * The design of this Netlink socket library ensures that the dump is > reliable. > * > * This function provides no status indication. An error status for the > entire > * dump operation is provided when it is completed by calling > nl_dump_done(). > * > * The caller is responsible for destroying 'request'. > - * > - * The new 'dump' is independent of 'sock'. 'sock' and 'dump' may be > destroyed > - * in either order. > */ > void > -nl_dump_start(struct nl_dump *dump, > - struct nl_sock *sock, const struct ofpbuf *request) > +nl_dump_start(struct nl_dump *dump, int protocol, const struct ofpbuf > *request) > { > ofpbuf_init(&dump->buffer, 4096); > - if (sock->dump) { > - /* 'sock' already has an ongoing dump. Clone the socket because > - * Netlink only allows one dump at a time. */ > - dump->status = nl_sock_clone(sock, &dump->sock); > - if (dump->status) { > - return; > - } > - } else { > - sock->dump = dump; > - dump->sock = sock; > - dump->status = 0; > + dump->status = nl_pool_alloc(protocol, &dump->sock); > + if (dump->status) { > + return; > } > > nl_msg_nlmsghdr(request)->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK; > - dump->status = nl_sock_send__(sock, request, > nl_sock_allocate_seq(sock, 1), > - true); > + dump->status = nl_sock_send__(dump->sock, request, > + nl_sock_allocate_seq(dump->sock, 1), > true); > dump->seq = nl_msg_nlmsghdr(request)->nlmsg_seq; > } > > @@ -862,21 +781,16 @@ int > nl_dump_done(struct nl_dump *dump) > { > /* Drain any remaining messages that the client didn't read. > Otherwise the > - * kernel will continue to queue them up and waste buffer space. */ > + * kernel will continue to queue them up and waste buffer space. > + * > + * XXX We could just destroy and discard the socket in this case. */ > while (!dump->status) { > struct ofpbuf reply; > if (!nl_dump_next(dump, &reply)) { > ovs_assert(dump->status); > } > } > - > - if (dump->sock) { > - if (dump->sock->dump) { > - dump->sock->dump = NULL; > - } else { > - nl_sock_destroy(dump->sock); > - } > - } > + nl_pool_release(dump->sock); > ofpbuf_uninit(&dump->buffer); > return dump->status == EOF ? 0 : dump->status; > } > @@ -1090,6 +1004,79 @@ nl_lookup_genl_family(const char *name, int *number) > return *number > 0 ? 0 : -*number; > } > > +struct nl_pool { > + struct nl_sock *socks[16]; > + int n; > +}; > + > +static struct nl_pool pools[MAX_LINKS]; > + > +static int > +nl_pool_alloc(int protocol, struct nl_sock **sockp) > +{ > + struct nl_pool *pool; > + > + ovs_assert(protocol >= 0 && protocol < ARRAY_SIZE(pools)); > + > + pool = &pools[protocol]; > + if (pool->n > 0) { > + *sockp = pool->socks[--pool->n]; > + return 0; > + } else { > + return nl_sock_create(protocol, sockp); > + } > +} > + > +static void > +nl_pool_release(struct nl_sock *sock) > +{ > + if (sock) { > + struct nl_pool *pool = &pools[sock->protocol]; > + > + if (pool->n < ARRAY_SIZE(pool->socks)) { > + pool->socks[pool->n++] = sock; > + } else { > + nl_sock_destroy(sock); > + } > + } > +} > + > +int > +nl_transact(int protocol, const struct ofpbuf *request, > + struct ofpbuf **replyp) > +{ > + struct nl_sock *sock; > + int error; > + > + error = nl_pool_alloc(protocol, &sock); > + if (error) { > + *replyp = NULL; > + return error; > + } > + > + error = nl_sock_transact(sock, request, replyp); > + > + nl_pool_release(sock); > + return error; > +} > + > +void > +nl_transact_multiple(int protocol, > + struct nl_transaction **transactions, size_t n) > +{ > + struct nl_sock *sock; > + int error; > + > + error = nl_pool_alloc(protocol, &sock); > + if (!error) { > + nl_sock_transact_multiple(sock, transactions, n); > + nl_pool_release(sock); > + } else { > + nl_sock_record_errors__(transactions, n, error); > + } > +} > + > + > static uint32_t > nl_sock_allocate_seq(struct nl_sock *sock, unsigned int n) > { > diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h > index 78dd7b2..c77050e 100644 > --- a/lib/netlink-socket.h > +++ b/lib/netlink-socket.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. > + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -84,6 +84,11 @@ struct nl_transaction { > void nl_sock_transact_multiple(struct nl_sock *, > struct nl_transaction **, size_t n); > > +/* Transactions without an allocated socket. */ > +int nl_transact(int protocol, const struct ofpbuf *request, > + struct ofpbuf **replyp); > +void nl_transact_multiple(int protocol, struct nl_transaction **, size_t > n); > + > /* Table dumping. */ > struct nl_dump { > struct nl_sock *sock; /* Socket being dumped. */ > @@ -92,7 +97,7 @@ struct nl_dump { > int status; /* 0=OK, EOF=done, or positive errno > value. */ > }; > > -void nl_dump_start(struct nl_dump *, struct nl_sock *, > +void nl_dump_start(struct nl_dump *, int protocol, > const struct ofpbuf *request); > bool nl_dump_next(struct nl_dump *, struct ofpbuf *reply); > int nl_dump_done(struct nl_dump *); > diff --git a/lib/route-table.c b/lib/route-table.c > index 5891ae8..d572e8c 100644 > --- a/lib/route-table.c > +++ b/lib/route-table.c > @@ -221,22 +221,13 @@ route_table_wait(void) > static int > route_table_reset(void) > { > - int error; > struct nl_dump dump; > struct rtgenmsg *rtmsg; > struct ofpbuf request, reply; > - struct nl_sock *rtnl_sock; > > route_map_clear(); > route_table_valid = true; > > - error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock); > - if (error) { > - VLOG_WARN_RL(&rl, "failed to reset routing table, " > - "cannot create RTNETLINK_ROUTE socket"); > - return error; > - } > - > ofpbuf_init(&request, 0); > > nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETROUTE, > NLM_F_REQUEST); > @@ -244,7 +235,7 @@ route_table_reset(void) > rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg); > rtmsg->rtgen_family = AF_INET; > > - nl_dump_start(&dump, rtnl_sock, &request); > + nl_dump_start(&dump, NETLINK_ROUTE, &request); > ofpbuf_uninit(&request); > > while (nl_dump_next(&dump, &reply)) { > @@ -255,10 +246,7 @@ route_table_reset(void) > } > } > > - error = nl_dump_done(&dump); > - nl_sock_destroy(rtnl_sock); > - > - return error; > + return nl_dump_done(&dump); > } > > > @@ -417,26 +405,19 @@ name_table_uninit(void) > static int > name_table_reset(void) > { > - int error; > struct nl_dump dump; > struct rtgenmsg *rtmsg; > struct ofpbuf request, reply; > - struct nl_sock *rtnl_sock; > > name_table_valid = true; > name_map_clear(); > - error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock); > - if (error) { > - VLOG_WARN_RL(&rl, "failed to create NETLINK_ROUTE socket"); > - return error; > - } > > ofpbuf_init(&request, 0); > nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETLINK, > NLM_F_REQUEST); > rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg); > rtmsg->rtgen_family = AF_INET; > > - nl_dump_start(&dump, rtnl_sock, &request); > + nl_dump_start(&dump, NETLINK_ROUTE, &request); > ofpbuf_uninit(&request); > > while (nl_dump_next(&dump, &reply)) { > @@ -453,7 +434,6 @@ name_table_reset(void) > hmap_insert(&name_map, &nn->node, hash_int(nn->ifi_index, 0)); > } > } > - nl_sock_destroy(rtnl_sock); > return nl_dump_done(&dump); > } > > -- > Looks good to me.
_______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev