On Fri, Jul 12, 2013 at 2:54 PM, Ben Pfaff <b...@nicira.com> wrote:

> This disentangles "struct nl_dump" from "struct nl_sock", clearing the way
> to make the use of either one thread-safe in an obviously correct manner.
>
> Signed-off-by: Ben Pfaff <b...@nicira.com>
> ---
>  lib/dpif-linux.c     |   20 ++---
>  lib/netdev-linux.c   |   18 +----
>  lib/netlink-socket.c |  201
> +++++++++++++++++++++++--------------------------
>  lib/netlink-socket.h |    9 ++-
>  lib/route-table.c    |   26 +------
>  5 files changed, 115 insertions(+), 159 deletions(-)
>
> diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
> index 804a90f..958873c 100644
> --- a/lib/dpif-linux.c
> +++ b/lib/dpif-linux.c
> @@ -162,7 +162,6 @@ static int ovs_flow_family;
>  static int ovs_packet_family;
>
>  /* Generic Netlink socket. */
> -static struct nl_sock *genl_sock;
>  static struct nln *nln = NULL;
>
>  static int dpif_linux_init(void);
> @@ -692,7 +691,7 @@ dpif_linux_port_dump_start(const struct dpif *dpif_,
> void **statep)
>
>      buf = ofpbuf_new(1024);
>      dpif_linux_vport_to_ofpbuf(&request, buf);
> -    nl_dump_start(&state->dump, genl_sock, buf);
> +    nl_dump_start(&state->dump, NETLINK_GENERIC, buf);
>      ofpbuf_delete(buf);
>
>      return 0;
> @@ -898,7 +897,7 @@ dpif_linux_flow_dump_start(const struct dpif *dpif_,
> void **statep)
>
>      buf = ofpbuf_new(1024);
>      dpif_linux_flow_to_ofpbuf(&request, buf);
> -    nl_dump_start(&state->dump, genl_sock, buf);
> +    nl_dump_start(&state->dump, NETLINK_GENERIC, buf);
>      ofpbuf_delete(buf);
>
>      state->buf = NULL;
> @@ -1005,7 +1004,7 @@ dpif_linux_execute__(int dp_ifindex, const struct
> dpif_execute *execute)
>
>      ofpbuf_use_stub(&request, request_stub, sizeof request_stub);
>      dpif_linux_encode_execute(dp_ifindex, execute, &request);
> -    error = nl_sock_transact(genl_sock, &request, NULL);
> +    error = nl_transact(NETLINK_GENERIC, &request, NULL);
>      ofpbuf_uninit(&request);
>
>      return error;
> @@ -1090,7 +1089,7 @@ dpif_linux_operate__(struct dpif *dpif_, struct
> dpif_op **ops, size_t n_ops)
>      for (i = 0; i < n_ops; i++) {
>          txnsp[i] = &auxes[i].txn;
>      }
> -    nl_sock_transact_multiple(genl_sock, txnsp, n_ops);
> +    nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops);
>
>      for (i = 0; i < n_ops; i++) {
>          struct op_auxdata *aux = &auxes[i];
> @@ -1464,9 +1463,6 @@ dpif_linux_init(void)
>                                            &ovs_packet_family);
>          }
>          if (!error) {
> -            error = nl_sock_create(NETLINK_GENERIC, &genl_sock);
> -        }
> -        if (!error) {
>              error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY,
> OVS_VPORT_MCGROUP,
>                                             &ovs_vport_mcgroup,
>                                             OVS_VPORT_MCGROUP_FALLBACK_ID);
> @@ -1659,7 +1655,7 @@ dpif_linux_vport_transact(const struct
> dpif_linux_vport *request,
>
>      request_buf = ofpbuf_new(1024);
>      dpif_linux_vport_to_ofpbuf(request, request_buf);
> -    error = nl_sock_transact(genl_sock, request_buf, bufp);
> +    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
>      ofpbuf_delete(request_buf);
>
>      if (reply) {
> @@ -1780,7 +1776,7 @@ dpif_linux_dp_dump_start(struct nl_dump *dump)
>
>      buf = ofpbuf_new(1024);
>      dpif_linux_dp_to_ofpbuf(&request, buf);
> -    nl_dump_start(dump, genl_sock, buf);
> +    nl_dump_start(dump, NETLINK_GENERIC, buf);
>      ofpbuf_delete(buf);
>  }
>
> @@ -1801,7 +1797,7 @@ dpif_linux_dp_transact(const struct dpif_linux_dp
> *request,
>
>      request_buf = ofpbuf_new(1024);
>      dpif_linux_dp_to_ofpbuf(request, request_buf);
> -    error = nl_sock_transact(genl_sock, request_buf, bufp);
> +    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
>      ofpbuf_delete(request_buf);
>
>      if (reply) {
> @@ -1965,7 +1961,7 @@ dpif_linux_flow_transact(struct dpif_linux_flow
> *request,
>
>      request_buf = ofpbuf_new(1024);
>      dpif_linux_flow_to_ofpbuf(request, request_buf);
> -    error = nl_sock_transact(genl_sock, request_buf, bufp);
> +    error = nl_transact(NETLINK_GENERIC, request_buf, bufp);
>      ofpbuf_delete(request_buf);
>
>      if (reply) {
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 8790f14..197e51d 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -409,9 +409,6 @@ static const struct netdev_rx_class
> netdev_rx_linux_class;
>  /* Sockets used for ioctl operations. */
>  static int af_inet_sock = -1;   /* AF_INET, SOCK_DGRAM. */
>
> -/* A Netlink routing socket that is not subscribed to any multicast
> groups. */
> -static struct nl_sock *rtnl_sock;
> -
>  /* This is set pretty low because we probably won't learn anything from
> the
>   * additional log messages. */
>  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> @@ -477,15 +474,6 @@ netdev_linux_init(void)
>          if (status) {
>              VLOG_ERR("failed to create inet socket: %s",
> ovs_strerror(status));
>          }
> -
> -        /* Create rtnetlink socket. */
> -        if (!status) {
> -            status = nl_sock_create(NETLINK_ROUTE, &rtnl_sock);
> -            if (status) {
> -                VLOG_ERR_RL(&rl, "failed to create rtnetlink socket: %s",
> -                            ovs_strerror(status));
> -            }
> -        }
>      }
>      return status;
>  }
> @@ -2027,7 +2015,7 @@ start_queue_dump(const struct netdev *netdev, struct
> nl_dump *dump)
>          return false;
>      }
>      tcmsg->tcm_parent = 0;
> -    nl_dump_start(dump, rtnl_sock, &request);
> +    nl_dump_start(dump, NETLINK_ROUTE, &request);
>      ofpbuf_uninit(&request);
>      return true;
>  }
> @@ -3646,7 +3634,7 @@ tc_make_request(const struct netdev *netdev, int
> type, unsigned int flags,
>  static int
>  tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
>  {
> -    int error = nl_sock_transact(rtnl_sock, request, replyp);
> +    int error = nl_transact(NETLINK_ROUTE, request, replyp);
>
     ofpbuf_uninit(request);
>      return error;
>  }
> @@ -4322,7 +4310,7 @@ get_stats_via_netlink(int ifindex, struct
> netdev_stats *stats)
>      ifi = ofpbuf_put_zeros(&request, sizeof *ifi);
>      ifi->ifi_family = PF_UNSPEC;
>      ifi->ifi_index = ifindex;
> -    error = nl_sock_transact(rtnl_sock, &request, &reply);
> +    error = nl_transact(NETLINK_ROUTE, &request, &reply);
>      ofpbuf_uninit(&request);
>      if (error) {
>          return error;
> diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
> index aa7fca2..8e08841 100644
> --- a/lib/netlink-socket.c
> +++ b/lib/netlink-socket.c
> @@ -63,7 +63,6 @@ struct nl_sock {
>      uint32_t next_seq;
>      uint32_t pid;
>      int protocol;
> -    struct nl_dump *dump;
>      unsigned int rcvbuf;        /* Receive buffer size (SO_RCVBUF). */
>  };
>
> @@ -77,11 +76,12 @@ struct nl_sock {
>   * Initialized by nl_sock_create(). */
>  static int max_iovs;
>
> -static int nl_sock_cow__(struct nl_sock *);
> +static int nl_pool_alloc(int protocol, struct nl_sock **sockp);
> +static void nl_pool_release(struct nl_sock *);
>
>  /* Creates a new netlink socket for the given netlink 'protocol'
>   * (NETLINK_ROUTE, NETLINK_GENERIC, ...).  Returns 0 and sets '*sockp' to
> the
> - * new socket if successful, otherwise returns a positive errno value.  */
> + * new socket if successful, otherwise returns a positive errno value. */
>  int
>  nl_sock_create(int protocol, struct nl_sock **sockp)
>  {
> @@ -117,7 +117,6 @@ nl_sock_create(int protocol, struct nl_sock **sockp)
>          goto error;
>      }
>      sock->protocol = protocol;
> -    sock->dump = NULL;
>      sock->next_seq = 1;
>
>      rcvbuf = 1024 * 1024;
> @@ -191,12 +190,8 @@ void
>  nl_sock_destroy(struct nl_sock *sock)
>  {
>      if (sock) {
> -        if (sock->dump) {
> -            sock->dump = NULL;
> -        } else {
> -            close(sock->fd);
> -            free(sock);
> -        }
> +        close(sock->fd);
> +        free(sock);
>      }
>  }
>
> @@ -214,10 +209,6 @@ nl_sock_destroy(struct nl_sock *sock)
>  int
>  nl_sock_join_mcgroup(struct nl_sock *sock, unsigned int multicast_group)
>  {
> -    int error = nl_sock_cow__(sock);
> -    if (error) {
> -        return error;
> -    }
>      if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
>                     &multicast_group, sizeof multicast_group) < 0) {
>          VLOG_WARN("could not join multicast group %u (%s)",
> @@ -240,7 +231,6 @@ nl_sock_join_mcgroup(struct nl_sock *sock, unsigned
> int multicast_group)
>  int
>  nl_sock_leave_mcgroup(struct nl_sock *sock, unsigned int multicast_group)
>  {
> -    ovs_assert(!sock->dump);
>      if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP,
>                     &multicast_group, sizeof multicast_group) < 0) {
>          VLOG_WARN("could not leave multicast group %u (%s)",
> @@ -301,10 +291,6 @@ int
>  nl_sock_send_seq(struct nl_sock *sock, const struct ofpbuf *msg,
>                   uint32_t nlmsg_seq, bool wait)
>  {
> -    int error = nl_sock_cow__(sock);
> -    if (error) {
> -        return error;
> -    }
>      return nl_sock_send__(sock, msg, nlmsg_seq, wait);
>  }
>
> @@ -395,10 +381,6 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf
> *buf, bool wait)
>  int
>  nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
>  {
> -    int error = nl_sock_cow__(sock);
> -    if (error) {
> -        return error;
> -    }
>      return nl_sock_recv__(sock, buf, wait);
>  }
>
> @@ -571,12 +553,6 @@ nl_sock_transact_multiple(struct nl_sock *sock,
>          return;
>      }
>
> -    error = nl_sock_cow__(sock);
> -    if (error) {
> -        nl_sock_record_errors__(transactions, n, error);
> -        return;
> -    }
> -
>      /* In theory, every request could have a 64 kB reply.  But the
> default and
>       * maximum socket rcvbuf size with typical Dom0 memory sizes both
> tend to
>       * be a bit below 128 kB, so that would only allow a single message
> in a
> @@ -690,93 +666,36 @@ nl_sock_transact(struct nl_sock *sock, const struct
> ofpbuf *request,
>  int
>  nl_sock_drain(struct nl_sock *sock)
>  {
> -    int error = nl_sock_cow__(sock);
> -    if (error) {
> -        return error;
> -    }
>      return drain_rcvbuf(sock->fd);
>  }
>
> -/* The client is attempting some operation on 'sock'.  If 'sock' has an
> ongoing
> - * dump operation, then replace 'sock''s fd with a new socket and hand
> 'sock''s
> - * old fd over to the dump. */
> -static int
> -nl_sock_cow__(struct nl_sock *sock)
> -{
> -    struct nl_sock *copy;
> -    uint32_t tmp_pid;
> -    int tmp_fd;
> -    int error;
> -
> -    if (!sock->dump) {
> -        return 0;
> -    }
> -
> -    error = nl_sock_clone(sock, &copy);
> -    if (error) {
> -        return error;
> -    }
> -
> -    tmp_fd = sock->fd;
> -    sock->fd = copy->fd;
> -    copy->fd = tmp_fd;
> -
> -    tmp_pid = sock->pid;
> -    sock->pid = copy->pid;
> -    copy->pid = tmp_pid;
> -
> -    sock->dump->sock = copy;
> -    sock->dump = NULL;
> -
> -    return 0;
> -}
> -
> -/* Starts a Netlink "dump" operation, by sending 'request' to the kernel
> via
> - * 'sock', and initializes 'dump' to reflect the state of the operation.
> +/* Starts a Netlink "dump" operation, by sending 'request' to the kernel
> on a
> + * Netlink socket created with the given 'protocol', and initializes
> 'dump' to
> + * reflect the state of the operation.
>   *
>   * nlmsg_len in 'msg' will be finalized to match msg->size, and nlmsg_pid
> will
> - * be set to 'sock''s pid, before the message is sent.  NLM_F_DUMP and
> - * NLM_F_ACK will be set in nlmsg_flags.
> - *
> - * This Netlink socket library is designed to ensure that the dump is
> reliable
> - * and that it will not interfere with other operations on 'sock',
> including
> - * destroying or sending and receiving messages on 'sock'.  One corner
> case is
> - * not handled:
> + * be set to the Netlink socket's pid, before the message is sent.
>  NLM_F_DUMP
> + * and NLM_F_ACK will be set in nlmsg_flags.
>   *
> - *   - If 'sock' has been used to send a request (e.g. with
> nl_sock_send())
> - *     whose response has not yet been received (e.g. with
> nl_sock_recv()).
> - *     This is unusual: usually nl_sock_transact() is used to send a
> message
> - *     and receive its reply all in one go.
> + * The design of this Netlink socket library ensures that the dump is
> reliable.
>   *
>   * This function provides no status indication.  An error status for the
> entire
>   * dump operation is provided when it is completed by calling
> nl_dump_done().
>   *
>   * The caller is responsible for destroying 'request'.
> - *
> - * The new 'dump' is independent of 'sock'.  'sock' and 'dump' may be
> destroyed
> - * in either order.
>   */
>  void
> -nl_dump_start(struct nl_dump *dump,
> -              struct nl_sock *sock, const struct ofpbuf *request)
> +nl_dump_start(struct nl_dump *dump, int protocol, const struct ofpbuf
> *request)
>  {
>      ofpbuf_init(&dump->buffer, 4096);
> -    if (sock->dump) {
> -        /* 'sock' already has an ongoing dump.  Clone the socket because
> -         * Netlink only allows one dump at a time. */
> -        dump->status = nl_sock_clone(sock, &dump->sock);
> -        if (dump->status) {
> -            return;
> -        }
> -    } else {
> -        sock->dump = dump;
> -        dump->sock = sock;
> -        dump->status = 0;
> +    dump->status = nl_pool_alloc(protocol, &dump->sock);
> +    if (dump->status) {
> +        return;
>      }
>
>      nl_msg_nlmsghdr(request)->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK;
> -    dump->status = nl_sock_send__(sock, request,
> nl_sock_allocate_seq(sock, 1),
> -                                  true);
> +    dump->status = nl_sock_send__(dump->sock, request,
> +                                  nl_sock_allocate_seq(dump->sock, 1),
> true);
>      dump->seq = nl_msg_nlmsghdr(request)->nlmsg_seq;
>  }
>
> @@ -862,21 +781,16 @@ int
>  nl_dump_done(struct nl_dump *dump)
>  {
>      /* Drain any remaining messages that the client didn't read.
>  Otherwise the
> -     * kernel will continue to queue them up and waste buffer space. */
> +     * kernel will continue to queue them up and waste buffer space.
> +     *
> +     * XXX We could just destroy and discard the socket in this case. */
>      while (!dump->status) {
>          struct ofpbuf reply;
>          if (!nl_dump_next(dump, &reply)) {
>              ovs_assert(dump->status);
>          }
>      }
> -
> -    if (dump->sock) {
> -        if (dump->sock->dump) {
> -            dump->sock->dump = NULL;
> -        } else {
> -            nl_sock_destroy(dump->sock);
> -        }
> -    }
> +    nl_pool_release(dump->sock);
>      ofpbuf_uninit(&dump->buffer);
>      return dump->status == EOF ? 0 : dump->status;
>  }
> @@ -1090,6 +1004,79 @@ nl_lookup_genl_family(const char *name, int *number)
>      return *number > 0 ? 0 : -*number;
>  }
>
> +struct nl_pool {
> +    struct nl_sock *socks[16];
> +    int n;
> +};
> +
> +static struct nl_pool pools[MAX_LINKS];
>
+
> +static int
> +nl_pool_alloc(int protocol, struct nl_sock **sockp)
> +{
> +    struct nl_pool *pool;
> +
> +    ovs_assert(protocol >= 0 && protocol < ARRAY_SIZE(pools));
> +
> +    pool = &pools[protocol];
> +    if (pool->n > 0) {
> +        *sockp = pool->socks[--pool->n];
> +        return 0;
> +    } else {
> +        return nl_sock_create(protocol, sockp);
> +    }
> +}
> +
> +static void
> +nl_pool_release(struct nl_sock *sock)
> +{
> +    if (sock) {
> +        struct nl_pool *pool = &pools[sock->protocol];
> +
> +        if (pool->n < ARRAY_SIZE(pool->socks)) {
> +            pool->socks[pool->n++] = sock;
> +        } else {
> +            nl_sock_destroy(sock);
> +        }
> +    }
> +}
> +
> +int
> +nl_transact(int protocol, const struct ofpbuf *request,
> +            struct ofpbuf **replyp)
> +{
> +    struct nl_sock *sock;
> +    int error;
> +
> +    error = nl_pool_alloc(protocol, &sock);
> +    if (error) {
> +        *replyp = NULL;
> +        return error;
> +    }
> +
> +    error = nl_sock_transact(sock, request, replyp);
> +
> +    nl_pool_release(sock);
> +    return error;
> +}
> +
> +void
> +nl_transact_multiple(int protocol,
> +                     struct nl_transaction **transactions, size_t n)
> +{
> +    struct nl_sock *sock;
> +    int error;
> +
> +    error = nl_pool_alloc(protocol, &sock);
> +    if (!error) {
> +        nl_sock_transact_multiple(sock, transactions, n);
> +        nl_pool_release(sock);
> +    } else {
> +        nl_sock_record_errors__(transactions, n, error);
> +    }
> +}
> +
> +
>  static uint32_t
>  nl_sock_allocate_seq(struct nl_sock *sock, unsigned int n)
>  {
> diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h
> index 78dd7b2..c77050e 100644
> --- a/lib/netlink-socket.h
> +++ b/lib/netlink-socket.h
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc.
> + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -84,6 +84,11 @@ struct nl_transaction {
>  void nl_sock_transact_multiple(struct nl_sock *,
>                                 struct nl_transaction **, size_t n);
>
> +/* Transactions without an allocated socket. */
> +int nl_transact(int protocol, const struct ofpbuf *request,
> +                struct ofpbuf **replyp);
> +void nl_transact_multiple(int protocol, struct nl_transaction **, size_t
> n);
> +
>  /* Table dumping. */
>  struct nl_dump {
>      struct nl_sock *sock;       /* Socket being dumped. */
> @@ -92,7 +97,7 @@ struct nl_dump {
>      int status;                 /* 0=OK, EOF=done, or positive errno
> value. */
>  };
>
> -void nl_dump_start(struct nl_dump *, struct nl_sock *,
> +void nl_dump_start(struct nl_dump *, int protocol,
>                     const struct ofpbuf *request);
>  bool nl_dump_next(struct nl_dump *, struct ofpbuf *reply);
>  int nl_dump_done(struct nl_dump *);
> diff --git a/lib/route-table.c b/lib/route-table.c
> index 5891ae8..d572e8c 100644
> --- a/lib/route-table.c
> +++ b/lib/route-table.c
> @@ -221,22 +221,13 @@ route_table_wait(void)
>  static int
>  route_table_reset(void)
>  {
> -    int error;
>      struct nl_dump dump;
>      struct rtgenmsg *rtmsg;
>      struct ofpbuf request, reply;
> -    struct nl_sock *rtnl_sock;
>
>      route_map_clear();
>      route_table_valid = true;
>
> -    error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock);
> -    if (error) {
> -        VLOG_WARN_RL(&rl, "failed to reset routing table, "
> -                     "cannot create RTNETLINK_ROUTE socket");
> -        return error;
> -    }
> -
>      ofpbuf_init(&request, 0);
>
>      nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETROUTE,
> NLM_F_REQUEST);
> @@ -244,7 +235,7 @@ route_table_reset(void)
>      rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg);
>      rtmsg->rtgen_family = AF_INET;
>
> -    nl_dump_start(&dump, rtnl_sock, &request);
> +    nl_dump_start(&dump, NETLINK_ROUTE, &request);
>      ofpbuf_uninit(&request);
>
>      while (nl_dump_next(&dump, &reply)) {
> @@ -255,10 +246,7 @@ route_table_reset(void)
>          }
>      }
>
> -    error = nl_dump_done(&dump);
> -    nl_sock_destroy(rtnl_sock);
> -
> -    return error;
> +    return nl_dump_done(&dump);
>  }
>
>
> @@ -417,26 +405,19 @@ name_table_uninit(void)
>  static int
>  name_table_reset(void)
>  {
> -    int error;
>      struct nl_dump dump;
>      struct rtgenmsg *rtmsg;
>      struct ofpbuf request, reply;
> -    struct nl_sock *rtnl_sock;
>
>      name_table_valid = true;
>      name_map_clear();
> -    error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock);
> -    if (error) {
> -        VLOG_WARN_RL(&rl, "failed to create NETLINK_ROUTE socket");
> -        return error;
> -    }
>
>      ofpbuf_init(&request, 0);
>      nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETLINK,
> NLM_F_REQUEST);
>      rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg);
>      rtmsg->rtgen_family = AF_INET;
>
> -    nl_dump_start(&dump, rtnl_sock, &request);
> +    nl_dump_start(&dump, NETLINK_ROUTE, &request);
>      ofpbuf_uninit(&request);
>
>      while (nl_dump_next(&dump, &reply)) {
> @@ -453,7 +434,6 @@ name_table_reset(void)
>              hmap_insert(&name_map, &nn->node, hash_int(nn->ifi_index, 0));
>          }
>      }
> -    nl_sock_destroy(rtnl_sock);
>      return nl_dump_done(&dump);
>  }
>
> --
>
Looks good to me.
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to