[ovs-dev] [PATCH] ofproto-dpif-upcall: Suppress sparse warning.
Signed-off-by: Alex Wang --- ofproto/ofproto-dpif-upcall.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index f00c17f..654fbd3 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -757,7 +757,7 @@ upcall_init(struct upcall *upcall, struct flow *flow, struct ofpbuf *packet, xlate_actions(&xin, &upcall->xout); } -void +static void free_upcall(struct upcall *upcall) { xlate_out_uninit(&upcall->xout); @@ -845,7 +845,7 @@ read_upcalls(struct handler *handler, return n_upcalls; } -int +static int convert_upcall(struct udpif *udpif, struct upcall *upcall) { struct dpif_upcall *dupcall = &upcall->dpif_upcall; -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] ofproto-dpif-upcall: Suppress sparse warning.
sorry for the confusion, please ignore my patch~ On Sun, Jul 27, 2014 at 6:29 PM, Justin Pettit wrote: > Your patch had two fixes, so he should probably grab yours. > > --Justin > > > > On Jul 27, 2014, at 5:37 PM, Ben Pfaff wrote: > > > > Yeah, but I'm happy if Alex wants to push it since I didn't get around to > > it yet. > >> On Jul 27, 2014 5:32 PM, "Joe Stringer" wrote: > >> > >> http://openvswitch.org/pipermail/dev/2014-July/043119.html > >> ___ > >> dev mailing list > >> dev@openvswitch.org > >> http://openvswitch.org/mailman/listinfo/dev > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 1/2] connmgr: Fix a typo.
Signed-off-by: Alex Wang --- ofproto/connmgr.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index b7d5d3b..c6432bf 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -1496,7 +1496,7 @@ ofconn_wants_packet_in_on_miss(struct ofconn *ofconn, /* The default "table-miss" behaviour for OpenFlow1.3+ is to drop the * packet rather than to send the packet to the controller. * - * This function returns false to indicate that a packet_in message + * This function returns true to indicate that a packet_in message * for a "table-miss" should be sent to at least one controller. * That is there is at least one controller with controller_id 0 * which connected using an OpenFlow version earlier than OpenFlow1.3. -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 2/2] connmgr: Make call to connmgr_wants_packet_in_on_miss() thread safe.
connmgr_wants_packet_in_on_miss() is called by multiple threads and thusly should be protected by the mutex. Signed-off-by: Alex Wang --- ofproto/connmgr.c |6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index c6432bf..89af6b6 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -1506,19 +1506,23 @@ ofconn_wants_packet_in_on_miss(struct ofconn *ofconn, * This logic assumes that "table-miss" packet_in messages * are always sent to controller_id 0. */ bool -connmgr_wants_packet_in_on_miss(struct connmgr *mgr) +connmgr_wants_packet_in_on_miss(struct connmgr *mgr) OVS_EXCLUDED(ofproto_mutex) { struct ofconn *ofconn; +ovs_mutex_lock(&ofproto_mutex); LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { enum ofputil_protocol protocol = ofconn_get_protocol(ofconn); if (ofconn->controller_id == 0 && (protocol == OFPUTIL_P_NONE || ofputil_protocol_to_ofp_version(protocol) < OFP13_VERSION)) { +ovs_mutex_unlock(&ofproto_mutex); return true; } } +ovs_mutex_unlock(&ofproto_mutex); + return false; } -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [RFC] connmgr: Make connmgr_wants_packet_in_on_miss() only check controller connections.
The connmgr_wants_packet_in_on_miss() should only check 'ofconn's of type OFCONN_PRIMARY (i.e. controller connections). This commit makes it happen. Signed-off-by: Alex Wang --- ofproto/connmgr.c | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index 89af6b6..49ced1f 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -1512,13 +1512,15 @@ connmgr_wants_packet_in_on_miss(struct connmgr *mgr) OVS_EXCLUDED(ofproto_mutex) ovs_mutex_lock(&ofproto_mutex); LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { -enum ofputil_protocol protocol = ofconn_get_protocol(ofconn); - -if (ofconn->controller_id == 0 && -(protocol == OFPUTIL_P_NONE || - ofputil_protocol_to_ofp_version(protocol) < OFP13_VERSION)) { -ovs_mutex_unlock(&ofproto_mutex); -return true; +if (ofconn->type == OFCONN_PRIMARY) { +enum ofputil_protocol protocol = ofconn_get_protocol(ofconn); + +if (ofconn->controller_id == 0 && +(protocol == OFPUTIL_P_NONE || + ofputil_protocol_to_ofp_version(protocol) < OFP13_VERSION)) { +ovs_mutex_unlock(&ofproto_mutex); +return true; +} } } ovs_mutex_unlock(&ofproto_mutex); -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [RFC] connmgr: Make connmgr_wants_packet_in_on_miss() only check controller connections.
Hey Simon, I think we should not check 'ofconn's for OFCONN_SERVICE... This change broke several tests: *Please send `tests/testsuite.log' and all information you think might help:* * To: >* * Subject: [openvswitch 2.3.0] testsuite: 737 739 747 817 failed* Wherein we use the ofctl monitor to simulate controller and capture the packet_ins. So, could you help me clarify if we need consider ofctl conn as a controller connection? Thanks, Alex Wang, On Tue, Jul 29, 2014 at 11:24 AM, Alex Wang wrote: > The connmgr_wants_packet_in_on_miss() should only check 'ofconn's > of type OFCONN_PRIMARY (i.e. controller connections). This commit > makes it happen. > > Signed-off-by: Alex Wang > --- > ofproto/connmgr.c | 16 +--- > 1 file changed, 9 insertions(+), 7 deletions(-) > > diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c > index 89af6b6..49ced1f 100644 > --- a/ofproto/connmgr.c > +++ b/ofproto/connmgr.c > @@ -1512,13 +1512,15 @@ connmgr_wants_packet_in_on_miss(struct connmgr > *mgr) OVS_EXCLUDED(ofproto_mutex) > > ovs_mutex_lock(&ofproto_mutex); > LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { > -enum ofputil_protocol protocol = ofconn_get_protocol(ofconn); > - > -if (ofconn->controller_id == 0 && > -(protocol == OFPUTIL_P_NONE || > - ofputil_protocol_to_ofp_version(protocol) < OFP13_VERSION)) { > -ovs_mutex_unlock(&ofproto_mutex); > -return true; > +if (ofconn->type == OFCONN_PRIMARY) { > +enum ofputil_protocol protocol = ofconn_get_protocol(ofconn); > + > +if (ofconn->controller_id == 0 && > +(protocol == OFPUTIL_P_NONE || > + ofputil_protocol_to_ofp_version(protocol) < > OFP13_VERSION)) { > +ovs_mutex_unlock(&ofproto_mutex); > +return true; > +} > } > } > ovs_mutex_unlock(&ofproto_mutex); > -- > 1.7.9.5 > > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/2] connmgr: Make call to connmgr_wants_packet_in_on_miss() thread safe.
Thx for the suggestion, I think it will make the connmgr module safer if we annotate the ofproto->connmgr. With a quick look, I think it will take more checking and efforts, (for functions we know that are only called by the main thread, there is no lock protection) For branch-2.3, I want to use this fix. Do you think it is okay? Thanks, Alex Wang, On Tue, Jul 29, 2014 at 1:23 PM, Andy Zhou wrote: > Should we also consider lock annotate the ofproto->connmgr variable? > > Also, it seems to me the lock can be per ofproto, instead of a global > lock, but I am not sure this use case will benefit from a finer grain > lock. > > > On Tue, Jul 29, 2014 at 11:20 AM, Alex Wang wrote: > > connmgr_wants_packet_in_on_miss() is called by multiple threads > > and thusly should be protected by the mutex. > > > > Signed-off-by: Alex Wang > > --- > > ofproto/connmgr.c |6 +- > > 1 file changed, 5 insertions(+), 1 deletion(-) > > > > diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c > > index c6432bf..89af6b6 100644 > > --- a/ofproto/connmgr.c > > +++ b/ofproto/connmgr.c > > @@ -1506,19 +1506,23 @@ ofconn_wants_packet_in_on_miss(struct ofconn > *ofconn, > > * This logic assumes that "table-miss" packet_in messages > > * are always sent to controller_id 0. */ > > bool > > -connmgr_wants_packet_in_on_miss(struct connmgr *mgr) > > +connmgr_wants_packet_in_on_miss(struct connmgr *mgr) > OVS_EXCLUDED(ofproto_mutex) > > { > > struct ofconn *ofconn; > > > > +ovs_mutex_lock(&ofproto_mutex); > > LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { > > enum ofputil_protocol protocol = ofconn_get_protocol(ofconn); > > > > if (ofconn->controller_id == 0 && > > (protocol == OFPUTIL_P_NONE || > > ofputil_protocol_to_ofp_version(protocol) < > OFP13_VERSION)) { > > +ovs_mutex_unlock(&ofproto_mutex); > > return true; > > } > > } > > +ovs_mutex_unlock(&ofproto_mutex); > > + > > return false; > > } > > > > -- > > 1.7.9.5 > > > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/2] connmgr: Make call to connmgr_wants_packet_in_on_miss() thread safe.
On Tue, Jul 29, 2014 at 2:16 PM, Andy Zhou wrote: > On Tue, Jul 29, 2014 at 2:09 PM, Alex Wang wrote: > > Thx for the suggestion, I think it will make the connmgr module safer > > if we annotate the ofproto->connmgr. With a quick look, I think it will > > take more checking and efforts, (for functions we know that are only > called > > by the main thread, there is no lock protection) > Is this safe if ofproto->connmgr can be accessed by other threads? > I think it is safe. since main thread creates/deletes the connmgr, > > > > For branch-2.3, I want to use this fix. Do you think it is okay? > I suppose so, as long as the fix is safe. Your call. > Thx, will backport it soon, ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/2] connmgr: Make call to connmgr_wants_packet_in_on_miss() thread safe.
Thx, pushed both patches to master and branch-2.3. I'll try improving the thread-safety of connmgr module later, On Tue, Jul 29, 2014 at 2:32 PM, Alex Wang wrote: > > > > On Tue, Jul 29, 2014 at 2:16 PM, Andy Zhou wrote: > >> On Tue, Jul 29, 2014 at 2:09 PM, Alex Wang wrote: >> > Thx for the suggestion, I think it will make the connmgr module safer >> > if we annotate the ofproto->connmgr. With a quick look, I think it will >> > take more checking and efforts, (for functions we know that are only >> called >> > by the main thread, there is no lock protection) >> Is this safe if ofproto->connmgr can be accessed by other threads? >> > > > I think it is safe. since main thread creates/deletes the connmgr, > > > > >> > >> > For branch-2.3, I want to use this fix. Do you think it is okay? >> I suppose so, as long as the fix is safe. Your call. >> > > > Thx, will backport it soon, > > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [RFC] connmgr: Make connmgr_wants_packet_in_on_miss() only check controller connections.
Hey Ben, Sorry for the confusion, On Tue, Jul 29, 2014 at 2:52 PM, Ben Pfaff wrote: > On Tue, Jul 29, 2014 at 11:30:12AM -0700, Alex Wang wrote: > > I think we should not check 'ofconn's for OFCONN_SERVICE... This > > change broke several tests: > > > > *Please send `tests/testsuite.log' and all information you think might > > help:* > > > > * To: >* > > > > * Subject: [openvswitch 2.3.0] testsuite: 737 739 747 817 failed* > > I'm a little confused now. > > I'm not sure why you addressed this to Simon. I don't think he has any > recent commits. > 1. The test failure is caused by this RFC patch, 2. connmgr_wants_packet_in_on_miss() was added by Simon. I think this function should only check for controller connections. And want to confirm it with Simon. 3. unit test 737 corresponds to this: (which is also added by Simon) 527ae97e (Simon Horman 2014-03-13 15:52:54 +0900 1565) 527ae97e (Simon Horman 2014-03-13 15:52:54 +0900 1566) AT_SETUP([ofproto-dpif - table-miss flow (OpenFlow 1.0)]) 527ae97e (Simon Horman 2014-03-13 15:52:54 +0900 1567) OVS_VSWITCHD_START([dnl 527ae97e (Simon Horman 2014-03-13 15:52:54 +0900 1568)add-port br0 p1 -- set Interface p1 type=dummy 527ae97e (Simon Horman 2014-03-13 15:52:54 +0900 1569) ]) I think that's why I sent email asking him. > > From the [openvswitch 2.3.0] it looks like you ran the tests against > branch-2.3. I don't see the same failures locally when I run the tests > on branch-2.3 (commit 9067b54) or master (commit 57fa816). Are these > intermittent failures or do you see them every time? Do they appear > only after the two recent additional commits that you pushed? > 4. I saw them every time. Those failures are caused only by this RFC patch (unrelated to the two recent commits I just pused) 5. The cause of failure is that connmgr_wants_packet_in_on_miss() now checks only the 'ofconn's of type OFCONN_PRIMARY. so, the 'ofconn' for 'ofctl monitor' is ignored, and we always drop the pkt_ins. Thanks, Alex Wang, ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [RFC] connmgr: Make connmgr_wants_packet_in_on_miss() only check controller connections.
> > > I don't wish to add to any confusion but I will none the less > make a comment on this change which you may choose to ignore if you like. > > The motivation for adding connmgr_wants_packet_in_on_miss() is to help > implement per-OpenFlow version behaviour of packet_in messages. So if > monitors should have per-OpenFlow version behaviour then I am not sure your > patch is correct. My recollection is that this behaviour of monitors seemed > useful for the purpose of testing the per-OpenFlow version behaviour of > packet in messages. But it was a while ago so my recollection may not be > accurate. > > > Hey Simon, Thanks for the explanation, I think we do want to have per-OpenFlow version 'table-miss' behavior for 'ofctl monitor'. My original understanding was that if there is no established 'ofconn' to controller, connmgr_wants_packet_in_on_miss() should always returns false, and 'ofctl monitor' should not work. After checking the code further, I think we treat 'ofconn' from 'ovs-ofctl monitor' as 'controller' also. And i see why you wrote connmgr_wants_packet_in_on_miss(). So, I'm dropping this patch. Thanks, Alex Wang, ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 1/2] bfd: Flip the default value of bfd ip source and destination.
This commit flips the default value of bfd ip source and destination, so that they match the default value of ip destination and source of vtep schema. Signed-off-by: Alex Wang --- lib/bfd.c|4 ++-- vswitchd/vswitch.xml |4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/bfd.c b/lib/bfd.c index 892dfe8..9069582 100644 --- a/lib/bfd.c +++ b/lib/bfd.c @@ -453,14 +453,14 @@ bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg, if (ip_src && bfd_lookup_ip(ip_src, &in_addr)) { memcpy(&bfd->ip_src, &in_addr, sizeof in_addr); } else { -bfd->ip_src = htonl(0xA9FE0100); /* 169.254.1.0. */ +bfd->ip_src = htonl(0xA9FE0101); /* 169.254.1.1. */ } ip_dst = smap_get(cfg, "bfd_dst_ip"); if (ip_dst && bfd_lookup_ip(ip_dst, &in_addr)) { memcpy(&bfd->ip_dst, &in_addr, sizeof in_addr); } else { -bfd->ip_dst = htonl(0xA9FE0101); /* 169.254.1.1. */ +bfd->ip_dst = htonl(0xA9FE0100); /* 169.254.1.0. */ } forwarding_if_rx = smap_get_bool(cfg, "forwarding_if_rx", false); diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index d47fc1a..bff8fb6 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2091,12 +2091,12 @@ Set to an IPv4 address to set the IP address used as source for - transmitted BFD packets. The default is 169.254.1.0. + transmitted BFD packets. The default is 169.254.1.1. Set to an IPv4 address to set the IP address used as destination - for transmitted BFD packets. The default is 169.254.1.1. + for transmitted BFD packets. The default is 169.254.1.0. -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 2/2] bfd: Allow users to set local/remote src/dst MAC address.
This commit adds four options for configuring the MAC addresses in BFD state machine. Therein, the "bfd_local_src_mac" and "bfd_local_dst_mac" configure the MAC address of sent BFD packets. The "bfd_remote_src_mac" and "bfd_remote_dst_mac" configure the matching of MAC address on recevied BFD packets. Signed-off-by: Alex Wang --- lib/bfd.c| 60 +++--- vswitchd/vswitch.xml | 32 +++ 2 files changed, 75 insertions(+), 17 deletions(-) diff --git a/lib/bfd.c b/lib/bfd.c index 9069582..165eb61 100644 --- a/lib/bfd.c +++ b/lib/bfd.c @@ -169,8 +169,15 @@ struct bfd { uint32_t rmt_disc;/* bfd.RemoteDiscr. */ -uint8_t eth_dst[ETH_ADDR_LEN];/* Ethernet destination address. */ -bool eth_dst_set; /* 'eth_dst' set through database. */ +uint8_t local_eth_src[ETH_ADDR_LEN]; /* Local eth src address. */ +bool local_eth_src_set; /* Local 'eth_src' set through db. */ +uint8_t local_eth_dst[ETH_ADDR_LEN]; /* Local eth dst address. */ +bool local_eth_dst_set; /* Local 'eth_dst' set through db. */ + +uint8_t rmt_eth_src[ETH_ADDR_LEN]; /* Remote eth src address. */ +bool rmt_eth_src_set;/* Remote 'eth_src' set through db. */ +uint8_t rmt_eth_dst[ETH_ADDR_LEN]; /* Remote eth dst address. */ +bool rmt_eth_dst_set;/* Remote 'eth_dst' set through db. */ ovs_be32 ip_src; /* IPv4 source address. */ ovs_be32 ip_dst; /* IPv4 destination address. */ @@ -388,8 +395,6 @@ bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg, bfd_set_state(bfd, STATE_DOWN, DIAG_NONE); -memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN); - bfd_status_changed(bfd); } @@ -440,13 +445,36 @@ bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg, need_poll = true; } -hwaddr = smap_get(cfg, "bfd_dst_mac"); +hwaddr = smap_get(cfg, "bfd_local_src_mac"); +if (hwaddr && eth_addr_from_string(hwaddr, ea) && !eth_addr_is_zero(ea)) { +memcpy(bfd->local_eth_src, ea, ETH_ADDR_LEN); +bfd->local_eth_src_set = true; +} else { +bfd->local_eth_src_set = false; +} + +hwaddr = smap_get(cfg, "bfd_local_dst_mac"); if (hwaddr && eth_addr_from_string(hwaddr, ea) && !eth_addr_is_zero(ea)) { -memcpy(bfd->eth_dst, ea, ETH_ADDR_LEN); -bfd->eth_dst_set = true; -} else if (bfd->eth_dst_set) { -memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN); -bfd->eth_dst_set = false; +memcpy(bfd->local_eth_dst, ea, ETH_ADDR_LEN); +bfd->local_eth_dst_set = true; +} else { +bfd->local_eth_dst_set = false; +} + +hwaddr = smap_get(cfg, "bfd_remote_src_mac"); +if (hwaddr && eth_addr_from_string(hwaddr, ea) && !eth_addr_is_zero(ea)) { +memcpy(bfd->rmt_eth_src, ea, ETH_ADDR_LEN); +bfd->rmt_eth_src_set = true; +} else { +bfd->rmt_eth_src_set = false; +} + +hwaddr = smap_get(cfg, "bfd_remote_dst_mac"); +if (hwaddr && eth_addr_from_string(hwaddr, ea) && !eth_addr_is_zero(ea)) { +memcpy(bfd->rmt_eth_dst, ea, ETH_ADDR_LEN); +bfd->rmt_eth_dst_set = true; +} else { +bfd->rmt_eth_dst_set = false; } ip_src = smap_get(cfg, "bfd_src_ip"); @@ -600,8 +628,10 @@ bfd_put_packet(struct bfd *bfd, struct ofpbuf *p, ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */ eth = ofpbuf_put_uninit(p, sizeof *eth); -memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); -memcpy(eth->eth_dst, bfd->eth_dst, ETH_ADDR_LEN); +memcpy(eth->eth_src, bfd->local_eth_src_set ? bfd->local_eth_src : eth_src, + ETH_ADDR_LEN); +memcpy(eth->eth_dst, bfd->local_eth_dst_set ? bfd->local_eth_dst +: eth_addr_bfd, ETH_ADDR_LEN); eth->eth_type = htons(ETH_TYPE_IP); ip = ofpbuf_put_zeros(p, sizeof *ip); @@ -656,8 +686,12 @@ bfd_should_process_flow(const struct bfd *bfd_, const struct flow *flow, struct bfd *bfd = CONST_CAST(struct bfd *, bfd_); bool check_tnl_key; +memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); -if (bfd->eth_dst_set && memcmp(bfd->eth_dst, flow->dl_dst, ETH_ADDR_LEN)) { +if ((bfd->rmt_eth_src_set && memcmp(bfd->rmt_eth_src, flow->dl_src, +ETH_ADDR_LEN)) +|| (bfd->rmt_eth_dst_set && memcmp
[ovs-dev] [PATCH V2] bfd: Allow users to set local/remote src/dst MAC address.
This commit adds options for configuring the MAC addresses in BFD state machine. Therein, the "bfd_local_src_mac" and "bfd_local_dst_mac" configure the MAC address of sent BFD packets. The "bfd_remote_dst_mac" configure the matching of MAC address on recevied BFD packets. Signed-off-by: Alex Wang --- PATCH -> V2: - simplify the code. --- lib/bfd.c| 47 +-- vswitchd/vswitch.xml | 23 +++ 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/lib/bfd.c b/lib/bfd.c index 9069582..5c86451 100644 --- a/lib/bfd.c +++ b/lib/bfd.c @@ -169,8 +169,10 @@ struct bfd { uint32_t rmt_disc;/* bfd.RemoteDiscr. */ -uint8_t eth_dst[ETH_ADDR_LEN];/* Ethernet destination address. */ -bool eth_dst_set; /* 'eth_dst' set through database. */ +uint8_t local_eth_src[ETH_ADDR_LEN]; /* Local eth src address. */ +uint8_t local_eth_dst[ETH_ADDR_LEN]; /* Local eth dst address. */ + +uint8_t rmt_eth_dst[ETH_ADDR_LEN]; /* Remote eth dst address. */ ovs_be32 ip_src; /* IPv4 source address. */ ovs_be32 ip_dst; /* IPv4 destination address. */ @@ -388,8 +390,6 @@ bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg, bfd_set_state(bfd, STATE_DOWN, DIAG_NONE); -memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN); - bfd_status_changed(bfd); } @@ -440,13 +440,25 @@ bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg, need_poll = true; } -hwaddr = smap_get(cfg, "bfd_dst_mac"); -if (hwaddr && eth_addr_from_string(hwaddr, ea) && !eth_addr_is_zero(ea)) { -memcpy(bfd->eth_dst, ea, ETH_ADDR_LEN); -bfd->eth_dst_set = true; -} else if (bfd->eth_dst_set) { -memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN); -bfd->eth_dst_set = false; +hwaddr = smap_get(cfg, "bfd_local_src_mac"); +if (hwaddr && eth_addr_from_string(hwaddr, ea)) { +memcpy(bfd->local_eth_src, ea, ETH_ADDR_LEN); +} else { +memset(bfd->local_eth_src, 0, ETH_ADDR_LEN); +} + +hwaddr = smap_get(cfg, "bfd_local_dst_mac"); +if (hwaddr && eth_addr_from_string(hwaddr, ea)) { +memcpy(bfd->local_eth_dst, ea, ETH_ADDR_LEN); +} else { +memset(bfd->local_eth_dst, 0, ETH_ADDR_LEN); +} + +hwaddr = smap_get(cfg, "bfd_remote_dst_mac"); +if (hwaddr && eth_addr_from_string(hwaddr, ea)) { +memcpy(bfd->rmt_eth_dst, ea, ETH_ADDR_LEN); +} else { +memset(bfd->rmt_eth_dst, 0, ETH_ADDR_LEN); } ip_src = smap_get(cfg, "bfd_src_ip"); @@ -600,8 +612,14 @@ bfd_put_packet(struct bfd *bfd, struct ofpbuf *p, ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */ eth = ofpbuf_put_uninit(p, sizeof *eth); -memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); -memcpy(eth->eth_dst, bfd->eth_dst, ETH_ADDR_LEN); +memcpy(eth->eth_src, + eth_addr_is_zero(bfd->local_eth_src) ? eth_src +: bfd->local_eth_src, + ETH_ADDR_LEN); +memcpy(eth->eth_dst, + eth_addr_is_zero(bfd->local_eth_dst) ? eth_addr_bfd +: bfd->local_eth_dst, + ETH_ADDR_LEN); eth->eth_type = htons(ETH_TYPE_IP); ip = ofpbuf_put_zeros(p, sizeof *ip); @@ -657,7 +675,8 @@ bfd_should_process_flow(const struct bfd *bfd_, const struct flow *flow, bool check_tnl_key; memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); -if (bfd->eth_dst_set && memcmp(bfd->eth_dst, flow->dl_dst, ETH_ADDR_LEN)) { +if (!eth_addr_is_zero(bfd->rmt_eth_dst) +&& memcmp(bfd->rmt_eth_dst, flow->dl_dst, ETH_ADDR_LEN)) { return false; } diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index bff8fb6..ff9be17 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2081,12 +2081,27 @@ tunnel key. - + Set to an Ethernet address in the form xx:xx:xx:xx:xx:xx - to set the MAC used as destination for transmitted BFD packets and - expected as destination for received BFD packets. The default is - 00:23:20:00:00:01. + to set the MAC used as source for transmitted BFD packets. The + default is the mac address of the BFD enabled interface. + + + + Set to an Ethernet address in the form + xx:xx:xx:xx:xx:xx + to set the MAC used as destination for transmitted BFD packets. The + default is 00:23:20:00:00:01. + + + + Set to an Ethernet address in
[ovs-dev] [dpdk patch 2/8] netdev-dpdk: Add function for getting the socket_id of netdev-dpdk.
Signed-off-by: Alex Wang --- lib/netdev-dpdk.c | 14 ++ lib/netdev-dpdk.h |8 2 files changed, 22 insertions(+) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 6ee9803..7298334 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -1442,6 +1442,20 @@ netdev_dpdk_register(void) } } +/* Returns the 'socket_id' if 'netdev_' is class dpdk. Otherwise, + * return -1. */ +int +netdev_dpdk_get_socket_id(const struct netdev *netdev_) +{ +if (is_dpdk_class(netdev_->netdev_class)) { +struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); + +return netdev->socket_id; +} else { +return -1; +} +} + int pmd_thread_setaffinity_cpu(int cpu) { diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index e4ba6fc..75f6a0b 100644 --- a/lib/netdev-dpdk.h +++ b/lib/netdev-dpdk.h @@ -4,6 +4,7 @@ #include struct dpif_packet; +struct netdev; #ifdef DPDK_NETDEV @@ -22,6 +23,7 @@ struct dpif_packet; int dpdk_init(int argc, char **argv); void netdev_dpdk_register(void); +int netdev_dpdk_get_socket_id(const struct netdev *); void free_dpdk_buf(struct dpif_packet *); int pmd_thread_setaffinity_cpu(int cpu); void thread_set_nonpmd(void); @@ -40,6 +42,12 @@ netdev_dpdk_register(void) /* Nothing */ } +static inline int +netdev_dpdk_get_socket_id(const struct netdev *netdev_ OVS_UNUSED) +{ +return -1; +} + static inline void free_dpdk_buf(struct dpif_packet *buf OVS_UNUSED) { -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [dpdk patch 5/8] dpif-netdev: Create 'number of dpdk ifaces on cpu socket' pmd threads for each cpu socket.
The pmd threads are pinned to available cpu cores on the corresponding cpu socket. Note, core 0 is not pinnable and is reserved for all non-pmd threads. Signed-off-by: Alex Wang --- lib/dpif-netdev.c | 254 + lib/dpif-netdev.h |2 +- lib/netdev-dpdk.c | 40 ++--- lib/netdev-dpdk.h | 15 4 files changed, 244 insertions(+), 67 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index c637d9f..14784bf 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -52,6 +52,7 @@ #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" +#include "ovs-numa.h" #include "ovs-rcu.h" #include "packet-dpif.h" #include "packets.h" @@ -71,6 +72,7 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev); #define NETDEV_RULE_PRIORITY 0x8000 #define FLOW_DUMP_MAX_BATCH 50 + /* Use per thread recirc_depth to prevent recirculation loop. */ #define MAX_RECIRC_DEPTH 5 DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) @@ -142,11 +144,9 @@ struct dp_netdev { struct fat_rwlock upcall_rwlock; exec_upcall_cb *upcall_cb; /* Callback function for executing upcalls. */ -/* Forwarding threads. */ -struct latch exit_latch; -struct pmd_thread *pmd_threads; -size_t n_pmd_threads; -int pmd_count; +/* Per-cpu-socket struct for configuring pmd threads. */ +struct pmd_socket *pmd_sockets; +int n_pmd_sockets; }; static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp, @@ -281,6 +281,15 @@ struct dp_netdev_actions *dp_netdev_flow_get_actions( const struct dp_netdev_flow *); static void dp_netdev_actions_free(struct dp_netdev_actions *); +/* Represents the PMD configuration on a cpu socket. */ +struct pmd_socket { +struct dp_netdev *dp; +struct latch exit_latch; +struct pmd_thread *pmd_threads; +int socket_id; +int n_pmd_threads; +}; + /* PMD: Poll modes drivers. PMD accesses devices via polling to eliminate * the performance overhead of interrupt processing. Therefore netdev can * not implement rx-wait for these devices. dpif-netdev needs to poll @@ -293,9 +302,10 @@ static void dp_netdev_actions_free(struct dp_netdev_actions *); * table, and executes the actions it finds. **/ struct pmd_thread { -struct dp_netdev *dp; +struct pmd_socket *socket; pthread_t thread; -int id; +int index; +int core_id; atomic_uint change_seq; }; @@ -335,8 +345,10 @@ static void dp_netdev_port_input(struct dp_netdev *dp, struct dpif_packet **packets, int cnt, odp_port_t port_no); -static void dp_netdev_set_pmd_threads(struct dp_netdev *, int n); static void dp_netdev_disable_upcall(struct dp_netdev *); +static void dp_netdev_destroy_all_pmd_sockets(struct dp_netdev *); +static void dp_netdev_unset_pmd_threads(struct dp_netdev *, int socket_id); +static void dp_netdev_set_pmd_threads(struct dp_netdev *, int socket_id); static struct dpif_netdev * dpif_netdev_cast(const struct dpif *dpif) @@ -450,7 +462,7 @@ create_dp_netdev(const char *name, const struct dpif_class *class, OVS_REQUIRES(dp_netdev_mutex) { struct dp_netdev *dp; -int error; +int n_sockets, error; dp = xzalloc(sizeof *dp); shash_add(&dp_netdevs, name, dp); @@ -469,13 +481,32 @@ create_dp_netdev(const char *name, const struct dpif_class *class, ovs_mutex_init(&dp->port_mutex); cmap_init(&dp->ports); dp->port_seq = seq_create(); -latch_init(&dp->exit_latch); fat_rwlock_init(&dp->upcall_rwlock); /* Disable upcalls by default. */ dp_netdev_disable_upcall(dp); dp->upcall_cb = NULL; +/* Reserves the core 0 for main thread. */ +ovs_numa_try_pin_core_specific(NON_PMD_CORE_ID); + +/* Creates 'pmd_socket' struct for each cpu socket. */ +n_sockets = ovs_numa_get_n_sockets(); +if (n_sockets != OVS_SOCKET_UNSPEC) { +int i; + +dp->n_pmd_sockets = n_sockets; +dp->pmd_sockets = xzalloc(dp->n_pmd_sockets * sizeof *dp->pmd_sockets); +for (i = 0; i < dp->n_pmd_sockets; i++) { +dp->pmd_sockets[i].dp = dp; +dp->pmd_sockets[i].socket_id = i; +latch_init(&dp->pmd_sockets[i].exit_latch); +} +} else { +dp->n_pmd_sockets = 0; +dp->pmd_sockets = NULL; +} + ovs_mutex_lock(&dp->port_mutex); error = do_add_port(dp, name, "internal", ODPP_LOCAL); ovs_mutex_unlock(&dp->port_mutex); @@ -525,8 +556,8 @@ dp_netdev_free(struct dp_netdev *dp) shash_find_and_delete(&dp_netdevs, dp->name); -dp_netdev_set_pmd_threads(dp, 0); -free(dp->pmd_threads); +dp_netdev_destroy_all_pmd_sockets(dp); +free(dp->pmd_socket
[ovs-dev] [dpdk patch 3/8] netdev-dpdk: Make memory pool name contain the socket id.
This commit makes the memory pool name contain the socket id. Since dpdk library do not allow creation of memory pool with same name, this commit serves as a simple way of making each name unique. Signed-off-by: Alex Wang Acked-by: Thomas Graf --- lib/netdev-dpdk.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 7298334..109006f 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -300,7 +300,8 @@ dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex) dmp->mtu = mtu; dmp->refcount = 1; -if (snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_mp_%d", dmp->mtu) < 0) { +if (snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_mp_%d_%d", dmp->mtu, + dmp->socket_id) < 0) { return NULL; } -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [dpdk patch 7/8] ovs-numa: Add support for cpu-mask configuration.
Later patch will use this patch to configure the pmd thread cpu affinity. Signed-off-by: Alex Wang --- lib/ovs-numa.c | 87 lib/ovs-numa.h |7 + 2 files changed, 82 insertions(+), 12 deletions(-) diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index acedd30..ec8c81a 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -50,6 +50,7 @@ struct cpu_core { struct list list_node; /* In 'cpu_socket->cores' list. */ struct cpu_socket *socket; /* Socket containing the core. */ int core_id; /* Core id. */ +bool pinnable; /* If the core can be pinned. */ bool pinned; /* If a thread has been pinned to the core. */ }; @@ -110,6 +111,7 @@ discover_sockets_and_cores(void) hash_int(core_id, 0)); list_insert(&s->cores, &c->list_node); c->core_id = core_id; +c->pinnable = true; n_cpus++; } } @@ -185,7 +187,8 @@ ovs_numa_get_n_cores_on_socket(int socket_id) return OVS_CORE_UNSPEC; } -/* Returns the number of unpinned cpu cores on socket. */ +/* Returns the number of cpu cores on socket that are unpinned and + * pinnable. */ int ovs_numa_get_n_unpinned_cores_on_socket(int socket_id) { @@ -199,7 +202,7 @@ ovs_numa_get_n_unpinned_cores_on_socket(int socket_id) hash_int(socket_id, 0)), struct cpu_socket, hmap_node); LIST_FOR_EACH(core, list_node, &socket->cores) { -if (!core->pinned) { +if (core->pinnable && !core->pinned) { count++; } } @@ -211,7 +214,7 @@ ovs_numa_get_n_unpinned_cores_on_socket(int socket_id) } /* Given 'core_id', tries to pin that core. Returns true, if succeeds. - * False, if the core has already been pinned. */ + * False, if the core has already been pinned or is un-pinnable. */ bool ovs_numa_try_pin_core_specific(int core_id) { @@ -222,7 +225,7 @@ ovs_numa_try_pin_core_specific(int core_id) core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, hash_int(core_id, 0)), struct cpu_core, hmap_node); -if (!core->pinned) { +if (core->pinnable && !core->pinned) { core->pinned = true; return true; } @@ -230,15 +233,15 @@ ovs_numa_try_pin_core_specific(int core_id) return false; } -/* Searches through all cores for an unpinned core. Returns the core_id - * if found and set the 'core->pinned' to true. Otherwise, returns -1. */ +/* Searches through all cores for an unpinned and pinnable core. Returns + * the core_id if found. Otherwise, returns -1. */ int ovs_numa_get_unpinned_core_any(void) { struct cpu_core *core; HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) { -if (!core->pinned) { +if (core->pinnable && !core->pinned) { core->pinned = true; return core->core_id; } @@ -247,9 +250,8 @@ ovs_numa_get_unpinned_core_any(void) return OVS_CORE_UNSPEC; } -/* Searches through all cores on socket with 'socket_id' for an unpinned core. - * Returns the core_id if found and sets the 'core->pinned' to true. - * Otherwise, returns -1. */ +/* Searches through all cores on socket with 'socket_id' for an unpinned and + * pinnable core. Returns the core_id if found. Otherwise, returns -1. */ int ovs_numa_get_unpinned_core_on_socket(int socket_id) { @@ -262,7 +264,7 @@ ovs_numa_get_unpinned_core_on_socket(int socket_id) hash_int(socket_id, 0)), struct cpu_socket, hmap_node); LIST_FOR_EACH(core, list_node, &socket->cores) { -if (!core->pinned) { +if (core->pinnable && !core->pinned) { core->pinned = true; return core->core_id; } @@ -271,7 +273,7 @@ ovs_numa_get_unpinned_core_on_socket(int socket_id) return OVS_CORE_UNSPEC; } -/* Resets the 'core->pinned' for the core with 'core_id'. */ +/* Unpins the core with 'core_id'. */ void ovs_numa_unpin_core(int core_id) { @@ -285,4 +287,65 @@ ovs_numa_unpin_core(int core_id) core->pinned = false; } +/* Reads the cpu mask configuration from 'cmask' and sets the + * 'pinnable' of corresponding cores. For unspecified cores, + * sets 'pinnable' to true. */ +void +ovs_numa_set_cpu_mask(const char *cmask) +{ +int core_id = 0; +int i; + +if (!found_sockets_and_cores) { +return; +} + +/* If no mask specified, resets th
[ovs-dev] [dpdk patch 6/8] netdev-dpdk: Add function for configuring rx queues for all dpdk interfaces.
Later patch will use this function to configure rx queues. Signed-off-by: Alex Wang --- lib/netdev-dpdk.c | 42 +- lib/netdev-dpdk.h |7 +++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 012ee68..3013df5 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -130,6 +130,8 @@ enum { DRAIN_TSC = 20ULL }; static int rte_eal_init_ret = ENODEV; +static size_t dpdk_rx_queues OVS_GUARDED_BY(dpdk_mutex); + static struct ovs_mutex dpdk_mutex = OVS_MUTEX_INITIALIZER; /* Contains all 'struct dpdk_dev's. */ @@ -491,7 +493,8 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk ovs_mutex_lock(&netdev->mutex); -netdev->n_rx_q = dpdk_get_n_devs(netdev->socket_id); +netdev->n_rx_q = dpdk_rx_queues ? dpdk_rx_queues + : dpdk_get_n_devs(netdev->socket_id); /* There can only be ovs_numa_get_n_cores() pmd threads, so creates a tx_q * for each of them. */ @@ -1514,6 +1517,43 @@ netdev_dpdk_flush_non_local(struct netdev *netdev_, int qid) dpdk_queue_flush(dev, qid); } +/* Sets the number of rx queues for each dpdk interface. If the + * configuration fails on one interface, do not try restoring its + * old configuration and just returns the error. + * Caller should make decision on the error case. One option is to + * call the function again with smaller n_queues or 0 (default). */ +int +netdev_dpdk_set_rx_queues(int n_queues) +{ +struct netdev_dpdk *dev; +int err = 0; + +ovs_mutex_lock(&dpdk_mutex); + +if (dpdk_rx_queues == n_queues) { +goto out; +} + +LIST_FOR_EACH (dev, list_node, &dpdk_list) { +ovs_mutex_lock(&dev->mutex); +rte_eth_dev_stop(dev->port_id); +dev->n_rx_q = n_queues ? n_queues + : dpdk_get_n_devs(dev->socket_id); +err = dpdk_eth_dev_init(dev); +if (err) { +ovs_mutex_unlock(&dev->mutex); +goto out; +} +dev->up.n_rxq = n_queues; +ovs_mutex_unlock(&dev->mutex); +} +dpdk_rx_queues = n_queues; +out: +ovs_mutex_unlock(&dpdk_mutex); + +return err; +} + int pmd_thread_setaffinity_cpu(int cpu) { diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index 5e8f296..38f916c 100644 --- a/lib/netdev-dpdk.h +++ b/lib/netdev-dpdk.h @@ -26,6 +26,7 @@ void netdev_dpdk_register(void); int netdev_dpdk_get_socket_id(const struct netdev *); int netdev_dpdk_get_n_devs_on_socket(int socket_id); void netdev_dpdk_flush_non_local(struct netdev *, int qid); +int netdev_dpdk_set_rx_queues(int n_queues); void free_dpdk_buf(struct dpif_packet *); int pmd_thread_setaffinity_cpu(int cpu); void thread_set_nonpmd(void); @@ -56,6 +57,12 @@ netdev_dpdk_get_n_devs_on_socket(int socket_id OVS_UNUSED) return -1; } +static inline int +netdev_dpdk_set_rx_queues(int n_queues OVS_UNUSED) +{ +return -1; +} + static inline void netdev_dpdk_flush_non_local(struct netdev *netdev OVS_UNUSED, int qid OVS_UNUSED) -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [dpdk patch 0/8] DKDP PMD multithreading.
9806658144 | | Tx.TxDuration| 100.0 | 100.0 | | Tx.StreamBlock | 32 udp 64 |32 udp 64-2| | Tx.Load |100|100| | Tx.Saturation| 99.9809428128 | 99.9806658144 | |Tx.DropRate | 0.000304743297568 | 0.000304743297568 | ++---+---+ Alex Wang (8): ovs-numa: Add ovs-numa.{c,h} for extracting and storing cpu socket and cpu core info. netdev-dpdk: Add function for getting the socket_id of netdev-dpdk. netdev-dpdk: Make memory pool name contain the socket id. netdev-dpdk: Create 'number of dpdk ifaces on same cpu socket' rx queues and 'number of cpu cores' tx queues for each dpdk interface. dpif-netdev: Create 'number of dpdk ifaces on cpu socket' pmd threads for each cpu socket. netdev-dpdk: Add function for configuring rx queues for all dpdk interfaces. ovs-numa: Add support for cpu-mask configuration. vswitchd: Add option to configure cpu-mask and number of dpdk rx_queues. lib/automake.mk|2 + lib/dpif-linux.c |1 + lib/dpif-netdev.c | 354 +--- lib/dpif-netdev.h |3 +- lib/dpif-provider.h|7 + lib/dpif.c | 17 +++ lib/dpif.h |1 + lib/netdev-dpdk.c | 150 --- lib/netdev-dpdk.h | 30 lib/ovs-numa.c | 351 +++ lib/ovs-numa.h | 118 +++ ofproto/ofproto-dpif.c |2 + ofproto/ofproto-provider.h |6 + ofproto/ofproto.c | 16 ++ ofproto/ofproto.h |2 + tests/ofproto-macros.at|1 + vswitchd/bridge.c |5 + vswitchd/vswitch.xml | 29 18 files changed, 1017 insertions(+), 78 deletions(-) create mode 100644 lib/ovs-numa.c create mode 100644 lib/ovs-numa.h -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [dpdk patch 8/8] dpdk: Allow configuration of pmd multithreading and dpdk interface rx queues.
This commits adds the multithreading functionality to OVS dpdk module. Users are able to create multiple pmd threads and set their cpu affinity via specifying the cpu mask string similar to the EAL '-c COREMASK' option. Also, the number of rx queues for each dpdk interface is made configurable to help distribution of rx packets among multiple pmd threads. Signed-off-by: Alex Wang --- lib/dpif-linux.c |1 + lib/dpif-netdev.c | 130 ++-- lib/dpif-provider.h|7 +++ lib/dpif.c | 17 ++ lib/dpif.h |1 + lib/ovs-numa.c |2 +- ofproto/ofproto-dpif.c |2 + ofproto/ofproto-provider.h |6 ++ ofproto/ofproto.c | 16 ++ ofproto/ofproto.h |2 + vswitchd/bridge.c |3 + vswitchd/vswitch.xml | 29 ++ 12 files changed, 198 insertions(+), 18 deletions(-) diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index aa01cef..c2d48a2 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -1941,6 +1941,7 @@ const struct dpif_class dpif_linux_class = { dpif_linux_operate, dpif_linux_recv_set, dpif_linux_handlers_set, +NULL, /* poll_thread_set */ dpif_linux_queue_to_priority, dpif_linux_recv, dpif_linux_recv_wait, diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 14784bf..d136df3 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -147,6 +147,9 @@ struct dp_netdev { /* Per-cpu-socket struct for configuring pmd threads. */ struct pmd_socket *pmd_sockets; int n_pmd_sockets; + +size_t n_dpdk_rxqs; +char *pmd_cmask; }; static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp, @@ -346,9 +349,11 @@ static void dp_netdev_port_input(struct dp_netdev *dp, odp_port_t port_no); static void dp_netdev_disable_upcall(struct dp_netdev *); -static void dp_netdev_destroy_all_pmd_sockets(struct dp_netdev *); +static void dp_netdev_destroy_all_pmd_sockets(struct dp_netdev *, + bool del_latch); static void dp_netdev_unset_pmd_threads(struct dp_netdev *, int socket_id); static void dp_netdev_set_pmd_threads(struct dp_netdev *, int socket_id); +static void dp_netdev_reset_pmd_threads(struct dp_netdev *); static struct dpif_netdev * dpif_netdev_cast(const struct dpif *dpif) @@ -556,7 +561,7 @@ dp_netdev_free(struct dp_netdev *dp) shash_find_and_delete(&dp_netdevs, dp->name); -dp_netdev_destroy_all_pmd_sockets(dp); +dp_netdev_destroy_all_pmd_sockets(dp, true); free(dp->pmd_sockets); dp_netdev_flow_flush(dp); @@ -1572,6 +1577,79 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) return 0; } +static bool +pmd_config_changed(const struct dp_netdev *dp, size_t rxqs, const char *cmask) +{ +if (dp->n_dpdk_rxqs != rxqs) { +return true; +} else { +if (dp->pmd_cmask != NULL && cmask != NULL) { +return strcmp(dp->pmd_cmask, cmask); +} else { +return (dp->pmd_cmask != NULL || cmask != NULL); +} +} +} + +static int +dpif_netdev_pmd_set(struct dpif *dpif, size_t n_rxqs, const char *cmask) +{ +struct dp_netdev *dp = get_dp_netdev(dpif); +int err = 0; + +/* If there is any configuration change, destroys all pmd threads, + * adopts the new config, and restarts pmd threads. */ +if (pmd_config_changed(dp, n_rxqs, cmask)) { +struct dp_netdev_port *port; + +dp_netdev_destroy_all_pmd_sockets(dp, false); + +/* Closes all dpdk port rx queues. */ +CMAP_FOR_EACH (port, node, &dp->ports) { +if (netdev_is_pmd(port->netdev)) { +int i; + +for (i = 0; i < netdev_n_rxq(port->netdev); i++) { +netdev_rxq_close(port->rxq[i]); +} +} +} + +err = netdev_dpdk_set_rx_queues(n_rxqs); +if (err) { +VLOG_ERR("Failed to set dpdk rx_queue to: %"PRIu64"," + "reset the default configuration", n_rxqs); +/* On error, resets the rx_queues to default. */ +dp->n_dpdk_rxqs = 0; +netdev_dpdk_set_rx_queues(dp->n_dpdk_rxqs); +} else { +dp->n_dpdk_rxqs = n_rxqs; +} + +/* Reloads all dpdk port rx queues. */ +CMAP_FOR_EACH (port, node, &dp->ports) { +if (netdev_is_pmd(port->netdev)) { +int i; + +port->rxq = xrealloc(port->rxq, sizeof *port->rxq + * netdev_n_rxq(port->netdev)); +for (i = 0; i < netdev_n_rxq(port->netdev); i++) { +netdev_
[ovs-dev] [dpdk patch 4/8] netdev-dpdk: Create 'number of dpdk ifaces on same cpu socket' rx queues and 'number of cpu cores' tx queues for each dpdk interface.
Before this commit, ovs only creates one tx/rx queue for each dpdk interface and uses only one poll thread for handling the I/O of all dpdk interfaces. As one step toward using multiple poll threads, this commit makes ovs, by default, create same number of rx queues as the number dpdk interfaces on the cpu socket. Also each dpdk interface will have one tx queue for each cpu core, even though not all of those queues will be used. Signed-off-by: Alex Wang --- lib/dpif-netdev.h |1 - lib/netdev-dpdk.c | 55 + 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h index 410fcfa..50c1198 100644 --- a/lib/dpif-netdev.h +++ b/lib/dpif-netdev.h @@ -40,7 +40,6 @@ static inline void dp_packet_pad(struct ofpbuf *b) } } -#define NR_QUEUE 1 #define NR_PMD_THREADS 1 #ifdef __cplusplus diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 109006f..432524f 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -36,6 +36,7 @@ #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" +#include "ovs-numa.h" #include "ovs-thread.h" #include "ovs-rcu.h" #include "packet-dpif.h" @@ -179,7 +180,9 @@ struct netdev_dpdk { int port_id; int max_packet_len; -struct dpdk_tx_queue tx_q[NR_QUEUE]; +struct dpdk_tx_queue *tx_q; +int n_tx_q; +int n_rx_q; struct ovs_mutex mutex OVS_ACQ_AFTER(dpdk_mutex); @@ -384,6 +387,25 @@ dpdk_watchdog(void *dummy OVS_UNUSED) return NULL; } +/* Returns the number of dpdk ifaces on the cpu socket. */ +static int +dpdk_get_n_devs(int socket_id) +{ +int count = 0; +int i; + +ovs_assert(ovs_numa_cpu_socket_id_is_valid(socket_id)); + +for (i = 0; i < rte_eth_dev_count(); i++) { +if (rte_eth_dev_socket_id(i) == socket_id) { +count++; +} +} +ovs_assert(count); + +return count; +} + static int dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) { @@ -396,13 +418,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) return ENODEV; } -diag = rte_eth_dev_configure(dev->port_id, NR_QUEUE, NR_QUEUE, &port_conf); +diag = rte_eth_dev_configure(dev->port_id, dev->n_rx_q, dev->n_tx_q, + &port_conf); if (diag) { VLOG_ERR("eth dev config error %d",diag); return -diag; } -for (i = 0; i < NR_QUEUE; i++) { +for (i = 0; i < dev->n_tx_q; i++) { diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE, dev->socket_id, &tx_conf); if (diag) { @@ -411,7 +434,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) } } -for (i = 0; i < NR_QUEUE; i++) { +for (i = 0; i < dev->n_rx_q; i++) { diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE, dev->socket_id, &rx_conf, dev->dpdk_mp->mp); @@ -463,13 +486,25 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk { struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); int err = 0; -int i; +int n_cores, i; ovs_mutex_init(&netdev->mutex); ovs_mutex_lock(&netdev->mutex); -for (i = 0; i < NR_QUEUE; i++) { +netdev->n_rx_q = dpdk_get_n_devs(netdev->socket_id); + +/* There can only be ovs_numa_get_n_cores() pmd threads, so creates a tx_q + * for each of them. */ +n_cores = ovs_numa_get_n_cores(); +if (n_cores == OVS_CORE_UNSPEC) { +VLOG_WARN_RL(&rl, "netdev_dpdk init failed due to no cpu core info"); +err = ENOENT; +goto unlock; +} +netdev->n_tx_q = n_cores; +netdev->tx_q = dpdk_rte_mzalloc(netdev->n_tx_q * sizeof *netdev->tx_q); +for (i = 0; i < netdev->n_tx_q; i++) { rte_spinlock_init(&netdev->tx_q[i].tx_lock); } @@ -492,11 +527,14 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk if (err) { goto unlock; } -netdev_->n_rxq = NR_QUEUE; +netdev_->n_rxq = netdev->n_rx_q; list_push_back(&dpdk_list, &netdev->list_node); unlock: +if (err) { +rte_free(netdev->tx_q); +} ovs_mutex_unlock(&netdev->mutex); return err; } @@ -548,6 +586,7 @@ netdev_dpdk_destruct(struct netdev *netdev_) ovs_mutex_unlock(&dev->mutex); ovs_mutex_lock(&dpdk_mutex); +rte_free(dev->tx_q); list_remove(&dev->list_node); dpdk_mp_put(dev->dpdk_mp); ovs_mutex_unlock(&dpdk_mutex); @@ -786,7 +825,7 @@ netdev_dpdk_send(struct
[ovs-dev] [dpdk patch 1/8] ovs-numa: Add ovs-numa.{c, h} for extracting and storing cpu socket and cpu core info.
Signed-off-by: Alex Wang --- Changes since first post: - use readdir() for the buffer safety. - only compile ovs-numa.c in linux. - make ovs_numa_get_n_sockets/cores() return *UNSPEC if there is no sockets or cores extracted. - refine code based on Ben's review. - Address review comments from Thomas Graf. - Add dummy interface for WIN32 case. --- lib/automake.mk |2 + lib/ovs-numa.c | 288 +++ lib/ovs-numa.h | 111 ++ tests/ofproto-macros.at |1 + vswitchd/bridge.c |2 + 5 files changed, 404 insertions(+) create mode 100644 lib/ovs-numa.c create mode 100644 lib/ovs-numa.h diff --git a/lib/automake.mk b/lib/automake.mk index f4426f2..dfd9c57 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -310,6 +310,8 @@ lib_libopenvswitch_la_SOURCES += \ lib/netlink-protocol.h \ lib/netlink-socket.c \ lib/netlink-socket.h \ + lib/ovs-numa.c \ + lib/ovs-numa.h \ lib/rtnetlink-link.c \ lib/rtnetlink-link.h \ lib/route-table.c \ diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c new file mode 100644 index 000..acedd30 --- /dev/null +++ b/lib/ovs-numa.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 2014 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* To avoid sparse warning. */ +#ifdef __linux__ + +#include +#include "ovs-numa.h" + +#include +#include +#include +#include +#include +#include + +#include "hash.h" +#include "hmap.h" +#include "list.h" +#include "ovs-thread.h" +#include "vlog.h" + +VLOG_DEFINE_THIS_MODULE(ovs_numa); + +#define MAX_CPU_SOCKETS 128 + +/* Cpu socket. */ +struct cpu_socket { +struct hmap_node hmap_node; /* In the 'all_cpu_sockets'. */ +struct list cores; /* List of cpu cores on the socket. */ +int socket_id; /* Socket id. */ +}; + +/* Cpu core on a cpu socket. */ +struct cpu_core { +struct hmap_node hmap_node;/* In the 'all_cpu_cores'. */ +struct list list_node; /* In 'cpu_socket->cores' list. */ +struct cpu_socket *socket; /* Socket containing the core. */ +int core_id; /* Core id. */ +bool pinned; /* If a thread has been pinned to the core. */ +}; + +/* Contains all 'struct cpu_socket's. */ +static struct hmap all_cpu_sockets = HMAP_INITIALIZER(&all_cpu_sockets); +/* Contains all 'struct cpu_core's. */ +static struct hmap all_cpu_cores = HMAP_INITIALIZER(&all_cpu_cores); +/* True if socket and core info are correctly extracted. */ +static bool found_sockets_and_cores; + +/* Returns true if 'str' contains all digits. Returns false otherwise. */ +static bool +contain_all_digits(const char *str) +{ +return str[strspn(str, "0123456789")] == '\0'; +} + +/* Discovers all cpu sockets and the corresponding cpu cores for each socket. + * Constructs the 'struct cpu_socket' and 'struct cpu_core'. */ +static void +discover_sockets_and_cores(void) +{ +int n_cpus = 0; +int i; + +for (i = 0; i < MAX_CPU_SOCKETS; i++) { +DIR *dir; +char* path; + +/* Constructs the path to node /sys/devices/system/nodeX. */ +path = xasprintf("/sys/devices/system/node/node%d", i); + +if (strlen(path) >= PATH_MAX) { +VLOG_WARN("Path to cpu socket %d exceeds the length limit", i); +break; +} + +dir = opendir(path); + +/* Creates 'struct cpu_socket' if the 'dir' is non-null. */ +if (dir) { +struct cpu_socket *s = xzalloc(sizeof *s); +struct dirent *subdir; +char *endptr = NULL; + +hmap_insert(&all_cpu_sockets, &s->hmap_node, hash_int(i, 0)); +list_init(&s->cores); +s->socket_id = i; + +while ((subdir = readdir(dir)) != NULL) { +if (!strncmp(subdir->d_name, "cpu", 3) +&& contain_all_digits(subdir->d_name + 3)){ +struct cpu_core *c = xzalloc(sizeof *c); +uint32_t core_id; + +core_id = strtoul(subdir->d_name + 3, &endptr, 10); +
Re: [ovs-dev] [PATCH] learning-switch: Make test-controller work with OpenFlow 1.3+.
Looks good to me, Acked-by: Alex Wang On Mon, Aug 4, 2014 at 1:02 PM, Ben Pfaff wrote: > On Wed, Jul 16, 2014 at 03:00:15PM -0700, Ben Pfaff wrote: > > The controller setup in my personal test environment has been broken for > a > > while. I figured that it wasn't anything important, though, because no > one > > else had reported similar problems. Anyway, it turns out that enabling > > OpenFlow 1.3 by default broke test-controller because OpenFlow 1.3 > doesn't > > send table misses to the controller by default. This commit fixes the > > problem. > > > > Signed-off-by: Ben Pfaff > > This fixes a bug that it would be nice to have fixed in the tree. > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] ofp-errors: Use EXT-444 extension error codes for properties in OF1.3.
Looks good to me, On Mon, Aug 11, 2014 at 2:36 PM, Ben Pfaff wrote: > These error codes are proposed in the ONF extensibility working group as an > OpenFlow 1.3 extension. > > Error codes are also proposed for the other three "bad property" error > codes, but those already have standardized OpenFlow 1.3 error codes and the > proposal says that implementations should use the standard ones. > > ONF-JIRA: EXT-444. > Signed-off-by: Ben Pfaff > --- > include/openflow/openflow-common.h |3 +++ > lib/ofp-errors.h | 12 ++-- > 2 files changed, 9 insertions(+), 6 deletions(-) > > diff --git a/include/openflow/openflow-common.h > b/include/openflow/openflow-common.h > index 05d5368..3a11725 100644 > --- a/include/openflow/openflow-common.h > +++ b/include/openflow/openflow-common.h > @@ -98,8 +98,11 @@ enum ofp_version { > * > *- ONF_VENDOR_ID is being used within the ONF "extensibility" working > * group to identify extensions being proposed for standardization. > + * > + * The list is sorted numerically. > */ > #define OF_VENDOR_ID0 > +#define HPL_VENDOR_ID 0x04EA /* HP Labs. */ > #define NX_VENDOR_ID0x2320 /* Nicira. */ > #define ONF_VENDOR_ID 0x4f4e4600 /* Open Networking Foundation. */ > > diff --git a/lib/ofp-errors.h b/lib/ofp-errors.h > index e32b751..643fa72 100644 > --- a/lib/ofp-errors.h > +++ b/lib/ofp-errors.h > @@ -557,22 +557,22 @@ enum ofperr { > * [Known as OFPTFFC_BAD_ARGUMENT in OF1.3.] */ > OFPERR_OFPBPC_BAD_VALUE, > > -/* OF1.4+(14,3). Can't handle this many properties. */ > +/* HPL1.3(4443), OF1.4+(14,3). Can't handle this many properties. */ > OFPERR_OFPBPC_TOO_MANY, > > -/* OF1.4+(14,4). A property type was duplicated. */ > +/* HPL1.3(), OF1.4+(14,4). A property type was duplicated. */ > OFPERR_OFPBPC_DUP_TYPE, > > -/* OF1.4+(14,5). Unknown experimenter id specified. */ > +/* HPL1.3(4445), OF1.4+(14,5). Unknown experimenter id specified. */ > OFPERR_OFPBPC_BAD_EXPERIMENTER, > > -/* OF1.4+(14,6). Unknown exp_type for experimenter id. */ > +/* HPL1.3(4446), OF1.4+(14,6). Unknown exp_type for experimenter id. > */ > OFPERR_OFPBPC_BAD_EXP_TYPE, > > -/* OF1.4+(14,7). Unknown value for experimenter id. */ > +/* HPL1.3(4447), OF1.4+(14,7). Unknown value for experimenter id. */ > OFPERR_OFPBPC_BAD_EXP_VALUE, > > -/* OF1.4+(14,8). Permissions error. */ > +/* HPL1.3(4448), OF1.4+(14,8). Permissions error. */ > OFPERR_OFPBPC_EPERM, > > /* ## ## */ > -- > 1.7.10.4 > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [dpdk patch 1/8] ovs-numa: Add ovs-numa.{c, h} for extracting and storing cpu socket and cpu core info.
Thanks Ben, for the review, > > + > > +/* To avoid sparse warning. */ > > +#ifdef __linux__ > ... > > +#endif /* __linux__ */ > > I didn't see the connection to sparse at first. I'm still not sure > there is one. Maybe a better comment: > /* On non-Linux, these functions are defined inline in ovs-numa.h. */ > > Yeah, this is better. It is cumbersome to describe that autoconf defines the LINUX that cause the compilation of ovs-numa.c, but sparse does not __linux__. > +/* Constructs the path to node /sys/devices/system/nodeX. */ > +path = xasprintf("/sys/devices/system/node/node%d", i); > + > +if (strlen(path) >= PATH_MAX) { > +VLOG_WARN("Path to cpu socket %d exceeds the length limit", > i); > +break; > +} > The above check seems odd. PATH_MAX is about 4 kB, I think. I can't > see any possible way that path would exceed it and, even if it did, > why not let opendir() catch the problem? > > Makes sense, I'll remove it,. > In discover_sockets_and_cores(), you can remove endptr. It is > assigned a value that is never used. > Thx, removed it, > > It looks to me like ovs-numa does not verify that every socket has at > least one core. Should it? > I think it is okay to not check for that, instead, the code will prevent the creation of pmd threads on that cpu socket and issue warning... also, I added this, to log the number cores on each socket: @@ -113,11 +107,18 @@ discover_sockets_and_cores(void) n_cpus++; } } -free(subdir); +VLOG_INFO("Discovered %"PRIu64" CPU cores on CPU socket %d", + list_size(&s->cores), i); +free(path); +closedir(dir); } else { +if (errno != ENOENT) { +VLOG_WARN("opendir(%s) failed (%s)", path, + ovs_strerror(errno)); +} +free(path); break; } do you think it is good? ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] ovs-numa: Add ovs-numa.{c, h} for extracting and storing cpu socket and cpu core info.
Signed-off-by: Alex Wang --- Changes since first post: - use readdir() for the buffer safety. - only compile ovs-numa.c in linux. - make ovs_numa_get_n_sockets/cores() return *UNSPEC if there is no sockets or cores extracted. - refine code based on Ben's review. - Address review comments from Thomas Graf. - Add dummy interface for WIN32 case. --- lib/automake.mk |2 + lib/ovs-numa.c | 289 +++ lib/ovs-numa.h | 111 ++ tests/ofproto-macros.at |1 + vswitchd/bridge.c |2 + 5 files changed, 405 insertions(+) create mode 100644 lib/ovs-numa.c create mode 100644 lib/ovs-numa.h diff --git a/lib/automake.mk b/lib/automake.mk index e95191d..d46613f 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -310,6 +310,8 @@ lib_libopenvswitch_la_SOURCES += \ lib/netlink-protocol.h \ lib/netlink-socket.c \ lib/netlink-socket.h \ + lib/ovs-numa.c \ + lib/ovs-numa.h \ lib/rtnetlink-link.c \ lib/rtnetlink-link.h \ lib/route-table.c \ diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c new file mode 100644 index 000..bcf9b68 --- /dev/null +++ b/lib/ovs-numa.c @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2014 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* On non-Linux, these functions are defined inline in ovs-numa.h. */ +#ifdef __linux__ + +#include +#include "ovs-numa.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "hash.h" +#include "hmap.h" +#include "list.h" +#include "ovs-thread.h" +#include "vlog.h" + +VLOG_DEFINE_THIS_MODULE(ovs_numa); + +#define MAX_CPU_SOCKETS 128 + +/* Cpu socket. */ +struct cpu_socket { +struct hmap_node hmap_node; /* In the 'all_cpu_sockets'. */ +struct list cores; /* List of cpu cores on the socket. */ +int socket_id; /* Socket id. */ +}; + +/* Cpu core on a cpu socket. */ +struct cpu_core { +struct hmap_node hmap_node;/* In the 'all_cpu_cores'. */ +struct list list_node; /* In 'cpu_socket->cores' list. */ +struct cpu_socket *socket; /* Socket containing the core. */ +int core_id; /* Core id. */ +bool pinned; /* If a thread has been pinned to the core. */ +}; + +/* Contains all 'struct cpu_socket's. */ +static struct hmap all_cpu_sockets = HMAP_INITIALIZER(&all_cpu_sockets); +/* Contains all 'struct cpu_core's. */ +static struct hmap all_cpu_cores = HMAP_INITIALIZER(&all_cpu_cores); +/* True if socket and core info are correctly extracted. */ +static bool found_sockets_and_cores; + +/* Returns true if 'str' contains all digits. Returns false otherwise. */ +static bool +contain_all_digits(const char *str) +{ +return str[strspn(str, "0123456789")] == '\0'; +} + +/* Discovers all cpu sockets and the corresponding cpu cores for each socket. + * Constructs the 'struct cpu_socket' and 'struct cpu_core'. */ +static void +discover_sockets_and_cores(void) +{ +int n_cpus = 0; +int i; + +for (i = 0; i < MAX_CPU_SOCKETS; i++) { +DIR *dir; +char* path; + +/* Constructs the path to node /sys/devices/system/nodeX. */ +path = xasprintf("/sys/devices/system/node/node%d", i); +dir = opendir(path); + +/* Creates 'struct cpu_socket' if the 'dir' is non-null. */ +if (dir) { +struct cpu_socket *s = xzalloc(sizeof *s); +struct dirent *subdir; + +hmap_insert(&all_cpu_sockets, &s->hmap_node, hash_int(i, 0)); +list_init(&s->cores); +s->socket_id = i; + +while ((subdir = readdir(dir)) != NULL) { +if (!strncmp(subdir->d_name, "cpu", 3) +&& contain_all_digits(subdir->d_name + 3)){ +struct cpu_core *c = xzalloc(sizeof *c); +uint32_t core_id; + +core_id = strtoul(subdir->d_name + 3, NULL, 10); +hmap_insert(&all_cpu_cores, &c->hmap_node, +hash_int(core_id, 0)); +list_insert(&am
Re: [ovs-dev] [PATCH] ovs-numa: Add ovs-numa.{c, h} for extracting and storing cpu socket and cpu core info.
On Wed, Aug 13, 2014 at 12:42 PM, Ben Pfaff wrote: > Acked-by: Ben Pfaff > Thx, applied to master! ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] ovs-numa: Correct printf format specifiers.
Thx for the fix, Acked-by: Alex Wang On Wed, Aug 13, 2014 at 2:19 PM, Ben Pfaff wrote: > list_size() yields a size_t, not uint64_t, and these types are different on > 32-bit targets. > > Signed-off-by: Ben Pfaff > --- > lib/ovs-numa.c |4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c > index bcf9b68..44df40f 100644 > --- a/lib/ovs-numa.c > +++ b/lib/ovs-numa.c > @@ -107,7 +107,7 @@ discover_sockets_and_cores(void) > n_cpus++; > } > } > -VLOG_INFO("Discovered %"PRIu64" CPU cores on CPU socket %d", > +VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on CPU socket %d", >list_size(&s->cores), s->socket_id); > free(path); > closedir(dir); > @@ -121,7 +121,7 @@ discover_sockets_and_cores(void) > } > } > > -VLOG_INFO("Discovered %"PRIu64" CPU Sockets and %d CPU cores", > +VLOG_INFO("Discovered %"PRIuSIZE" CPU Sockets and %d CPU cores", > hmap_count(&all_cpu_sockets), n_cpus); > if (hmap_count(&all_cpu_sockets) && hmap_count(&all_cpu_cores)) { > found_sockets_and_cores = true; > -- > 1.7.10.4 > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH V2] bfd: Allow users to set local/remote src/dst MAC address.
Thx, applied both patches to master and branch-2.3. On Wed, Aug 13, 2014 at 6:49 PM, Ethan Jackson wrote: > Acked-by: Ethan Jackson > > > On Mon, Aug 11, 2014 at 6:34 PM, Alex Wang wrote: > > This commit adds options for configuring the MAC addresses > > in BFD state machine. Therein, the "bfd_local_src_mac" and > > "bfd_local_dst_mac" configure the MAC address of sent BFD > > packets. The "bfd_remote_dst_mac" configure the matching > > of MAC address on recevied BFD packets. > > > > Signed-off-by: Alex Wang > > > > --- > > PATCH -> V2: > > - simplify the code. > > --- > > lib/bfd.c| 47 > +-- > > vswitchd/vswitch.xml | 23 +++ > > 2 files changed, 52 insertions(+), 18 deletions(-) > > > > diff --git a/lib/bfd.c b/lib/bfd.c > > index 9069582..5c86451 100644 > > --- a/lib/bfd.c > > +++ b/lib/bfd.c > > @@ -169,8 +169,10 @@ struct bfd { > > > > uint32_t rmt_disc;/* bfd.RemoteDiscr. */ > > > > -uint8_t eth_dst[ETH_ADDR_LEN];/* Ethernet destination address. */ > > -bool eth_dst_set; /* 'eth_dst' set through database. */ > > +uint8_t local_eth_src[ETH_ADDR_LEN]; /* Local eth src address. */ > > +uint8_t local_eth_dst[ETH_ADDR_LEN]; /* Local eth dst address. */ > > + > > +uint8_t rmt_eth_dst[ETH_ADDR_LEN]; /* Remote eth dst address. */ > > > > ovs_be32 ip_src; /* IPv4 source address. */ > > ovs_be32 ip_dst; /* IPv4 destination address. */ > > @@ -388,8 +390,6 @@ bfd_configure(struct bfd *bfd, const char *name, > const struct smap *cfg, > > > > bfd_set_state(bfd, STATE_DOWN, DIAG_NONE); > > > > -memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN); > > - > > bfd_status_changed(bfd); > > } > > > > @@ -440,13 +440,25 @@ bfd_configure(struct bfd *bfd, const char *name, > const struct smap *cfg, > > need_poll = true; > > } > > > > -hwaddr = smap_get(cfg, "bfd_dst_mac"); > > -if (hwaddr && eth_addr_from_string(hwaddr, ea) && > !eth_addr_is_zero(ea)) { > > -memcpy(bfd->eth_dst, ea, ETH_ADDR_LEN); > > -bfd->eth_dst_set = true; > > -} else if (bfd->eth_dst_set) { > > -memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN); > > -bfd->eth_dst_set = false; > > +hwaddr = smap_get(cfg, "bfd_local_src_mac"); > > +if (hwaddr && eth_addr_from_string(hwaddr, ea)) { > > +memcpy(bfd->local_eth_src, ea, ETH_ADDR_LEN); > > +} else { > > +memset(bfd->local_eth_src, 0, ETH_ADDR_LEN); > > +} > > + > > +hwaddr = smap_get(cfg, "bfd_local_dst_mac"); > > +if (hwaddr && eth_addr_from_string(hwaddr, ea)) { > > +memcpy(bfd->local_eth_dst, ea, ETH_ADDR_LEN); > > +} else { > > +memset(bfd->local_eth_dst, 0, ETH_ADDR_LEN); > > +} > > + > > +hwaddr = smap_get(cfg, "bfd_remote_dst_mac"); > > +if (hwaddr && eth_addr_from_string(hwaddr, ea)) { > > +memcpy(bfd->rmt_eth_dst, ea, ETH_ADDR_LEN); > > +} else { > > +memset(bfd->rmt_eth_dst, 0, ETH_ADDR_LEN); > > } > > > > ip_src = smap_get(cfg, "bfd_src_ip"); > > @@ -600,8 +612,14 @@ bfd_put_packet(struct bfd *bfd, struct ofpbuf *p, > > > > ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. > */ > > eth = ofpbuf_put_uninit(p, sizeof *eth); > > -memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); > > -memcpy(eth->eth_dst, bfd->eth_dst, ETH_ADDR_LEN); > > +memcpy(eth->eth_src, > > + eth_addr_is_zero(bfd->local_eth_src) ? eth_src > > +: bfd->local_eth_src, > > + ETH_ADDR_LEN); > > +memcpy(eth->eth_dst, > > + eth_addr_is_zero(bfd->local_eth_dst) ? eth_addr_bfd > > +: bfd->local_eth_dst, > > + ETH_ADDR_LEN); > > eth->eth_type = htons(ETH_TYPE_IP); > > > > ip = ofpbuf_put_zeros(p, sizeof *ip); > > @@ -657,7 +675,8 @@ bfd_should_process_flow(const struct bfd *bfd_, > const struct flow *flow, > > bool check_tnl_key; > > > > memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl
[ovs-dev] [PATCH] bfd: Clarify the BFD diagnostic.
This commit adds more explanation for the bfd diagnostic and bfd remote diagnostic in vswitch and vtep documentation. Requested-by:Bruce Davie Signed-off-by: Alex Wang --- vswitchd/vswitch.xml | 10 ++ vtep/vtep.xml| 10 ++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index ff9be17..96a607b 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2138,8 +2138,9 @@ - In case of a problem, set to a short message that reports what the - local BFD session thinks is wrong. + In case of a problem, set to an error message that reports what the + local BFD session thinks is wrong. The error messages are defined + in section 4.1 of [RFC 5880]. - In case of a problem, set to a short message that reports what the - remote endpoint's BFD session thinks is wrong. + In case of a problem, set to an error message that reports what the + remote endpoint's BFD session thinks is wrong. The error messages + are defined in section 4.1 of [RFC 5880]. -In case of a problem, set to a short message that reports what the -local BFD session thinks is wrong. +In case of a problem, set to an error message that reports what the +local BFD session thinks is wrong. The error messages are defined +in section 4.1 of [RFC 5880]. -In case of a problem, set to a short message that reports what the -remote endpoint's BFD session thinks is wrong. +In case of a problem, set to an error message that reports what the +remote endpoint's BFD session thinks is wrong. The error messages +are defined in section 4.1 of [RFC 5880]. -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] bfd: Clarify the BFD diagnostic.
Thx, applied to master and branch-2.3. On Thu, Aug 14, 2014 at 1:59 PM, Bruce Davie wrote: > Looks good, thanks. > > Bruce > > On Aug 14, 2014, at 11:02 AM, Alex Wang wrote: > > > This commit adds more explanation for the bfd diagnostic and > > bfd remote diagnostic in vswitch and vtep documentation. > > > > Requested-by:Bruce Davie > > Signed-off-by: Alex Wang > > --- > > vswitchd/vswitch.xml | 10 ++ > > vtep/vtep.xml| 10 ++ > > 2 files changed, 12 insertions(+), 8 deletions(-) > > > > diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml > > index ff9be17..96a607b 100644 > > --- a/vswitchd/vswitch.xml > > +++ b/vswitchd/vswitch.xml > > @@ -2138,8 +2138,9 @@ > > > > > > > > - In case of a problem, set to a short message that reports what > the > > - local BFD session thinks is wrong. > > + In case of a problem, set to an error message that reports what > the > > + local BFD session thinks is wrong. The error messages are > defined > > + in section 4.1 of [RFC 5880]. > > > > > >> @@ -2149,8 +2150,9 @@ > > > > > > > > - In case of a problem, set to a short message that reports what > the > > - remote endpoint's BFD session thinks is wrong. > > + In case of a problem, set to an error message that reports what > the > > + remote endpoint's BFD session thinks is wrong. The error > messages > > + are defined in section 4.1 of [RFC 5880]. > > > > > > > diff --git a/vtep/vtep.xml b/vtep/vtep.xml > > index 63ed005..4393a16 100644 > > --- a/vtep/vtep.xml > > +++ b/vtep/vtep.xml > > @@ -474,8 +474,9 @@ > > > > > > > > -In case of a problem, set to a short message that reports what > the > > -local BFD session thinks is wrong. > > +In case of a problem, set to an error message that reports what > the > > +local BFD session thinks is wrong. The error messages are > defined > > +in section 4.1 of [RFC 5880]. > > > > > >> @@ -485,8 +486,9 @@ > > > > > > > > -In case of a problem, set to a short message that reports what > the > > -remote endpoint's BFD session thinks is wrong. > > +In case of a problem, set to an error message that reports what > the > > +remote endpoint's BFD session thinks is wrong. The error > messages > > +are defined in section 4.1 of [RFC 5880]. > > > > > > > > -- > > 1.7.9.5 > > > > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] ofproto-dpif-upcall: Fix use of cleared stack memory.
Commit cc377352d (ofproto: Reorganize in preparation for direct dpdk upcalls.) introduced the bug that uses variable defined on the stack inside while loop for reading dpif upcalls and keeps reference to attributes of the variable within the same function after the stack is cleared. This bug can cause ovs abort. This commit fixes the above issue by defining an array of the variable on the function stack. Signed-off-by: Alex Wang --- ofproto/ofproto-dpif-upcall.c | 27 ++- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 180684c..9f68a7d 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -574,55 +574,56 @@ recv_upcalls(struct handler *handler) struct udpif *udpif = handler->udpif; uint64_t recv_stubs[UPCALL_MAX_BATCH][512 / 8]; struct ofpbuf recv_bufs[UPCALL_MAX_BATCH]; +struct dpif_upcall dupcalls[UPCALL_MAX_BATCH]; struct upcall upcalls[UPCALL_MAX_BATCH]; size_t n_upcalls, i; n_upcalls = 0; while (n_upcalls < UPCALL_MAX_BATCH) { struct ofpbuf *recv_buf = &recv_bufs[n_upcalls]; +struct dpif_upcall *dupcall = &dupcalls[n_upcalls]; struct upcall *upcall = &upcalls[n_upcalls]; -struct dpif_upcall dupcall; struct pkt_metadata md; struct flow flow; int error; ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls], sizeof recv_stubs[n_upcalls]); -if (dpif_recv(udpif->dpif, handler->handler_id, &dupcall, recv_buf)) { +if (dpif_recv(udpif->dpif, handler->handler_id, dupcall, recv_buf)) { ofpbuf_uninit(recv_buf); break; } -if (odp_flow_key_to_flow(dupcall.key, dupcall.key_len, &flow) +if (odp_flow_key_to_flow(dupcall->key, dupcall->key_len, &flow) == ODP_FIT_ERROR) { goto free_dupcall; } -error = upcall_receive(upcall, udpif->backer, &dupcall.packet, - dupcall.type, dupcall.userdata, &flow); +error = upcall_receive(upcall, udpif->backer, &dupcall->packet, + dupcall->type, dupcall->userdata, &flow); if (error) { if (error == ENODEV) { /* Received packet on datapath port for which we couldn't * associate an ofproto. This can happen if a port is removed * while traffic is being received. Print a rate-limited * message in case it happens frequently. */ -dpif_flow_put(udpif->dpif, DPIF_FP_CREATE, dupcall.key, - dupcall.key_len, NULL, 0, NULL, 0, NULL); +dpif_flow_put(udpif->dpif, DPIF_FP_CREATE, dupcall->key, + dupcall->key_len, NULL, 0, NULL, 0, NULL); VLOG_INFO_RL(&rl, "received packet on unassociated datapath " "port %"PRIu32, flow.in_port.odp_port); } goto free_dupcall; } -upcall->key = dupcall.key; -upcall->key_len = dupcall.key_len; +upcall->key = dupcall->key; +upcall->key_len = dupcall->key_len; -if (vsp_adjust_flow(upcall->ofproto, &flow, &dupcall.packet)) { +if (vsp_adjust_flow(upcall->ofproto, &flow, &dupcall->packet)) { upcall->vsp_adjusted = true; } md = pkt_metadata_from_flow(&flow); -flow_extract(&dupcall.packet, &md, &flow); +flow_extract(&dupcall->packet, &md, &flow); error = process_upcall(udpif, upcall, NULL); if (error) { @@ -635,14 +636,14 @@ recv_upcalls(struct handler *handler) cleanup: upcall_uninit(upcall); free_dupcall: -ofpbuf_uninit(&dupcall.packet); +ofpbuf_uninit(&dupcall->packet); ofpbuf_uninit(recv_buf); } if (n_upcalls) { handle_upcalls(handler->udpif, upcalls, n_upcalls); for (i = 0; i < n_upcalls; i++) { -ofpbuf_uninit(CONST_CAST(struct ofpbuf *, upcalls[i].packet)); +ofpbuf_uninit(&dupcalls[i].packet); ofpbuf_uninit(&recv_bufs[i]); upcall_uninit(&upcalls[i]); } -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] ofproto-dpif-upcall: Fix use of cleared stack memory.
Thx, applied to master, On Fri, Aug 15, 2014 at 1:33 PM, Ethan Jackson wrote: > Acked-by: Ethan Jackson > > Thanks for taking care of this. > > Ethan > > On Fri, Aug 15, 2014 at 1:15 AM, Alex Wang wrote: > > Commit cc377352d (ofproto: Reorganize in preparation for direct > > dpdk upcalls.) introduced the bug that uses variable defined on > > the stack inside while loop for reading dpif upcalls and keeps > > reference to attributes of the variable within the same function > > after the stack is cleared. This bug can cause ovs abort. > > > > This commit fixes the above issue by defining an array of the > > variable on the function stack. > > > > Signed-off-by: Alex Wang > > --- > > ofproto/ofproto-dpif-upcall.c | 27 ++- > > 1 file changed, 14 insertions(+), 13 deletions(-) > > > > diff --git a/ofproto/ofproto-dpif-upcall.c > b/ofproto/ofproto-dpif-upcall.c > > index 180684c..9f68a7d 100644 > > --- a/ofproto/ofproto-dpif-upcall.c > > +++ b/ofproto/ofproto-dpif-upcall.c > > @@ -574,55 +574,56 @@ recv_upcalls(struct handler *handler) > > struct udpif *udpif = handler->udpif; > > uint64_t recv_stubs[UPCALL_MAX_BATCH][512 / 8]; > > struct ofpbuf recv_bufs[UPCALL_MAX_BATCH]; > > +struct dpif_upcall dupcalls[UPCALL_MAX_BATCH]; > > struct upcall upcalls[UPCALL_MAX_BATCH]; > > size_t n_upcalls, i; > > > > n_upcalls = 0; > > while (n_upcalls < UPCALL_MAX_BATCH) { > > struct ofpbuf *recv_buf = &recv_bufs[n_upcalls]; > > +struct dpif_upcall *dupcall = &dupcalls[n_upcalls]; > > struct upcall *upcall = &upcalls[n_upcalls]; > > -struct dpif_upcall dupcall; > > struct pkt_metadata md; > > struct flow flow; > > int error; > > > > ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls], > > sizeof recv_stubs[n_upcalls]); > > -if (dpif_recv(udpif->dpif, handler->handler_id, &dupcall, > recv_buf)) { > > +if (dpif_recv(udpif->dpif, handler->handler_id, dupcall, > recv_buf)) { > > ofpbuf_uninit(recv_buf); > > break; > > } > > > > -if (odp_flow_key_to_flow(dupcall.key, dupcall.key_len, &flow) > > +if (odp_flow_key_to_flow(dupcall->key, dupcall->key_len, &flow) > > == ODP_FIT_ERROR) { > > goto free_dupcall; > > } > > > > -error = upcall_receive(upcall, udpif->backer, &dupcall.packet, > > - dupcall.type, dupcall.userdata, &flow); > > +error = upcall_receive(upcall, udpif->backer, &dupcall->packet, > > + dupcall->type, dupcall->userdata, &flow); > > if (error) { > > if (error == ENODEV) { > > /* Received packet on datapath port for which we > couldn't > > * associate an ofproto. This can happen if a port is > removed > > * while traffic is being received. Print a > rate-limited > > * message in case it happens frequently. */ > > -dpif_flow_put(udpif->dpif, DPIF_FP_CREATE, dupcall.key, > > - dupcall.key_len, NULL, 0, NULL, 0, NULL); > > +dpif_flow_put(udpif->dpif, DPIF_FP_CREATE, dupcall->key, > > + dupcall->key_len, NULL, 0, NULL, 0, NULL); > > VLOG_INFO_RL(&rl, "received packet on unassociated > datapath " > > "port %"PRIu32, flow.in_port.odp_port); > > } > > goto free_dupcall; > > } > > > > -upcall->key = dupcall.key; > > -upcall->key_len = dupcall.key_len; > > +upcall->key = dupcall->key; > > +upcall->key_len = dupcall->key_len; > > > > -if (vsp_adjust_flow(upcall->ofproto, &flow, &dupcall.packet)) { > > +if (vsp_adjust_flow(upcall->ofproto, &flow, &dupcall->packet)) { > > upcall->vsp_adjusted = true; > > } > > > > md = pkt_metadata_from_flow(&flow); > > -flow_extract(&dupcall.packet, &md, &flow); > > +flow_extract(&dupcall->packet, &md, &flow); > > > >
[ovs-dev] [PATCH] bfd: Clarify bfd:enable.
This commit clarifies the documentation for 'bfd:enable' when it is not specified. Signed-off-by: Alex Wang --- vswitchd/vswitch.xml |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index ab738da..6c49250 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2026,7 +2026,8 @@ - True to enable BFD on this . + True to enable BFD on this . If not + specified, BFD will not be enabled by default. http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] bfd: Clarify bfd:enable.
Thx, applied to master and branch-2.3 On Tue, Aug 19, 2014 at 11:52 AM, Ben Pfaff wrote: > On Tue, Aug 19, 2014 at 11:06:04AM -0700, Alex Wang wrote: > > This commit clarifies the documentation for 'bfd:enable' > > when it is not specified. > > > > Signed-off-by: Alex Wang > > Acked-by: Ben Pfaff > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] vswitch.xml: Fix a typo.
Signed-off-by: Alex Wang --- vswitchd/vswitch.xml |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 6c49250..42e944b 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2096,7 +2096,7 @@ default is 00:23:20:00:00:01. - + Set to an Ethernet address in the form xx:xx:xx:xx:xx:xx to set the MAC used for checking the destination of received BFD packets. -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] vswitch.xml: Fix a typo.
Thx, applied to master and branch-2.3, On Tue, Aug 26, 2014 at 12:26 AM, Justin Pettit wrote: > Acked-by: Justin Pettit > > --Justin > > > On August 25, 2014 at 10:42:24 PM, Alex Wang (al...@nicira.com) wrote: > > Signed-off-by: Alex Wang > > --- > > vswitchd/vswitch.xml | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml > > index 6c49250..42e944b 100644 > > --- a/vswitchd/vswitch.xml > > +++ b/vswitchd/vswitch.xml > > @@ -2096,7 +2096,7 @@ > > default is 00:23:20:00:00:01. > > > > > > - > > + > > Set to an Ethernet address in the form > > xx:xx:xx:xx:xx:xx > > to set the MAC used for checking the destination of received BFD packets. > > -- > > 1.7.9.5 > > > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > > > > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [dpdk patch 2/8] netdev-dpdk: Add function for getting the socket_id of netdev-dpdk.
> > > This should be part of netdev_class api rather than dpdk specific API. > we should try to minimize dpdk specific call in dpif-netdev layer. > > One more issue I noticed in this series is that socket and numa is > used interchangeably, can we use just one? > > I'll add it to netdev_class api. and I'll always use 'numa'. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [dpdk patch 4/8] netdev-dpdk: Create 'number of dpdk ifaces on same cpu socket' rx queues and 'number of cpu cores' tx queues for each dpdk interface.
On Sat, Aug 30, 2014 at 12:02 PM, Pravin Shelar wrote: > On Mon, Aug 11, 2014 at 9:56 PM, Alex Wang wrote: > > Before this commit, ovs only creates one tx/rx queue for each > > dpdk interface and uses only one poll thread for handling the > > I/O of all dpdk interfaces. As one step toward using multiple > > poll threads, this commit makes ovs, by default, create same > > number of rx queues as the number dpdk interfaces on the cpu > > socket. Also each dpdk interface will have one tx queue for > > each cpu core, even though not all of those queues will be > > used. > > > > Generally we should describe subject in less than 70 characters. > Commit msg should explain why we are introducing this change. It is > not clear from the patch the relation between number of core on socket > and rx queues. > > Thx, I'll use the 70 characters rule, thought it was 80, Also, I'll shed light on how upcoming patches relates to this patch. > > > @@ -179,7 +180,9 @@ struct netdev_dpdk { > > int port_id; > > int max_packet_len; > > > > -struct dpdk_tx_queue tx_q[NR_QUEUE]; > > +struct dpdk_tx_queue *tx_q; > > +int n_tx_q; > > +int n_rx_q; > > > There is already member in struct netdev called n_rxq to represent > number of rx_queues, we should use that directly. > tx_queues are not visible to dpif-netdev, but later patches will make > them visible, so we should add another member n_txq to struct netdev. > netdev-provide should be a passive layer, driven by dpif-netdev. Logic > of calculating number of queues should is in dpif-netdev, We can add > another API to open multi queue devices like dpdk. > i'm good with adding n_txq to the 'struct netdev'. so i think what you suggest are the following: - add n_txq to 'struct netdev', and add new struct 'struct netdev_txq' - add functions netdev_set_multiq() in 'netdev-provider.h' for configuring the n_txq, n_rxq - like rxq_recv() functions, the send() function will take in 'struct netdev_txq' as argument right? ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [dpdk patch 5/8] dpif-netdev: Create 'number of dpdk ifaces on cpu socket' pmd threads for each cpu socket.
> > > static struct dp_netdev_port *dp_netdev_lookup_port(const struct > dp_netdev *dp, > > @@ -281,6 +281,15 @@ struct dp_netdev_actions > *dp_netdev_flow_get_actions( > > const struct dp_netdev_flow *); > > static void dp_netdev_actions_free(struct dp_netdev_actions *); > > > > +/* Represents the PMD configuration on a cpu socket. */ > > +struct pmd_socket { > > +struct dp_netdev *dp; > > +struct latch exit_latch; > > +struct pmd_thread *pmd_threads; > > +int socket_id; > > +int n_pmd_threads; > > +}; > > + > We should keep socket to core mapping in numa module rather than in > dpif-netdev. > In my future patches (for per pmd cls/flowtable), i removed the pmd_socket. I'll just move the change forward... > I am not sure why exit latch needs to be per socket, it is global > today, it should be ok for now, no? > > I'll make the 'exit latch' per pmd thread, because, for this optimization: - when dpdk port is deleted, if it is the last dpdk port on the socket, all pmd threads on the socket will be removed, - using global exit latch will cause all pmd threads removed, > > static void * > > pmd_thread_main(void *f_) > > { > > struct pmd_thread *f = f_; > > -struct dp_netdev *dp = f->dp; > > +struct dp_netdev *dp = f->socket->dp; > > unsigned int lc = 0; > > struct rxq_poll *poll_list; > > +struct non_local_pmd_dev *dev_list; > > unsigned int port_seq; > > -int poll_cnt; > > +int poll_cnt, dev_cnt; > > int i; > > > > poll_cnt = 0; > > +dev_cnt = 0; > > poll_list = NULL; > > +dev_list = NULL; > > > > -pmd_thread_setaffinity_cpu(f->id); > > +pmd_thread_setaffinity_cpu(f->core_id); > > reload: > > poll_cnt = pmd_load_queues(f, &poll_list, poll_cnt); > > +dev_cnt = pmd_get_non_local_pmd_dev(f, &dev_list, dev_cnt); > > atomic_read(&f->change_seq, &port_seq); > > > > for (;;) { > > @@ -1682,6 +1777,10 @@ reload: > > dp_netdev_process_rxq_port(dp, poll_list[i].port, > poll_list[i].rx); > > } > > > > +for (i = 0; i < dev_cnt; i++) { > > +netdev_dpdk_flush_non_local(dev_list[i].dev, f->core_id); > > +} > > + > > In transmit function we can flush if this is remote queue. To optimize > remote queue check on every xmit, we can add remote flag to > dpdk-netdev queue. > may i know more about the reason you want to flush for every remote tx pkt? - is it for packet order concern? i'm not sure how expensive it is to call the tx function? but still think we should batch the remote tx here, ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [dpdk patch 6/8] netdev-dpdk: Add function for configuring rx queues for all dpdk interfaces.
> > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 012ee68..3013df5 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -130,6 +130,8 @@ enum { DRAIN_TSC = 20ULL }; > > > > static int rte_eal_init_ret = ENODEV; > > > > +static size_t dpdk_rx_queues OVS_GUARDED_BY(dpdk_mutex); > > + > There is no need to have this variable if netdev_open_multiq() accepts > number of rx and tx queues. > Agree, based on comments from previous patches, ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [dpdk patch 4/8] netdev-dpdk: Create 'number of dpdk ifaces on same cpu socket' rx queues and 'number of cpu cores' tx queues for each dpdk interface.
> > > > Generally we should describe subject in less than 70 characters. > > > Commit msg should explain why we are introducing this change. It is > > > not clear from the patch the relation between number of core on socket > > > and rx queues. > > > > > > > > > > Thx, I'll use the 70 characters rule, thought it was 80, > > 80 isn't too pushing the rules too much (sometimes it takes a few > extra characters to describe well), but the first line here is more > like 136 characters. > I newline'ed the title line and it showed as two lines in my git log. i think git format-patch recombined them. anyway, i should use a short title and explain it in the commit log. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] vtep: additions to BFD configuration and status reporting
On Tue, Sep 2, 2014 at 11:27 AM, wrote: > It also brings the usage > of the BFD diagnostic keys into line with the recent clarifications > made for OVS. > Does this commit miss this part? did not see any change/line move related to diagnostic, ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [dpdk patch 3/8] netdev-dpdk: Make memory pool name contain the socket id.
Thx, applied this to master. On Sat, Aug 30, 2014 at 12:01 PM, Pravin Shelar wrote: > On Mon, Aug 11, 2014 at 9:56 PM, Alex Wang wrote: > > This commit makes the memory pool name contain the socket id. > > Since dpdk library do not allow creation of memory pool with > > same name, this commit serves as a simple way of making each > > name unique. > > > > Signed-off-by: Alex Wang > > Acked-by: Thomas Graf > > Looks good. > > Acked-by: Pravin B Shelar > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 2/2] dpif-netdev: Introduce port_try_ref() to prevent a race.
When pmd thread interates through all ports for queue loading, the main thread may unreference and 'rcu-free' a port before pmd thread take new reference of it. This could cause pmd thread fail the reference and access freed memory later. This commit fixes this race by introducing port_try_ref() which uses ovs_refcount_try_ref_rcu(). And the pmd thread will only load the port's queue, if port_try_ref() returns true. Found by inspection. Signed-off-by: Alex Wang --- lib/dpif-netdev.c | 17 - 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 2476435..4297db0 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -880,6 +880,16 @@ port_ref(struct dp_netdev_port *port) } } +static bool +port_try_ref(struct dp_netdev_port *port) +{ +if (port) { +return ovs_refcount_try_ref_rcu(&port->ref_cnt); +} + +return false; +} + static void port_destroy__(struct dp_netdev_port *port) { @@ -1864,7 +1874,10 @@ pmd_load_queues(struct pmd_thread *f, index = 0; CMAP_FOR_EACH (port, node, &f->dp->ports) { -if (netdev_is_pmd(port->netdev)) { +/* Calls port_try_ref() to prevent the main thread + * from deleting the port. */ +if (netdev_is_pmd(port->netdev) +&& port_try_ref(port)) { int i; for (i = 0; i < netdev_n_rxq(port->netdev); i++) { @@ -1878,6 +1891,8 @@ pmd_load_queues(struct pmd_thread *f, } index++; } +/* Unrefs the port_try_ref(). */ +port_unref(port); } } -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 1/2] netdev-dpdk: Show interface status for dpdk0.
This commit fixes a bug which prevents the display of interface status for dpdk0. Found by inspection. Signed-off-by: Alex Wang --- lib/netdev-dpdk.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 049ea5e..a4ed355 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -1127,7 +1127,7 @@ netdev_dpdk_get_status(const struct netdev *netdev_, struct smap *args) struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_); struct rte_eth_dev_info dev_info; -if (dev->port_id <= 0) +if (dev->port_id < 0) return ENODEV; ovs_mutex_lock(&dev->mutex); -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Introduce port_try_ref() to prevent a race.
> > > @@ -1864,7 +1874,10 @@ pmd_load_queues(struct pmd_thread *f, > > index = 0; > > > > CMAP_FOR_EACH (port, node, &f->dp->ports) { > > -if (netdev_is_pmd(port->netdev)) { > > +/* Calls port_try_ref() to prevent the main thread > > + * from deleting the port. */ > > +if (netdev_is_pmd(port->netdev) > > +&& port_try_ref(port)) { > > int i; > > > port_try_ref() check should be first in the condition. > > Otherwise looks good. > Acked-by: Pravin B Shelar > > Could you explain more about why? if port_try_ref() is called first, then we will try reference the both dpdk and non_dpdk port. And we need to un-reference the port outside the if statement. However, when we try to un-reference the port, we dont know if the reference succeeds. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/2] netdev-dpdk: Show interface status for dpdk0.
Thx, applied to master On Tue, Sep 2, 2014 at 7:27 PM, Pravin Shelar wrote: > On Tue, Sep 2, 2014 at 6:44 PM, Alex Wang wrote: > > This commit fixes a bug which prevents the display of interface > > status for dpdk0. > > > > Found by inspection. > > > > Signed-off-by: Alex Wang > Looks good. > > Acked-by: Pravin B Shelar > > > > --- > > lib/netdev-dpdk.c |2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 049ea5e..a4ed355 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -1127,7 +1127,7 @@ netdev_dpdk_get_status(const struct netdev > *netdev_, struct smap *args) > > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_); > > struct rte_eth_dev_info dev_info; > > > > -if (dev->port_id <= 0) > > +if (dev->port_id < 0) > > return ENODEV; > > > > ovs_mutex_lock(&dev->mutex); > > -- > > 1.7.9.5 > > > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Introduce port_try_ref() to prevent a race.
After discussing with Pravin offline, it is good to use first get the reference and then access the 'port->netdev' for pmd check. So, use a nested if statement: @@ -1864,20 +1874,27 @@ pmd_load_queues(struct pmd_thread *f, index = 0; CMAP_FOR_EACH (port, node, &f->dp->ports) { -if (netdev_is_pmd(port->netdev)) { -int i; - -for (i = 0; i < netdev_n_rxq(port->netdev); i++) { -if ((index % dp->n_pmd_threads) == id) { -poll_list = xrealloc(poll_list, sizeof *poll - -port_ref(port); -poll_list[poll_cnt].port = port; -poll_list[poll_cnt].rx = port->rxq[i]; -poll_cnt++; +/* Calls port_try_ref() to prevent the main thread + * from deleting the port. */ +if (port_try_ref(port)) { +if (netdev_is_pmd(port->netdev)) { +int i; + +for (i = 0; i < netdev_n_rxq(port->netdev); i++) +if ((index % dp->n_pmd_threads) == id) { +poll_list = xrealloc(poll_list, +sizeof *poll_list * (pol + +port_ref(port); +poll_list[poll_cnt].port = port; +poll_list[poll_cnt].rx = port->rxq[i]; +poll_cnt++; +} +index++; } -index++; } +/* Unrefs the port_try_ref(). */ +port_unref(port); } Will apply to master soon, On Tue, Sep 2, 2014 at 9:39 PM, Alex Wang wrote: > > @@ -1864,7 +1874,10 @@ pmd_load_queues(struct pmd_thread *f, >> > index = 0; >> > >> > CMAP_FOR_EACH (port, node, &f->dp->ports) { >> > -if (netdev_is_pmd(port->netdev)) { >> > +/* Calls port_try_ref() to prevent the main thread >> > + * from deleting the port. */ >> > +if (netdev_is_pmd(port->netdev) >> > +&& port_try_ref(port)) { >> > int i; >> > >> port_try_ref() check should be first in the condition. >> >> Otherwise looks good. >> Acked-by: Pravin B Shelar >> >> > > Could you explain more about why? if port_try_ref() is called first, > then we will try reference the both dpdk and non_dpdk port. And > we need to un-reference the port outside the if statement. > > However, when we try to un-reference the port, we dont know if the > reference succeeds. > > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 1/2] ovs-numa: Relax the ovs_numa_*() input argument check.
Many of the ovs_numa_*() functions abort the program when the input cpu socket or core id is invalid. This commit relaxes the input check and makes these functions return OVS_*_UNSPEC when the check fails. Signed-off-by: Alex Wang --- lib/ovs-numa.c | 77 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 44df40f..545b075 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -143,13 +143,13 @@ ovs_numa_init(void) bool ovs_numa_cpu_socket_id_is_valid(int sid) { -return sid < ovs_numa_get_n_sockets(); +return found_sockets_and_cores && sid < ovs_numa_get_n_sockets(); } bool ovs_numa_cpu_core_id_is_valid(int cid) { -return cid < ovs_numa_get_n_cores(); +return found_sockets_and_cores && cid < ovs_numa_get_n_cores(); } /* Returns the number of cpu sockets. */ @@ -168,14 +168,14 @@ ovs_numa_get_n_cores(void) : OVS_CORE_UNSPEC; } -/* Returns the number of cpu cores on socket. */ +/* Returns the number of cpu cores on socket. Returns OVS_CORE_UNSPEC + * if 'socket_id' is invalid. */ int ovs_numa_get_n_cores_on_socket(int socket_id) { -if (found_sockets_and_cores) { +if (ovs_numa_cpu_socket_id_is_valid(socket_id)) { struct cpu_socket *socket; -ovs_assert(ovs_numa_cpu_socket_id_is_valid(socket_id)); socket = CONTAINER_OF(hmap_first_with_hash(&all_cpu_sockets, hash_int(socket_id, 0)), struct cpu_socket, hmap_node); @@ -186,16 +186,16 @@ ovs_numa_get_n_cores_on_socket(int socket_id) return OVS_CORE_UNSPEC; } -/* Returns the number of unpinned cpu cores on socket. */ +/* Returns the number of unpinned cpu cores on socket. Returns + * OVS_CORE_UNSPEC if 'socket_id' is invalid. */ int ovs_numa_get_n_unpinned_cores_on_socket(int socket_id) { -if (found_sockets_and_cores) { +if (ovs_numa_cpu_socket_id_is_valid(socket_id)) { struct cpu_socket *socket; struct cpu_core *core; int count = 0; -ovs_assert(ovs_numa_cpu_socket_id_is_valid(socket_id)); socket = CONTAINER_OF(hmap_first_with_hash(&all_cpu_sockets, hash_int(socket_id, 0)), struct cpu_socket, hmap_node); @@ -212,27 +212,28 @@ ovs_numa_get_n_unpinned_cores_on_socket(int socket_id) } /* Given 'core_id', tries to pin that core. Returns true, if succeeds. - * False, if the core has already been pinned. */ + * False, if the core has already been pinned or if 'core_id' is invalid. */ bool ovs_numa_try_pin_core_specific(int core_id) { -struct cpu_core *core; - -ovs_assert(ovs_numa_cpu_core_id_is_valid(core_id)); +if (ovs_numa_cpu_core_id_is_valid(core_id)) { +struct cpu_core *core; -core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, - hash_int(core_id, 0)), -struct cpu_core, hmap_node); -if (!core->pinned) { -core->pinned = true; -return true; +core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, + hash_int(core_id, 0)), +struct cpu_core, hmap_node); +if (!core->pinned) { +core->pinned = true; +return true; +} } return false; } /* Searches through all cores for an unpinned core. Returns the core_id - * if found and set the 'core->pinned' to true. Otherwise, returns -1. */ + * if found and set the 'core->pinned' to true. Otherwise, returns + * OVS_CORE_UNSPEC. */ int ovs_numa_get_unpinned_core_any(void) { @@ -250,22 +251,22 @@ ovs_numa_get_unpinned_core_any(void) /* Searches through all cores on socket with 'socket_id' for an unpinned core. * Returns the core_id if found and sets the 'core->pinned' to true. - * Otherwise, returns -1. */ + * Otherwise, returns OVS_CORE_UNSPEC. */ int ovs_numa_get_unpinned_core_on_socket(int socket_id) { -struct cpu_socket *socket; -struct cpu_core *core; - -ovs_assert(ovs_numa_cpu_socket_id_is_valid(socket_id)); +if(ovs_numa_cpu_socket_id_is_valid(socket_id)) { +struct cpu_socket *socket; +struct cpu_core *core; -socket = CONTAINER_OF(hmap_first_with_hash(&all_cpu_sockets, - hash_int(socket_id, 0)), - struct cpu_socket, hmap_node); -LIST_FOR_EACH(core, list_node, &socket->cores) { -if (!core->pinned) { -core->pinned = true; -return core->core_id; +socket = CONTAINER_OF(hmap_first_with_hash(&all_cpu_s
[ovs-dev] [PATCH 2/2] ovs-numa: Add function for getting cpu socket id from core id.
Signed-off-by: Alex Wang --- lib/ovs-numa.c | 18 ++ lib/ovs-numa.h |7 +++ 2 files changed, 25 insertions(+) diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 545b075..2de2aa2 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -168,6 +168,24 @@ ovs_numa_get_n_cores(void) : OVS_CORE_UNSPEC; } +/* Given 'core_id', returns the corresponding socket id. Returns + * OVS_SOCKET_UNSPEC if 'core_id' is invalid. */ +int +ovs_numa_get_socket_id(int core_id) +{ +if (ovs_numa_cpu_core_id_is_valid(core_id)) { +struct cpu_core *core; + +core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, + hash_int(core_id, 0)), +struct cpu_core, hmap_node); + +return core->socket->socket_id; +} + +return OVS_SOCKET_UNSPEC; +} + /* Returns the number of cpu cores on socket. Returns OVS_CORE_UNSPEC * if 'socket_id' is invalid. */ int diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index 95884c5..46d5750 100644 --- a/lib/ovs-numa.h +++ b/lib/ovs-numa.h @@ -32,6 +32,7 @@ bool ovs_numa_cpu_socket_id_is_valid(int sid); bool ovs_numa_cpu_core_id_is_valid(int cid); int ovs_numa_get_n_sockets(void); int ovs_numa_get_n_cores(void); +int ovs_numa_get_socket_id(int core_id); int ovs_numa_get_n_cores_on_socket(int socket_id); int ovs_numa_get_n_unpinned_cores_on_socket(int socket_id); bool ovs_numa_try_pin_core_specific(int core_id); @@ -72,6 +73,12 @@ ovs_numa_get_n_cores(void) } static inline int +ovs_numa_get_socket_id(int core_id OVS_UNUSED) +{ +return OVS_SOCKET_UNSPEC; +} + +static inline int ovs_numa_get_n_cores_on_socket(int socket_id OVS_UNUSED) { return OVS_CORE_UNSPEC; -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] ovs-numa: Add module description.
Add a short description of the module and its assumption. Signed-off-by: Alex Wang --- lib/ovs-numa.c | 12 1 file changed, 12 insertions(+) diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 2de2aa2..ad44d95 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -36,6 +36,18 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa); +/* ovs-numa module + * === + * + * This module stores the affinity information of cpu sockets and cpu cores. + * It also provides functions to bookkeep the pin of threads on cpu cores. + * + * It is assumed that the cpu socket ids and cpu core ids all start from 0 and + * range continuously. So, for example, if 'ovs_numa_get_n_cores()' returns N, + * user can assume core ids from 0 to N-1 are all valid and there is a + * 'struct cpu_core' for each id. + */ + #define MAX_CPU_SOCKETS 128 /* Cpu socket. */ -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] ovs-numa: Add module description.
Thx for the clarification, I'll replace the 'socket' with 'numa' and repost the series. On Thu, Sep 4, 2014 at 1:32 PM, Pravin Shelar wrote: > On Thu, Sep 4, 2014 at 1:26 PM, Alex Wang wrote: > > Add a short description of the module and its assumption. > > > > Signed-off-by: Alex Wang > > --- > > lib/ovs-numa.c | 12 > > 1 file changed, 12 insertions(+) > > > > diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c > > index 2de2aa2..ad44d95 100644 > > --- a/lib/ovs-numa.c > > +++ b/lib/ovs-numa.c > > @@ -36,6 +36,18 @@ > > > > VLOG_DEFINE_THIS_MODULE(ovs_numa); > > > > +/* ovs-numa module > > + * === > > + * > > + * This module stores the affinity information of cpu sockets and cpu > cores. > > + * It also provides functions to bookkeep the pin of threads on cpu > cores. > > + * > > + * It is assumed that the cpu socket ids and cpu core ids all start > from 0 and > > + * range continuously. So, for example, if 'ovs_numa_get_n_cores()' > returns N, > > + * user can assume core ids from 0 to N-1 are all valid and there is a > > + * 'struct cpu_core' for each id. > > + */ > > + > > #define MAX_CPU_SOCKETS 128 > > numa and socket is used interchangeably in this module which is not > always the case. Most of cases numa node is same as socket but some > platform can have multiple socket on a numa node. We should be more > specific and use only one term. > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 1/4] ovs-numa: Replace name 'cpu_socket' with 'numa_node'.
'numa' and 'socket' are currently used interchangeably in ovs-numa. But they are not always equivalent as some platform can have multiple sockets on a numa node. To avoid confusion, this commit renames all the 'cpu_socket' to 'numa_node'. Signed-off-by: Alex Wang --- lib/ovs-numa.c | 138 lib/ovs-numa.h | 28 ++-- 2 files changed, 83 insertions(+), 83 deletions(-) diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 44df40f..3583663 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -36,30 +36,30 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa); -#define MAX_CPU_SOCKETS 128 +#define MAX_NUMA_NODES 128 -/* Cpu socket. */ -struct cpu_socket { -struct hmap_node hmap_node; /* In the 'all_cpu_sockets'. */ -struct list cores; /* List of cpu cores on the socket. */ -int socket_id; /* Socket id. */ +/* numa node. */ +struct numa_node { +struct hmap_node hmap_node; /* In the 'all_numa_nodes'. */ +struct list cores; /* List of cpu cores on the numa node. */ +int numa_id;/* numa node id. */ }; -/* Cpu core on a cpu socket. */ +/* Cpu core on a numa node. */ struct cpu_core { struct hmap_node hmap_node;/* In the 'all_cpu_cores'. */ -struct list list_node; /* In 'cpu_socket->cores' list. */ -struct cpu_socket *socket; /* Socket containing the core. */ +struct list list_node; /* In 'numa_node->cores' list. */ +struct numa_node *numa;/* numa node containing the core. */ int core_id; /* Core id. */ bool pinned; /* If a thread has been pinned to the core. */ }; -/* Contains all 'struct cpu_socket's. */ -static struct hmap all_cpu_sockets = HMAP_INITIALIZER(&all_cpu_sockets); +/* Contains all 'struct numa_node's. */ +static struct hmap all_numa_nodes = HMAP_INITIALIZER(&all_numa_nodes); /* Contains all 'struct cpu_core's. */ static struct hmap all_cpu_cores = HMAP_INITIALIZER(&all_cpu_cores); -/* True if socket and core info are correctly extracted. */ -static bool found_sockets_and_cores; +/* True if numa node and core info are correctly extracted. */ +static bool found_numa_and_core; /* Returns true if 'str' contains all digits. Returns false otherwise. */ static bool @@ -68,15 +68,15 @@ contain_all_digits(const char *str) return str[strspn(str, "0123456789")] == '\0'; } -/* Discovers all cpu sockets and the corresponding cpu cores for each socket. - * Constructs the 'struct cpu_socket' and 'struct cpu_core'. */ +/* Discovers all numa nodes and the corresponding cpu cores. + * Constructs the 'struct numa_node' and 'struct cpu_core'. */ static void -discover_sockets_and_cores(void) +discover_numa_and_core(void) { int n_cpus = 0; int i; -for (i = 0; i < MAX_CPU_SOCKETS; i++) { +for (i = 0; i < MAX_NUMA_NODES; i++) { DIR *dir; char* path; @@ -84,14 +84,14 @@ discover_sockets_and_cores(void) path = xasprintf("/sys/devices/system/node/node%d", i); dir = opendir(path); -/* Creates 'struct cpu_socket' if the 'dir' is non-null. */ +/* Creates 'struct numa_node' if the 'dir' is non-null. */ if (dir) { -struct cpu_socket *s = xzalloc(sizeof *s); +struct numa_node *n = xzalloc(sizeof *n); struct dirent *subdir; -hmap_insert(&all_cpu_sockets, &s->hmap_node, hash_int(i, 0)); -list_init(&s->cores); -s->socket_id = i; +hmap_insert(&all_numa_nodes, &n->hmap_node, hash_int(i, 0)); +list_init(&n->cores); +n->numa_id = i; while ((subdir = readdir(dir)) != NULL) { if (!strncmp(subdir->d_name, "cpu", 3) @@ -102,13 +102,13 @@ discover_sockets_and_cores(void) core_id = strtoul(subdir->d_name + 3, NULL, 10); hmap_insert(&all_cpu_cores, &c->hmap_node, hash_int(core_id, 0)); -list_insert(&s->cores, &c->list_node); +list_insert(&n->cores, &c->list_node); c->core_id = core_id; n_cpus++; } } -VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on CPU socket %d", - list_size(&s->cores), s->socket_id); +VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on NUMA node %d", + list_size(&n->cores), n->numa_id); free(p
[ovs-dev] [PATCH 2/4] ovs-numa: Relax the ovs_numa_*() input argument check.
Many of the ovs_numa_*() functions abort the program when the input cpu socket or core id is invalid. This commit relaxes the input check and makes these functions return OVS_*_UNSPEC when the check fails. Signed-off-by: Alex Wang --- lib/ovs-numa.c | 77 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 3583663..bcdea3b 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -143,13 +143,13 @@ ovs_numa_init(void) bool ovs_numa_numa_id_is_valid(int numa_id) { -return numa_id < ovs_numa_get_n_numas(); +return found_numa_and_core && numa_id < ovs_numa_get_n_numas(); } bool ovs_numa_core_id_is_valid(int core_id) { -return core_id < ovs_numa_get_n_cores(); +return found_numa_and_core && core_id < ovs_numa_get_n_cores(); } /* Returns the number of numa nodes. */ @@ -168,14 +168,14 @@ ovs_numa_get_n_cores(void) : OVS_CORE_UNSPEC; } -/* Returns the number of cpu cores on numa node. */ +/* Returns the number of cpu cores on numa node. Returns OVS_CORE_UNSPEC + * if 'numa_id' is invalid. */ int ovs_numa_get_n_cores_on_numa(int numa_id) { -if (found_numa_and_core) { +if (ovs_numa_numa_id_is_valid(numa_id)) { struct numa_node *numa; -ovs_assert(ovs_numa_numa_id_is_valid(numa_id)); numa = CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes, hash_int(numa_id, 0)), struct numa_node, hmap_node); @@ -186,16 +186,16 @@ ovs_numa_get_n_cores_on_numa(int numa_id) return OVS_CORE_UNSPEC; } -/* Returns the number of unpinned cpu cores on numa node. */ +/* Returns the number of unpinned cpu cores on numa node. Returns + * OVS_CORE_UNSPEC if 'numa_id' is invalid. */ int ovs_numa_get_n_unpinned_cores_on_numa(int numa_id) { -if (found_numa_and_core) { +if (ovs_numa_numa_id_is_valid(numa_id)) { struct numa_node *numa; struct cpu_core *core; int count = 0; -ovs_assert(ovs_numa_numa_id_is_valid(numa_id)); numa = CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes, hash_int(numa_id, 0)), struct numa_node, hmap_node); @@ -212,27 +212,28 @@ ovs_numa_get_n_unpinned_cores_on_numa(int numa_id) } /* Given 'core_id', tries to pin that core. Returns true, if succeeds. - * False, if the core has already been pinned. */ + * False, if the core has already been pinned or if 'core_id' is invalid. */ bool ovs_numa_try_pin_core_specific(int core_id) { -struct cpu_core *core; - -ovs_assert(ovs_numa_core_id_is_valid(core_id)); +if (ovs_numa_core_id_is_valid(core_id)) { +struct cpu_core *core; -core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, - hash_int(core_id, 0)), -struct cpu_core, hmap_node); -if (!core->pinned) { -core->pinned = true; -return true; +core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, + hash_int(core_id, 0)), +struct cpu_core, hmap_node); +if (!core->pinned) { +core->pinned = true; +return true; +} } return false; } /* Searches through all cores for an unpinned core. Returns the core_id - * if found and set the 'core->pinned' to true. Otherwise, returns -1. */ + * if found and set the 'core->pinned' to true. Otherwise, returns + * OVS_CORE_UNSPEC. */ int ovs_numa_get_unpinned_core_any(void) { @@ -250,22 +251,22 @@ ovs_numa_get_unpinned_core_any(void) /* Searches through all cores on numa node with 'numa_id' for an unpinned * core. Returns the core_id if found and sets the 'core->pinned' to true. - * Otherwise, returns -1. */ + * Otherwise, returns OVS_CORE_UNSPEC. */ int ovs_numa_get_unpinned_core_on_numa(int numa_id) { -struct numa_node *numa; -struct cpu_core *core; - -ovs_assert(ovs_numa_numa_id_is_valid(numa_id)); +if (ovs_numa_numa_id_is_valid(numa_id)) { +struct numa_node *numa; +struct cpu_core *core; -numa = CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes, - hash_int(numa_id, 0)), -struct numa_node, hmap_node); -LIST_FOR_EACH(core, list_node, &numa->cores) { -if (!core->pinned) { -core->pinned = true; -return core->core_id; +numa = CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes, + hash_int(numa_id, 0)), +struct numa_
[ovs-dev] [PATCH 4/4] ovs-numa: Add module description.
Add a short description of the module and its assumption. Signed-off-by: Alex Wang --- lib/ovs-numa.c | 12 1 file changed, 12 insertions(+) diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 4698967..c909d5a 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -36,6 +36,18 @@ VLOG_DEFINE_THIS_MODULE(ovs_numa); +/* ovs-numa module + * === + * + * This module stores the affinity information of numa nodes and cpu cores. + * It also provides functions to bookkeep the pin of threads on cpu cores. + * + * It is assumed that the numa node ids and cpu core ids all start from 0 and + * range continuously. So, for example, if 'ovs_numa_get_n_cores()' returns N, + * user can assume core ids from 0 to N-1 are all valid and there is a + * 'struct cpu_core' for each id. + */ + #define MAX_NUMA_NODES 128 /* numa node. */ -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 3/4] ovs-numa: Add function for getting numa node id from core id.
Signed-off-by: Alex Wang --- lib/ovs-numa.c | 17 + lib/ovs-numa.h |7 +++ 2 files changed, 24 insertions(+) diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index bcdea3b..4698967 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -168,6 +168,23 @@ ovs_numa_get_n_cores(void) : OVS_CORE_UNSPEC; } +/* Given 'core_id', returns the corresponding numa node id. Returns + * OVS_NUMA_UNSPEC if 'core_id' is invalid. */ +int +ovs_numa_get_numa_id(int core_id) +{ +if (ovs_numa_core_id_is_valid(core_id)) { +struct cpu_core *core; + +core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, + hash_int(core_id, 0)), +struct cpu_core, hmap_node); + +return core->numa->numa_id; +} +return OVS_NUMA_UNSPEC; +} + /* Returns the number of cpu cores on numa node. Returns OVS_CORE_UNSPEC * if 'numa_id' is invalid. */ int diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index d4a57a5..83732fc 100644 --- a/lib/ovs-numa.h +++ b/lib/ovs-numa.h @@ -32,6 +32,7 @@ bool ovs_numa_numa_id_is_valid(int numa_id); bool ovs_numa_core_id_is_valid(int core_id); int ovs_numa_get_n_numas(void); int ovs_numa_get_n_cores(void); +int ovs_numa_get_numa_id(int core_id); int ovs_numa_get_n_cores_on_numa(int numa_id); int ovs_numa_get_n_unpinned_cores_on_numa(int numa_id); bool ovs_numa_try_pin_core_specific(int core_id); @@ -72,6 +73,12 @@ ovs_numa_get_n_cores(void) } static inline int +ovs_numa_get_numa_id(int core_id OVS_UNUSED) +{ +return OVS_NUMA_UNSPEC; +} + +static inline int ovs_numa_get_n_cores_on_numa(int numa_id OVS_UNUSED) { return OVS_CORE_UNSPEC; -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/4] ovs-numa: Relax the ovs_numa_*() input argument check.
Thx for pointing it out, I'll make a note in the comment. On Fri, Sep 5, 2014 at 2:19 PM, Pravin Shelar wrote: > On Thu, Sep 4, 2014 at 3:17 PM, Alex Wang wrote: > > Many of the ovs_numa_*() functions abort the program when the > > input cpu socket or core id is invalid. This commit relaxes > > the input check and makes these functions return OVS_*_UNSPEC > > when the check fails. > > > > Signed-off-by: Alex Wang > > Due to CPU hot plugging, the range based check for core id and numa id > is not good enough, but it can improved later on. > Otherwise looks good. > > Acked-by: Pravin B Shelar > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 4/4] ovs-numa: Add module description.
Thanks, applied all to master On Fri, Sep 5, 2014 at 2:20 PM, Pravin Shelar wrote: > On Thu, Sep 4, 2014 at 3:17 PM, Alex Wang wrote: > > Add a short description of the module and its assumption. > > > > Signed-off-by: Alex Wang > > > LGTM > Acked-by: Pravin B Shelar > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] ovs-numa: Fix a missing initialization.
This commit updates the pointer to 'struct numa_node' when initializing the 'struct cpu_core'. Signed-off-by: Alex Wang --- lib/ovs-numa.c |1 + 1 file changed, 1 insertion(+) diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 4f0ba0d..7da1407 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -122,6 +122,7 @@ discover_numa_and_core(void) hash_int(core_id, 0)); list_insert(&n->cores, &c->list_node); c->core_id = core_id; +c->numa = n; n_cpus++; } } -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] ofproto-dpif-upcall: Fix a free of uninitialized memory.
On current master, when 'upcall_receive()' returns error, the ofpbuf 'upcall->put_actions' is uninitialized. In most cases, the failure of 'upcall_receive()' will cause uninitialize of 'upcall->put_actions' and free of uninitialized pointer. This commit fixes the issue by making 'upcall_receive()' always initialize the 'upcall->put_actions'. Found by inspection. Signed-off-by: Alex Wang --- ofproto/ofproto-dpif-upcall.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index cce89dd..d6ffee3 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -828,6 +828,8 @@ upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, { int error; +ofpbuf_init(&upcall->put_actions, 0); + error = xlate_lookup(backer, flow, &upcall->ofproto, &upcall->ipfix, &upcall->sflow, NULL, &upcall->in_port); if (error) { @@ -838,7 +840,6 @@ upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, upcall->packet = packet; upcall->type = type; upcall->userdata = userdata; -ofpbuf_init(&upcall->put_actions, 0); upcall->xout_initialized = false; upcall->vsp_adjusted = false; -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] ofproto-dpif-upcall: Fix a free of uninitialized memory.
Thx Ben, > It's a little unusual for an initialization function that fails to > still leave the object that it initializes ready to be destroyed. If > upcall_receive() fails, is there other data in 'upcall' that needs to > be destroyed? No, I don't think there is other data to be destroyed.. > If there is, then the other caller of upcall_receive() > is wrong because it does not call upcall_uninit(). If there is not, > then upcall_cb() can simply "return error;" instead of calling > upcall_uninit(). > Makes sense, I'll also add a note to the upcall_receive() comment ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] ofproto-dpif-upcall: Fix a free of uninitialized memory.
On current master, when 'upcall_receive()' returns error, the ofpbuf 'upcall->put_actions' is uninitialized. In some usecase, the failure of 'upcall_receive()' will cause uninitialize of 'upcall->put_actions' and free of uninitialized pointer. This commit fixes the issue by making the caller not conduct the uninitialize of the 'upcall' when there is error. Found by inspection. Signed-off-by: Alex Wang --- PATCH -> V2: 1. make the caller just return the error. 2. fix the comment to warn the user. --- ofproto/ofproto-dpif-upcall.c |9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index cce89dd..8e890f8 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -818,9 +818,10 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout, buf); } -/* The upcall must be destroyed with upcall_uninit() before quiescing, - * as the referred objects are guaranteed to exist only until the calling - * thread quiesces. */ +/* If there is no error, the upcall must be destroyed with upcall_uninit() + * before quiescing, as the referred objects are guaranteed to exist only + * until the calling thread quiesces. Otherwise, do not call upcall_uninit() + * since the 'upcall->put_actions' remains uninitialized. */ static int upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, const struct ofpbuf *packet, enum dpif_upcall_type type, @@ -944,7 +945,7 @@ upcall_cb(const struct ofpbuf *packet, const struct flow *flow, error = upcall_receive(&upcall, udpif->backer, packet, type, userdata, flow); if (error) { -goto out; +return error; } error = process_upcall(udpif, &upcall, actions); -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [dpdk patch v2 6/6] dpif-netdev: Create multiple pmd threads by default.
With this commit, ovs by default will try creating 'number of dpdk interfaces on numa node' pmd threads for each numa node and pin the pmd threads to available cpu cores on the numa node. NON_PMD_CORE_ID (currently 0) is used to reserve a particular cpu core for the I/O of all non-pmd threads. No pmd thread can be pinned to this reserved core. As side-effects of this commit: - the exact-match cache for non-pmd threads is removed from 'struct dp_netdev'. Instead, all non-pmd threads will use the exact-match cache defined in the 'struct dp_netdev_pmd_thread' for NON_PMD_CORE_ID. - the received packet processing functions are refactored to use 'struct dp_netdev_pmd_thread' as input. - the 'netdev_send()' function will be called with the proper queue id. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. --- lib/dpif-netdev.c | 382 +++-- lib/dpif-netdev.h |4 +- lib/netdev-dpdk.c | 17 ++- lib/netdev-dpdk.h |7 + 4 files changed, 274 insertions(+), 136 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 098f5e7..41514cb 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -52,6 +52,7 @@ #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" +#include "ovs-numa.h" #include "ovs-rcu.h" #include "packet-dpif.h" #include "packets.h" @@ -158,7 +159,6 @@ struct emc_cache { * *dp_netdev_mutex (global) *port_mutex - *emc_mutex *flow_mutex */ struct dp_netdev { @@ -195,17 +195,8 @@ struct dp_netdev { upcall_callback *upcall_cb; /* Callback function for executing upcalls. */ void *upcall_aux; -/* Forwarding threads. */ -struct latch exit_latch; -struct pmd_thread *pmd_threads; -size_t n_pmd_threads; -int pmd_count; - -/* Exact match cache for non-pmd devices. - * Pmd devices use instead each thread's flow_cache for this purpose. - * Protected by emc_mutex */ -struct emc_cache flow_cache OVS_GUARDED; -struct ovs_mutex emc_mutex; +/* Stores all 'struct dp_netdev_pmd_thread's. */ +struct cmap poll_threads; }; static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp, @@ -340,15 +331,25 @@ static void dp_netdev_actions_free(struct dp_netdev_actions *); * * DPDK used PMD for accessing NIC. * - * A thread that receives packets from PMD ports, looks them up in the flow - * table, and executes the actions it finds. + * Note, instance with cpu core id NON_PMD_CORE_ID will be reserved for + * I/O of all non-pmd threads. There will be no actual thread created + * for the instance. **/ -struct pmd_thread { +struct dp_netdev_pmd_thread { struct dp_netdev *dp; +struct cmap_node node; /* In 'dp->poll_threads'. */ +/* Per thread exact-match cache. Note, the instance for cpu core + * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly + * need to be protected (e.g. by 'dp_netdev_mutex'). All other + * instances will only be accessed by its own pmd thread. */ struct emc_cache flow_cache; +struct latch exit_latch;/* For terminating the pmd thread. */ +atomic_uint change_seq; /* For reloading pmd ports. */ pthread_t thread; -int id; -atomic_uint change_seq; +int index; /* Idx of this pmd thread among pmd*/ +/* threads on same cpu socket. */ +int core_id;/* CPU core id of this pmd thread. */ +int numa_id;/* numa node id of this pmd thread. */ }; #define PMD_INITIAL_SEQ 1 @@ -374,19 +375,23 @@ static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *) OVS_REQUIRES(dp->port_mutex); static int dpif_netdev_open(const struct dpif_class *, const char *name, bool create, struct dpif **); -static void dp_netdev_execute_actions(struct dp_netdev *dp, +static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, struct dpif_packet **, int c, bool may_steal, struct pkt_metadata *, - struct emc_cache *flow_cache, const struct nlattr *actions, size_t actions_len); -static void dp_netdev_port_input(struct dp_netdev *dp, - struct emc_cache *flow_cache, +static void dp_netdev_port_input(struct dp_netdev_pmd_thread *pmd, struct dpif_packet **packets, int cnt, odp_port_t port_no); -static void dp_netdev_set_pmd_threads(struct dp_netdev *, int n); static void dp_netdev_disable_upc
[ovs-dev] [dpdk patch v2 4/6] netdev-dpdk: Add indicator for flushing tx queue.
Previous commit makes OVS create one tx queue for each cpu core. An upcoming patch will allow multiple pmd threads be created and pinned to cpu cores. So each pmd thread will use the tx queue corresponding to its core id. Moreover, the pmd threads running on different numa node than the dpdk interface (called non-local pmd thread) will not handle the rx of the interface. Consequently, there need to be a way to flush the tx queues of the non-local pmd threads. To address the queue flushing issue, this commit introduces a new flag 'flush_tx' in the 'struct dpdk_tx_queue' which is set if the queue is to be used by a non-local pmd thread. Then, when enqueueing the tx pkts, if the flag is set, the tx queue will always be flushed immediately after the enqueue. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. --- lib/netdev-dpdk.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index c7bc4c5..d6bf0bd 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -158,6 +158,8 @@ struct dpdk_mp { /* There will one 'struct dpdk_tx_queue' created for each cpu core.*/ struct dpdk_tx_queue { rte_spinlock_t tx_lock; +bool flush_tx; /* Set to true to flush queue everytime */ + /* pkts are queued. */ int count; uint64_t tsc; struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN]; @@ -494,6 +496,9 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk ovs_mutex_lock(&netdev->mutex); +/* XXX: need to discover device node at run time. */ +netdev->socket_id = SOCKET0; + /* There can only be ovs_numa_get_n_cores() pmd threads, so creates a tx_q * for each of them. */ n_cores = ovs_numa_get_n_cores(); @@ -505,7 +510,12 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk } netdev->tx_q = dpdk_rte_mzalloc(n_cores * sizeof *netdev->tx_q); for (i = 0; i < n_cores; i++) { +int core_id = ovs_numa_get_numa_id(i); + rte_spinlock_init(&netdev->tx_q[i].tx_lock); +/* If the corresponding core is not on the same numa node + * as 'netdev', flags the 'flush_tx'. */ +netdev->tx_q[i].flush_tx = netdev->socket_id == core_id; } netdev_->n_txq = n_cores; netdev_->n_rxq = dpdk_get_n_devs(netdev->socket_id); @@ -516,9 +526,6 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk netdev->mtu = ETHER_MTU; netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu); -/* XXX: need to discover device node at run time. */ -netdev->socket_id = SOCKET0; - netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu); if (!netdev->dpdk_mp) { err = ENOMEM; @@ -758,7 +765,7 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid, txq->count += tocopy; i += tocopy; -if (txq->count == MAX_TX_QUEUE_LEN) { +if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) { dpdk_queue_flush__(dev, qid); } diff_tsc = rte_get_timer_cycles() - txq->tsc; -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [dpdk patch v2 3/6] netdev-dpdk: Create multiple tx/rx queues by default.
Before this commit, ovs creates one tx and one rx queue for each dpdk interface and uses only one poll thread for handling I/O of all dpdk interfaces. An upcoming patch will allow multiple poll threads be created. As a preparation, this commit changes the default to create multiple tx and rx queues. Specifically, the default number of rx queues will be the number of dpdk interfaces on the numa node. And the upcoming work will assign each rx queue to a different poll thread. The default number of tx queues will be the number of cpu cores on the machine. Although not all the tx queues will be used, each poll thread will have its own queue for transmission on the dpdk interface. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. --- lib/dpif-netdev.h |1 - lib/netdev-dpdk.c | 54 - 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h index fb9d0e2..adbbf87 100644 --- a/lib/dpif-netdev.h +++ b/lib/dpif-netdev.h @@ -42,7 +42,6 @@ static inline void dp_packet_pad(struct ofpbuf *b) #define NETDEV_QID_NONE INT_MAX -#define NR_QUEUE 1 #define NR_PMD_THREADS 1 #ifdef __cplusplus diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 4f9c5c2..c7bc4c5 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -36,6 +36,7 @@ #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" +#include "ovs-numa.h" #include "ovs-thread.h" #include "ovs-rcu.h" #include "packet-dpif.h" @@ -154,6 +155,7 @@ struct dpdk_mp { struct list list_node OVS_GUARDED_BY(dpdk_mutex); }; +/* There will one 'struct dpdk_tx_queue' created for each cpu core.*/ struct dpdk_tx_queue { rte_spinlock_t tx_lock; int count; @@ -182,7 +184,7 @@ struct netdev_dpdk { int port_id; int max_packet_len; -struct dpdk_tx_queue tx_q[NR_QUEUE]; +struct dpdk_tx_queue *tx_q; struct ovs_mutex mutex OVS_ACQ_AFTER(dpdk_mutex); @@ -387,6 +389,25 @@ dpdk_watchdog(void *dummy OVS_UNUSED) return NULL; } +/* Returns the number of dpdk ifaces on the numa node 'numa_id'. */ +static int +dpdk_get_n_devs(int numa_id) +{ +int count = 0; +int i; + +ovs_assert(ovs_numa_numa_id_is_valid(numa_id)); + +for (i = 0; i < rte_eth_dev_count(); i++) { +if (rte_eth_dev_socket_id(i) == numa_id) { +count++; +} +} +ovs_assert(count); + +return count; +} + static int dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) { @@ -399,13 +420,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) return ENODEV; } -diag = rte_eth_dev_configure(dev->port_id, NR_QUEUE, NR_QUEUE, &port_conf); +diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq, dev->up.n_txq, + &port_conf); if (diag) { VLOG_ERR("eth dev config error %d",diag); return -diag; } -for (i = 0; i < NR_QUEUE; i++) { +for (i = 0; i < dev->up.n_txq; i++) { diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE, dev->socket_id, &tx_conf); if (diag) { @@ -414,7 +436,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) } } -for (i = 0; i < NR_QUEUE; i++) { +for (i = 0; i < dev->up.n_rxq; i++) { diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE, dev->socket_id, &rx_conf, dev->dpdk_mp->mp); @@ -466,15 +488,27 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk { struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); int err = 0; -int i; +int n_cores, i; ovs_mutex_init(&netdev->mutex); ovs_mutex_lock(&netdev->mutex); -for (i = 0; i < NR_QUEUE; i++) { +/* There can only be ovs_numa_get_n_cores() pmd threads, so creates a tx_q + * for each of them. */ +n_cores = ovs_numa_get_n_cores(); +if (n_cores == OVS_CORE_UNSPEC) { +VLOG_WARN_RL(&rl, "netdev_dpdk txq creation failed due to no cpu" + " core info"); +err = ENOENT; +goto unlock; +} +netdev->tx_q = dpdk_rte_mzalloc(n_cores * sizeof *netdev->tx_q); +for (i = 0; i < n_cores; i++) { rte_spinlock_init(&netdev->tx_q[i].tx_lock); } +netdev_->n_txq = n_cores; +netdev_->n_rxq = dpdk_get_n_devs(netdev->socket_id); netdev->port_id = port_no; @@ -495,12 +529,13 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk if (err) { goto unlock;
[ovs-dev] [dpdk patch v2 2/6] netdev: Add n_txq to 'struct netdev'.
This commit adds new variable n_txq to 'struct netdev' for recording the number of tx queues. Correspondingly, the send_*() functions are extended to accept queue id as input argument. All 'netdev-*' implementation will ignore the queue id since having multiple tx queues is not supported. Upcomping patches will start using it and create multiple tx queues for dpdk netdev by default. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. --- lib/dpif-netdev.c |2 +- lib/dpif-netdev.h |2 ++ lib/netdev-bsd.c |6 +++--- lib/netdev-dpdk.c |6 +++--- lib/netdev-dummy.c|4 ++-- lib/netdev-linux.c|6 +++--- lib/netdev-provider.h | 27 --- lib/netdev.c | 41 + lib/netdev.h |5 +++-- 9 files changed, 58 insertions(+), 41 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 869fb55..098f5e7 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2447,7 +2447,7 @@ dp_execute_cb(void *aux_, struct dpif_packet **packets, int cnt, case OVS_ACTION_ATTR_OUTPUT: p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a))); if (OVS_LIKELY(p)) { -netdev_send(p->netdev, packets, cnt, may_steal); +netdev_send(p->netdev, NETDEV_QID_NONE, packets, cnt, may_steal); } else if (may_steal) { for (i = 0; i < cnt; i++) { dpif_packet_delete(packets[i]); diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h index 410fcfa..fb9d0e2 100644 --- a/lib/dpif-netdev.h +++ b/lib/dpif-netdev.h @@ -40,6 +40,8 @@ static inline void dp_packet_pad(struct ofpbuf *b) } } +#define NETDEV_QID_NONE INT_MAX + #define NR_QUEUE 1 #define NR_PMD_THREADS 1 diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index dd1893c..32c04a3 100644 --- a/lib/netdev-bsd.c +++ b/lib/netdev-bsd.c @@ -687,8 +687,8 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_) * system or a tap device. */ static int -netdev_bsd_send(struct netdev *netdev_, struct dpif_packet **pkts, int cnt, -bool may_steal) +netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED, +struct dpif_packet **pkts, int cnt, bool may_steal) { struct netdev_bsd *dev = netdev_bsd_cast(netdev_); const char *name = netdev_get_name(netdev_); @@ -750,7 +750,7 @@ netdev_bsd_send(struct netdev *netdev_, struct dpif_packet **pkts, int cnt, * with netdev_send(). */ static void -netdev_bsd_send_wait(struct netdev *netdev_) +netdev_bsd_send_wait(struct netdev *netdev_, int qid OVS_UNUSED) { struct netdev_bsd *dev = netdev_bsd_cast(netdev_); diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 70ee8db..4f9c5c2 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -495,6 +495,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk if (err) { goto unlock; } +netdev_->n_txq = NR_QUEUE; netdev_->n_rxq = NR_QUEUE; list_push_back(&dpdk_list, &netdev->list_node); @@ -792,8 +793,8 @@ dpdk_do_tx_copy(struct netdev *netdev, struct dpif_packet ** pkts, int cnt) } static int -netdev_dpdk_send(struct netdev *netdev, struct dpif_packet **pkts, int cnt, - bool may_steal) +netdev_dpdk_send(struct netdev *netdev, int qid, struct dpif_packet **pkts, + int cnt, bool may_steal) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); int ret; @@ -808,7 +809,6 @@ netdev_dpdk_send(struct netdev *netdev, struct dpif_packet **pkts, int cnt, } } } else { -int qid; int next_tx_idx = 0; int dropped = 0; diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index 14239f1..050bad8 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -847,8 +847,8 @@ netdev_dummy_rxq_drain(struct netdev_rxq *rxq_) } static int -netdev_dummy_send(struct netdev *netdev, struct dpif_packet **pkts, int cnt, - bool may_steal) +netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED, + struct dpif_packet **pkts, int cnt, bool may_steal) { struct netdev_dummy *dev = netdev_dummy_cast(netdev); int error = 0; diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 298ccd6..e311122 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1057,8 +1057,8 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_) * The kernel maintains a packet transmission queue, so the caller is not * expected to do additional queuing of packets. */ static int -netdev_linux_send(struct netdev *netdev_, struct dpif_packet **pkts, int cnt, - bool may_steal) +netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, + struct dpif_packet **pkts, int cnt, bool may_steal) { int i; int error = 0; @@ -1161,7 +1161,7 @@ netdev_linux_send(s
[ovs-dev] [dpdk patch v2 5/6] netdev-dpdk: Remove the tx queue spinlock.
The previous commit makes OVS create one tx queue for each cpu core, each pmd thread will use a separate tx queue. Also, tx of non-pmd threads on dpdk interface is all through 'NON_PMD_THREAD_TX_QUEUE', protected by the 'nonpmd_mempool_mutex'. Therefore, the spinlock is no longer needed. And this commit removes it from 'struct dpdk_tx_queue'. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. --- lib/netdev-dpdk.c |6 -- 1 file changed, 6 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index d6bf0bd..26b1591 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -157,7 +157,6 @@ struct dpdk_mp { /* There will one 'struct dpdk_tx_queue' created for each cpu core.*/ struct dpdk_tx_queue { -rte_spinlock_t tx_lock; bool flush_tx; /* Set to true to flush queue everytime */ /* pkts are queued. */ int count; @@ -512,7 +511,6 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk for (i = 0; i < n_cores; i++) { int core_id = ovs_numa_get_numa_id(i); -rte_spinlock_init(&netdev->tx_q[i].tx_lock); /* If the corresponding core is not on the same numa node * as 'netdev', flags the 'flush_tx'. */ netdev->tx_q[i].flush_tx = netdev->socket_id == core_id; @@ -716,9 +714,7 @@ dpdk_queue_flush(struct netdev_dpdk *dev, int qid) if (txq->count == 0) { return; } -rte_spinlock_lock(&txq->tx_lock); dpdk_queue_flush__(dev, qid); -rte_spinlock_unlock(&txq->tx_lock); } static int @@ -754,7 +750,6 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid, int i = 0; -rte_spinlock_lock(&txq->tx_lock); while (i < cnt) { int freeslots = MAX_TX_QUEUE_LEN - txq->count; int tocopy = MIN(freeslots, cnt-i); @@ -773,7 +768,6 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid, dpdk_queue_flush__(dev, qid); } } -rte_spinlock_unlock(&txq->tx_lock); } /* Tx function. Transmit packets indefinitely */ -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [dpdk patch v2 1/6] netdev: Add function for getting the numa node id of netdev.
This commit adds a new API to the 'struct netdev_class' which allows user to query the numa node id the 'netdev' is on. Currently, only netdev-dpdk module implements this function. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. --- lib/netdev-bsd.c |1 + lib/netdev-dpdk.c |9 + lib/netdev-dummy.c|1 + lib/netdev-linux.c|1 + lib/netdev-provider.h |7 +++ lib/netdev-vport.c|1 + lib/netdev.c | 12 lib/netdev.h |1 + 8 files changed, 33 insertions(+) diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index 82f61ff..dd1893c 100644 --- a/lib/netdev-bsd.c +++ b/lib/netdev-bsd.c @@ -1562,6 +1562,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum netdev_flags off, NULL, /* get_config */ \ NULL, /* set_config */ \ NULL, /* get_tunnel_config */\ +NULL, /* get_numa_id */ \ \ netdev_bsd_send, \ netdev_bsd_send_wait,\ diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 62e4cb9..70ee8db 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -581,6 +581,14 @@ netdev_dpdk_get_config(const struct netdev *netdev_, struct smap *args) return 0; } +static int +netdev_dpdk_get_numa_id(const struct netdev *netdev_) +{ +struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); + +return netdev->socket_id; +} + static struct netdev_rxq * netdev_dpdk_rxq_alloc(void) { @@ -1345,6 +1353,7 @@ unlock_dpdk: netdev_dpdk_get_config, \ NULL, /* netdev_dpdk_set_config */ \ NULL, /* get_tunnel_config */ \ +netdev_dpdk_get_numa_id,/* get_numa_id */ \ \ netdev_dpdk_send, /* send */\ NULL, /* send_wait */ \ diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index e3cf72d..14239f1 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -1044,6 +1044,7 @@ static const struct netdev_class dummy_class = { netdev_dummy_get_config, netdev_dummy_set_config, NULL, /* get_tunnel_config */ +NULL, /* get_numa_id */ netdev_dummy_send, /* send */ NULL, /* send_wait */ diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index d8a76f9..298ccd6 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -2750,6 +2750,7 @@ netdev_linux_update_flags(struct netdev *netdev_, enum netdev_flags off, NULL, /* get_config */\ NULL, /* set_config */\ NULL, /* get_tunnel_config */ \ +NULL, /* get_numa_id */ \ \ netdev_linux_send, \ netdev_linux_send_wait, \ diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 56244ad..e8f2ebe 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -22,6 +22,7 @@ #include "connectivity.h" #include "netdev.h" #include "list.h" +#include "ovs-numa.h" #include "seq.h" #include "shash.h" #include "smap.h" @@ -30,6 +31,8 @@ extern "C" { #endif +#define NETDEV_NUMA_UNSPEC OVS_NUMA_UNSPEC + /* A network device (e.g. an Ethernet device). * * Network device implementations may read these members but should not modify @@ -250,6 +253,10 @@ struct netdev_class { const struct netdev_tunnel_config * (*get_tunnel_config)(const struct netdev *netdev); +/* Returns the id of the numa node the 'netdev' is on. If there is no + * such info, returns NETDEV_NUMA_UNSPEC. */ +int (*get_numa_id)(const struct netdev *netdev); + /* Sends buffers on 'netdev'. * Returns 0 if successful (for every buffer), otherwise a positive errno value. * Returns EAGAIN without blocking if one or more packets cannot be diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 17adf94..6ab90bf 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -775,6 +775,7 @@ get_stats(const struct netdev *netdev, struct netdev_stats *stats) GET_CONFIG, \ SET_CONFIG, \ GET_TUNNEL_CONFIG, \ +NULL, /* get_numa_id */ \
Re: [ovs-dev] [PATCH] ovs-numa: Fix a missing initialization.
Thx, applied to master, On Mon, Sep 8, 2014 at 10:52 AM, Ben Pfaff wrote: > On Mon, Sep 08, 2014 at 08:29:22AM -0700, Alex Wang wrote: > > This commit updates the pointer to 'struct numa_node' > > when initializing the 'struct cpu_core'. > > > > Signed-off-by: Alex Wang > > Acked-by: Ben Pfaff > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] ofproto-dpif-upcall: Fix a free of uninitialized memory.
Thx, applied to master, On Mon, Sep 8, 2014 at 11:18 AM, Ben Pfaff wrote: > On Mon, Sep 08, 2014 at 11:13:34AM -0700, Alex Wang wrote: > > On current master, when 'upcall_receive()' returns error, the > > ofpbuf 'upcall->put_actions' is uninitialized. In some usecase, > > the failure of 'upcall_receive()' will cause uninitialize of > > 'upcall->put_actions' and free of uninitialized pointer. > > > > This commit fixes the issue by making the caller not conduct > > the uninitialize of the 'upcall' when there is error. > > > > Found by inspection. > > > > Signed-off-by: Alex Wang > > > > --- > > PATCH -> V2: > > 1. make the caller just return the error. > > 2. fix the comment to warn the user. > > Acked-by: Ben Pfaff > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] ovs-rcu: Make ovsrcu_quiesce() flush the callback event set.
On current master, the per-thread callback event set is flushed when ovsrcu_quiesce_start() is called or when the callback event set is full. For threads that only call 'ovsrcu_quiesce()' to indicate quiescient state, their callback event set will not be flushed for execution until the set is full. And this could take a very long time. Theoretically, this should not be an issue, since rcu postponed callback events should only free the old version of objects. However, current ovs does not follow this rule, and some callback events include other activities like unregistering the netdev from global name-netdev map. The delay of unregistering the netdev (by threads that only calls ovsrcu_quiesce()) will prevent the recreate of same netdev indefinitely. As a short-term workaround, this commit makes every call to ovsrcu_quiesce() flush the callback event set. In the long run, there will be a refactor of the use of ovs-rcu module, in which all callback events only free the old version of objects. Signed-off-by: Alex Wang --- lib/ovs-rcu.c |8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c index a052d6c..6fe6103 100644 --- a/lib/ovs-rcu.c +++ b/lib/ovs-rcu.c @@ -138,8 +138,14 @@ ovsrcu_quiesce_start(void) void ovsrcu_quiesce(void) { +struct ovsrcu_perthread *perthread; + ovsrcu_init_module(); -ovsrcu_perthread_get()->seqno = seq_read(global_seqno); +perthread = pthread_getspecific(perthread_key); +perthread->seqno = seq_read(global_seqno); +if (perthread->cbset) { +ovsrcu_flush_cbset(perthread); +} seq_change(global_seqno); ovsrcu_quiesced(); -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [dpdk patch v3 1/6] netdev: Add function for getting the numa node id of netdev.
This commit adds a new API to the 'struct netdev_class' which allows user to query the numa node id the 'netdev' is on. Currently, only netdev-dpdk module implements this function. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. V2 -> V3: - rebase. --- lib/netdev-bsd.c |1 + lib/netdev-dpdk.c |9 + lib/netdev-dummy.c|1 + lib/netdev-linux.c|1 + lib/netdev-provider.h |7 +++ lib/netdev-vport.c|1 + lib/netdev.c | 12 lib/netdev.h |1 + 8 files changed, 33 insertions(+) diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index 82f61ff..dd1893c 100644 --- a/lib/netdev-bsd.c +++ b/lib/netdev-bsd.c @@ -1562,6 +1562,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum netdev_flags off, NULL, /* get_config */ \ NULL, /* set_config */ \ NULL, /* get_tunnel_config */\ +NULL, /* get_numa_id */ \ \ netdev_bsd_send, \ netdev_bsd_send_wait,\ diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 62e4cb9..70ee8db 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -581,6 +581,14 @@ netdev_dpdk_get_config(const struct netdev *netdev_, struct smap *args) return 0; } +static int +netdev_dpdk_get_numa_id(const struct netdev *netdev_) +{ +struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); + +return netdev->socket_id; +} + static struct netdev_rxq * netdev_dpdk_rxq_alloc(void) { @@ -1345,6 +1353,7 @@ unlock_dpdk: netdev_dpdk_get_config, \ NULL, /* netdev_dpdk_set_config */ \ NULL, /* get_tunnel_config */ \ +netdev_dpdk_get_numa_id,/* get_numa_id */ \ \ netdev_dpdk_send, /* send */\ NULL, /* send_wait */ \ diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index e3cf72d..14239f1 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -1044,6 +1044,7 @@ static const struct netdev_class dummy_class = { netdev_dummy_get_config, netdev_dummy_set_config, NULL, /* get_tunnel_config */ +NULL, /* get_numa_id */ netdev_dummy_send, /* send */ NULL, /* send_wait */ diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index d8a76f9..298ccd6 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -2750,6 +2750,7 @@ netdev_linux_update_flags(struct netdev *netdev_, enum netdev_flags off, NULL, /* get_config */\ NULL, /* set_config */\ NULL, /* get_tunnel_config */ \ +NULL, /* get_numa_id */ \ \ netdev_linux_send, \ netdev_linux_send_wait, \ diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 56244ad..e8f2ebe 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -22,6 +22,7 @@ #include "connectivity.h" #include "netdev.h" #include "list.h" +#include "ovs-numa.h" #include "seq.h" #include "shash.h" #include "smap.h" @@ -30,6 +31,8 @@ extern "C" { #endif +#define NETDEV_NUMA_UNSPEC OVS_NUMA_UNSPEC + /* A network device (e.g. an Ethernet device). * * Network device implementations may read these members but should not modify @@ -250,6 +253,10 @@ struct netdev_class { const struct netdev_tunnel_config * (*get_tunnel_config)(const struct netdev *netdev); +/* Returns the id of the numa node the 'netdev' is on. If there is no + * such info, returns NETDEV_NUMA_UNSPEC. */ +int (*get_numa_id)(const struct netdev *netdev); + /* Sends buffers on 'netdev'. * Returns 0 if successful (for every buffer), otherwise a positive errno value. * Returns EAGAIN without blocking if one or more packets cannot be diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 17adf94..6ab90bf 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -775,6 +775,7 @@ get_stats(const struct netdev *netdev, struct netdev_stats *stats) GET_CONFIG, \ SET_CONFIG, \ GET_TUNNEL_CONFIG, \ +NULL, /* g
[ovs-dev] [dpdk patch v3 2/6] netdev: Add n_txq to 'struct netdev'.
This commit adds new variable n_txq to 'struct netdev' for recording the number of tx queues. Correspondingly, the send_*() functions are extended to accept queue id as input argument. All 'netdev-*' implementation will ignore the queue id since having multiple tx queues is not supported. Upcomping patches will start using it and create multiple tx queues for dpdk netdev by default. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. V2 -> V3: - rebase. --- lib/dpif-netdev.c |2 +- lib/dpif-netdev.h |2 ++ lib/netdev-bsd.c |6 +++--- lib/netdev-dpdk.c |6 +++--- lib/netdev-dummy.c|4 ++-- lib/netdev-linux.c|6 +++--- lib/netdev-provider.h | 27 --- lib/netdev.c | 41 + lib/netdev.h |5 +++-- 9 files changed, 58 insertions(+), 41 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 072ed5d..dcce02e 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2435,7 +2435,7 @@ dp_execute_cb(void *aux_, struct dpif_packet **packets, int cnt, case OVS_ACTION_ATTR_OUTPUT: p = dp_netdev_lookup_port(dp, u32_to_odp(nl_attr_get_u32(a))); if (OVS_LIKELY(p)) { -netdev_send(p->netdev, packets, cnt, may_steal); +netdev_send(p->netdev, NETDEV_QID_NONE, packets, cnt, may_steal); } else if (may_steal) { for (i = 0; i < cnt; i++) { dpif_packet_delete(packets[i]); diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h index 410fcfa..fb9d0e2 100644 --- a/lib/dpif-netdev.h +++ b/lib/dpif-netdev.h @@ -40,6 +40,8 @@ static inline void dp_packet_pad(struct ofpbuf *b) } } +#define NETDEV_QID_NONE INT_MAX + #define NR_QUEUE 1 #define NR_PMD_THREADS 1 diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index dd1893c..32c04a3 100644 --- a/lib/netdev-bsd.c +++ b/lib/netdev-bsd.c @@ -687,8 +687,8 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_) * system or a tap device. */ static int -netdev_bsd_send(struct netdev *netdev_, struct dpif_packet **pkts, int cnt, -bool may_steal) +netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED, +struct dpif_packet **pkts, int cnt, bool may_steal) { struct netdev_bsd *dev = netdev_bsd_cast(netdev_); const char *name = netdev_get_name(netdev_); @@ -750,7 +750,7 @@ netdev_bsd_send(struct netdev *netdev_, struct dpif_packet **pkts, int cnt, * with netdev_send(). */ static void -netdev_bsd_send_wait(struct netdev *netdev_) +netdev_bsd_send_wait(struct netdev *netdev_, int qid OVS_UNUSED) { struct netdev_bsd *dev = netdev_bsd_cast(netdev_); diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 70ee8db..4f9c5c2 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -495,6 +495,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk if (err) { goto unlock; } +netdev_->n_txq = NR_QUEUE; netdev_->n_rxq = NR_QUEUE; list_push_back(&dpdk_list, &netdev->list_node); @@ -792,8 +793,8 @@ dpdk_do_tx_copy(struct netdev *netdev, struct dpif_packet ** pkts, int cnt) } static int -netdev_dpdk_send(struct netdev *netdev, struct dpif_packet **pkts, int cnt, - bool may_steal) +netdev_dpdk_send(struct netdev *netdev, int qid, struct dpif_packet **pkts, + int cnt, bool may_steal) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); int ret; @@ -808,7 +809,6 @@ netdev_dpdk_send(struct netdev *netdev, struct dpif_packet **pkts, int cnt, } } } else { -int qid; int next_tx_idx = 0; int dropped = 0; diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index 14239f1..050bad8 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -847,8 +847,8 @@ netdev_dummy_rxq_drain(struct netdev_rxq *rxq_) } static int -netdev_dummy_send(struct netdev *netdev, struct dpif_packet **pkts, int cnt, - bool may_steal) +netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED, + struct dpif_packet **pkts, int cnt, bool may_steal) { struct netdev_dummy *dev = netdev_dummy_cast(netdev); int error = 0; diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 298ccd6..e311122 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1057,8 +1057,8 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_) * The kernel maintains a packet transmission queue, so the caller is not * expected to do additional queuing of packets. */ static int -netdev_linux_send(struct netdev *netdev_, struct dpif_packet **pkts, int cnt, - bool may_steal) +netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED, + struct dpif_packet **pkts, int cnt, bool may_steal) { int i; int error = 0; @@ -1161,7
[ovs-dev] [dpdk patch v3 3/6] netdev-dpdk: Create multiple tx/rx queues by default.
Before this commit, ovs creates one tx and one rx queue for each dpdk interface and uses only one poll thread for handling I/O of all dpdk interfaces. An upcoming patch will allow multiple poll threads be created. As a preparation, this commit changes the default to create multiple tx and rx queues. Specifically, the default number of rx queues will be the number of dpdk interfaces on the numa node. And the upcoming work will assign each rx queue to a different poll thread. The default number of tx queues will be the number of cpu cores on the machine. Although not all the tx queues will be used, each poll thread will have its own queue for transmission on the dpdk interface. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. V2 -> V3: - rebase. --- lib/dpif-netdev.h |1 - lib/netdev-dpdk.c | 54 - 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h index fb9d0e2..adbbf87 100644 --- a/lib/dpif-netdev.h +++ b/lib/dpif-netdev.h @@ -42,7 +42,6 @@ static inline void dp_packet_pad(struct ofpbuf *b) #define NETDEV_QID_NONE INT_MAX -#define NR_QUEUE 1 #define NR_PMD_THREADS 1 #ifdef __cplusplus diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 4f9c5c2..c7bc4c5 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -36,6 +36,7 @@ #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" +#include "ovs-numa.h" #include "ovs-thread.h" #include "ovs-rcu.h" #include "packet-dpif.h" @@ -154,6 +155,7 @@ struct dpdk_mp { struct list list_node OVS_GUARDED_BY(dpdk_mutex); }; +/* There will one 'struct dpdk_tx_queue' created for each cpu core.*/ struct dpdk_tx_queue { rte_spinlock_t tx_lock; int count; @@ -182,7 +184,7 @@ struct netdev_dpdk { int port_id; int max_packet_len; -struct dpdk_tx_queue tx_q[NR_QUEUE]; +struct dpdk_tx_queue *tx_q; struct ovs_mutex mutex OVS_ACQ_AFTER(dpdk_mutex); @@ -387,6 +389,25 @@ dpdk_watchdog(void *dummy OVS_UNUSED) return NULL; } +/* Returns the number of dpdk ifaces on the numa node 'numa_id'. */ +static int +dpdk_get_n_devs(int numa_id) +{ +int count = 0; +int i; + +ovs_assert(ovs_numa_numa_id_is_valid(numa_id)); + +for (i = 0; i < rte_eth_dev_count(); i++) { +if (rte_eth_dev_socket_id(i) == numa_id) { +count++; +} +} +ovs_assert(count); + +return count; +} + static int dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) { @@ -399,13 +420,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) return ENODEV; } -diag = rte_eth_dev_configure(dev->port_id, NR_QUEUE, NR_QUEUE, &port_conf); +diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq, dev->up.n_txq, + &port_conf); if (diag) { VLOG_ERR("eth dev config error %d",diag); return -diag; } -for (i = 0; i < NR_QUEUE; i++) { +for (i = 0; i < dev->up.n_txq; i++) { diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE, dev->socket_id, &tx_conf); if (diag) { @@ -414,7 +436,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) } } -for (i = 0; i < NR_QUEUE; i++) { +for (i = 0; i < dev->up.n_rxq; i++) { diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE, dev->socket_id, &rx_conf, dev->dpdk_mp->mp); @@ -466,15 +488,27 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk { struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); int err = 0; -int i; +int n_cores, i; ovs_mutex_init(&netdev->mutex); ovs_mutex_lock(&netdev->mutex); -for (i = 0; i < NR_QUEUE; i++) { +/* There can only be ovs_numa_get_n_cores() pmd threads, so creates a tx_q + * for each of them. */ +n_cores = ovs_numa_get_n_cores(); +if (n_cores == OVS_CORE_UNSPEC) { +VLOG_WARN_RL(&rl, "netdev_dpdk txq creation failed due to no cpu" + " core info"); +err = ENOENT; +goto unlock; +} +netdev->tx_q = dpdk_rte_mzalloc(n_cores * sizeof *netdev->tx_q); +for (i = 0; i < n_cores; i++) { rte_spinlock_init(&netdev->tx_q[i].tx_lock); } +netdev_->n_txq = n_cores; +netdev_->n_rxq = dpdk_get_n_devs(netdev->socket_id); netdev->port_id = port_no; @@ -495,12 +529,13 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk if (err) {
[ovs-dev] [dpdk patch v3 5/6] netdev-dpdk: Remove the tx queue spinlock.
The previous commit makes OVS create one tx queue for each cpu core, each pmd thread will use a separate tx queue. Also, tx of non-pmd threads on dpdk interface is all through 'NON_PMD_THREAD_TX_QUEUE', protected by the 'nonpmd_mempool_mutex'. Therefore, the spinlock is no longer needed. And this commit removes it from 'struct dpdk_tx_queue'. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. V2 -> V3: - rebase. --- lib/netdev-dpdk.c |6 -- 1 file changed, 6 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index d6bf0bd..26b1591 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -157,7 +157,6 @@ struct dpdk_mp { /* There will one 'struct dpdk_tx_queue' created for each cpu core.*/ struct dpdk_tx_queue { -rte_spinlock_t tx_lock; bool flush_tx; /* Set to true to flush queue everytime */ /* pkts are queued. */ int count; @@ -512,7 +511,6 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk for (i = 0; i < n_cores; i++) { int core_id = ovs_numa_get_numa_id(i); -rte_spinlock_init(&netdev->tx_q[i].tx_lock); /* If the corresponding core is not on the same numa node * as 'netdev', flags the 'flush_tx'. */ netdev->tx_q[i].flush_tx = netdev->socket_id == core_id; @@ -716,9 +714,7 @@ dpdk_queue_flush(struct netdev_dpdk *dev, int qid) if (txq->count == 0) { return; } -rte_spinlock_lock(&txq->tx_lock); dpdk_queue_flush__(dev, qid); -rte_spinlock_unlock(&txq->tx_lock); } static int @@ -754,7 +750,6 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid, int i = 0; -rte_spinlock_lock(&txq->tx_lock); while (i < cnt) { int freeslots = MAX_TX_QUEUE_LEN - txq->count; int tocopy = MIN(freeslots, cnt-i); @@ -773,7 +768,6 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid, dpdk_queue_flush__(dev, qid); } } -rte_spinlock_unlock(&txq->tx_lock); } /* Tx function. Transmit packets indefinitely */ -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [dpdk patch v3 4/6] netdev-dpdk: Add indicator for flushing tx queue.
Previous commit makes OVS create one tx queue for each cpu core. An upcoming patch will allow multiple pmd threads be created and pinned to cpu cores. So each pmd thread will use the tx queue corresponding to its core id. Moreover, the pmd threads running on different numa node than the dpdk interface (called non-local pmd thread) will not handle the rx of the interface. Consequently, there need to be a way to flush the tx queues of the non-local pmd threads. To address the queue flushing issue, this commit introduces a new flag 'flush_tx' in the 'struct dpdk_tx_queue' which is set if the queue is to be used by a non-local pmd thread. Then, when enqueueing the tx pkts, if the flag is set, the tx queue will always be flushed immediately after the enqueue. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. V2 -> V3: - rebase. --- lib/netdev-dpdk.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index c7bc4c5..d6bf0bd 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -158,6 +158,8 @@ struct dpdk_mp { /* There will one 'struct dpdk_tx_queue' created for each cpu core.*/ struct dpdk_tx_queue { rte_spinlock_t tx_lock; +bool flush_tx; /* Set to true to flush queue everytime */ + /* pkts are queued. */ int count; uint64_t tsc; struct rte_mbuf *burst_pkts[MAX_TX_QUEUE_LEN]; @@ -494,6 +496,9 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk ovs_mutex_lock(&netdev->mutex); +/* XXX: need to discover device node at run time. */ +netdev->socket_id = SOCKET0; + /* There can only be ovs_numa_get_n_cores() pmd threads, so creates a tx_q * for each of them. */ n_cores = ovs_numa_get_n_cores(); @@ -505,7 +510,12 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk } netdev->tx_q = dpdk_rte_mzalloc(n_cores * sizeof *netdev->tx_q); for (i = 0; i < n_cores; i++) { +int core_id = ovs_numa_get_numa_id(i); + rte_spinlock_init(&netdev->tx_q[i].tx_lock); +/* If the corresponding core is not on the same numa node + * as 'netdev', flags the 'flush_tx'. */ +netdev->tx_q[i].flush_tx = netdev->socket_id == core_id; } netdev_->n_txq = n_cores; netdev_->n_rxq = dpdk_get_n_devs(netdev->socket_id); @@ -516,9 +526,6 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk netdev->mtu = ETHER_MTU; netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu); -/* XXX: need to discover device node at run time. */ -netdev->socket_id = SOCKET0; - netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu); if (!netdev->dpdk_mp) { err = ENOMEM; @@ -758,7 +765,7 @@ dpdk_queue_pkts(struct netdev_dpdk *dev, int qid, txq->count += tocopy; i += tocopy; -if (txq->count == MAX_TX_QUEUE_LEN) { +if (txq->count == MAX_TX_QUEUE_LEN || txq->flush_tx) { dpdk_queue_flush__(dev, qid); } diff_tsc = rte_get_timer_cycles() - txq->tsc; -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [dpdk patch v3 6/6] dpif-netdev: Create multiple pmd threads by default.
With this commit, ovs by default will try creating 'number of dpdk interfaces on numa node' pmd threads for each numa node and pin the pmd threads to available cpu cores on the numa node. NON_PMD_CORE_ID (currently 0) is used to reserve a particular cpu core for the I/O of all non-pmd threads. No pmd thread can be pinned to this reserved core. As side-effects of this commit: - the exact-match cache for non-pmd threads is removed from 'struct dp_netdev'. Instead, all non-pmd threads will use the exact-match cache defined in the 'struct dp_netdev_pmd_thread' for NON_PMD_CORE_ID. - the received packet processing functions are refactored to use 'struct dp_netdev_pmd_thread' as input. - the 'netdev_send()' function will be called with the proper queue id. Signed-off-by: Alex Wang --- PATCH -> V2 - rebase and refactor the code. V2 -> V3: - both pmd and non-pmd threads can call the dpif_netdev_execute(). so, use a per-thread variable to help recognize the calling thread. --- lib/dpif-netdev.c | 407 + lib/dpif-netdev.h |4 +- lib/netdev-dpdk.c | 17 ++- lib/netdev-dpdk.h |7 + 4 files changed, 302 insertions(+), 133 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index dcce02e..29a92b3 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -52,6 +52,7 @@ #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" +#include "ovs-numa.h" #include "ovs-rcu.h" #include "packet-dpif.h" #include "packets.h" @@ -158,7 +159,6 @@ struct emc_cache { * *dp_netdev_mutex (global) *port_mutex - *emc_mutex *flow_mutex */ struct dp_netdev { @@ -195,17 +195,16 @@ struct dp_netdev { upcall_callback *upcall_cb; /* Callback function for executing upcalls. */ void *upcall_aux; -/* Forwarding threads. */ -struct latch exit_latch; -struct pmd_thread *pmd_threads; -size_t n_pmd_threads; -int pmd_count; - -/* Exact match cache for non-pmd devices. - * Pmd devices use instead each thread's flow_cache for this purpose. - * Protected by emc_mutex */ -struct emc_cache flow_cache OVS_GUARDED; -struct ovs_mutex emc_mutex; +/* Stores all 'struct dp_netdev_pmd_thread's. */ +struct cmap poll_threads; + +/* Protects the access of the 'struct dp_netdev_pmd_thread' + * instance for non-pmd thread. */ +struct ovs_mutex non_pmd_mutex; + +/* Each pmd thread will store its pointer to + * 'struct dp_netdev_pmd_thread' in 'per_pmd_key'. */ +ovsthread_key_t per_pmd_key; }; static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp, @@ -340,15 +339,25 @@ static void dp_netdev_actions_free(struct dp_netdev_actions *); * * DPDK used PMD for accessing NIC. * - * A thread that receives packets from PMD ports, looks them up in the flow - * table, and executes the actions it finds. + * Note, instance with cpu core id NON_PMD_CORE_ID will be reserved for + * I/O of all non-pmd threads. There will be no actual thread created + * for the instance. **/ -struct pmd_thread { +struct dp_netdev_pmd_thread { struct dp_netdev *dp; +struct cmap_node node; /* In 'dp->poll_threads'. */ +/* Per thread exact-match cache. Note, the instance for cpu core + * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly + * need to be protected (e.g. by 'dp_netdev_mutex'). All other + * instances will only be accessed by its own pmd thread. */ struct emc_cache flow_cache; +struct latch exit_latch;/* For terminating the pmd thread. */ +atomic_uint change_seq; /* For reloading pmd ports. */ pthread_t thread; -int id; -atomic_uint change_seq; +int index; /* Idx of this pmd thread among pmd*/ +/* threads on same numa node. */ +int core_id;/* CPU core id of this pmd thread. */ +int numa_id;/* numa node id of this pmd thread. */ }; #define PMD_INITIAL_SEQ 1 @@ -374,18 +383,22 @@ static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *) OVS_REQUIRES(dp->port_mutex); static int dpif_netdev_open(const struct dpif_class *, const char *name, bool create, struct dpif **); -static void dp_netdev_execute_actions(struct dp_netdev *dp, +static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, struct dpif_packet **, int c, bool may_steal, struct pkt_metadata *, - struct emc_cache *flow_cache, const struct nlatt
[ovs-dev] [PATCH 3/4] ovs-appctl: Add option '--option' to show all long options.
Signed-off-by: Alex Wang --- utilities/ovs-appctl.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/utilities/ovs-appctl.c b/utilities/ovs-appctl.c index bb17ec2..8249bf8 100644 --- a/utilities/ovs-appctl.c +++ b/utilities/ovs-appctl.c @@ -81,6 +81,19 @@ main(int argc, char *argv[]) } static void +options(const struct option *opts, size_t len) +{ +int i; + +for (i = 0; i < len - 1; i++) { +const struct option *opt = &opts[i]; + +printf("--%s\n", opt->name); +} +exit(EXIT_SUCCESS); +} + +static void usage(void) { printf("\ @@ -116,6 +129,7 @@ parse_command_line(int argc, char *argv[]) {"target", required_argument, NULL, 't'}, {"execute", no_argument, NULL, 'e'}, {"help", no_argument, NULL, 'h'}, +{"option", no_argument, NULL, 'o'}, {"version", no_argument, NULL, 'V'}, {"timeout", required_argument, NULL, 'T'}, VLOG_LONG_OPTIONS, @@ -157,6 +171,11 @@ parse_command_line(int argc, char *argv[]) usage(); break; +case 'o': +/* Always skips the last one. */ +options(long_options, ARRAY_SIZE(long_options) - 1); +break; + case 'T': time_alarm(atoi(optarg)); break; -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 2/4] ovs-appctl: Make command description all lowercase.
Signed-off-by: Alex Wang --- lib/dpif-netdev.c |4 ++-- lib/netdev-dummy.c |2 +- lib/timeval.c |2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 072ed5d..a4dabb6 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2719,8 +2719,8 @@ dpif_dummy_register(bool override) dpif_dummy_register__("dummy"); unixctl_command_register("dpif-dummy/change-port-number", - "DP PORT NEW-NUMBER", + "dp port new-number", 3, 3, dpif_dummy_change_port_number, NULL); -unixctl_command_register("dpif-dummy/delete-port", "DP PORT", +unixctl_command_register("dpif-dummy/delete-port", "dp port", 2, 2, dpif_dummy_delete_port, NULL); } diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index e3cf72d..39a3fd7 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -1328,7 +1328,7 @@ netdev_dummy_conn_state(struct unixctl_conn *conn, int argc, void netdev_dummy_register(bool override) { -unixctl_command_register("netdev-dummy/receive", "NAME PACKET|FLOW...", +unixctl_command_register("netdev-dummy/receive", "name packet|flow...", 2, INT_MAX, netdev_dummy_receive, NULL); unixctl_command_register("netdev-dummy/set-admin-state", "[netdev] up|down", 1, 2, diff --git a/lib/timeval.c b/lib/timeval.c index 9d90f1b..0587834 100644 --- a/lib/timeval.c +++ b/lib/timeval.c @@ -732,7 +732,7 @@ timeval_dummy_register(void) { timewarp_enabled = true; unixctl_command_register("time/stop", "", 0, 0, timeval_stop_cb, NULL); -unixctl_command_register("time/warp", "[LARGE_MSECS] MSECS", 1, 2, +unixctl_command_register("time/warp", "[large_msecs] msecs", 1, 2, timeval_warp_cb, NULL); } -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 1/4] ovsdb-server: Remove the 'enable-dummy' option.
There is no use case of this option in ovsdb-server. Also, it causes dpif-dummy and netdev-dummy module register unrelated unixctl commands. Signed-off-by: Alex Wang --- ovsdb/ovsdb-server.c |7 --- tests/ovsdb-server.at |2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index a0b8f30..7dc2adb 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -26,7 +26,6 @@ #include "command-line.h" #include "daemon.h" #include "dirs.h" -#include "dummy.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "file.h" @@ -1224,7 +1223,6 @@ parse_options(int *argcp, char **argvp[], OPT_UNIXCTL, OPT_RUN, OPT_BOOTSTRAP_CA_CERT, -OPT_ENABLE_DUMMY, VLOG_OPTION_ENUMS, DAEMON_OPTION_ENUMS }; @@ -1242,7 +1240,6 @@ parse_options(int *argcp, char **argvp[], {"private-key", required_argument, NULL, 'p'}, {"certificate", required_argument, NULL, 'c'}, {"ca-cert", required_argument, NULL, 'C'}, -{"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY}, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); @@ -1299,10 +1296,6 @@ parse_options(int *argcp, char **argvp[], bootstrap_ca_cert = true; break; -case OPT_ENABLE_DUMMY: -dummy_enable(optarg && !strcmp(optarg, "override")); -break; - case '?': exit(EXIT_FAILURE); diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at index 8b1dd05..35f0d11 100644 --- a/tests/ovsdb-server.at +++ b/tests/ovsdb-server.at @@ -360,7 +360,7 @@ AT_CHECK( "uuid-name": "x", "row": {"target": "punix:socket2"}}]']], [0], [ignore], [ignore]) ON_EXIT([kill `cat ovsdb-server.pid`]) -AT_CHECK([ovsdb-server --enable-dummy --detach --no-chdir --pidfile --remote=db:mydb,Root,managers --remote=db:mydb,Root,manager_options --log-file db], [0], [ignore], [ignore]) +AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=db:mydb,Root,managers --remote=db:mydb,Root,manager_options --log-file db], [0], [ignore], [ignore]) ovs-appctl -t ovsdb-server time/warp 6000 1000 AT_CHECK( [[ovsdb-client transact unix:socket1 \ -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 4/4] ovs-appctl: Remove the include of vlog.h.
Remove the include of vlog.h and all the related macros, since it is not really used. Signed-off-by: Alex Wang --- utilities/ovs-appctl.c |7 --- 1 file changed, 7 deletions(-) diff --git a/utilities/ovs-appctl.c b/utilities/ovs-appctl.c index 8249bf8..174d147 100644 --- a/utilities/ovs-appctl.c +++ b/utilities/ovs-appctl.c @@ -31,7 +31,6 @@ #include "timeval.h" #include "unixctl.h" #include "util.h" -#include "vlog.h" static void usage(void); static const char *parse_command_line(int argc, char *argv[]); @@ -122,9 +121,6 @@ Other options:\n\ static const char * parse_command_line(int argc, char *argv[]) { -enum { -VLOG_OPTION_ENUMS -}; static const struct option long_options[] = { {"target", required_argument, NULL, 't'}, {"execute", no_argument, NULL, 'e'}, @@ -132,7 +128,6 @@ parse_command_line(int argc, char *argv[]) {"option", no_argument, NULL, 'o'}, {"version", no_argument, NULL, 'V'}, {"timeout", required_argument, NULL, 'T'}, -VLOG_LONG_OPTIONS, {NULL, 0, NULL, 0}, }; char *short_options_ = long_options_to_short_options(long_options); @@ -184,8 +179,6 @@ parse_command_line(int argc, char *argv[]) ovs_print_version(0, 0); exit(EXIT_SUCCESS); -VLOG_OPTION_HANDLERS - case '?': exit(EXIT_FAILURE); -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 1/3] ovsdb-server: Remove the 'enable-dummy' option.
There is no use case of this option in ovsdb-server. Also, it causes dpif-dummy and netdev-dummy module register unrelated unixctl commands. Signed-off-by: Alex Wang --- ovsdb/ovsdb-server.c |7 --- tests/ovsdb-server.at |2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index a0b8f30..7dc2adb 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -26,7 +26,6 @@ #include "command-line.h" #include "daemon.h" #include "dirs.h" -#include "dummy.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "file.h" @@ -1224,7 +1223,6 @@ parse_options(int *argcp, char **argvp[], OPT_UNIXCTL, OPT_RUN, OPT_BOOTSTRAP_CA_CERT, -OPT_ENABLE_DUMMY, VLOG_OPTION_ENUMS, DAEMON_OPTION_ENUMS }; @@ -1242,7 +1240,6 @@ parse_options(int *argcp, char **argvp[], {"private-key", required_argument, NULL, 'p'}, {"certificate", required_argument, NULL, 'c'}, {"ca-cert", required_argument, NULL, 'C'}, -{"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY}, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); @@ -1299,10 +1296,6 @@ parse_options(int *argcp, char **argvp[], bootstrap_ca_cert = true; break; -case OPT_ENABLE_DUMMY: -dummy_enable(optarg && !strcmp(optarg, "override")); -break; - case '?': exit(EXIT_FAILURE); diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at index 8b1dd05..35f0d11 100644 --- a/tests/ovsdb-server.at +++ b/tests/ovsdb-server.at @@ -360,7 +360,7 @@ AT_CHECK( "uuid-name": "x", "row": {"target": "punix:socket2"}}]']], [0], [ignore], [ignore]) ON_EXIT([kill `cat ovsdb-server.pid`]) -AT_CHECK([ovsdb-server --enable-dummy --detach --no-chdir --pidfile --remote=db:mydb,Root,managers --remote=db:mydb,Root,manager_options --log-file db], [0], [ignore], [ignore]) +AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=db:mydb,Root,managers --remote=db:mydb,Root,manager_options --log-file db], [0], [ignore], [ignore]) ovs-appctl -t ovsdb-server time/warp 6000 1000 AT_CHECK( [[ovsdb-client transact unix:socket1 \ -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 2/3] unixctl: Make command description all lowercase.
Signed-off-by: Alex Wang --- lib/dpif-netdev.c |4 ++-- lib/netdev-dummy.c |2 +- lib/timeval.c |2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 072ed5d..a4dabb6 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2719,8 +2719,8 @@ dpif_dummy_register(bool override) dpif_dummy_register__("dummy"); unixctl_command_register("dpif-dummy/change-port-number", - "DP PORT NEW-NUMBER", + "dp port new-number", 3, 3, dpif_dummy_change_port_number, NULL); -unixctl_command_register("dpif-dummy/delete-port", "DP PORT", +unixctl_command_register("dpif-dummy/delete-port", "dp port", 2, 2, dpif_dummy_delete_port, NULL); } diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index e3cf72d..39a3fd7 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -1328,7 +1328,7 @@ netdev_dummy_conn_state(struct unixctl_conn *conn, int argc, void netdev_dummy_register(bool override) { -unixctl_command_register("netdev-dummy/receive", "NAME PACKET|FLOW...", +unixctl_command_register("netdev-dummy/receive", "name packet|flow...", 2, INT_MAX, netdev_dummy_receive, NULL); unixctl_command_register("netdev-dummy/set-admin-state", "[netdev] up|down", 1, 2, diff --git a/lib/timeval.c b/lib/timeval.c index 9d90f1b..0587834 100644 --- a/lib/timeval.c +++ b/lib/timeval.c @@ -732,7 +732,7 @@ timeval_dummy_register(void) { timewarp_enabled = true; unixctl_command_register("time/stop", "", 0, 0, timeval_stop_cb, NULL); -unixctl_command_register("time/warp", "[LARGE_MSECS] MSECS", 1, 2, +unixctl_command_register("time/warp", "[large_msecs] msecs", 1, 2, timeval_warp_cb, NULL); } -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 3/3] ovs-appctl: Add option '--option' to show all long options.
Signed-off-by: Alex Wang --- utilities/ovs-appctl.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/utilities/ovs-appctl.c b/utilities/ovs-appctl.c index bb17ec2..f279f50 100644 --- a/utilities/ovs-appctl.c +++ b/utilities/ovs-appctl.c @@ -81,6 +81,19 @@ main(int argc, char *argv[]) } static void +options(const struct option *opts, size_t len) +{ +int i; + +for (i = 0; i < len; i++) { +const struct option *opt = &opts[i]; + +printf("--%s\n", opt->name); +} +exit(EXIT_SUCCESS); +} + +static void usage(void) { printf("\ @@ -116,6 +129,7 @@ parse_command_line(int argc, char *argv[]) {"target", required_argument, NULL, 't'}, {"execute", no_argument, NULL, 'e'}, {"help", no_argument, NULL, 'h'}, +{"option", no_argument, NULL, 'o'}, {"version", no_argument, NULL, 'V'}, {"timeout", required_argument, NULL, 'T'}, VLOG_LONG_OPTIONS, @@ -157,6 +171,11 @@ parse_command_line(int argc, char *argv[]) usage(); break; +case 'o': +/* Always skips the last one. */ +options(long_options, ARRAY_SIZE(long_options) - 1); +break; + case 'T': time_alarm(atoi(optarg)); break; -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 4/4] ovs-appctl: Remove the include of vlog.h.
Just realized that I mistakenly sent this series (not ready) out. Please ignore, I have sent the new version (3 patches instead of 4) without adding a version number... sorry, On Wed, Sep 10, 2014 at 12:52 PM, Alex Wang wrote: > Remove the include of vlog.h and all the related macros, since > it is not really used. > > Signed-off-by: Alex Wang > --- > utilities/ovs-appctl.c |7 --- > 1 file changed, 7 deletions(-) > > diff --git a/utilities/ovs-appctl.c b/utilities/ovs-appctl.c > index 8249bf8..174d147 100644 > --- a/utilities/ovs-appctl.c > +++ b/utilities/ovs-appctl.c > @@ -31,7 +31,6 @@ > #include "timeval.h" > #include "unixctl.h" > #include "util.h" > -#include "vlog.h" > > static void usage(void); > static const char *parse_command_line(int argc, char *argv[]); > @@ -122,9 +121,6 @@ Other options:\n\ > static const char * > parse_command_line(int argc, char *argv[]) > { > -enum { > -VLOG_OPTION_ENUMS > -}; > static const struct option long_options[] = { > {"target", required_argument, NULL, 't'}, > {"execute", no_argument, NULL, 'e'}, > @@ -132,7 +128,6 @@ parse_command_line(int argc, char *argv[]) > {"option", no_argument, NULL, 'o'}, > {"version", no_argument, NULL, 'V'}, > {"timeout", required_argument, NULL, 'T'}, > -VLOG_LONG_OPTIONS, > {NULL, 0, NULL, 0}, > }; > char *short_options_ = long_options_to_short_options(long_options); > @@ -184,8 +179,6 @@ parse_command_line(int argc, char *argv[]) > ovs_print_version(0, 0); > exit(EXIT_SUCCESS); > > -VLOG_OPTION_HANDLERS > - > case '?': > exit(EXIT_FAILURE); > > -- > 1.7.9.5 > > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 3/3] ovs-appctl: Add option '--option' to show all long options.
for my completion script, i changed design to get subcmds from `ovs-appctl help' and long options from `ovs-appctl --option` parsing manpage is very slow, i'm finishing up the unit tests for the script will post it soon after this series, On Thu, Sep 11, 2014 at 10:49 AM, Ben Pfaff wrote: > On Wed, Sep 10, 2014 at 02:12:31PM -0700, Alex Wang wrote: > > Signed-off-by: Alex Wang > > What's it for? > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 3/3] ovs-appctl: Add option '--option' to show all long options.
Thx Ben, I'll adjust the commit message, Also, i think it makes sense to generalize into a function in command-line module. Alex Wang, On Thu, Sep 11, 2014 at 11:03 AM, Ben Pfaff wrote: > Can you add that to the commit message? > > Does it make any sense to generalize this or is it only useful for > ovs-appctl? > > On Thu, Sep 11, 2014 at 10:59:26AM -0700, Alex Wang wrote: > > for my completion script, i changed design to get subcmds from > `ovs-appctl > > help' and long options from `ovs-appctl --option` > > > > parsing manpage is very slow, i'm finishing up the unit tests for the > script > > will post it soon after this series, > > > > On Thu, Sep 11, 2014 at 10:49 AM, Ben Pfaff wrote: > > > > > On Wed, Sep 10, 2014 at 02:12:31PM -0700, Alex Wang wrote: > > > > Signed-off-by: Alex Wang > > > > > > What's it for? > > > > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/3] ovsdb-server: Remove the 'enable-dummy' option.
Thx, applied to master, On Thu, Sep 11, 2014 at 10:45 AM, Ben Pfaff wrote: > On Wed, Sep 10, 2014 at 02:12:29PM -0700, Alex Wang wrote: > > There is no use case of this option in ovsdb-server. Also, > > it causes dpif-dummy and netdev-dummy module register unrelated > > unixctl commands. > > > > Signed-off-by: Alex Wang > > Acked-by: Ben Pfaff > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] command-line: Add function to print all options.
This commit adds function that extracts (both long and short) options and returns them as string. Right now, only ovs-appctl command provides an option to print this function output. And the output is to be used by bash command-line completion script for completion other options of the same command. Signed-off-by: Alex Wang --- lib/command-line.c | 23 +++ lib/command-line.h |1 + utilities/ovs-appctl.c | 18 ++ 3 files changed, 42 insertions(+) diff --git a/lib/command-line.c b/lib/command-line.c index cb73a25..820f3ff 100644 --- a/lib/command-line.c +++ b/lib/command-line.c @@ -19,6 +19,7 @@ #include #include #include +#include "dynamic-string.h" #include "ovs-thread.h" #include "util.h" #include "vlog.h" @@ -51,6 +52,28 @@ long_options_to_short_options(const struct option options[]) return xstrdup(short_options); } +/* Given the GNU-style options in 'options', returns a string which prints all + * options. The caller is responsible for freeing the string. */ +char * +print_options(const struct option options[]) +{ +struct ds ds = DS_EMPTY_INITIALIZER; +char *ret; + +for (; options->name; options++) { +const struct option *o = options; + +ds_put_format(&ds, "--%s%s\n", o->name, o->has_arg ? "=ARG" : ""); +if (o->flag == NULL && o->val > 0 && o->val <= UCHAR_MAX) { +ds_put_format(&ds, "-%c%s\n", o->val, o->has_arg ? " ARG" : ""); +} +} +ret = xstrdup(ds_cstr(&ds)); +ds_destroy(&ds); + +return ret; +} + /* Runs the command designated by argv[0] within the command table specified by * 'commands', which must be terminated by a command whose 'name' member is a * null pointer. diff --git a/lib/command-line.h b/lib/command-line.h index bb12f72..f7d5d23 100644 --- a/lib/command-line.h +++ b/lib/command-line.h @@ -31,6 +31,7 @@ struct command { }; char *long_options_to_short_options(const struct option *options); +char *print_options(const struct option *options); void run_command(int argc, char *argv[], const struct command[]); void proctitle_init(int argc, char **argv); diff --git a/utilities/ovs-appctl.c b/utilities/ovs-appctl.c index bb17ec2..9ae5201 100644 --- a/utilities/ovs-appctl.c +++ b/utilities/ovs-appctl.c @@ -81,6 +81,18 @@ main(int argc, char *argv[]) } static void +print_appctl_options(const struct option options[]) +{ +char *opts; + +opts = print_options(options); +printf("%s", opts); +free(opts); + +exit(EXIT_SUCCESS); +} + +static void usage(void) { printf("\ @@ -110,12 +122,14 @@ static const char * parse_command_line(int argc, char *argv[]) { enum { +OPT_START = UCHAR_MAX + 1, VLOG_OPTION_ENUMS }; static const struct option long_options[] = { {"target", required_argument, NULL, 't'}, {"execute", no_argument, NULL, 'e'}, {"help", no_argument, NULL, 'h'}, +{"option", no_argument, NULL, 'o'}, {"version", no_argument, NULL, 'V'}, {"timeout", required_argument, NULL, 'T'}, VLOG_LONG_OPTIONS, @@ -157,6 +171,10 @@ parse_command_line(int argc, char *argv[]) usage(); break; +case 'o': +print_appctl_options(long_options); +break; + case 'T': time_alarm(atoi(optarg)); break; -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [dpdk patch v3 6/6] dpif-netdev: Create multiple pmd threads by default.
One example is when STP config pkts are received from the dpdk port, when processing the pkts in xlate's process_special(), if the stp state machine need to send a config immediately via the dpdk interface, the state machine will directly invoke the stp->send() callback function and sends the pkt immediately (via calling dpif_netdev_execute()). On Thu, Sep 11, 2014 at 10:39 PM, Pravin Shelar wrote: > On Tue, Sep 9, 2014 at 5:00 PM, Alex Wang wrote: > > With this commit, ovs by default will try creating 'number of > > dpdk interfaces on numa node' pmd threads for each numa node > > and pin the pmd threads to available cpu cores on the numa node. > > > > NON_PMD_CORE_ID (currently 0) is used to reserve a particular > > cpu core for the I/O of all non-pmd threads. No pmd thread > > can be pinned to this reserved core. > > > > As side-effects of this commit: > > > > - the exact-match cache for non-pmd threads is removed from > > 'struct dp_netdev'. Instead, all non-pmd threads will use > > the exact-match cache defined in the 'struct dp_netdev_pmd_thread' > > for NON_PMD_CORE_ID. > > > > - the received packet processing functions are refactored to use > > 'struct dp_netdev_pmd_thread' as input. > > > > - the 'netdev_send()' function will be called with the proper > > queue id. > > > > Signed-off-by: Alex Wang > > > > --- > > PATCH -> V2 > > - rebase and refactor the code. > > > > V2 -> V3: > > - both pmd and non-pmd threads can call the dpif_netdev_execute(). > > so, use a per-thread variable to help recognize the calling thread. > > --- > > lib/dpif-netdev.c | 407 > + > > lib/dpif-netdev.h |4 +- > > lib/netdev-dpdk.c | 17 ++- > > lib/netdev-dpdk.h |7 + > > 4 files changed, 302 insertions(+), 133 deletions(-) > > > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > > index dcce02e..29a92b3 100644 > > --- a/lib/dpif-netdev.c > > +++ b/lib/dpif-netdev.c > > @@ -52,6 +52,7 @@ > > #include "odp-util.h" > > #include "ofp-print.h" > > #include "ofpbuf.h" > > +#include "ovs-numa.h" > > #include "ovs-rcu.h" > > #include "packet-dpif.h" > > #include "packets.h" > > @@ -158,7 +159,6 @@ struct emc_cache { > > * > > *dp_netdev_mutex (global) > > *port_mutex > > - *emc_mutex > > *flow_mutex > > */ > > struct dp_netdev { > > @@ -195,17 +195,16 @@ struct dp_netdev { > > upcall_callback *upcall_cb; /* Callback function for executing > upcalls. */ > > void *upcall_aux; > > > > -/* Forwarding threads. */ > > -struct latch exit_latch; > > -struct pmd_thread *pmd_threads; > > -size_t n_pmd_threads; > > -int pmd_count; > > - > > -/* Exact match cache for non-pmd devices. > > - * Pmd devices use instead each thread's flow_cache for this > purpose. > > - * Protected by emc_mutex */ > > -struct emc_cache flow_cache OVS_GUARDED; > > -struct ovs_mutex emc_mutex; > > +/* Stores all 'struct dp_netdev_pmd_thread's. */ > > +struct cmap poll_threads; > > + > > +/* Protects the access of the 'struct dp_netdev_pmd_thread' > > + * instance for non-pmd thread. */ > > +struct ovs_mutex non_pmd_mutex; > > + > > +/* Each pmd thread will store its pointer to > > + * 'struct dp_netdev_pmd_thread' in 'per_pmd_key'. */ > > +ovsthread_key_t per_pmd_key; > > }; > > > > static struct dp_netdev_port *dp_netdev_lookup_port(const struct > dp_netdev *dp, > > @@ -340,15 +339,25 @@ static void dp_netdev_actions_free(struct > dp_netdev_actions *); > > * > > * DPDK used PMD for accessing NIC. > > * > > - * A thread that receives packets from PMD ports, looks them up in the > flow > > - * table, and executes the actions it finds. > > + * Note, instance with cpu core id NON_PMD_CORE_ID will be reserved for > > + * I/O of all non-pmd threads. There will be no actual thread created > > + * for the instance. > > **/ > > -struct pmd_thread { > > +struct dp_netdev_pmd_thread { > > struct dp_netdev *dp; > > +struct cmap_node node; /* In 'dp->poll_threads'. */ > > +/* Per thread exact-match cache. Note, the instanc
Re: [ovs-dev] [dpdk patch v3 3/6] netdev-dpdk: Create multiple tx/rx queues by default.
> > > > > Specifically, the default number of rx queues will be the number > > of dpdk interfaces on the numa node. And the upcoming work > > will assign each rx queue to a different poll thread. The default > > number of tx queues will be the number of cpu cores on the machine. > > Although not all the tx queues will be used, each poll thread will > > have its own queue for transmission on the dpdk interface. > > > I thought we had decided to create one rx queue for each core on local > numa node. Is there problem with this ? > creating one rx-queue for each core is more predictable than number of > device on the switch at given point. Actually, I was not aware of that. But I'm okay with it. > +netdev->tx_q = dpdk_rte_mzalloc(n_cores * sizeof *netdev->tx_q); > > +for (i = 0; i < n_cores; i++) { > > rte_spinlock_init(&netdev->tx_q[i].tx_lock); > > } > > +netdev_->n_txq = n_cores; > > +netdev_->n_rxq = dpdk_get_n_devs(netdev->socket_id); > > > > Rather than calculating n_tx_q and n_rx_q, these values should be > calculated by dpif-netdev and passed down to netdev implementation. I see what you mean. I'll bring forward the netdev_set_multiq() patch (not posted yet). and use it to configure the n_rxq from dpif-netdev. For n_txq, since we always specify one per core, I'd like to still init it in netdev_dpdk_init(). so netdev_set_multiq will just mark the n_txq argument as OVS_UNUSED. what do you think? ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [dpdk patch v3 3/6] netdev-dpdk: Create multiple tx/rx queues by default.
You mean netdev_open(), or rxq_open() or some function else? netdev_open() seems not a good place, it can be called at multiple places. and we need to keep record of the n_rxq config at some high level module. for rxq_open(), currently it is used to create a 'struct netdev_rxq', not for specifying the number of queues. On Thu, Sep 11, 2014 at 11:22 PM, Pravin Shelar wrote: > On Thu, Sep 11, 2014 at 10:56 PM, Alex Wang wrote: > >> > > >> > Specifically, the default number of rx queues will be the number > >> > of dpdk interfaces on the numa node. And the upcoming work > >> > will assign each rx queue to a different poll thread. The default > >> > number of tx queues will be the number of cpu cores on the machine. > >> > Although not all the tx queues will be used, each poll thread will > >> > have its own queue for transmission on the dpdk interface. > >> > > >> I thought we had decided to create one rx queue for each core on local > >> numa node. Is there problem with this ? > >> creating one rx-queue for each core is more predictable than number of > >> device on the switch at given point. > > > > > > > > Actually, I was not aware of that. But I'm okay with it. > > > > > >> > +netdev->tx_q = dpdk_rte_mzalloc(n_cores * sizeof *netdev->tx_q); > >> > +for (i = 0; i < n_cores; i++) { > >> > rte_spinlock_init(&netdev->tx_q[i].tx_lock); > >> > } > >> > +netdev_->n_txq = n_cores; > >> > +netdev_->n_rxq = dpdk_get_n_devs(netdev->socket_id); > >> > > >> > >> Rather than calculating n_tx_q and n_rx_q, these values should be > >> calculated by dpif-netdev and passed down to netdev implementation. > > > > > > > > I see what you mean. I'll bring forward the netdev_set_multiq() patch > (not > > posted yet). and use it to configure the n_rxq from dpif-netdev. > > > > For n_txq, since we always specify one per core, I'd like to still init > it > > in > > netdev_dpdk_init(). so netdev_set_multiq will just mark the n_txq > argument > > as OVS_UNUSED. what do you think? > > > > If we pass number of queue via open we can avoid netdev_set_multiq() > function. > in case of any change in number of queue we can close device and > reopen with new configuration. > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [RFC ovs-appctl compgen] ovs-appctl: Add bash command-line completion script.
This patch is a RFC for the bash command-line completion script for ovs-appctl command. Right now, the script can do the following: - accept user specified '--target' and query available completions from the corresponding deamons. - once the subcommand (e.g. ofproto/trace) has been given, the script will print the subcommand format. - the script can convert between keywords like 'bridge/port/interface/dp' and the available record in ovsdb. - the script can print and auto-complete the half input argument. To use the script, either copy it inside /etc/bash_completion.d/ or manually run it via . ovs-appctl-compgen.bash. Also, a unit testsuite is provided as ovs-appctl-compgen-test.bash. It is suggested that this testsuit be run inside ovs-sandbox. Signed-off-by: Alex Wang --- utilities/ovs-appctl-compgen-test.bash | 586 utilities/ovs-appctl-compgen.bash | 516 2 files changed, 1102 insertions(+) create mode 100755 utilities/ovs-appctl-compgen-test.bash create mode 100755 utilities/ovs-appctl-compgen.bash diff --git a/utilities/ovs-appctl-compgen-test.bash b/utilities/ovs-appctl-compgen-test.bash new file mode 100755 index 000..a20788f --- /dev/null +++ b/utilities/ovs-appctl-compgen-test.bash @@ -0,0 +1,586 @@ +#!/bin/bash +# +# Tests for the ovs-appctl-compgen.bash +# +# Please run this with ovs-appctl-compgen.bash script inside +# ovs-sandbox, under the same directory. +# +# For information about running the ovs-sandbox, please refer to +# the tutorial directory. +# +# +# +COMP_OUTPUT= +TMP= +EXPECT= +TEST_RESULT= + +TEST_COUNTER=0 +TEST_TARGETS=(ovs-vswitchd ovsdb-server ovs-ofctl) + +# +# Helper functions. +# +get_command_format() { +local input="$@" + +echo "$(grep -A 1 "Command format" <<< "$input" | tail -n+2)" +} + +get_argument_expansion() { +local input="$@" + +echo "$(grep -- "argument keyword .* is expanded to" <<< "$input" | sed -e 's/^[ \t]*//')" +} + +get_available_completions() { +local input="$@" + +echo "$(sed -e '1,/Available/d' <<< "$input" | tail -n+2)" +} + +reset_globals() { +COMP_OUTPUT= +TMP= +EXPECT= +TEST_RESULT= +} + +# +# $1: Test name. +# $2: ok or fail. +# +print_result() { +(( TEST_COUNTER++ )) +printf "%2d: %-70s %s\n" "$TEST_COUNTER" "$1" "$2" +} + +# +# Sub-tests. +# +ovs_apptcl_TAB() { +local target="$1" +local target_line= +local comp_output tmp expect + +if [ -n "$target" ]; then +target_line="--target $target" +fi +comp_output="$(bash ovs-appctl-compgen.bash debug ovs-appctl $target_line TAB 2>&1)" +tmp="$(get_available_completions "$comp_output")" +expect="$(ovs-appctl --option | sort | sed -n '/^--.*/p' | cut -d '=' -f1) +$(ovs-appctl $target_line help | tail -n +2 | cut -c3- | cut -d ' ' -f1)" +if [ "$tmp" = "$expect" ]; then +echo "ok" +else +echo "fail" +fi +} + +# +# Test preparation. +# +ovs-vsctl add-br br0 +ovs-vsctl add-port br0 p1 + + +# +# Begin the test. +# +cat <&1)" +TMP="$(get_argument_expansion "$COMP_OUTPUT" | sed -e 's/[ \t]*$//')" +EXPECT="argument keyword \"-generate\" is expanded to: -generate +argument keyword \"packet\" is expanded to: packet" +if [ "$TMP" != "$EXPECT" ]; then +TEST_RESULT=fail +break +fi + +# check the available completions. +TMP="$(get_available_completions "$COMP_OUTPUT" | tr '\n' ' ' | sed -e 's/[ \t]*$//')" +EXPECT="-generate packet" +if [ "$TMP" != "$EXPECT" ]; then +TEST_RESULT=fail +break +fi + +# set packet to some random string, there should be no more completions. +COMP_OUTPUT="$(bash ovs-appctl-compgen.bash debug ovs-appctl ofproto/trace ovs-system "in_port(123),mac(),ip,tcp" "ABSJDFLSDJFOIWEQR" TAB 2>&1)" +TMP="$(sed -e '/./,$!d' <<< "$COMP_OUTPUT")" +EXPECT="Command format: +ofproto/trace {[dp_name] odp_flow | bridge br_flow} [-generate|packet]" +if [ "$TMP" != "$EXPECT" ]; then +TEST_RESULT=fail +break +fi + +# set argument to 'br0', should go to the bridge path. +COMP_OUTPUT="$(bash ovs-appctl-compgen.bash debug ovs-appctl ofproto/trace br0 TAB 2>&1)" +TMP="$(get_argument_expansion "$COMP_OUTPUT" |
Re: [ovs-dev] [RFC ovs-appctl compgen] ovs-appctl: Add bash command-line completion script.
depends on this patch: http://openvswitch.org/pipermail/dev/2014-September/045617.html On Fri, Sep 12, 2014 at 12:14 AM, Alex Wang wrote: > This patch is a RFC for the bash command-line completion script > for ovs-appctl command. Right now, the script can do the following: > > - accept user specified '--target' and query available completions > from the corresponding deamons. > > - once the subcommand (e.g. ofproto/trace) has been given, the > script will print the subcommand format. > > - the script can convert between keywords like 'bridge/port/interface/dp' > and the available record in ovsdb. > > - the script can print and auto-complete the half input argument. > > To use the script, either copy it inside /etc/bash_completion.d/ > or manually run it via . ovs-appctl-compgen.bash. > > Also, a unit testsuite is provided as ovs-appctl-compgen-test.bash. > It is suggested that this testsuit be run inside ovs-sandbox. > > Signed-off-by: Alex Wang > --- > utilities/ovs-appctl-compgen-test.bash | 586 > > utilities/ovs-appctl-compgen.bash | 516 > 2 files changed, 1102 insertions(+) > create mode 100755 utilities/ovs-appctl-compgen-test.bash > create mode 100755 utilities/ovs-appctl-compgen.bash > > diff --git a/utilities/ovs-appctl-compgen-test.bash > b/utilities/ovs-appctl-compgen-test.bash > new file mode 100755 > index 000..a20788f > --- /dev/null > +++ b/utilities/ovs-appctl-compgen-test.bash > @@ -0,0 +1,586 @@ > +#!/bin/bash > +# > +# Tests for the ovs-appctl-compgen.bash > +# > +# Please run this with ovs-appctl-compgen.bash script inside > +# ovs-sandbox, under the same directory. > +# > +# For information about running the ovs-sandbox, please refer to > +# the tutorial directory. > +# > +# > +# > +COMP_OUTPUT= > +TMP= > +EXPECT= > +TEST_RESULT= > + > +TEST_COUNTER=0 > +TEST_TARGETS=(ovs-vswitchd ovsdb-server ovs-ofctl) > + > +# > +# Helper functions. > +# > +get_command_format() { > +local input="$@" > + > +echo "$(grep -A 1 "Command format" <<< "$input" | tail -n+2)" > +} > + > +get_argument_expansion() { > +local input="$@" > + > +echo "$(grep -- "argument keyword .* is expanded to" <<< "$input" | > sed -e 's/^[ \t]*//')" > +} > + > +get_available_completions() { > +local input="$@" > + > +echo "$(sed -e '1,/Available/d' <<< "$input" | tail -n+2)" > +} > + > +reset_globals() { > +COMP_OUTPUT= > +TMP= > +EXPECT= > +TEST_RESULT= > +} > + > +# > +# $1: Test name. > +# $2: ok or fail. > +# > +print_result() { > +(( TEST_COUNTER++ )) > +printf "%2d: %-70s %s\n" "$TEST_COUNTER" "$1" "$2" > +} > + > +# > +# Sub-tests. > +# > +ovs_apptcl_TAB() { > +local target="$1" > +local target_line= > +local comp_output tmp expect > + > +if [ -n "$target" ]; then > +target_line="--target $target" > +fi > +comp_output="$(bash ovs-appctl-compgen.bash debug ovs-appctl > $target_line TAB 2>&1)" > +tmp="$(get_available_completions "$comp_output")" > +expect="$(ovs-appctl --option | sort | sed -n '/^--.*/p' | cut -d '=' > -f1) > +$(ovs-appctl $target_line help | tail -n +2 | cut -c3- | cut -d ' ' -f1)" > +if [ "$tmp" = "$expect" ]; then > +echo "ok" > +else > +echo "fail" > +fi > +} > + > +# > +# Test preparation. > +# > +ovs-vsctl add-br br0 > +ovs-vsctl add-port br0 p1 > + > + > +# > +# Begin the test. > +# > +cat < + > +## -- ## > +## ovs-appctl-compgen unit tests. ## > +## -- ## > + > +EOF > + > + > +# complete ovs-appctl --tar[TAB] > + > +reset_globals > + > +COMP_OUTPUT="$(bash ovs-appctl-compgen.bash debug ovs-appctl --tar 2>&1)" > +TMP="$(get_available_completions "$COMP_OUTPUT")" > +EXPECT="--target" > +if [ "$TMP" = "$EXPECT" ]; then > +TEST_RESULT=ok > +else > +TEST_RESULT=fail > +fi > + > +print_result "complete ovs-appctl --targ[TAB]" "$TEST_RESULT" > + > + > +# complete ovs-appctl --target [TAB] > + > +reset_globals > + > +COMP
Re: [ovs-dev] [PATCH] ovs-rcu: Make ovsrcu_quiesce() flush the callback event set.
Thx for pointing it out! Actually, I did 'perthread = pthread_getspecific(perthread_key);' instead of using 'ovsrcu_perthread_get()', which is wrong. Will fix it and remote the call to ovsrcu_init_module(). On Fri, Sep 12, 2014 at 9:47 AM, Ben Pfaff wrote: > On Tue, Sep 09, 2014 at 11:23:34AM -0700, Alex Wang wrote: > > On current master, the per-thread callback event set is flushed > > when ovsrcu_quiesce_start() is called or when the callback > > event set is full. For threads that only call 'ovsrcu_quiesce()' > > to indicate quiescient state, their callback event set will not > > be flushed for execution until the set is full. And this could > > take a very long time. > > > > Theoretically, this should not be an issue, since rcu postponed > > callback events should only free the old version of objects. > > However, current ovs does not follow this rule, and some callback > > events include other activities like unregistering the netdev > > from global name-netdev map. The delay of unregistering the netdev > > (by threads that only calls ovsrcu_quiesce()) will prevent the > > recreate of same netdev indefinitely. > > > > As a short-term workaround, this commit makes every call to > > ovsrcu_quiesce() flush the callback event set. In the long run, > > there will be a refactor of the use of ovs-rcu module, in which all > > callback events only free the old version of objects. > > > > Signed-off-by: Alex Wang > > --- > > lib/ovs-rcu.c |8 +++- > > 1 file changed, 7 insertions(+), 1 deletion(-) > > > > diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c > > index a052d6c..6fe6103 100644 > > --- a/lib/ovs-rcu.c > > +++ b/lib/ovs-rcu.c > > @@ -138,8 +138,14 @@ ovsrcu_quiesce_start(void) > > void > > ovsrcu_quiesce(void) > > { > > +struct ovsrcu_perthread *perthread; > > + > > ovsrcu_init_module(); > > -ovsrcu_perthread_get()->seqno = seq_read(global_seqno); > > +perthread = pthread_getspecific(perthread_key); > > +perthread->seqno = seq_read(global_seqno); > > +if (perthread->cbset) { > > +ovsrcu_flush_cbset(perthread); > > +} > > seq_change(global_seqno); > > I think that we can remove the call to ovsrcu_init_module() because > ovsrcu_perthread_get() calls it. (This was also true before your > patch.) > > Acked-by: Ben Pfaff > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] ovs-rcu: Make ovsrcu_quiesce() flush the callback event set.
Applied to master with the changes mentioned in previous email. Thanks, On Fri, Sep 12, 2014 at 10:18 AM, Alex Wang wrote: > Thx for pointing it out! > > Actually, I did 'perthread = pthread_getspecific(perthread_key);' instead > of > using 'ovsrcu_perthread_get()', which is wrong. Will fix it and remote the > call to ovsrcu_init_module(). > > > > On Fri, Sep 12, 2014 at 9:47 AM, Ben Pfaff wrote: > >> On Tue, Sep 09, 2014 at 11:23:34AM -0700, Alex Wang wrote: >> > On current master, the per-thread callback event set is flushed >> > when ovsrcu_quiesce_start() is called or when the callback >> > event set is full. For threads that only call 'ovsrcu_quiesce()' >> > to indicate quiescient state, their callback event set will not >> > be flushed for execution until the set is full. And this could >> > take a very long time. >> > >> > Theoretically, this should not be an issue, since rcu postponed >> > callback events should only free the old version of objects. >> > However, current ovs does not follow this rule, and some callback >> > events include other activities like unregistering the netdev >> > from global name-netdev map. The delay of unregistering the netdev >> > (by threads that only calls ovsrcu_quiesce()) will prevent the >> > recreate of same netdev indefinitely. >> > >> > As a short-term workaround, this commit makes every call to >> > ovsrcu_quiesce() flush the callback event set. In the long run, >> > there will be a refactor of the use of ovs-rcu module, in which all >> > callback events only free the old version of objects. >> > >> > Signed-off-by: Alex Wang >> > --- >> > lib/ovs-rcu.c |8 +++- >> > 1 file changed, 7 insertions(+), 1 deletion(-) >> > >> > diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c >> > index a052d6c..6fe6103 100644 >> > --- a/lib/ovs-rcu.c >> > +++ b/lib/ovs-rcu.c >> > @@ -138,8 +138,14 @@ ovsrcu_quiesce_start(void) >> > void >> > ovsrcu_quiesce(void) >> > { >> > +struct ovsrcu_perthread *perthread; >> > + >> > ovsrcu_init_module(); >> > -ovsrcu_perthread_get()->seqno = seq_read(global_seqno); >> > +perthread = pthread_getspecific(perthread_key); >> > +perthread->seqno = seq_read(global_seqno); >> > +if (perthread->cbset) { >> > +ovsrcu_flush_cbset(perthread); >> > +} >> > seq_change(global_seqno); >> >> I think that we can remove the call to ovsrcu_init_module() because >> ovsrcu_perthread_get() calls it. (This was also true before your >> patch.) >> >> Acked-by: Ben Pfaff >> > > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [dpdk patch v3 2/6] netdev: Add n_txq to 'struct netdev'.
Applied the first two patches to master. Will rebase the following ones based on the discussion and comments. On Thu, Sep 11, 2014 at 10:38 PM, Pravin Shelar wrote: > On Tue, Sep 9, 2014 at 5:00 PM, Alex Wang wrote: > > This commit adds new variable n_txq to 'struct netdev' for recording > > the number of tx queues. Correspondingly, the send_*() functions are > > extended to accept queue id as input argument. > > > > All 'netdev-*' implementation will ignore the queue id since having > > multiple tx queues is not supported. Upcomping patches will start > > using it and create multiple tx queues for dpdk netdev by default. > > > > Signed-off-by: Alex Wang > > > > LGTM > Acked-by: Pravin B Shelar > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [dpdk patch v4 1/5] netdev: Add function for configuring tx and rx queues.
This commit adds a new API to the 'struct netdev_class' which allows user to configure the number of tx queues and rx queues of 'netdev'. Upcoming patches will use this function to set multiple tx/rx queues when adding the netdev to dpif-netdev. Currently, only netdev-dpdk module implements this function. Signed-off-by: Alex Wang --- PATCH -> V4: - move this hidden patch forward. --- lib/netdev-bsd.c |1 + lib/netdev-dpdk.c | 46 +- lib/netdev-dummy.c|1 + lib/netdev-linux.c|1 + lib/netdev-provider.h | 10 ++ lib/netdev-vport.c|1 + lib/netdev.c | 25 + lib/netdev.h |1 + 8 files changed, 77 insertions(+), 9 deletions(-) diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index 32c04a3..20b4718 100644 --- a/lib/netdev-bsd.c +++ b/lib/netdev-bsd.c @@ -1563,6 +1563,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum netdev_flags off, NULL, /* set_config */ \ NULL, /* get_tunnel_config */\ NULL, /* get_numa_id */ \ +NULL, /* set_multiq */ \ \ netdev_bsd_send, \ netdev_bsd_send_wait,\ diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 4f9c5c2..d11c070 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -399,13 +399,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) return ENODEV; } -diag = rte_eth_dev_configure(dev->port_id, NR_QUEUE, NR_QUEUE, &port_conf); +diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq, dev->up.n_txq, + &port_conf); if (diag) { VLOG_ERR("eth dev config error %d",diag); return -diag; } -for (i = 0; i < NR_QUEUE; i++) { +for (i = 0; i < dev->up.n_txq; i++) { diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE, dev->socket_id, &tx_conf); if (diag) { @@ -414,7 +415,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex) } } -for (i = 0; i < NR_QUEUE; i++) { +for (i = 0; i < dev->up.n_rxq; i++) { diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE, dev->socket_id, &rx_conf, dev->dpdk_mp->mp); @@ -491,12 +492,12 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int port_no) OVS_REQUIRES(dpdk goto unlock; } +netdev_->n_txq = NR_QUEUE; +netdev_->n_rxq = NR_QUEUE; err = dpdk_eth_dev_init(netdev); if (err) { goto unlock; } -netdev_->n_txq = NR_QUEUE; -netdev_->n_rxq = NR_QUEUE; list_push_back(&dpdk_list, &netdev->list_node); @@ -590,6 +591,26 @@ netdev_dpdk_get_numa_id(const struct netdev *netdev_) return netdev->socket_id; } +/* Sets the number of tx queues and rx queues for the dpdk interface. + * If the configuration fails, do not try restoring its old configuration + * and just returns the error. */ +static int +netdev_dpdk_set_multiq(struct netdev *netdev_, unsigned int n_txq, + unsigned int n_rxq) +{ +struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); +int err = 0; + +ovs_mutex_lock(&netdev->mutex); +rte_eth_dev_stop(netdev->port_id); +netdev->up.n_txq = MAX(n_txq, NR_QUEUE); +netdev->up.n_rxq = MAX(n_rxq, NR_QUEUE); +err = dpdk_eth_dev_init(netdev); +ovs_mutex_unlock(&netdev->mutex); + +return err; +} + static struct netdev_rxq * netdev_dpdk_rxq_alloc(void) { @@ -687,7 +708,11 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dpif_packet **packets, struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); int nb_rx; -dpdk_queue_flush(dev, rxq_->queue_id); +/* There is only one tx queue for this core. Do not flush other + * queueus. */ +if (rxq_->queue_id == rte_lcore_id()) { +dpdk_queue_flush(dev, rxq_->queue_id); +} nb_rx = rte_eth_rx_burst(rx->port_id, rxq_->queue_id, (struct rte_mbuf **) packets, @@ -1339,7 +1364,7 @@ unlock_dpdk: return err; } -#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT) \ +#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, MULTIQ) \ { \ NAME, \ INIT, /* init */\ @@ -1354,6 +1379,7 @@ unlock_dpdk: NULL, /* netdev_dp