[ovs-dev] [recirc datapath V4 4/5] datapath: add hash action
Signed-off-by: Andy Zhou V3->v4: * Rehash the hash with hash_basis * always accept dp_hash mask * add mask to ovs_nla_put_flow() API V2->V3: * rename dp_hash to ovs_flow_hash * Simplify netlink message error checking logic * other cleanups --- datapath/actions.c | 18 ++ datapath/datapath.c | 8 +--- datapath/flow.h | 1 + datapath/flow_netlink.c | 30 +- datapath/flow_netlink.h | 2 +- 5 files changed, 54 insertions(+), 5 deletions(-) diff --git a/datapath/actions.c b/datapath/actions.c index 82cfd2d..87a8a40 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -460,6 +460,20 @@ static int sample(struct datapath *dp, struct sk_buff *skb, nla_len(acts_list), true); } +static void execute_hash(struct sk_buff *skb, const struct nlattr *attr) +{ + struct sw_flow_key *key = OVS_CB(skb)->pkt_key; + struct ovs_action_hash *hash_act = nla_data(attr); + u32 hash = 0; + + /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */ + hash = skb_get_rxhash(skb); + if (!hash) + hash = 0x1; + + key->ovs_flow_hash = jhash_1word(hash, hash_act->hash_basis); +} + static int execute_set_action(struct sk_buff *skb, const struct nlattr *nested_attr) { @@ -536,6 +550,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, output_userspace(dp, skb, a); break; + case OVS_ACTION_ATTR_HASH: + execute_hash(skb, a); + break; + case OVS_ACTION_ATTR_PUSH_VLAN: err = push_vlan(skb, nla_data(a)); if (unlikely(err)) /* skb already freed. */ diff --git a/datapath/datapath.c b/datapath/datapath.c index 6dfe69d..0fdd1d4 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -469,7 +469,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, upcall->dp_ifindex = dp_ifindex; nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); - ovs_nla_put_flow(upcall_info->key, upcall_info->key, user_skb); + ovs_nla_put_flow(upcall_info->key, NULL, upcall_info->key, user_skb); nla_nest_end(user_skb, nla); if (upcall_info->userdata) @@ -690,7 +690,8 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, if (!nla) goto nla_put_failure; - err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); + err = ovs_nla_put_flow(&flow->unmasked_key, &flow->mask->key, + &flow->unmasked_key, skb); if (err) goto error; nla_nest_end(skb, nla); @@ -699,7 +700,8 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, if (!nla) goto nla_put_failure; - err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); + err = ovs_nla_put_flow(&flow->key, &flow->mask->key, + &flow->mask->key, skb); if (err) goto error; diff --git a/datapath/flow.h b/datapath/flow.h index 1bb6ce0..a4cb57e 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -74,6 +74,7 @@ struct sw_flow_key { u32 skb_mark; /* SKB mark. */ u16 in_port;/* Input switch port (or DP_MAX_PORTS). */ } __packed phy; /* Safe when right after 'tun_key'. */ + u32 ovs_flow_hash; /* Datapath computed hash value. */ struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 5c32cd0..6b9a120 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -128,6 +128,7 @@ static bool match_validate(const struct sw_flow_match *match, /* Always allowed mask fields. */ mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL) | (1ULL << OVS_KEY_ATTR_IN_PORT) + | (1ULL << OVS_KEY_ATTR_DP_HASH) | (1ULL << OVS_KEY_ATTR_ETHERTYPE)); /* Check key attributes. */ @@ -252,6 +253,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), + [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), [OVS_KEY_ATTR_TUNNEL] = -1, }; @@ -455,6 +457,13 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb, static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask) { + if (*attrs & (1ULL <<
Re: [ovs-dev] [PATCH] ofproto-dpif-upcall: Don't use stack garbage
Acked-by: Jarno Rajahalme On Apr 17, 2014, at 7:19 PM, YAMAMOTO Takashi wrote: > Catched by "learning action - self-modifying flow with hard_timeout" > test case. > > The bug introduced by commit b256dc52. > ("ofproto-dpif-xlate: Cache xlate_actions() effects.") > > Signed-off-by: YAMAMOTO Takashi > --- > ofproto/ofproto-dpif-upcall.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c > index 0d7dd8e..96a53e6 100644 > --- a/ofproto/ofproto-dpif-upcall.c > +++ b/ofproto/ofproto-dpif-upcall.c > @@ -1379,7 +1379,6 @@ revalidate_ukey(struct udpif *udpif, struct > udpif_flow_dump *udump, > xoutp = NULL; > actions = NULL; > netflow = NULL; > -may_learn = push.n_packets > 0; > > /* If we don't need to revalidate, we can simply push the stats contained > * in the udump, otherwise we'll have to get the actions so we can check > @@ -1413,6 +1412,7 @@ revalidate_ukey(struct udpif *udpif, struct > udpif_flow_dump *udump, > goto exit; > } > > +may_learn = push.n_packets > 0; > if (ukey->xcache && !udump->need_revalidate) { > xlate_push_stats(ukey->xcache, may_learn, &push); > ok = true; > -- > 1.8.3.1 > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/5] lib/flow: Introduce miniflow_extract().
Pushed, thanks for the prompt review! Jarno On Apr 18, 2014, at 2:32 AM, YAMAMOTO Takashi wrote: >> miniflow_extract() extracts packet headers directly to a miniflow, >> which is a compressed form of the struct flow. This does not require >> a large struct to be cleared to begin with, and accesses less memory. >> These performance benefits should allow this to be used in the DPDK >> datapath. >> >> miniflow_extract() takes a miniflow as an input/output parameter. On >> input the buffer for values to be extracted must be properly >> initialized. On output the map contains ones for all the fields that >> have been extracted. >> >> Some struct flow fields are reordered to make miniflow_extract to >> progress in the logical order. >> >> Some explicit "inline" keywords are necessary for GCC to optimize this >> properly. Also, macros are used for same reason instead of inline >> functions for pushing data to the miniflow. >> >> Signed-off-by: Jarno Rajahalme > > Reviewed-by: YAMAMOTO Takashi ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] lib/ofp-util: Restore the check for minus sign in port number strings.
Thanks for the review, pushed with this error message line (it fit to one line better :-): VLOG_WARN("Negative value %s is not a valid port number.", s); Jarno On Apr 17, 2014, at 11:11 PM, Justin Pettit wrote: > It might be useful to print the port string that was handed to it, since the > other errors in that function do that. Something like: > > VLOG_WARN("port %s is a negative value, which is not a valid port number", > s); > > Assuming that seems reasonable to you and Zoltan thinks it would prevent the > issue he saw: > > Acked-by: Justin Pettit > > --Justin > > > On April 17, 2014 at 6:37:42 PM, Jarno Rajahalme (jrajaha...@nicira.com) > wrote: >> Commit 33ab38d9 (meta-flow: Simplify mf_from_ofp_port_string()) >> inadvertently removed a check for minus sign at the beginning of a >> port number string introduced by commit 05dddba (meta-flow: Don't >> allow negative port numbers). This check is still needed, so put it >> back, but to ofputil_port_from_string() this time. >> >> Signed-off-by: Jarno Rajahalme >> --- >> lib/ofp-util.c | 6 +- >> 1 file changed, 5 insertions(+), 1 deletion(-) >> >> diff --git a/lib/ofp-util.c b/lib/ofp-util.c >> index af2d853..a990f46 100644 >> --- a/lib/ofp-util.c >> +++ b/lib/ofp-util.c >> @@ -5394,8 +5394,12 @@ ofputil_port_to_ofp11(ofp_port_t ofp10_port) >> bool >> ofputil_port_from_string(const char *s, ofp_port_t *portp) >> { >> - uint32_t port32; >> + unsigned int port32; /* int is at least 32 bits wide. */ >> >> + if (*s == '-') { >> + VLOG_WARN("Negative values are not supported as port numbers."); >> + return false; >> + } >> *portp = 0; >> if (str_to_uint(s, 10, &port32)) { >> if (port32 < ofp_to_u16(OFPP_MAX)) { >> -- >> 1.7.10.4 >> >> ___ >> dev mailing list >> dev@openvswitch.org >> http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/5] lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().
On Apr 18, 2014, at 2:28 AM, YAMAMOTO Takashi wrote: >> Add inlined generic accessors for miniflow integer type fields, and a >> new miniflow_get_tcp_flags() usinge these. These will be used in a >> later patch. >> >> Some definitions also used in lib/packets.h had to be moved there to >> resolve circular include dependencies. Similarly, some inline >> functions using struct flow are now in lib/flow.h. IMO this is >> cleaner, since now the lib/flow.h need not be included from >> lib/packets.h. >> >> Signed-off-by: Jarno Rajahalme > > at least tests/test-sflow.c needs arpa/inet.h for inet_ntop > as it isn't indirectly included via packets.h anymore. > Added, thanks! > otherwise looks fine to me. > > Reviewed-by: YAMAMOTO Takashi Pushed to master, Jarno ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 3/5] lib/flow: Possibly faster miniflow_hash_in_minimask()
On Apr 18, 2014, at 2:32 AM, YAMAMOTO Takashi wrote: >> Upcoming patches add classifier lookups using miniflows, this is >> heavily used for it. >> >> Signed-off-by: Jarno Rajahalme > > Reviewed-by: YAMAMOTO Takashi Pushed, thanks! Jarno ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 4/5] classifier: Support miniflow as a key.
On Apr 18, 2014, at 2:32 AM, YAMAMOTO Takashi wrote: >> Support struct miniflow as a key for datapath flow lookup. >> >> The new classifier interface classifier_lookup_miniflow_first() takes >> a miniflow as a key and stops at the first match with no regard to >> flow prioritites. This works only if the classifier has no >> conflicting rules (as is the case with the userspace datapath >> classifier). >> >> Signed-off-by: Jarno Rajahalme > > Reviewed-by: YAMAMOTO Takashi Thanks for the review, pushed to master, Jarno ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 5/5] dpif-netdev: Use miniflow as a flow key.
On Apr 18, 2014, at 2:26 AM, YAMAMOTO Takashi wrote: >> Use miniflow as a flow key in the userspace datapath classifier. The >> miniflow is expanded for upcalls, but for existing datapath flows, the >> key need not be expanded. >> >> Signed-off-by: Jarno Rajahalme > > Reviewed-by: YAMAMOTO Takashi > Pushed to master, Jarno >> @@ -2144,8 +2157,9 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, >> >> hash_act = nl_attr_get(a); >> if (hash_act->hash_alg == OVS_HASH_ALG_L4) { >> - >> -hash = flow_hash_symmetric_l4(aux->key, hash_act->hash_bias); >> +/* Hash need not be symmetric, nor does it need to include >> + * L2 fields. */ >> +hash = miniflow_hash_5tuple(aux->key, hash_act->hash_bias); > > how do you think about ipv6? I noticed support for it was missing in the existing flow_hash_5tuple, and agree that both that and the new miniflow_hash_5tuple() need to be improved to add support for IPv6. I’ll send a new patch doing that. > > YAMAMOTO Takashi ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [recirc datapath V4 3/5] openvswitch.h: rename hash action definition
On Fri, Apr 18, 2014 at 2:50 AM, Andy Zhou wrote: > Rename hash_bias to hash_basis to make it consistent with similar > usages. > > Signed-off-by: Andy Zhou Acked-by: Jesse Gross ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [recirc datapath V4 4/5] datapath: add hash action
On Fri, Apr 18, 2014 at 2:51 AM, Andy Zhou wrote: > diff --git a/datapath/actions.c b/datapath/actions.c > index 82cfd2d..87a8a40 100644 > --- a/datapath/actions.c > +++ b/datapath/actions.c > @@ -460,6 +460,20 @@ static int sample(struct datapath *dp, struct sk_buff > *skb, > nla_len(acts_list), true); > } > > +static void execute_hash(struct sk_buff *skb, const struct nlattr *attr) > +{ > + struct sw_flow_key *key = OVS_CB(skb)->pkt_key; > + struct ovs_action_hash *hash_act = nla_data(attr); > + u32 hash = 0; > + > + /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */ > + hash = skb_get_rxhash(skb); > + if (!hash) > + hash = 0x1; > + > + key->ovs_flow_hash = jhash_1word(hash, hash_act->hash_basis); I think you need to rehash before the zero check - otherwise after rehashing it's possible to have zero again. I don't understand the other change about accepting a hash mask even when there isn't a hash value. Can you explain this? ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/2] DPI plugin addition proposal v2
It looks like this patch still slow-paths packets until the classification is done. As I stated last time[1], this is a non-starter because of the impact it will have on performance. As such, I'm not going to review any patches or comment on future ones that do this. If you want to integrate with OVS, you must find a way to classify the packets in the datapath. If that's not possible, then I would recommend creating a local controller to which you send packets for classification. You can have it program flows that send Packet-Ins to it until the classification is done. Once you've determined the classification, you can push down a flow that applies the policy into the fast-path. If you want to work with other OpenFlow controllers, you'll need to coordinate with them or create a Flowvisor-like proxy, though. --Justin [1] - http://openvswitch.org/pipermail/dev/2014-March/038192.html On April 18, 2014 at 1:04:25 AM, Franck BAUDIN (franck.bau...@qosmos.com) wrote: > > This message and any attachments (the "message") are confidential, intended > solely > for the addressees. If you are not the intended recipient, please notify the > sender immediately > by e-mail and delete this message from your system. In this case, you are not > authorized > to use, copy this message and/or disclose the content to any other person. > E-mails are > susceptible to alteration. Neither Qosmos nor any of its subsidiaries or > affiliates > shall be liable for the message if altered, changed or falsified. > > Ce message et toutes ses pièces jointes (ci-après le "message")sont > confidentiels > et établis à l'intention exclusive de ses destinataires. Si vous avez reçu ce > message > par erreur, merci d’en informer immédiatement son émetteur par courrier > électronique > et d’effacer ce message de votre système. Dans cette hypothèse, vous n’êtes > pas autorisé > à utiliser, copier ce message et/ou en divulguer le contenu à un tiers. Tout > message électronique > est susceptible d'altération. Qosmos et ses filiales déclinent toute > responsabilité > au titre de ce message s'il a été altéré, déformé ou falsifié. > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [recirc datapath V4 4/5] datapath: add hash action
On Fri, Apr 18, 2014 at 9:28 AM, Jesse Gross wrote: > On Fri, Apr 18, 2014 at 2:51 AM, Andy Zhou wrote: >> diff --git a/datapath/actions.c b/datapath/actions.c >> index 82cfd2d..87a8a40 100644 >> --- a/datapath/actions.c >> +++ b/datapath/actions.c >> @@ -460,6 +460,20 @@ static int sample(struct datapath *dp, struct sk_buff >> *skb, >> nla_len(acts_list), true); >> } >> >> +static void execute_hash(struct sk_buff *skb, const struct nlattr *attr) >> +{ >> + struct sw_flow_key *key = OVS_CB(skb)->pkt_key; >> + struct ovs_action_hash *hash_act = nla_data(attr); >> + u32 hash = 0; >> + >> + /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */ >> + hash = skb_get_rxhash(skb); >> + if (!hash) >> + hash = 0x1; >> + >> + key->ovs_flow_hash = jhash_1word(hash, hash_act->hash_basis); > > I think you need to rehash before the zero check - otherwise after > rehashing it's possible to have zero again. Good catch. Will do. > > I don't understand the other change about accepting a hash mask even > when there isn't a hash value. Can you explain this? It is not necessary now. I was thinking of the case Pravin suggested that we do a masked action. A hash value may be zero after applying a mask. In this case, the hash value won't be exported on a post recirculation miss, but its mask can be supplied on flow installation. We can remove it if this is not the direction we want to go. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [recirc datapath V4 4/5] datapath: add hash action
On Fri, Apr 18, 2014 at 10:39 AM, Andy Zhou wrote: > On Fri, Apr 18, 2014 at 9:28 AM, Jesse Gross wrote: >> I don't understand the other change about accepting a hash mask even >> when there isn't a hash value. Can you explain this? > It is not necessary now. I was thinking of the case Pravin suggested that > we do a masked action. A hash value may be zero after applying a mask. > In this case, the hash value won't be exported on a post recirculation miss, > but > its mask can be supplied on flow installation. We can remove it if this is not > the direction we want to go. I don't know if Pravin was actually suggesting a maskable action - his questions seemed more on the match side. I can't really come up with a use case either. I think that the value being special/disallowed in the hash should be entirely internal to the kernel. In this case it's fine to do it because the hash is opaque anyways, so who's so say that we just don't have a hash with this property. However, I think if we started getting into cases like this where a zero value might come up then we should probably either use a bit to indicate that the hash is valid or always send it. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [recirc datapath V4 4/5] datapath: add hash action
On Fri, Apr 18, 2014 at 10:52 AM, Jesse Gross wrote: > On Fri, Apr 18, 2014 at 10:39 AM, Andy Zhou wrote: >> On Fri, Apr 18, 2014 at 9:28 AM, Jesse Gross wrote: >>> I don't understand the other change about accepting a hash mask even >>> when there isn't a hash value. Can you explain this? >> It is not necessary now. I was thinking of the case Pravin suggested that >> we do a masked action. A hash value may be zero after applying a mask. >> In this case, the hash value won't be exported on a post recirculation miss, >> but >> its mask can be supplied on flow installation. We can remove it if this is >> not >> the direction we want to go. > > I don't know if Pravin was actually suggesting a maskable action - his > questions seemed more on the match side. I can't really come up with a > use case either. > Thanks for clarifying. I will remove it from this patch since there is not a clear use case to allow dp_hash mask alone. > I think that the value being special/disallowed in the hash should be > entirely internal to the kernel. In this case it's fine to do it > because the hash is opaque anyways, so who's so say that we just don't > have a hash with this property. However, I think if we started getting > into cases like this where a zero value might come up then we should > probably either use a bit to indicate that the hash is valid or always > send it. That's not a bad idea. I some how don't like sending 'new' attributes to an older user space, even if it will not cause harm because of the compatibility layer as your previous email suggested. We could go down this road if you think this is significantly better than removing it. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 06/11] daemon-windows: Create pidfiles with option --pidfile.
In Windows, we cannot delete a file that has been opened. We use this feature to "lock" the pidfile. Signed-off-by: Gurucharan Shetty --- lib/daemon-windows.c | 79 ++ lib/daemon.h |8 - 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/lib/daemon-windows.c b/lib/daemon-windows.c index baa7520..b47b42e 100644 --- a/lib/daemon-windows.c +++ b/lib/daemon-windows.c @@ -34,6 +34,9 @@ static bool detach; /* Was --detach specified? */ static bool detached; /* Running as the child process. */ static HANDLE write_handle; /* pipe handle to write to the parent. */ +static char *pidfile; /* --pidfile: Name of pidfile (null if none). */ +static FILE *filep_pidfile; /* File pointer to access the pidfile. */ + /* Handle to the Services Manager and the created service. */ static SC_HANDLE manager, service; @@ -409,10 +412,59 @@ daemon_save_fd(int fd OVS_UNUSED) void daemonize(void) { +daemonize_start(); +daemonize_complete(); +} + +static void +unlink_pidfile(void) +{ +if (filep_pidfile) { +fclose(filep_pidfile); +} +if (pidfile) { +unlink(pidfile); +} +} + +/* If a pidfile has been configured, creates it and stores the running + * process's pid in it. Ensures that the pidfile will be deleted when the + * process exits. */ +static void +make_pidfile(void) +{ +int error; + +error = GetFileAttributes(pidfile); +if (error != INVALID_FILE_ATTRIBUTES) { +/* pidfile exists. Try to unlink() it. */ +error = unlink(pidfile); +if (error) { +VLOG_FATAL("Failed to delete existing pidfile %s (%s)", pidfile, + ovs_strerror(errno)); +} +} + +filep_pidfile = fopen(pidfile, "w"); +if (filep_pidfile == NULL) { +VLOG_FATAL("failed to open %s (%s)", pidfile, ovs_strerror(errno)); +} + +fatal_signal_add_hook(unlink_pidfile, NULL, NULL, true); + +fprintf(filep_pidfile, "%d\n", _getpid()); +if (fflush(filep_pidfile) == EOF) { +VLOG_FATAL("Failed to write into the pidfile %s", pidfile); +} + +/* Don't close the pidfile till the process exits. */ } void daemonize_start(void) { +if (pidfile) { +make_pidfile(); +} } void @@ -431,3 +483,30 @@ daemonize_complete(void) service_complete(); } + +/* Returns the file name that would be used for a pidfile if 'name' were + * provided to set_pidfile(). If 'name' is not provided, returns the default + * name based on rundir. The caller must free the returned string. */ +static char * +make_pidfile_name(const char *name) +{ +if (name && strchr(name, ':')) { +return strdup(name); +} else { +return xasprintf("%s/%s.pid", ovs_rundir(), program_name); +} +} + +/* Sets up a following call to daemonize() to create a pidfile named 'name'. + * If 'name' contains ':', then it is treated as an absolute path. + * Otherwise, it is taken relative to RUNDIR, which is $(prefix)/var/run by + * default. + * + * If 'name' is null, then program_name followed by ".pid" is used. */ +void +set_pidfile(const char *name) +{ +assert_single_threaded(); +free(pidfile); +pidfile = make_pidfile_name(name); +} diff --git a/lib/daemon.h b/lib/daemon.h index f4dced3..0b7dc5f 100644 --- a/lib/daemon.h +++ b/lib/daemon.h @@ -74,19 +74,20 @@ void set_detach(void); void daemon_set_monitor(void); -void set_pidfile(const char *name); void set_no_chdir(void); void ignore_existing_pidfile(void); pid_t read_pidfile(const char *name); #else #define DAEMON_OPTION_ENUMS\ OPT_DETACH,\ +OPT_PIDFILE, \ OPT_PIPE_HANDLE, \ OPT_SERVICE, \ OPT_SERVICE_MONITOR #define DAEMON_LONG_OPTIONS \ {"detach", no_argument, NULL, OPT_DETACH},\ +{"pidfile",optional_argument, NULL, OPT_PIDFILE}, \ {"pipe-handle",required_argument, NULL, OPT_PIPE_HANDLE}, \ {"service",no_argument, NULL, OPT_SERVICE}, \ {"service-monitor",no_argument, NULL, OPT_SERVICE_MONITOR} @@ -95,6 +96,10 @@ pid_t read_pidfile(const char *name); case OPT_DETACH:\ break; \ \ +case OPT_PIDFILE: \ +set_pidfile(optarg);\ +break; \ +\ case OPT_PIPE_HANDLE: \ set_pipe_handle(optarg);\ break; \ @@ -117,6 +122,7 @@ void daemonize_complete(void); void daemon_usage(void); void ser
[ovs-dev] [PATCH 08/11] netdev: Initialize netdev_class_mutex.
ovsdb-server on windows crashes without it. Signed-off-by: Gurucharan Shetty --- lib/netdev.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/netdev.c b/lib/netdev.c index 4736a97..71d3f83 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -70,7 +70,8 @@ static struct shash netdev_shash OVS_GUARDED_BY(netdev_mutex) * providers. For example, netdev_run() calls into provider 'run' functions, * which might reasonably want to call one of the netdev functions that takes * netdev_class_mutex. */ -static struct ovs_mutex netdev_class_mutex OVS_ACQ_BEFORE(netdev_mutex); +static struct ovs_mutex netdev_class_mutex OVS_ACQ_BEFORE(netdev_mutex) = +OVS_MUTEX_INITIALIZER; /* Contains 'struct netdev_registered_class'es. */ static struct hmap netdev_classes OVS_GUARDED_BY(netdev_class_mutex) -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 01/11] tests: Define a variable "IS_WIN32" for tests.
Signed-off-by: Gurucharan Shetty --- tests/atlocal.in |3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/atlocal.in b/tests/atlocal.in index 06e7384..8267554 100644 --- a/tests/atlocal.in +++ b/tests/atlocal.in @@ -77,4 +77,7 @@ Linux) FreeBSD|NetBSD) LOOPBACK_INTERFACE=lo0 ;; +MINGW*) +IS_WIN32="yes" +;; esac -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 07/11] daemon-windows: Recognize --no-chdir option for windows.
The option won't have any effect on the running of the daemon. Recognizing the option lets us avoid if else conditions in unit tests. Signed-off-by: Gurucharan Shetty --- lib/daemon.h |5 + 1 file changed, 5 insertions(+) diff --git a/lib/daemon.h b/lib/daemon.h index 0b7dc5f..8ac777f 100644 --- a/lib/daemon.h +++ b/lib/daemon.h @@ -80,6 +80,7 @@ pid_t read_pidfile(const char *name); #else #define DAEMON_OPTION_ENUMS\ OPT_DETACH,\ +OPT_NO_CHDIR, \ OPT_PIDFILE, \ OPT_PIPE_HANDLE, \ OPT_SERVICE, \ @@ -87,6 +88,7 @@ pid_t read_pidfile(const char *name); #define DAEMON_LONG_OPTIONS \ {"detach", no_argument, NULL, OPT_DETACH},\ +{"no-chdir", no_argument, NULL, OPT_NO_CHDIR}, \ {"pidfile",optional_argument, NULL, OPT_PIDFILE}, \ {"pipe-handle",required_argument, NULL, OPT_PIPE_HANDLE}, \ {"service",no_argument, NULL, OPT_SERVICE}, \ @@ -96,6 +98,9 @@ pid_t read_pidfile(const char *name); case OPT_DETACH:\ break; \ \ +case OPT_NO_CHDIR: \ +break; \ +\ case OPT_PIDFILE: \ set_pidfile(optarg);\ break; \ -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 04/11] daemon-windows: Rename service variables.
Sa far, we are using variable 'detach' to indicate whether the option "--service" has been set. We were using variable 'detached' to indicate that the daemon is being called from the Windows services manager. An upcoming commit introduces command line option "--detach" for daemons running on Windows. This will cause confusion with variable names. Therefore, rename the variables. Signed-off-by: Gurucharan Shetty --- lib/daemon-windows.c | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/daemon-windows.c b/lib/daemon-windows.c index da0bd11..a00278e 100644 --- a/lib/daemon-windows.c +++ b/lib/daemon-windows.c @@ -23,8 +23,8 @@ VLOG_DEFINE_THIS_MODULE(daemon); -static bool detach; /* Was --service specified? */ -static bool detached; /* Have we already detached? */ +static bool service_create; /* Was --service specified? */ +static bool service_started; /* Have we dispatched service to start? */ /* --service-monitor: Should the service be restarted if it dies * unexpectedly? */ @@ -77,10 +77,10 @@ service_start(int *argcp, char **argvp[]) {NULL, NULL} }; -/* 'detached' is 'false' when service_start() is called the first time. - * It is 'true', when it is called the second time by the Windows services - * manager. */ -if (detached) { +/* 'service_started' is 'false' when service_start() is called the first + * time. It is 'true', when it is called the second time by the Windows + * services manager. */ +if (service_started) { init_service_status(); wevent = CreateEvent(NULL, TRUE, FALSE, NULL); @@ -133,14 +133,14 @@ service_start(int *argcp, char **argvp[]) * options before the call-back from the service control manager. */ for (i = 0; i < argc; i ++) { if (!strcmp(argv[i], "--service")) { -detach = true; +service_create = true; } else if (!strcmp(argv[i], "--service-monitor")) { monitor = true; } } /* If '--service' is not a command line option, run in foreground. */ -if (!detach) { +if (!service_create) { return; } @@ -149,7 +149,7 @@ service_start(int *argcp, char **argvp[]) * script. */ check_service(); -detached = true; +service_started = true; /* StartServiceCtrlDispatcher blocks and returns after the service is * stopped. */ @@ -184,7 +184,7 @@ control_handler(DWORD request) bool should_service_stop(void) { -if (detached) { +if (service_started) { if (service_status.dwCurrentState != SERVICE_RUNNING) { return true; } else { -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 05/11] daemon-windows: Implement --detach option for Windows.
When "--detach" is specified, a daemon will create a new process with the same command line options as the parent. Additionally, an undocumented command line option "--pipe-handle" is passed to child. Once the child is ready to handle external commands, it communicates to the parent that it is ready (using the pipe handle). The parent exits. This lets us run the daemons in background. This will also help the unit tests because currently most of the unit tests pass the '--detach' option to the daemons. Signed-off-by: Gurucharan Shetty --- lib/daemon-windows.c | 107 +- lib/daemon.h | 26 2 files changed, 124 insertions(+), 9 deletions(-) diff --git a/lib/daemon-windows.c b/lib/daemon-windows.c index a00278e..baa7520 100644 --- a/lib/daemon-windows.c +++ b/lib/daemon-windows.c @@ -30,6 +30,10 @@ static bool service_started; /* Have we dispatched service to start? */ * unexpectedly? */ static bool monitor; +static bool detach; /* Was --detach specified? */ +static bool detached; /* Running as the child process. */ +static HANDLE write_handle; /* pipe handle to write to the parent. */ + /* Handle to the Services Manager and the created service. */ static SC_HANDLE manager, service; @@ -51,6 +55,8 @@ static void handle_scm_callback(void); static void init_service_status(void); static void set_config_failure_actions(void); +static bool detach_process(int argc, char *argv[]); + extern int main(int argc, char *argv[]); void @@ -77,6 +83,14 @@ service_start(int *argcp, char **argvp[]) {NULL, NULL} }; +/* If one of the command line option is "--detach", we create + * a new process in case of parent, wait for child to start and exit. + * In case of the child, we just return. We should not be creating a + * service in either case. */ +if (detach_process(argc, argv)) { +return; +} + /* 'service_started' is 'false' when service_start() is called the first * time. It is 'true', when it is called the second time by the Windows * services manager. */ @@ -301,8 +315,86 @@ set_config_failure_actions() } } - -/* Stub functions to handle daemonize related calls in non-windows platform. */ +/* When a daemon is passed the --detach option, we create a new + * process and pass an additional non-documented option called --pipe-handle. + * Through this option, the parent passes one end of a pipe handle. */ +void +set_pipe_handle(const char *pipe_handle) +{ +write_handle = (HANDLE) atoi(pipe_handle); +} + +/* If one of the command line option is "--detach", creates + * a new process in case of parent, waits for child to start and exits. + * In case of the child, returns. */ +static bool +detach_process(int argc, char *argv[]) +{ +SECURITY_ATTRIBUTES sa; +STARTUPINFO si; +PROCESS_INFORMATION pi; +HANDLE read_pipe, write_pipe; +char *buffer; +int error, i; +char ch; + +/* We are only interested in the '--detach' and '--pipe-handle'. */ +for (i = 0; i < argc; i ++) { +if (!strcmp(argv[i], "--detach")) { +detach = true; +} else if (!strncmp(argv[i], "--pipe-handle", 13)) { +/* If running as a child, return. */ +detached = true; +return true; +} +} + +/* Nothing to do if the option --detach is not set. */ +if (!detach) { +return false; +} + +/* We are running as a parent. */ + +/* Set the security attribute such that a process created will + * inherit the pipe handles. */ +sa.nLength = sizeof(sa); +sa.lpSecurityDescriptor = NULL; +sa.bInheritHandle = TRUE; + +/* Create an anonymous pipe to communicate with the child. */ +error = CreatePipe(&read_pipe, &write_pipe, &sa, 0); +if (!error) { +VLOG_FATAL("CreatePipe failed (%s)", ovs_lasterror_to_string()); +} + +GetStartupInfo(&si); + +/* To the child, we pass an extra argument '--pipe-handle=write_pipe' */ +buffer = xasprintf("%s %s=%ld", GetCommandLine(), "--pipe-handle", + write_pipe); + +/* Create a detached child */ +error = CreateProcess(NULL, buffer, NULL, NULL, TRUE, DETACHED_PROCESS, + NULL, NULL, &si, &pi); +if (!error) { +VLOG_FATAL("CreateProcess failed (%s)", ovs_lasterror_to_string()); +} + +/* Close one end of the pipe in the parent. */ +CloseHandle(write_pipe); + +/* Block and wait for child to say it is ready. */ +error = ReadFile(read_pipe, &ch, 1, NULL, NULL); +if (!error) { +VLOG_FATAL("Failed to read from child (%s)", + ovs_lasterror_to_string()); +} +/* The child has successfully started and is ready. */ +exit(0); +} + +/* Will daemonize() really detach? */ bool get_detach() { @@ -326,5 +418,16 @@ void daemonize_start(void) void daemonize_complete(void) { +/* If runnin
[ovs-dev] [PATCH 02/11] stream: Introduce [p]windows_[p]stream_class.
On Linux, we heavily use --remote=punix:* to listen for connections through unix domain sockets. We also use, unix:* to connect to a daemon that is listening on unix domain sockets. Many times, we create default unix domain sockets for listening and many utilities connect to these sockets by default. Windows does not have unix domain sockets. So far, we could just use ptcp:* and tcp:* for listening and initiating connections respectively. The drawback here is that one has to provide a specific TCP port. For unit tests, it looks useful to let kernel choose that port. As such, we can let that chosen kernel port be stored in the file specified with punix:* and unix:*. For this purpose, introduce a new [p]windows_[p]stream_class. Since it is just a wrapper around [p]tcp_[p]stream_class, add it to stream-tcp.c. commit cb54a8c (unixctl: Add support for Windows.) used the above concept for only control channel connections (i.e., --unixctl for daemons and its interaction with ovs-appctl). This commit adds the same support for all unix domain sockets. Signed-off-by: Gurucharan Shetty --- lib/stream-provider.h|5 +++ lib/stream-tcp.c | 111 ++ lib/stream.c |4 ++ lib/vconn-active.man |4 +- lib/vconn-passive.man|5 ++- ovsdb/remote-active.man |5 ++- ovsdb/remote-passive.man |5 ++- 7 files changed, 135 insertions(+), 4 deletions(-) diff --git a/lib/stream-provider.h b/lib/stream-provider.h index 44d75d3..8347ac6 100644 --- a/lib/stream-provider.h +++ b/lib/stream-provider.h @@ -191,8 +191,13 @@ struct pstream_class { /* Active and passive stream classes. */ extern const struct stream_class tcp_stream_class; extern const struct pstream_class ptcp_pstream_class; +#ifndef _WIN32 extern const struct stream_class unix_stream_class; extern const struct pstream_class punix_pstream_class; +#else +extern const struct stream_class windows_stream_class; +extern const struct pstream_class pwindows_pstream_class; +#endif #ifdef HAVE_OPENSSL extern const struct stream_class ssl_stream_class; extern const struct pstream_class pssl_pstream_class; diff --git a/lib/stream-tcp.c b/lib/stream-tcp.c index d62e9c3..19d7314 100644 --- a/lib/stream-tcp.c +++ b/lib/stream-tcp.c @@ -91,6 +91,62 @@ const struct stream_class tcp_stream_class = { NULL, /* run_wait */ NULL, /* wait */ }; + +#ifdef _WIN32 +static int +windows_open(const char *name, char *suffix, struct stream **streamp, + uint8_t dscp) +{ +int error, port; +FILE *file; +char *suffix_new, *path; + +/* If the path does not contain a ':', assume it is relative to + * OVS_RUNDIR. */ +if (!strchr(suffix, ':')) { +path = xasprintf("%s/%s", ovs_rundir(), suffix); +} else { +path = strdup(suffix); +} + +file = fopen(path, "r"); +if (!file) { +error = errno; +VLOG_DBG("%s: could not open %s (%s)", name, path, + ovs_strerror(error)); +return error; +} + +error = fscanf(file, "%d", &port); +if (error != 1) { +VLOG_ERR("failed to read port from %s", suffix); +fclose(file); +return EINVAL; +} +fclose(file); + +suffix_new = xasprintf("127.0.0.1:%d", port); + +error = tcp_open(name, suffix_new, streamp, dscp); + +free(suffix_new); +free(path); +return error; +} + +const struct stream_class windows_stream_class = { +"unix", /* name */ +false, /* needs_probes */ +windows_open, /* open */ +NULL, /* close */ +NULL, /* connect */ +NULL, /* recv */ +NULL, /* send */ +NULL, /* run */ +NULL, /* run_wait */ +NULL, /* wait */ +}; +#endif /* Passive TCP. */ @@ -148,3 +204,58 @@ const struct pstream_class ptcp_pstream_class = { NULL, }; +#ifdef _WIN32 +static int +pwindows_open(const char *name OVS_UNUSED, char *suffix, + struct pstream **pstreamp, uint8_t dscp) +{ +int error; +char *suffix_new, *path; +FILE *file; +struct pstream *listener; + +suffix_new = xstrdup("0:127.0.0.1"); +error = ptcp_open(name, suffix_new, pstreamp, dscp); +if (error) { +goto exit; +} +listener = *pstreamp; + +/* If the path does not contain a ':', assume it is relative to + * OVS_RUNDIR. */ +if (!strchr(suffix, ':')) { +path = xasprintf("%s/%s", ovs_rundir(), suffix); +} else { +path = strdup(suffix); +} + +file = fopen(path, "w"); +if (!file) { +error = errno; +VLOG_DBG("could not open %s (%s)", path, ovs_strerror(error)); +goto exit; +} + +fprintf(file, "%d\n", ntohs(listener->bound_port)); +
[ovs-dev] [PATCH 11/11] testsuite.at: Workaround for carriage returns on windows.
In unit tests, we compare text written in logs or stdout/stderr to figure out the success or failure of tests. In Windows, since new line is represented by CR+LF, autoconf tests run in MinGW environment fail. Asking diff to ignore trailing carriage returns is one way to solve the problem Suggested-by: Ben Pfaff Signed-off-by: Gurucharan Shetty --- tests/testsuite.at |4 1 file changed, 4 insertions(+) diff --git a/tests/testsuite.at b/tests/testsuite.at index 95e53d2..4b1d13f 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -47,6 +47,10 @@ pwd () { command pwd -W "$@" } +diff () { +command diff --strip-trailing-cr "$@" +} + kill () { case "$1" in -0) -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 10/11] testsuite.at: kill for windows.
We use kill to cleanup processes from pidfiles. Windows has a 'taskkill' which does something similar. We can check if the process with a PID exists with 'tasklist'. Both tasklist and taskkill return 0 for both success and failure. So, we will have to grep to see if there is a o/p. A typical o/p of tasklist is: $ tasklist | grep ovs ovsdb-server.exe 3228 RDP-Tcp#0 2 6,132 K ovs-vswitchd.exe 2080 RDP-Tcp#0 2 5,808 K $ tasklist //fi "PID eq 3228" Image Name PID Session NameSession#Mem Usage = === ovsdb-server.exe 3228 RDP-Tcp#0 2 6,132 K Signed-off-by: Gurucharan Shetty --- tests/testsuite.at | 29 + 1 file changed, 29 insertions(+) diff --git a/tests/testsuite.at b/tests/testsuite.at index 9cd90a3..95e53d2 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -46,6 +46,35 @@ if test "$IS_WIN32" = "yes"; then pwd () { command pwd -W "$@" } + +kill () { +case "$1" in +-0) +shift +for i in $*; do +# tasklist will always have return code 0. +# If pid does exist, there will be a line with the pid. +if tasklist //fi "PID eq $i" | grep $i; then +: +else +return 1 +fi +done +return 0 +;; +-[1-9]*) +shift +for i in $*; do +taskkill //F //PID $i +done +;; +[1-9][1-9]*) +for i in $*; do +taskkill //F //PID $i +done +;; +esac +} fi ] m4_divert_pop([PREPARE_TESTS]) -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 03/11] unixctl: Use [p]windows_[p]stream_class for windows.
Now that we have a separate [p]stream_class for hiding kernel assigned TCP port inside a file meant for unix domain sockets in windows, use the new infrastructure provided. Signed-off-by: Gurucharan Shetty --- lib/unixctl.c | 67 +++-- 1 file changed, 12 insertions(+), 55 deletions(-) diff --git a/lib/unixctl.c b/lib/unixctl.c index 20acc3c..c982214 100644 --- a/lib/unixctl.c +++ b/lib/unixctl.c @@ -211,28 +211,31 @@ unixctl_server_create(const char *path, struct unixctl_server **serverp) { struct unixctl_server *server; struct pstream *listener; -char *punix_path, *abs_path = NULL; +char *punix_path; int error; -#ifdef _WIN32 -FILE *file; -#endif *serverp = NULL; if (path && !strcmp(path, "none")) { return 0; } -#ifndef _WIN32 if (path) { +char *abs_path; +#ifndef _WIN32 abs_path = abs_file_name(ovs_rundir(), path); +#else +abs_path = strdup(path); +#endif punix_path = xasprintf("punix:%s", abs_path); +free(abs_path); } else { +#ifndef _WIN32 punix_path = xasprintf("punix:%s/%s.%ld.ctl", ovs_rundir(), program_name, (long int) getpid()); -} #else -punix_path = xstrdup("ptcp:0:127.0.0.1"); +punix_path = xasprintf("punix:%s/%s.ctl", ovs_rundir(), program_name); #endif +} error = pstream_open(punix_path, &listener, 0); if (error) { @@ -240,30 +243,6 @@ unixctl_server_create(const char *path, struct unixctl_server **serverp) goto exit; } -#ifdef _WIN32 -if (path) { -abs_path = xstrdup(path); -} else { -abs_path = xasprintf("%s/%s.ctl", ovs_rundir(), program_name); -} - -file = fopen(abs_path, "w"); -if (!file) { -error = errno; -ovs_error(error, "could not open %s", abs_path); -goto exit; -} - -fprintf(file, "%d\n", ntohs(listener->bound_port)); -if (fflush(file) == EOF) { -error = EIO; -ovs_error(error, "write failed for %s", abs_path); -fclose(file); -goto exit; -} -fclose(file); -#endif - unixctl_command_register("help", "", 0, 0, unixctl_help, NULL); unixctl_command_register("version", "", 0, 0, unixctl_version, NULL); @@ -273,9 +252,6 @@ unixctl_server_create(const char *path, struct unixctl_server **serverp) *serverp = server; exit: -if (abs_path) { -free(abs_path); -} free(punix_path); return error; } @@ -460,32 +436,13 @@ unixctl_client_create(const char *path, struct jsonrpc **client) char *abs_path, *unix_path; struct stream *stream; int error; -#ifdef _WIN32 -FILE *file; -int port; +#ifdef _WIN32 abs_path = strdup(path); -file = fopen(abs_path, "r"); -if (!file) { -int error = errno; -ovs_error(error, "could not open %s", abs_path); -free(abs_path); -return error; -} - -error = fscanf(file, "%d", &port); -if (error != 1) { -ovs_error(errno, "failed to read port from %s", abs_path); -free(abs_path); -return EINVAL; -} -fclose(file); - -unix_path = xasprintf("tcp:127.0.0.1:%d", port); #else abs_path = abs_file_name(ovs_rundir(), path); -unix_path = xasprintf("unix:%s", abs_path); #endif +unix_path = xasprintf("unix:%s", abs_path); *client = NULL; -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 09/11] testsuite.at: pwd for windows.
On MinGW, "pwd -W" gives the present working directory in the form of windows path (i.e C:/temp instead of /c/temp). When we pass the directory path to daemons as arguments, we should be passing it in the form of windows path. Suggested-by: Ben Pfaff Signed-off-by: Gurucharan Shetty --- tests/testsuite.at |6 ++ 1 file changed, 6 insertions(+) diff --git a/tests/testsuite.at b/tests/testsuite.at index a569436..9cd90a3 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -41,6 +41,12 @@ seq () { set `expr $1 + ${3-1}` $2 $3 done } + +if test "$IS_WIN32" = "yes"; then +pwd () { +command pwd -W "$@" +} +fi ] m4_divert_pop([PREPARE_TESTS]) -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [recirc datapath V4 4/5] datapath: add hash action
The incremental change follows: diff --git a/datapath/actions.c b/datapath/actions.c index fdcd576..921310a 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -468,10 +468,11 @@ static void execute_hash(struct sk_buff *skb, const struct nlattr *attr) /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */ hash = skb_get_rxhash(skb); +hash = jhash_1word(hash, hash_act->hash_basis); if (!hash) hash = 0x1; -key->ovs_flow_hash = jhash_1word(hash, hash_act->hash_basis); +key->ovs_flow_hash = hash; } static int execute_set_action(struct sk_buff *skb, diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 3bee04d..873c42b 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -128,7 +128,6 @@ static bool match_validate(const struct sw_flow_match *match, /* Always allowed mask fields. */ mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL) | (1ULL << OVS_KEY_ATTR_IN_PORT) - | (1ULL << OVS_KEY_ATTR_DP_HASH) | (1ULL << OVS_KEY_ATTR_RECIRC_ID) | (1ULL << OVS_KEY_ATTR_ETHERTYPE)); On Fri, Apr 18, 2014 at 11:42 AM, Andy Zhou wrote: > On Fri, Apr 18, 2014 at 10:52 AM, Jesse Gross wrote: >> On Fri, Apr 18, 2014 at 10:39 AM, Andy Zhou wrote: >>> On Fri, Apr 18, 2014 at 9:28 AM, Jesse Gross wrote: I don't understand the other change about accepting a hash mask even when there isn't a hash value. Can you explain this? >>> It is not necessary now. I was thinking of the case Pravin suggested that >>> we do a masked action. A hash value may be zero after applying a mask. >>> In this case, the hash value won't be exported on a post recirculation >>> miss, but >>> its mask can be supplied on flow installation. We can remove it if this is >>> not >>> the direction we want to go. >> >> I don't know if Pravin was actually suggesting a maskable action - his >> questions seemed more on the match side. I can't really come up with a >> use case either. >> > Thanks for clarifying. I will remove it from this patch since there is > not a clear > use case to allow dp_hash mask alone. > >> I think that the value being special/disallowed in the hash should be >> entirely internal to the kernel. In this case it's fine to do it >> because the hash is opaque anyways, so who's so say that we just don't >> have a hash with this property. However, I think if we started getting >> into cases like this where a zero value might come up then we should >> probably either use a bit to indicate that the hash is valid or always >> send it. > > That's not a bad idea. I some how don't like sending 'new' attributes > to an older > user space, even if it will not cause harm because of the > compatibility layer as your > previous email suggested. We could go down this road if you think this > is significantly > better than removing it. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 03/10] ofproto: Use classifer cursor API to collect vlan usage.
This was the only place in OVS code that accessed classifier internal data structures directly. Use the classifier cursor API instead, so that following patches can hide classifier internal data structures. Note: There seems to be no test case to verify that this vlan usage collection is implemented correctly. Signed-off-by: Jarno Rajahalme --- ofproto/ofproto.c | 25 +++-- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index f16005c..e2a593e 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -6984,25 +6984,30 @@ ofproto_unixctl_init(void) void ofproto_get_vlan_usage(struct ofproto *ofproto, unsigned long int *vlan_bitmap) { +struct match match; +struct cls_rule target; const struct oftable *oftable; +match_init_catchall(&match); +match_set_vlan_vid_masked(&match, htons(VLAN_CFI), htons(VLAN_CFI)); +cls_rule_init(&target, &match, 0); + free(ofproto->vlan_bitmap); ofproto->vlan_bitmap = bitmap_allocate(4096); ofproto->vlans_changed = false; OFPROTO_FOR_EACH_TABLE (oftable, ofproto) { -const struct cls_subtable *table; +struct cls_cursor cursor; +struct rule *rule; fat_rwlock_rdlock(&oftable->cls.rwlock); -HMAP_FOR_EACH (table, hmap_node, &oftable->cls.subtables) { -if (minimask_get_vid_mask(&table->mask) == VLAN_VID_MASK) { -const struct cls_rule *rule; - -HMAP_FOR_EACH (rule, hmap_node, &table->rules) { -uint16_t vid = miniflow_get_vid(&rule->match.flow); -bitmap_set1(vlan_bitmap, vid); -bitmap_set1(ofproto->vlan_bitmap, vid); -} +cls_cursor_init(&cursor, &oftable->cls, &target); +CLS_CURSOR_FOR_EACH (rule, cr, &cursor) { +if (minimask_get_vid_mask(&rule->cr.match.mask) == VLAN_VID_MASK) { +uint16_t vid = miniflow_get_vid(&rule->cr.match.flow); + +bitmap_set1(vlan_bitmap, vid); +bitmap_set1(ofproto->vlan_bitmap, vid); } } fat_rwlock_unlock(&oftable->cls.rwlock); -- 1.7.10.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 04/10] lib/classifier: Hide more of the internal data structures.
It is better not to expose definitions not needed by users. Signed-off-by: Jarno Rajahalme --- lib/classifier.c| 134 --- lib/classifier.h| 65 --- tests/test-classifier.c |6 +-- 3 files changed, 115 insertions(+), 90 deletions(-) diff --git a/lib/classifier.c b/lib/classifier.c index 8ab1f9c..e48f0a1 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -30,18 +30,70 @@ VLOG_DEFINE_THIS_MODULE(classifier); +struct trie_node; + +/* Prefix trie for a 'field' */ +struct cls_trie { +const struct mf_field *field; /* Trie field, or NULL. */ +struct trie_node *root; /* NULL if none. */ +}; + +struct cls_classifier { +int n_rules;/* Total number of rules. */ +uint8_t n_flow_segments; +uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use + * for staged lookup. */ +struct hmap subtables; /* Contains "struct cls_subtable"s. */ +struct list subtables_priority; /* Subtables in descending priority order. + */ +struct hmap partitions; /* Contains "struct cls_partition"s. */ +struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */ +unsigned int n_tries; +}; + +/* A set of rules that all have the same fields wildcarded. */ +struct cls_subtable { +struct hmap_node hmap_node; /* Within struct cls_classifier 'subtables' + * hmap. */ +struct list list_node; /* Within classifier 'subtables_priority' list. + */ +struct hmap rules; /* Contains "struct cls_rule"s. */ +struct minimask mask; /* Wildcards for fields. */ +int n_rules;/* Number of rules, including duplicates. */ +unsigned int max_priority; /* Max priority of any rule in the subtable. */ +unsigned int max_count; /* Count of max_priority rules. */ +tag_type tag; /* Tag generated from mask for partitioning. */ +uint8_t n_indices; /* How many indices to use. */ +uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */ +struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */ +unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. */ +}; + +/* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata + * field) with tags for the "cls_subtable"s that contain rules that match that + * metadata value. */ +struct cls_partition { +struct hmap_node hmap_node; /* In struct cls_classifier's 'partitions' + * hmap. */ +ovs_be64 metadata; /* metadata value for this partition. */ +tag_type tags; /* OR of each flow's cls_subtable tag. */ +struct tag_tracker tracker; /* Tracks the bits in 'tags'. */ +}; + + + struct trie_ctx; -static struct cls_subtable *find_subtable(const struct classifier *, +static struct cls_subtable *find_subtable(const struct cls_classifier *, const struct minimask *); -static struct cls_subtable *insert_subtable(struct classifier *, +static struct cls_subtable *insert_subtable(struct cls_classifier *, const struct minimask *); -static void destroy_subtable(struct classifier *, struct cls_subtable *); +static void destroy_subtable(struct cls_classifier *, struct cls_subtable *); -static void update_subtables_after_insertion(struct classifier *, +static void update_subtables_after_insertion(struct cls_classifier *, struct cls_subtable *, unsigned int new_priority); -static void update_subtables_after_removal(struct classifier *, +static void update_subtables_after_removal(struct cls_classifier *, struct cls_subtable *, unsigned int del_priority); @@ -51,7 +103,7 @@ static struct cls_rule *find_match_wc(const struct cls_subtable *, struct flow_wildcards *); static struct cls_rule *find_equal(struct cls_subtable *, const struct miniflow *, uint32_t hash); -static struct cls_rule *insert_rule(struct classifier *, +static struct cls_rule *insert_rule(struct cls_classifier *, struct cls_subtable *, struct cls_rule *); /* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. */ @@ -67,7 +119,7 @@ static struct cls_rule *next_rule_in_list(struct cls_rule *); static unsigned int minimask_get_prefix_len(const struct minimask *, const struct mf_field *); -static void trie_init(struct classifier *, int trie_idx, +static void trie_init(struct cls_classifier *, i
[ovs-dev] [PATCH 05/10] lib/flow: Optimize minimask_has_extra() and minimask_is_catchall()
We only need to iterate over the bits masked by the 'b' in minimask_has_extra(), since for zeroes in 'b' there can be no 'extra' wildcards in 'a', as 'b' has already wildcarded all the bits. minimask_is_catchall() can be simplified by the invariant that mask's map never has 1-bits for all-zero values. Signed-off-by: Jarno Rajahalme --- lib/flow.c | 31 ++- lib/flow.h | 12 +++- 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/lib/flow.c b/lib/flow.c index a227f73..22e95d2 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -1862,19 +1862,17 @@ minimask_equal(const struct minimask *a, const struct minimask *b) return miniflow_equal(&a->masks, &b->masks); } -/* Returns true if at least one bit is wildcarded in 'a_' but not in 'b_', +/* Returns true if at least one bit matched by 'b' is wildcarded by 'a', * false otherwise. */ bool -minimask_has_extra(const struct minimask *a_, const struct minimask *b_) +minimask_has_extra(const struct minimask *a, const struct minimask *b) { -const struct miniflow *a = &a_->masks; -const struct miniflow *b = &b_->masks; +const uint32_t *p = b->masks.values; uint64_t map; -for (map = a->map | b->map; map; map = zero_rightmost_1bit(map)) { -int ofs = raw_ctz(map); -uint32_t a_u32 = miniflow_get(a, ofs); -uint32_t b_u32 = miniflow_get(b, ofs); +for (map = b->masks.map; map; map = zero_rightmost_1bit(map)) { +uint32_t a_u32 = minimask_get(a, raw_ctz(map)); +uint32_t b_u32 = *p++; if ((a_u32 & b_u32) != b_u32) { return true; @@ -1883,20 +1881,3 @@ minimask_has_extra(const struct minimask *a_, const struct minimask *b_) return false; } - -/* Returns true if 'mask' matches every packet, false if 'mask' fixes any bits - * or fields. */ -bool -minimask_is_catchall(const struct minimask *mask_) -{ -const struct miniflow *mask = &mask_->masks; -const uint32_t *p = mask->values; -uint64_t map; - -for (map = mask->map; map; map = zero_rightmost_1bit(map)) { -if (*p++) { -return false; -} -} -return true; -} diff --git a/lib/flow.h b/lib/flow.h index 0ed4f6a..47b 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -484,7 +484,17 @@ static inline ovs_be64 minimask_get_metadata_mask(const struct minimask *); bool minimask_equal(const struct minimask *a, const struct minimask *b); bool minimask_has_extra(const struct minimask *, const struct minimask *); -bool minimask_is_catchall(const struct minimask *); + +/* Returns true if 'mask' matches every packet, false if 'mask' fixes any bits + * or fields. */ +static inline bool +minimask_is_catchall(const struct minimask *mask) +{ +/* For every 1-bit in mask's map, the corresponding value is non-zero, + * so the only way the mask can not fix any bits or fields is for the + * map the be zero. */ +return mask->masks.map == 0; +} -- 1.7.10.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 10/10] lib/classifier: Support variable sized miniflows.
Change the classifier to allocate variable sized miniflows and minimasks in cls_match and cls_subtable, respectively. Do not duplicate the mask in cls_rule any more. miniflow_clone and miniflow_move can now take variably sized miniflows as source. The destination is assumed to be regularly sized miniflow. Inlining miniflow and mask values reduces memory indirection and helps reduce cache misses. Signed-off-by: Jarno Rajahalme --- lib/classifier.c | 82 +++--- lib/flow.c | 55 +++- lib/flow.h | 30 ++-- 3 files changed, 134 insertions(+), 33 deletions(-) diff --git a/lib/classifier.c b/lib/classifier.c index 75befad..1198a76 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -40,7 +40,6 @@ struct cls_trie { struct cls_subtable_entry { struct cls_subtable *subtable; -const uint32_t *mask_values; tag_type tag; unsigned int max_priority; }; @@ -72,7 +71,6 @@ struct cls_subtable { struct hmap_node hmap_node; /* Within struct cls_classifier 'subtables' * hmap. */ struct hmap rules; /* Contains "struct cls_rule"s. */ -struct minimask mask; /* Wildcards for fields. */ int n_rules;/* Number of rules, including duplicates. */ unsigned int max_priority; /* Max priority of any rule in the subtable. */ unsigned int max_count; /* Count of max_priority rules. */ @@ -81,6 +79,8 @@ struct cls_subtable { uint8_t index_ofs[CLS_MAX_INDICES]; /* u32 flow segment boundaries. */ struct hindex indices[CLS_MAX_INDICES]; /* Staged lookup indices. */ unsigned int trie_plen[CLS_MAX_TRIES]; /* Trie prefix length in 'mask'. */ +struct minimask mask; /* Wildcards for fields. */ +/* 'mask' must be the last field. */ }; /* Associates a metadata value (that is, a value of the OpenFlow 1.1+ metadata @@ -103,16 +103,21 @@ struct cls_match { unsigned int priority; /* Larger numbers are higher priorities. */ struct cls_partition *partition; struct list list; /* List of identical, lower-priority rules. */ -struct minimatch match; /* Matching rule. */ +struct miniflow flow; /* Matching rule. Mask is in the subtable. */ +/* 'flow' must be the last field. */ }; static struct cls_match * cls_match_alloc(struct cls_rule *rule) { -struct cls_match *cls_match = xmalloc(sizeof *cls_match); +int count = count_1bits(rule->match.flow.map); + +struct cls_match *cls_match += xmalloc(sizeof *cls_match - sizeof cls_match->flow.inline_values + + MINIFLOW_VALUES_SIZE(count)); cls_match->cls_rule = rule; -minimatch_clone(&cls_match->match, &rule->match); +miniflow_clone_inline(&cls_match->flow, &rule->match.flow, count); cls_match->priority = rule->priority; rule->cls_match = cls_match; @@ -875,7 +880,6 @@ static inline void lookahead_subtable(const struct cls_subtable_entry *subtables) { ovs_prefetch_range(subtables->subtable, sizeof *subtables->subtable); -ovs_prefetch_range(subtables->mask_values, 1); } /* Finds and returns the highest-priority rule in 'cls' that matches 'flow'. @@ -969,16 +973,19 @@ classifier_lookup(const struct classifier *cls_, const struct flow *flow, } /* Returns true if 'target' satisifies 'match', that is, if each bit for which - * 'match' specifies a particular value has the correct value in 'target'. */ + * 'match' specifies a particular value has the correct value in 'target'. + * + * 'flow' and 'mask' have the same mask! */ static bool -minimatch_matches_miniflow(const struct minimatch *match, - const struct miniflow *target) +miniflow_and_mask_matches_miniflow(const struct miniflow *flow, + const struct minimask *mask, + const struct miniflow *target) { -const uint32_t *flowp = miniflow_get_u32_values(&match->flow); -const uint32_t *maskp = miniflow_get_u32_values(&match->mask.masks); +const uint32_t *flowp = miniflow_get_u32_values(flow); +const uint32_t *maskp = miniflow_get_u32_values(&mask->masks); uint32_t target_u32; -MINIFLOW_FOR_EACH_IN_MAP(target_u32, target, match->mask.masks.map) { +MINIFLOW_FOR_EACH_IN_MAP(target_u32, target, mask->masks.map) { if ((*flowp++ ^ target_u32) & *maskp++) { return false; } @@ -995,7 +1002,8 @@ find_match_miniflow(const struct cls_subtable *subtable, struct cls_match *rule; HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &subtable->rules) { -if (minimatch_matches_miniflow(&rule->match, flow)) { +if (miniflow_and_mask_matches_miniflow(&rule->flow, &subtable->mask, + flow)) { return rule; } } @@ -1113,7 +1121,7 @@ classif
[ovs-dev] [PATCH 07/10] classifier: Use array for subtables instead of a list.
Using a linear array allows more efficient memory access for lookups. Signed-off-by: Jarno Rajahalme --- lib/classifier.c | 239 +- 1 file changed, 182 insertions(+), 57 deletions(-) diff --git a/lib/classifier.c b/lib/classifier.c index e48f0a1..0e67b7f 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -38,14 +38,26 @@ struct cls_trie { struct trie_node *root; /* NULL if none. */ }; +struct cls_subtable_entry { +struct cls_subtable *subtable; +uint32_t *mask_values; +tag_type tag; +unsigned int max_priority; +}; + +struct cls_subtable_cache { +struct cls_subtable_entry *subtables; +size_t alloc_size; /* Number of allocated elements. */ +size_t size; /* One past last valid array element. */ +}; + struct cls_classifier { int n_rules;/* Total number of rules. */ uint8_t n_flow_segments; uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use * for staged lookup. */ struct hmap subtables; /* Contains "struct cls_subtable"s. */ -struct list subtables_priority; /* Subtables in descending priority order. - */ +struct cls_subtable_cache subtables_priority; struct hmap partitions; /* Contains "struct cls_partition"s. */ struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. */ unsigned int n_tries; @@ -55,8 +67,6 @@ struct cls_classifier { struct cls_subtable { struct hmap_node hmap_node; /* Within struct cls_classifier 'subtables' * hmap. */ -struct list list_node; /* Within classifier 'subtables_priority' list. - */ struct hmap rules; /* Contains "struct cls_rule"s. */ struct minimask mask; /* Wildcards for fields. */ int n_rules;/* Number of rules, including duplicates. */ @@ -131,6 +141,84 @@ static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs, unsigned int nbits); static bool mask_prefix_bits_set(const struct flow_wildcards *, uint8_t be32ofs, unsigned int nbits); + +static void +cls_subtable_cache_init(struct cls_subtable_cache *array) +{ +memset(array, 0, sizeof *array); +} + +static void +cls_subtable_cache_destroy(struct cls_subtable_cache *array) +{ +free(array->subtables); +memset(array, 0, sizeof *array); +} + +/* Array insertion. */ +static void +cls_subtable_cache_push_back(struct cls_subtable_cache *array, + struct cls_subtable_entry a) +{ +if (array->size == array->alloc_size) { +array->alloc_size = (array->alloc_size + 1) * 2; +array->subtables = xrealloc(array->subtables, +array->alloc_size * sizeof a); +} + +array->subtables[array->size++] = a; +} + +/* Only for rearranging entries in the same cache. */ +static inline void +cls_subtable_cache_splice(struct cls_subtable_entry *to, + struct cls_subtable_entry *start, + struct cls_subtable_entry *end) +{ +if (to > end) { +/* Same as splicing entries to (start) from [end, to). */ +struct cls_subtable_entry *temp = to; +to = start; start = end; end = temp; +} +if (to < start) { +while (start != end) { +struct cls_subtable_entry temp = *start; + +memmove(to + 1, to, (start - to) * sizeof *to); +*to = temp; +start++; +} +} /* Else nothing to be done. */ +} + +/* Array removal. */ +static inline void +cls_subtable_cache_remove(struct cls_subtable_cache *array, + struct cls_subtable_entry *elem) +{ +ssize_t size = (&array->subtables[array->size] +- (elem + 1)) * sizeof *elem; +if (size > 0) { +memmove(elem, elem + 1, size); +} +array->size--; +} + +#define CLS_SUBTABLE_CACHE_FOR_EACH(SUBTABLE, ITER, ARRAY) \ +for (ITER = (ARRAY)->subtables; \ + ITER < &(ARRAY)->subtables[(ARRAY)->size] \ + && OVS_LIKELY(SUBTABLE = ITER->subtable); \ + ++ITER) +#define CLS_SUBTABLE_CACHE_FOR_EACH_CONTINUE(SUBTABLE, ITER, ARRAY) \ +for (++ITER;\ + ITER < &(ARRAY)->subtables[(ARRAY)->size] \ + && OVS_LIKELY(SUBTABLE = ITER->subtable); \ + ++ITER) +#define CLS_SUBTABLE_CACHE_FOR_EACH_REVERSE(SUBTABLE, ITER, ARRAY) \ +for (ITER = &(ARRAY)->subtables[(ARRAY)->size]; \ + ITER > (ARRAY)->subtables \ + && OVS_LIKELY(SUBTABLE = (--ITER)->subtable);) + /* flow/miniflow/minimask/minimatch u
[ovs-dev] [PATCH 08/10] lib/classifier: Separate cls_rule internals from the API.
Keep an internal representation of a rule separate from the one embedded into user's structs. This allows for further memory optimization in the classifier. Signed-off-by: Jarno Rajahalme --- lib/classifier.c| 211 +-- lib/classifier.h| 22 ++--- tests/test-classifier.c | 18 ++-- 3 files changed, 149 insertions(+), 102 deletions(-) diff --git a/lib/classifier.c b/lib/classifier.c index 0e67b7f..0517aa7 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -51,6 +51,10 @@ struct cls_subtable_cache { size_t size; /* One past last valid array element. */ }; +enum { +CLS_MAX_INDICES = 3 /* Maximum number of lookup indices per subtable. */ +}; + struct cls_classifier { int n_rules;/* Total number of rules. */ uint8_t n_flow_segments; @@ -90,7 +94,30 @@ struct cls_partition { struct tag_tracker tracker; /* Tracks the bits in 'tags'. */ }; +/* Internal representation of a rule in a "struct cls_subtable". */ +struct cls_match { +struct cls_rule *cls_rule; +struct hindex_node index_nodes[CLS_MAX_INDICES]; /* Within subtable's + * 'indices'. */ +struct hmap_node hmap_node; /* Within struct cls_subtable 'rules'. */ +unsigned int priority; /* Larger numbers are higher priorities. */ +struct cls_partition *partition; +struct list list; /* List of identical, lower-priority rules. */ +struct minimatch match; /* Matching rule. */ +}; +static struct cls_match * +cls_match_alloc(struct cls_rule *rule) +{ +struct cls_match *cls_match = xmalloc(sizeof *cls_match); + +cls_match->cls_rule = rule; +minimatch_clone(&cls_match->match, &rule->match); +cls_match->priority = rule->priority; +rule->cls_match = cls_match; + +return cls_match; +} struct trie_ctx; static struct cls_subtable *find_subtable(const struct cls_classifier *, @@ -107,14 +134,14 @@ static void update_subtables_after_removal(struct cls_classifier *, struct cls_subtable *, unsigned int del_priority); -static struct cls_rule *find_match_wc(const struct cls_subtable *, - const struct flow *, struct trie_ctx *, - unsigned int n_tries, - struct flow_wildcards *); -static struct cls_rule *find_equal(struct cls_subtable *, - const struct miniflow *, uint32_t hash); -static struct cls_rule *insert_rule(struct cls_classifier *, -struct cls_subtable *, struct cls_rule *); +static struct cls_match *find_match_wc(const struct cls_subtable *, + const struct flow *, struct trie_ctx *, + unsigned int n_tries, + struct flow_wildcards *); +static struct cls_match *find_equal(struct cls_subtable *, +const struct miniflow *, uint32_t hash); +static struct cls_match *insert_rule(struct cls_classifier *, + struct cls_subtable *, struct cls_rule *); /* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. */ #define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \ @@ -124,8 +151,8 @@ static struct cls_rule *insert_rule(struct cls_classifier *, (RULE) != NULL && ((NEXT) = next_rule_in_list(RULE), true);\ (RULE) = (NEXT)) -static struct cls_rule *next_rule_in_list__(struct cls_rule *); -static struct cls_rule *next_rule_in_list(struct cls_rule *); +static struct cls_match *next_rule_in_list__(struct cls_match *); +static struct cls_match *next_rule_in_list(struct cls_match *); static unsigned int minimask_get_prefix_len(const struct minimask *, const struct mf_field *); @@ -415,6 +442,7 @@ cls_rule_init(struct cls_rule *rule, { minimatch_init(&rule->match, match); rule->priority = priority; +rule->cls_match = NULL; } /* Same as cls_rule_init() for initialization from a "struct minimatch". */ @@ -425,6 +453,7 @@ cls_rule_init_from_minimatch(struct cls_rule *rule, { minimatch_clone(&rule->match, match); rule->priority = priority; +rule->cls_match = NULL; } /* Initializes 'dst' as a copy of 'src'. @@ -435,6 +464,7 @@ cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src) { minimatch_clone(&dst->match, &src->match); dst->priority = src->priority; +dst->cls_match = NULL; } /* Initializes 'dst' with the data in 'src', destroying 'src'. @@ -445,6 +475,7 @@ cls_rule_move(struct cls_rule *dst, struct cls_rule *src) { minimatch_move(&dst->match, &src->match); dst->priority = src->priority; +dst->cls_match
[ovs-dev] [PATCH 02/10] lib: Inline functions used in classifier_lookup
Signed-off-by: Jarno Rajahalme --- lib/classifier.c| 181 ++- lib/flow.c | 155 lib/flow.h | 38 -- lib/hindex.c| 13 lib/hindex.h| 13 +++- lib/match.c | 33 - lib/match.h |4 -- tests/test-classifier.c |6 +- 8 files changed, 212 insertions(+), 231 deletions(-) diff --git a/lib/classifier.c b/lib/classifier.c index 31a0e8b..8ab1f9c 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -80,6 +80,185 @@ static void mask_set_prefix_bits(struct flow_wildcards *, uint8_t be32ofs, static bool mask_prefix_bits_set(const struct flow_wildcards *, uint8_t be32ofs, unsigned int nbits); +/* flow/miniflow/minimask/minimatch utilities. + * These are only used by the classifier, so place them here to allow + * for better optimization. */ + +static inline uint64_t +miniflow_get_map_in_range(const struct miniflow *miniflow, + uint8_t start, uint8_t end, unsigned int *offset) +{ +uint64_t map = miniflow->map; +*offset = 0; + +if (start > 0) { +uint64_t msk = (UINT64_C(1) << start) - 1; /* 'start' LSBs set */ +*offset = count_1bits(map & msk); +map &= ~msk; +} +if (end < FLOW_U32S) { +uint64_t msk = (UINT64_C(1) << end) - 1; /* 'end' LSBs set */ +map &= msk; +} +return map; +} + +/* Returns a hash value for the bits of 'flow' where there are 1-bits in + * 'mask', given 'basis'. + * + * The hash values returned by this function are the same as those returned by + * miniflow_hash_in_minimask(), only the form of the arguments differ. */ +static inline uint32_t +flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask, + uint32_t basis) +{ +const uint32_t *flow_u32 = (const uint32_t *)flow; +const uint32_t *p = mask->masks.values; +uint32_t hash; +uint64_t map; + +hash = basis; +for (map = mask->masks.map; map; map = zero_rightmost_1bit(map)) { +hash = mhash_add(hash, flow_u32[raw_ctz(map)] & *p++); +} + +return mhash_finish(hash, (p - mask->masks.values) * 4); +} + +/* Returns a hash value for the bits of 'flow' where there are 1-bits in + * 'mask', given 'basis'. + * + * The hash values returned by this function are the same as those returned by + * flow_hash_in_minimask(), only the form of the arguments differ. */ +static inline uint32_t +miniflow_hash_in_minimask(const struct miniflow *flow, + const struct minimask *mask, uint32_t basis) +{ +const uint32_t *p = mask->masks.values; +uint32_t hash = basis; +uint32_t flow_u32; + +MINIFLOW_FOR_EACH_IN_MAP(flow_u32, flow, mask->masks.map) { +hash = mhash_add(hash, flow_u32 & *p++); +} + +return mhash_finish(hash, (p - mask->masks.values) * 4); +} + +/* Returns a hash value for the bits of range [start, end) in 'flow', + * where there are 1-bits in 'mask', given 'hash'. + * + * The hash values returned by this function are the same as those returned by + * minimatch_hash_range(), only the form of the arguments differ. */ +static inline uint32_t +flow_hash_in_minimask_range(const struct flow *flow, +const struct minimask *mask, +uint8_t start, uint8_t end, uint32_t *basis) +{ +const uint32_t *flow_u32 = (const uint32_t *)flow; +unsigned int offset; +uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, + &offset); +const uint32_t *p = mask->masks.values + offset; +uint32_t hash = *basis; + +for (; map; map = zero_rightmost_1bit(map)) { +hash = mhash_add(hash, flow_u32[raw_ctz(map)] & *p++); +} + +*basis = hash; /* Allow continuation from the unfinished value. */ +return mhash_finish(hash, (p - mask->masks.values) * 4); +} + +/* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */ +static inline void +flow_wildcards_fold_minimask(struct flow_wildcards *wc, + const struct minimask *mask) +{ +flow_union_with_miniflow(&wc->masks, &mask->masks); +} + +/* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask + * in range [start, end). */ +static inline void +flow_wildcards_fold_minimask_range(struct flow_wildcards *wc, + const struct minimask *mask, + uint8_t start, uint8_t end) +{ +uint32_t *dst_u32 = (uint32_t *)&wc->masks; +unsigned int offset; +uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, + &offset); +const uint32_t *p = mask->masks.values + offset; + +for (; map; map = zero_rightmost_1bit(map)) { +dst_u32[raw_ctz(map)] |= *p++; +} +} + +/* Re
[ovs-dev] [PATCH 06/10] lib: Add prefetch support (for GCC)
Define OVS_PREFETCH() and OVS_PREFETCH_WRITE() using builtin prefetch for GCC, and ovs_prefetch_range() for prefetching a range of addresses. Signed-off-by: Jarno Rajahalme --- lib/compiler.h |8 lib/util.h | 14 ++ 2 files changed, 22 insertions(+) diff --git a/lib/compiler.h b/lib/compiler.h index 3b59813..981605c 100644 --- a/lib/compiler.h +++ b/lib/compiler.h @@ -206,4 +206,12 @@ static void f(void) #endif +#if __GNUC__ +#define OVS_PREFETCH(addr) __builtin_prefetch((addr)) +#define OVS_PREFETCH_WRITE(addr) __builtin_prefetch((addr), 1) +#else +#define OVS_PREFETCH(addr) +#define OVS_PREFETCH_WRITE(addr) +#endif + #endif /* compiler.h */ diff --git a/lib/util.h b/lib/util.h index aff17a5..5c3668a 100644 --- a/lib/util.h +++ b/lib/util.h @@ -151,6 +151,19 @@ is_pow2(uintmax_t x) #define CACHE_LINE_SIZE 64 BUILD_ASSERT_DECL(IS_POW2(CACHE_LINE_SIZE)); +#define CACHE_LINE_SIZE 64 /* Correct for most CPUs. */ + +static inline void +ovs_prefetch_range(const void *start, size_t size) +{ +const char *addr = (const char *)start; +size_t ofs; + +for (ofs = 0; ofs < size; ofs += CACHE_LINE_SIZE) { +OVS_PREFETCH(addr + ofs); +} +} + #ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #endif @@ -503,6 +516,7 @@ uint64_t bitwise_get(const void *src, unsigned int src_len, unsigned int src_ofs, unsigned int n_bits); void xsleep(unsigned int seconds); + #ifdef _WIN32 char *ovs_format_message(int error); -- 1.7.10.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 01/10] lib/flow: Simplify miniflow accessors, add ipv6 support.
Add new macro MINIFLOW_MAP(FIELD) that returns the map covering the given struct flow field. Change the miniflow accessors to macros so that they can take the field name directly. Use these to add ipv6 suppoort to miniflow_hash_5tuple(). Add ipv6 support to flow_hash_5tuple() as well so that these two functions continue to return the same hash value for the corresponding flows. Also, simplify miniflow_get_metadata(). Signed-off-by: Jarno Rajahalme --- lib/flow.c | 79 + lib/flow.h | 99 +++ tests/test-classifier.c |3 +- 3 files changed, 96 insertions(+), 85 deletions(-) diff --git a/lib/flow.c b/lib/flow.c index 093dec8..171393e 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -355,7 +355,7 @@ flow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, } /* Caller is responsible for initializing 'dst->values' with enough storage - * (FLOW_U64S * 8 bytes is enough). */ + * for FLOW_U32S * 4 bytes. */ void miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, struct miniflow *dst) @@ -943,26 +943,40 @@ flow_wildcards_set_reg_mask(struct flow_wildcards *wc, int idx, uint32_t mask) wc->masks.regs[idx] = mask; } -/* Calculates the 5-tuple hash from the given flow. */ +/* Calculates the 5-tuple hash from the given miniflow. + * This returns the same value as flow_hash_5tuple for the corresponding + * flow. */ uint32_t miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis) { -uint32_t hash = 0; +uint32_t hash = basis; -if (!flow) { -return 0; -} +if (flow) { +ovs_be16 dl_type = MINIFLOW_GET_BE16(flow, dl_type); + +hash = mhash_add(hash, MINIFLOW_GET_U8(flow, nw_proto)); -hash = mhash_add(basis, - miniflow_get_u32(flow, offsetof(struct flow, nw_src))); -hash = mhash_add(hash, - miniflow_get_u32(flow, offsetof(struct flow, nw_dst))); -hash = mhash_add(hash, - miniflow_get_u32(flow, offsetof(struct flow, tp_src))); -hash = mhash_add(hash, - miniflow_get_u8(flow, offsetof(struct flow, nw_proto))); +/* Separate loops for better optimization. */ +if (dl_type == htons(ETH_TYPE_IPV6)) { +uint64_t map = MINIFLOW_MAP(ipv6_src) | MINIFLOW_MAP(ipv6_dst) +| MINIFLOW_MAP(tp_src); /* Covers both ports */ +uint32_t value; -return mhash_finish(hash, 13); +MINIFLOW_FOR_EACH_IN_MAP(value, flow, map) { +hash = mhash_add(hash, value); +} +} else { +uint64_t map = MINIFLOW_MAP(nw_src) | MINIFLOW_MAP(nw_dst) +| MINIFLOW_MAP(tp_src); /* Covers both ports */ +uint32_t value; + +MINIFLOW_FOR_EACH_IN_MAP(value, flow, map) { +hash = mhash_add(hash, value); +} +} +hash = mhash_finish(hash, 13); /* No need to match byte length here. */ +} +return hash; } BUILD_ASSERT_DECL(offsetof(struct flow, tp_src) + 2 @@ -974,19 +988,34 @@ BUILD_ASSERT_DECL(offsetof(struct flow, tp_src) + 2 uint32_t flow_hash_5tuple(const struct flow *flow, uint32_t basis) { -const uint32_t *flow_u32 = (const uint32_t *)flow; -uint32_t hash = 0; +uint32_t hash = basis; -if (!flow) { -return 0; -} +if (flow) { +const uint32_t *flow_u32 = (const uint32_t *)flow; -hash = mhash_add(basis, (OVS_FORCE uint32_t) flow->nw_src); -hash = mhash_add(hash, (OVS_FORCE uint32_t) flow->nw_dst); -hash = mhash_add(hash, flow_u32[offsetof(struct flow, tp_src) / 4]); -hash = mhash_add(hash, flow->nw_proto); +hash = mhash_add(hash, flow->nw_proto); -return mhash_finish(hash, 13); +if (flow->dl_type == htons(ETH_TYPE_IPV6)) { +uint32_t *saddr = (OVS_FORCE uint32_t *) &flow->ipv6_src; +uint32_t *daddr = (OVS_FORCE uint32_t *) &flow->ipv6_dst; + +hash = mhash_add(hash, saddr[0]); +hash = mhash_add(hash, saddr[1]); +hash = mhash_add(hash, saddr[2]); +hash = mhash_add(hash, saddr[3]); +hash = mhash_add(hash, daddr[0]); +hash = mhash_add(hash, daddr[1]); +hash = mhash_add(hash, daddr[2]); +hash = mhash_add(hash, daddr[3]); +} else { +hash = mhash_add(hash, (OVS_FORCE uint32_t) flow->nw_src); +hash = mhash_add(hash, (OVS_FORCE uint32_t) flow->nw_dst); +} +hash = mhash_add(hash, flow_u32[offsetof(struct flow, tp_src) / 4]); + +hash = mhash_finish(hash, 13); /* No need to match byte length here. */ +} +return hash; } /* Hashes 'flow' based on its L2 through L4 protocol information. */ diff --git a/lib/flow.h b/lib/flow.h index 413ecb5..6558756 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -402,
[ovs-dev] [PATCH 09/10] lib/flow: Maintain miniflow offline values explicitly.
This allows use of miniflows that have all of their values inline. Signed-off-by: Jarno Rajahalme --- lib/classifier.c | 36 +++-- lib/dpif-netdev.c | 32 ++- lib/flow.c| 91 ++--- lib/flow.h| 70 + lib/match.c |4 +-- 5 files changed, 127 insertions(+), 106 deletions(-) diff --git a/lib/classifier.c b/lib/classifier.c index 0517aa7..75befad 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -40,7 +40,7 @@ struct cls_trie { struct cls_subtable_entry { struct cls_subtable *subtable; -uint32_t *mask_values; +const uint32_t *mask_values; tag_type tag; unsigned int max_priority; }; @@ -279,8 +279,9 @@ static inline uint32_t flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask, uint32_t basis) { +const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks); const uint32_t *flow_u32 = (const uint32_t *)flow; -const uint32_t *p = mask->masks.values; +const uint32_t *p = mask_values; uint32_t hash; uint64_t map; @@ -289,7 +290,7 @@ flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask, hash = mhash_add(hash, flow_u32[raw_ctz(map)] & *p++); } -return mhash_finish(hash, (p - mask->masks.values) * 4); +return mhash_finish(hash, (p - mask_values) * 4); } /* Returns a hash value for the bits of 'flow' where there are 1-bits in @@ -301,7 +302,8 @@ static inline uint32_t miniflow_hash_in_minimask(const struct miniflow *flow, const struct minimask *mask, uint32_t basis) { -const uint32_t *p = mask->masks.values; +const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks); +const uint32_t *p = mask_values; uint32_t hash = basis; uint32_t flow_u32; @@ -309,7 +311,7 @@ miniflow_hash_in_minimask(const struct miniflow *flow, hash = mhash_add(hash, flow_u32 & *p++); } -return mhash_finish(hash, (p - mask->masks.values) * 4); +return mhash_finish(hash, (p - mask_values) * 4); } /* Returns a hash value for the bits of range [start, end) in 'flow', @@ -322,11 +324,12 @@ flow_hash_in_minimask_range(const struct flow *flow, const struct minimask *mask, uint8_t start, uint8_t end, uint32_t *basis) { +const uint32_t *mask_values = miniflow_get_u32_values(&mask->masks); const uint32_t *flow_u32 = (const uint32_t *)flow; unsigned int offset; uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, &offset); -const uint32_t *p = mask->masks.values + offset; +const uint32_t *p = mask_values + offset; uint32_t hash = *basis; for (; map; map = zero_rightmost_1bit(map)) { @@ -334,7 +337,7 @@ flow_hash_in_minimask_range(const struct flow *flow, } *basis = hash; /* Allow continuation from the unfinished value. */ -return mhash_finish(hash, (p - mask->masks.values) * 4); +return mhash_finish(hash, (p - mask_values) * 4); } /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */ @@ -356,7 +359,7 @@ flow_wildcards_fold_minimask_range(struct flow_wildcards *wc, unsigned int offset; uint64_t map = miniflow_get_map_in_range(&mask->masks, start, end, &offset); -const uint32_t *p = mask->masks.values + offset; +const uint32_t *p = miniflow_get_u32_values(&mask->masks) + offset; for (; map; map = zero_rightmost_1bit(map)) { dst_u32[raw_ctz(map)] |= *p++; @@ -367,7 +370,8 @@ flow_wildcards_fold_minimask_range(struct flow_wildcards *wc, static inline uint32_t miniflow_hash(const struct miniflow *flow, uint32_t basis) { -const uint32_t *p = flow->values; +const uint32_t *values = miniflow_get_u32_values(flow); +const uint32_t *p = values; uint32_t hash = basis; uint64_t hash_map = 0; uint64_t map; @@ -382,7 +386,7 @@ miniflow_hash(const struct miniflow *flow, uint32_t basis) hash = mhash_add(hash, hash_map); hash = mhash_add(hash, hash_map >> 32); -return mhash_finish(hash, p - flow->values); +return mhash_finish(hash, p - values); } /* Returns a hash value for 'mask', given 'basis'. */ @@ -415,8 +419,8 @@ minimatch_hash_range(const struct minimatch *match, uint8_t start, uint8_t end, n = count_1bits(miniflow_get_map_in_range(&match->mask.masks, start, end, &offset)); -q = match->mask.masks.values + offset; -p = match->flow.values + offset; +q = miniflow_get_u32_values(&match->mask.masks) + offset; +p = miniflow_get_u32_values(&match->flow) + offset; for (i = 0; i < n; i++) { hash = mhash_add(hash, p[i] & q[i]); @@ -970,8 +974,8 @@ static bool
[ovs-dev] [PATCH 00/10] classifier: Optimize memory access.
This series: - Inlines critical functions that are only used by the classifier. - Hides classifier internals from the API, adding a 'cls_match' that contains the private parts of 'cls_rule'. - Changes the subtable list to a linear array that enables prefetching. - Always inlines the miniflows and minimasks in the classifier, reducing memory indirection. Jarno Rajahalme (10): lib/flow: Simplify miniflow accessors, add ipv6 support. lib: Inline functions used in classifier_lookup ofproto: Use classifer cursor API to collect vlan usage. lib/classifier: Hide more of the internal data structures. lib/flow: Optimize minimask_has_extra() and minimask_is_catchall() lib: Add prefetch support (for GCC) classifier: Use array for subtables instead of a list. lib/classifier: Separate cls_rule internals from the API. lib/flow: Maintain miniflow offline values explicitly. lib/classifier: Support variable sized miniflows. lib/classifier.c| 825 --- lib/classifier.h| 73 + lib/compiler.h |8 + lib/dpif-netdev.c | 32 +- lib/flow.c | 393 -- lib/flow.h | 233 +++-- lib/hindex.c| 13 - lib/hindex.h| 13 +- lib/match.c | 37 +-- lib/match.h |4 - lib/util.h | 14 + ofproto/ofproto.c | 25 +- tests/test-classifier.c | 33 +- 13 files changed, 1016 insertions(+), 687 deletions(-) -- 1.7.10.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [recirc datapath V4 1/5] ofproto-dpif: Rule lookup starts from table zero for non-recirc datapath
On Apr 18, 2014, at 2:50 AM, Andy Zhou wrote: > Currently, all packet lookup starts from internal table for possible > matching of post recirculation rules. This is not necessary for > datapath that does not support recirculation. > > This patch adds the ability to steering rule lookup starting table > based on whether datapath supports recirculation. > > Signed-off-by: Andy Zhou ¬ > --- > ofproto/ofproto-dpif.c | 83 +- > 1 file changed, 41 insertions(+), 42 deletions(-) > > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > index 3648dd7..1e51ff2 100644 > --- a/ofproto/ofproto-dpif.c > +++ b/ofproto/ofproto-dpif.c > @@ -3171,41 +3171,57 @@ rule_dpif_get_actions(const struct rule_dpif *rule) > return rule_get_actions(&rule->up); > } > > -static uint8_t > -rule_dpif_lookup__ (struct ofproto_dpif *ofproto, const struct flow *flow, > -struct flow_wildcards *wc, struct rule_dpif **rule) > +/* Lookup 'flow' in table 0 of 'ofproto''s classifier. > + * If 'wc' is non-null, sets the fields that were relevant as part of > + * the lookup. Returns the table_id where a match or miss occurred. > + * > + * The return value will be zero unless there was a miss and > + * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables > + * where misses occur. */ > +uint8_t > +rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow, > + struct flow_wildcards *wc, struct rule_dpif **rule) > { > enum rule_dpif_lookup_verdict verdict; > enum ofputil_port_config config = 0; > -uint8_t table_id = TBL_INTERNAL; > +uint8_t table_id; > + > +if (ofproto_dpif_get_enable_recirc(ofproto)) { Would this test be faster as “if (flow->recirc_id)”? > +/* Set metadata to the value of recirc_id to speed up internal > + * rule lookup. */ > +flow->metadata = htonll(flow->recirc_id); Since we currently have only one kind of a match pattern in the internal table, this will actually make the lookup slightly slower. > +table_id = TBL_INTERNAL; > +} else { > +table_id = 0; > +} > > verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true, > &table_id, rule); > > switch (verdict) { > -case RULE_DPIF_LOOKUP_VERDICT_MATCH: > -return table_id; > -case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: { > -struct ofport_dpif *port; > - > -port = get_ofp_port(ofproto, flow->in_port.ofp_port); > -if (!port) { > -VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16, > - flow->in_port.ofp_port); > +case RULE_DPIF_LOOKUP_VERDICT_MATCH: > +return table_id; > +case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: { > +struct ofport_dpif *port; > + > +port = get_ofp_port(ofproto, flow->in_port.ofp_port); > +if (!port) { > +VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port > %"PRIu16, > + flow->in_port.ofp_port); > +} > +config = port ? port->up.pp.config : 0; > +break; > } > -config = port ? port->up.pp.config : 0; > -break; > -} > -case RULE_DPIF_LOOKUP_VERDICT_DROP: > -config = OFPUTIL_PC_NO_PACKET_IN; > -break; > -case RULE_DPIF_LOOKUP_VERDICT_DEFAULT: > -if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) { > +case RULE_DPIF_LOOKUP_VERDICT_DROP: > config = OFPUTIL_PC_NO_PACKET_IN; > -} > -break; > -default: > -OVS_NOT_REACHED(); > +break; > +case RULE_DPIF_LOOKUP_VERDICT_DEFAULT: > +if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) { > +config = OFPUTIL_PC_NO_PACKET_IN; > +} > +break; > +default: > +OVS_NOT_REACHED(); > } > The previous indentation was according to the CodingStyle. It seems to me nothing changed here? > choose_miss_rule(config, ofproto->miss_rule, > @@ -3213,23 +3229,6 @@ rule_dpif_lookup__ (struct ofproto_dpif *ofproto, > const struct flow *flow, > return table_id; > } > > -/* Lookup 'flow' in table 0 of 'ofproto''s classifier. > - * If 'wc' is non-null, sets the fields that were relevant as part of > - * the lookup. Returns the table_id where a match or miss occurred. > - * > - * The return value will be zero unless there was a miss and > - * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables > - * where misses occur. */ > -uint8_t > -rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow, > - struct flow_wildcards *wc, struct rule_dpif **rule) > -{ > -/* Set metadata to the value of recirc_id to speed up internal > - * rule lookup. */ > -flow->metadata = htonll(flow->recirc_id); > -return rule_dpif_
Re: [ovs-dev] [recirc datapath V4 1/5] ofproto-dpif: Rule lookup starts from table zero for non-recirc datapath
On Fri, Apr 18, 2014 at 12:59 PM, Jarno Rajahalme wrote: > > On Apr 18, 2014, at 2:50 AM, Andy Zhou wrote: > >> Currently, all packet lookup starts from internal table for possible >> matching of post recirculation rules. This is not necessary for >> datapath that does not support recirculation. >> >> This patch adds the ability to steering rule lookup starting table >> based on whether datapath supports recirculation. >> >> Signed-off-by: Andy Zhou ¬ >> --- >> ofproto/ofproto-dpif.c | 83 >> +- >> 1 file changed, 41 insertions(+), 42 deletions(-) >> >> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c >> index 3648dd7..1e51ff2 100644 >> --- a/ofproto/ofproto-dpif.c >> +++ b/ofproto/ofproto-dpif.c >> @@ -3171,41 +3171,57 @@ rule_dpif_get_actions(const struct rule_dpif *rule) >> return rule_get_actions(&rule->up); >> } >> >> -static uint8_t >> -rule_dpif_lookup__ (struct ofproto_dpif *ofproto, const struct flow *flow, >> -struct flow_wildcards *wc, struct rule_dpif **rule) >> +/* Lookup 'flow' in table 0 of 'ofproto''s classifier. >> + * If 'wc' is non-null, sets the fields that were relevant as part of >> + * the lookup. Returns the table_id where a match or miss occurred. >> + * >> + * The return value will be zero unless there was a miss and >> + * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables >> + * where misses occur. */ >> +uint8_t >> +rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow, >> + struct flow_wildcards *wc, struct rule_dpif **rule) >> { >> enum rule_dpif_lookup_verdict verdict; >> enum ofputil_port_config config = 0; >> -uint8_t table_id = TBL_INTERNAL; >> +uint8_t table_id; >> + >> +if (ofproto_dpif_get_enable_recirc(ofproto)) { > > Would this test be faster as “if (flow->recirc_id)”? Yes. However the intention is to for non-recirc flows to hit the recirc_id=0, actions=resubmit(table 0) rule to indicate recirc_id is significant. the > >> +/* Set metadata to the value of recirc_id to speed up internal >> + * rule lookup. */ >> +flow->metadata = htonll(flow->recirc_id); > > Since we currently have only one kind of a match pattern in the internal > table, this will actually make the lookup slightly slower. This helps if we have multiple bond ports, each one has its own recirc_id. MPLS recirc may also benefit from this. > >> +table_id = TBL_INTERNAL; >> +} else { >> +table_id = 0; >> +} >> >> verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true, >> &table_id, rule); >> >> switch (verdict) { >> -case RULE_DPIF_LOOKUP_VERDICT_MATCH: >> -return table_id; >> -case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: { >> -struct ofport_dpif *port; >> - >> -port = get_ofp_port(ofproto, flow->in_port.ofp_port); >> -if (!port) { >> -VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16, >> - flow->in_port.ofp_port); >> +case RULE_DPIF_LOOKUP_VERDICT_MATCH: >> +return table_id; >> +case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: { >> +struct ofport_dpif *port; >> + >> +port = get_ofp_port(ofproto, flow->in_port.ofp_port); >> +if (!port) { >> +VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port >> %"PRIu16, >> + flow->in_port.ofp_port); >> +} >> +config = port ? port->up.pp.config : 0; >> +break; >> } >> -config = port ? port->up.pp.config : 0; >> -break; >> -} >> -case RULE_DPIF_LOOKUP_VERDICT_DROP: >> -config = OFPUTIL_PC_NO_PACKET_IN; >> -break; >> -case RULE_DPIF_LOOKUP_VERDICT_DEFAULT: >> -if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) { >> +case RULE_DPIF_LOOKUP_VERDICT_DROP: >> config = OFPUTIL_PC_NO_PACKET_IN; >> -} >> -break; >> -default: >> -OVS_NOT_REACHED(); >> +break; >> +case RULE_DPIF_LOOKUP_VERDICT_DEFAULT: >> +if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) { >> +config = OFPUTIL_PC_NO_PACKET_IN; >> +} >> +break; >> +default: >> +OVS_NOT_REACHED(); >> } >> > > The previous indentation was according to the CodingStyle. It seems to me > nothing changed here? I combined the underscored version function. The diffs are showing because the code being moved around. Nothing is changed. > >> choose_miss_rule(config, ofproto->miss_rule, >> @@ -3213,23 +3229,6 @@ rule_dpif_lookup__ (struct ofproto_dpif *ofproto, >> const struct flow *flow, >> return table_id; >> } >> >> -/* Lookup 'flow' in table 0 of 'ofproto''s classifier. >> - * If 'wc' is non-null, sets the fields that were relevant as part of
Re: [ovs-dev] [recirc datapath V4 2/5] odp-util: Always generate key/mask pair in netlink for recirc_id
On Apr 18, 2014, at 2:50 AM, Andy Zhou wrote: ... > @@ -2681,9 +2681,9 @@ unencap: > * capable of being expanded to allow for that much space. */ > void > odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow, > - odp_port_t odp_in_port) > + const struct flow *mask, odp_port_t odp_in_port) > { > -odp_flow_key_from_flow__(buf, flow, flow, odp_in_port, SIZE_MAX); > +odp_flow_key_from_flow__(buf, flow, flow, mask, odp_in_port, SIZE_MAX); > } > This looks odd, how about passing in both flow and mask, and a boolean “is_mask” instead of “flow, flow, mask” or “mask, flow, mask” below? > /* Appends a representation of 'mask' as OVS_KEY_ATTR_* attributes to > @@ -2699,8 +2699,8 @@ odp_flow_key_from_mask(struct ofpbuf *buf, const struct > flow *mask, >const struct flow *flow, uint32_t odp_in_port_mask, >size_t max_mpls_depth) > { > -odp_flow_key_from_flow__(buf, mask, flow, u32_to_odp(odp_in_port_mask), > - max_mpls_depth); > +odp_flow_key_from_flow__(buf, mask, flow, mask, > + u32_to_odp(odp_in_port_mask), max_mpls_depth); > } > Otherwise, Acked-by: Jarno Rajahalme Jarno ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH V9 1/3] dpif-linux: Implement the API functions to allow multiple handler threads read upcall.
Acked-by: Ethan Jackson On Thu, Apr 17, 2014 at 5:41 PM, Alex Wang wrote: > Signed-off-by: Alex Wang > > --- > V8 -> v9: > - move the fat-lock acquisition inside the if-statement in dpif_linux_run(). > - fix an error: dpif_linux_recv_purge() should acquire wrlock. > > V7 -> V8: > - rebase. > > V6 -> V7: > - rebase. > - set n_handlers to 0 in destroy_all_channels(). > - fix the indexing error in dpif_linux_recv_purge(). > > V5 -> V6: > - move the 'struct dpif_epoll' in 'struct dpif_handler' > - fix typos based on review. > - refine refresh_channels() comments. > - rename 'n_pids' of 'struct dpif_linux_vport' to 'n_upcall_pids'. > > V4 -> V5: > - rebase. > > V3 -> V4: > - add check of handler_id range. > > V2 -> V3: > - use OVS_DP_ATTR_USER_FEATURES to inform datapath about the type of > OVS_VPORT_ATTR_UPCALL_PID attribute. > - close epoll fd when destroying all channels. > > PATCH -> V2: > - rebase. > > major changes since RFC: > - free the 'upcall_pids' pointer in dpif_linux_refresh_channels(). > - for cache access efficiency, in 'recv()' index the handlers > array first and then port channels array. > --- > lib/dpif-linux.c | 549 > +++--- > lib/dpif-linux.h |3 +- > 2 files changed, 361 insertions(+), 191 deletions(-) > > diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c > index e8efdac..c119c12 100644 > --- a/lib/dpif-linux.c > +++ b/lib/dpif-linux.c > @@ -36,6 +36,7 @@ > #include "dpif-provider.h" > #include "dynamic-string.h" > #include "flow.h" > +#include "fat-rwlock.h" > #include "netdev.h" > #include "netdev-linux.h" > #include "netdev-vport.h" > @@ -132,7 +133,13 @@ struct dpif_channel { > long long int last_poll;/* Last time this channel was polled. */ > }; > > -static void report_loss(struct dpif *, struct dpif_channel *); > +struct dpif_handler { > +struct dpif_channel *channels;/* Array of channels for each handler. */ > +struct epoll_event *epoll_events; > +int epoll_fd; /* epoll fd that includes channel socks. */ > +int n_events; /* Num events returned by epoll_wait(). */ > +int event_offset; /* Offset into 'epoll_events'. */ > +}; > > /* Datapath interface for the openvswitch Linux kernel module. */ > struct dpif_linux { > @@ -140,19 +147,20 @@ struct dpif_linux { > int dp_ifindex; > > /* Upcall messages. */ > -struct ovs_mutex upcall_lock; > -int uc_array_size; /* Size of 'channels' and 'epoll_events'. */ > -struct dpif_channel *channels; > -struct epoll_event *epoll_events; > -int epoll_fd; /* epoll fd that includes channel socks. */ > -int n_events; /* Num events returned by epoll_wait(). */ > -int event_offset; /* Offset into 'epoll_events'. */ > +struct fat_rwlock upcall_lock; > +struct dpif_handler *handlers; > +uint32_t n_handlers; /* Num of upcall handlers. */ > +int uc_array_size; /* Size of 'handler->channels' and */ > + /* 'handler->epoll_events'. */ > > /* Change notification. */ > struct nl_sock *port_notifier; /* vport multicast group subscriber. */ > bool refresh_channels; > }; > > +static void report_loss(struct dpif *, struct dpif_channel *, uint32_t > ch_idx, > +uint32_t handler_id); > + > static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(, 5); > > /* Generic Netlink family numbers for OVS. > @@ -172,8 +180,7 @@ static int dpif_linux_init(void); > static int open_dpif(const struct dpif_linux_dp *, struct dpif **); > static uint32_t dpif_linux_port_get_pid(const struct dpif *, > odp_port_t port_no, uint32_t hash); > -static int dpif_linux_refresh_channels(struct dpif *); > - > +static int dpif_linux_refresh_channels(struct dpif *, uint32_t n_handlers); > static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *, > struct ofpbuf *); > static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *, > @@ -238,6 +245,7 @@ dpif_linux_open(const struct dpif_class *class > OVS_UNUSED, const char *name, > } > dp_request.name = name; > dp_request.user_features |= OVS_DP_F_UNALIGNED; > +dp_request.user_features |= OVS_DP_F_VPORT_PIDS; > error = dpif_linux_dp_transact(&dp_request, &dp, &buf); > if (error) { > return error; > @@ -255,8 +263,7 @@ open_dpif(const struct dpif_linux_dp *dp, struct dpif > **dpifp) > > dpif = xzalloc(sizeof *dpif); > dpif->port_notifier = NULL; > -ovs_mutex_init(&dpif->upcall_lock); > -dpif->epoll_fd = -1; > +fat_rwlock_init(&dpif->upcall_lock); > > dpif_init(&dpif->dpif, &dpif_linux_class, dp->name, >dp->dp_ifindex, dp->dp_ifindex); > @@ -267,64 +274,110 @@ open_dpif(const struct dpif_linux_dp *dp, struct dpif > **dpifp)
Re: [ovs-dev] [PATCH V9 2/3] dpif-linux: Pass 'struct dpif_linux *' to internal static functions.
Acked-by: Ethan Jackson On Thu, Apr 17, 2014 at 5:41 PM, Alex Wang wrote: > This commit reformats the dpif-linux module so that all internal > static functions take 'struct dpif_linux *' as input argument. > This will allow the adding of thread-safety annotations. > > Signed-off-by: Alex Wang > > --- > V9: > - necessary changes for adding thread-safety annotations. > --- > lib/dpif-linux.c | 113 > +++--- > 1 file changed, 56 insertions(+), 57 deletions(-) > > diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c > index c119c12..b70ddef 100644 > --- a/lib/dpif-linux.c > +++ b/lib/dpif-linux.c > @@ -158,8 +158,8 @@ struct dpif_linux { > bool refresh_channels; > }; > > -static void report_loss(struct dpif *, struct dpif_channel *, uint32_t > ch_idx, > -uint32_t handler_id); > +static void report_loss(struct dpif_linux *, struct dpif_channel *, > +uint32_t ch_idx, uint32_t handler_id); > > static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(, 5); > > @@ -180,7 +180,7 @@ static int dpif_linux_init(void); > static int open_dpif(const struct dpif_linux_dp *, struct dpif **); > static uint32_t dpif_linux_port_get_pid(const struct dpif *, > odp_port_t port_no, uint32_t hash); > -static int dpif_linux_refresh_channels(struct dpif *, uint32_t n_handlers); > +static int dpif_linux_refresh_channels(struct dpif_linux *, uint32_t > n_handlers); > static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *, > struct ofpbuf *); > static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *, > @@ -538,7 +538,7 @@ dpif_linux_run(struct dpif *dpif_) > if (dpif->refresh_channels) { > dpif->refresh_channels = false; > fat_rwlock_wrlock(&dpif->upcall_lock); > -dpif_linux_refresh_channels(dpif_, dpif->n_handlers); > +dpif_linux_refresh_channels(dpif, dpif->n_handlers); > fat_rwlock_unlock(&dpif->upcall_lock); > } > } > @@ -623,10 +623,9 @@ netdev_to_ovs_vport_type(const struct netdev *netdev) > } > > static int > -dpif_linux_port_add__(struct dpif *dpif_, struct netdev *netdev, > +dpif_linux_port_add__(struct dpif_linux *dpif, struct netdev *netdev, >odp_port_t *port_nop) > { > -struct dpif_linux *dpif = dpif_linux_cast(dpif_); > const struct netdev_tunnel_config *tnl_cfg; > char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; > const char *name = netdev_vport_get_dpif_port(netdev, > @@ -654,7 +653,7 @@ dpif_linux_port_add__(struct dpif *dpif_, struct netdev > *netdev, > if (request.type == OVS_VPORT_TYPE_UNSPEC) { > VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has " > "unsupported type `%s'", > - dpif_name(dpif_), name, type); > + dpif_name(&dpif->dpif), name, type); > vport_del_socksp(socksp, dpif->n_handlers); > return EINVAL; > } > @@ -684,7 +683,7 @@ dpif_linux_port_add__(struct dpif *dpif_, struct netdev > *netdev, > } else { > if (error == EBUSY && *port_nop != ODPP_NONE) { > VLOG_INFO("%s: requested port %"PRIu32" is in use", > - dpif_name(dpif_), *port_nop); > + dpif_name(&dpif->dpif), *port_nop); > } > > vport_del_socksp(socksp, dpif->n_handlers); > @@ -695,7 +694,7 @@ dpif_linux_port_add__(struct dpif *dpif_, struct netdev > *netdev, > error = vport_add_channels(dpif, *port_nop, socksp); > if (error) { > VLOG_INFO("%s: could not add channel for port %s", > - dpif_name(dpif_), name); > + dpif_name(&dpif->dpif), name); > > /* Delete the port. */ > dpif_linux_vport_init(&request); > @@ -724,16 +723,15 @@ dpif_linux_port_add(struct dpif *dpif_, struct netdev > *netdev, > int error; > > fat_rwlock_wrlock(&dpif->upcall_lock); > -error = dpif_linux_port_add__(dpif_, netdev, port_nop); > +error = dpif_linux_port_add__(dpif, netdev, port_nop); > fat_rwlock_unlock(&dpif->upcall_lock); > > return error; > } > > static int > -dpif_linux_port_del__(struct dpif *dpif_, odp_port_t port_no) > +dpif_linux_port_del__(struct dpif_linux *dpif, odp_port_t port_no) > { > -struct dpif_linux *dpif = dpif_linux_cast(dpif_); > struct dpif_linux_vport vport; > int error; > > @@ -755,14 +753,14 @@ dpif_linux_port_del(struct dpif *dpif_, odp_port_t > port_no) > int error; > > fat_rwlock_wrlock(&dpif->upcall_lock); > -error = dpif_linux_port_del__(dpif_, port_no); > +error = dpif_linux_port_del__(dpif, port_no); > fat_rwlock_unlock(&dpif->upcall_lock); > > return error; > } > > static int > -dpif_linux_port_query__(const struct dpif *dpif, odp_port_t port_no, > +dp
Re: [ovs-dev] [PATCH V9 3/3] dpif-linux: Add thread-safety annotations.
Acked-by: Ethan Jackson On Thu, Apr 17, 2014 at 5:41 PM, Alex Wang wrote: > Signed-off-by: Alex Wang > > --- > V9: > - add thread-safety annotations. > --- > lib/dpif-linux.c | 60 > +- > 1 file changed, 46 insertions(+), 14 deletions(-) > > diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c > index b70ddef..a575b78 100644 > --- a/lib/dpif-linux.c > +++ b/lib/dpif-linux.c > @@ -180,7 +180,8 @@ static int dpif_linux_init(void); > static int open_dpif(const struct dpif_linux_dp *, struct dpif **); > static uint32_t dpif_linux_port_get_pid(const struct dpif *, > odp_port_t port_no, uint32_t hash); > -static int dpif_linux_refresh_channels(struct dpif_linux *, uint32_t > n_handlers); > +static int dpif_linux_refresh_channels(struct dpif_linux *, > + uint32_t n_handlers); > static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *, > struct ofpbuf *); > static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *, > @@ -459,7 +460,7 @@ vport_del_channels(struct dpif_linux *dpif, odp_port_t > port_no) > } > > static void > -destroy_all_channels(struct dpif_linux *dpif) > +destroy_all_channels(struct dpif_linux *dpif) > OVS_REQ_WRLOCK(dpif->upcall_lock) > { > unsigned int i; > > @@ -625,6 +626,7 @@ netdev_to_ovs_vport_type(const struct netdev *netdev) > static int > dpif_linux_port_add__(struct dpif_linux *dpif, struct netdev *netdev, >odp_port_t *port_nop) > +OVS_REQ_WRLOCK(dpif->upcall_lock) > { > const struct netdev_tunnel_config *tnl_cfg; > char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; > @@ -731,6 +733,7 @@ dpif_linux_port_add(struct dpif *dpif_, struct netdev > *netdev, > > static int > dpif_linux_port_del__(struct dpif_linux *dpif, odp_port_t port_no) > +OVS_REQ_WRLOCK(dpif->upcall_lock) > { > struct dpif_linux_vport vport; > int error; > @@ -809,14 +812,13 @@ dpif_linux_port_query_by_name(const struct dpif *dpif_, > const char *devname, > } > > static uint32_t > -dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no, > -uint32_t hash) > +dpif_linux_port_get_pid__(const struct dpif_linux *dpif, odp_port_t port_no, > + uint32_t hash) > +OVS_REQ_RDLOCK(dpif->upcall_lock) > { > -struct dpif_linux *dpif = dpif_linux_cast(dpif_); > uint32_t port_idx = odp_to_u32(port_no); > uint32_t pid = 0; > > -fat_rwlock_rdlock(&dpif->upcall_lock); > if (dpif->handlers) { > /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s > * channel, since it is not heavily loaded. */ > @@ -825,11 +827,24 @@ dpif_linux_port_get_pid(const struct dpif *dpif_, > odp_port_t port_no, > > pid = nl_sock_pid(h->channels[idx].sock); > } > -fat_rwlock_unlock(&dpif->upcall_lock); > > return pid; > } > > +static uint32_t > +dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no, > +uint32_t hash) > +{ > +const struct dpif_linux *dpif = dpif_linux_cast(dpif_); > +uint32_t ret; > + > +fat_rwlock_rdlock(&dpif->upcall_lock); > +ret = dpif_linux_port_get_pid__(dpif, port_no, hash); > +fat_rwlock_unlock(&dpif->upcall_lock); > + > +return ret; > +} > + > static int > dpif_linux_flow_flush(struct dpif *dpif_) > { > @@ -1461,6 +1476,7 @@ dpif_linux_operate(struct dpif *dpif_, struct dpif_op > **ops, size_t n_ops) > * backing kernel vports. */ > static int > dpif_linux_refresh_channels(struct dpif_linux *dpif, uint32_t n_handlers) > +OVS_REQ_WRLOCK(dpif->upcall_lock) > { > unsigned long int *keep_channels; > struct dpif_linux_vport vport; > @@ -1586,6 +1602,7 @@ dpif_linux_refresh_channels(struct dpif_linux *dpif, > uint32_t n_handlers) > > static int > dpif_linux_recv_set__(struct dpif_linux *dpif, bool enable) > +OVS_REQ_WRLOCK(dpif->upcall_lock) > { > if ((dpif->handlers != NULL) == enable) { > return 0; > @@ -1702,6 +1719,7 @@ parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall > *upcall, > static int > dpif_linux_recv__(struct dpif_linux *dpif, uint32_t handler_id, >struct dpif_upcall *upcall, struct ofpbuf *buf) > +OVS_REQ_RDLOCK(dpif->upcall_lock) > { > struct dpif_handler *handler; > int read_tries = 0; > @@ -1787,25 +1805,30 @@ dpif_linux_recv(struct dpif *dpif_, uint32_t > handler_id, > } > > static void > -dpif_linux_recv_wait(struct dpif *dpif_, uint32_t handler_id) > +dpif_linux_recv_wait__(struct dpif_linux *dpif, uint32_t handler_id) > +OVS_REQ_RDLOCK(dpif->upcall_lock) > { > -struct dpif_linux *dpif = dpif_linux_cast(dpif_); > - > -fat_rwlock_rdlock(&dpif->upcall_lock); > if (dpif->handlers && handler_id < dpif->n_handlers) { > struct dpif_handler *handler = &d
Re: [ovs-dev] [PATCH V9 3/3] dpif-linux: Add thread-safety annotations.
Thx, applied three patches, On Fri, Apr 18, 2014 at 1:55 PM, Ethan Jackson wrote: > Acked-by: Ethan Jackson > > > On Thu, Apr 17, 2014 at 5:41 PM, Alex Wang wrote: > > Signed-off-by: Alex Wang > > > > --- > > V9: > > - add thread-safety annotations. > > --- > > lib/dpif-linux.c | 60 > +- > > 1 file changed, 46 insertions(+), 14 deletions(-) > > > > diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c > > index b70ddef..a575b78 100644 > > --- a/lib/dpif-linux.c > > +++ b/lib/dpif-linux.c > > @@ -180,7 +180,8 @@ static int dpif_linux_init(void); > > static int open_dpif(const struct dpif_linux_dp *, struct dpif **); > > static uint32_t dpif_linux_port_get_pid(const struct dpif *, > > odp_port_t port_no, uint32_t > hash); > > -static int dpif_linux_refresh_channels(struct dpif_linux *, uint32_t > n_handlers); > > +static int dpif_linux_refresh_channels(struct dpif_linux *, > > + uint32_t n_handlers); > > static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *, > > struct ofpbuf *); > > static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *, > > @@ -459,7 +460,7 @@ vport_del_channels(struct dpif_linux *dpif, > odp_port_t port_no) > > } > > > > static void > > -destroy_all_channels(struct dpif_linux *dpif) > > +destroy_all_channels(struct dpif_linux *dpif) > OVS_REQ_WRLOCK(dpif->upcall_lock) > > { > > unsigned int i; > > > > @@ -625,6 +626,7 @@ netdev_to_ovs_vport_type(const struct netdev *netdev) > > static int > > dpif_linux_port_add__(struct dpif_linux *dpif, struct netdev *netdev, > >odp_port_t *port_nop) > > +OVS_REQ_WRLOCK(dpif->upcall_lock) > > { > > const struct netdev_tunnel_config *tnl_cfg; > > char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; > > @@ -731,6 +733,7 @@ dpif_linux_port_add(struct dpif *dpif_, struct > netdev *netdev, > > > > static int > > dpif_linux_port_del__(struct dpif_linux *dpif, odp_port_t port_no) > > +OVS_REQ_WRLOCK(dpif->upcall_lock) > > { > > struct dpif_linux_vport vport; > > int error; > > @@ -809,14 +812,13 @@ dpif_linux_port_query_by_name(const struct dpif > *dpif_, const char *devname, > > } > > > > static uint32_t > > -dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no, > > -uint32_t hash) > > +dpif_linux_port_get_pid__(const struct dpif_linux *dpif, odp_port_t > port_no, > > + uint32_t hash) > > +OVS_REQ_RDLOCK(dpif->upcall_lock) > > { > > -struct dpif_linux *dpif = dpif_linux_cast(dpif_); > > uint32_t port_idx = odp_to_u32(port_no); > > uint32_t pid = 0; > > > > -fat_rwlock_rdlock(&dpif->upcall_lock); > > if (dpif->handlers) { > > /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s > > * channel, since it is not heavily loaded. */ > > @@ -825,11 +827,24 @@ dpif_linux_port_get_pid(const struct dpif *dpif_, > odp_port_t port_no, > > > > pid = nl_sock_pid(h->channels[idx].sock); > > } > > -fat_rwlock_unlock(&dpif->upcall_lock); > > > > return pid; > > } > > > > +static uint32_t > > +dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no, > > +uint32_t hash) > > +{ > > +const struct dpif_linux *dpif = dpif_linux_cast(dpif_); > > +uint32_t ret; > > + > > +fat_rwlock_rdlock(&dpif->upcall_lock); > > +ret = dpif_linux_port_get_pid__(dpif, port_no, hash); > > +fat_rwlock_unlock(&dpif->upcall_lock); > > + > > +return ret; > > +} > > + > > static int > > dpif_linux_flow_flush(struct dpif *dpif_) > > { > > @@ -1461,6 +1476,7 @@ dpif_linux_operate(struct dpif *dpif_, struct > dpif_op **ops, size_t n_ops) > > * backing kernel vports. */ > > static int > > dpif_linux_refresh_channels(struct dpif_linux *dpif, uint32_t > n_handlers) > > +OVS_REQ_WRLOCK(dpif->upcall_lock) > > { > > unsigned long int *keep_channels; > > struct dpif_linux_vport vport; > > @@ -1586,6 +1602,7 @@ dpif_linux_refresh_channels(struct dpif_linux > *dpif, uint32_t n_handlers) > > > > static int > > dpif_linux_recv_set__(struct dpif_linux *dpif, bool enable) > > +OVS_REQ_WRLOCK(dpif->upcall_lock) > > { > > if ((dpif->handlers != NULL) == enable) { > > return 0; > > @@ -1702,6 +1719,7 @@ parse_odp_packet(struct ofpbuf *buf, struct > dpif_upcall *upcall, > > static int > > dpif_linux_recv__(struct dpif_linux *dpif, uint32_t handler_id, > >struct dpif_upcall *upcall, struct ofpbuf *buf) > > +OVS_REQ_RDLOCK(dpif->upcall_lock) > > { > > struct dpif_handler *handler; > > int read_tries = 0; > > @@ -1787,25 +1805,30 @@ dpif_linux_recv(struct dpif *dpif_, uint32_t > handler_id, > > } > > > > static void > > -dpif_linux_recv_wait(struct dpif *dpif_, uint32_t handler_id) >
Re: [ovs-dev] [recirc datapath V4 1/5] ofproto-dpif: Rule lookup starts from table zero for non-recirc datapath
> On Apr 18, 2014, at 1:09 PM, Andy Zhou wrote: > >> On Fri, Apr 18, 2014 at 12:59 PM, Jarno Rajahalme >> wrote: >> >>> On Apr 18, 2014, at 2:50 AM, Andy Zhou wrote: >>> >>> Currently, all packet lookup starts from internal table for possible >>> matching of post recirculation rules. This is not necessary for >>> datapath that does not support recirculation. >>> >>> This patch adds the ability to steering rule lookup starting table >>> based on whether datapath supports recirculation. >>> >>> Signed-off-by: Andy Zhou ¬ >>> --- >>> ofproto/ofproto-dpif.c | 83 >>> +- >>> 1 file changed, 41 insertions(+), 42 deletions(-) >>> >>> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c >>> index 3648dd7..1e51ff2 100644 >>> --- a/ofproto/ofproto-dpif.c >>> +++ b/ofproto/ofproto-dpif.c >>> @@ -3171,41 +3171,57 @@ rule_dpif_get_actions(const struct rule_dpif *rule) >>>return rule_get_actions(&rule->up); >>> } >>> >>> -static uint8_t >>> -rule_dpif_lookup__ (struct ofproto_dpif *ofproto, const struct flow *flow, >>> -struct flow_wildcards *wc, struct rule_dpif **rule) >>> +/* Lookup 'flow' in table 0 of 'ofproto''s classifier. >>> + * If 'wc' is non-null, sets the fields that were relevant as part of >>> + * the lookup. Returns the table_id where a match or miss occurred. >>> + * >>> + * The return value will be zero unless there was a miss and >>> + * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables >>> + * where misses occur. */ >>> +uint8_t >>> +rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow, >>> + struct flow_wildcards *wc, struct rule_dpif **rule) >>> { >>>enum rule_dpif_lookup_verdict verdict; >>>enum ofputil_port_config config = 0; >>> -uint8_t table_id = TBL_INTERNAL; >>> +uint8_t table_id; >>> + >>> +if (ofproto_dpif_get_enable_recirc(ofproto)) { >> >> Would this test be faster as “if (flow->recirc_id)”? > Yes. However the intention is to for non-recirc flows to hit the > recirc_id=0, actions=resubmit(table 0) rule to indicate recirc_id is > significant. > the You could also mark the mask bits manually, and save the cost of the extra classifier lookup. >> >>> +/* Set metadata to the value of recirc_id to speed up internal >>> + * rule lookup. */ >>> +flow->metadata = htonll(flow->recirc_id); >> >> Since we currently have only one kind of a match pattern in the internal >> table, this will actually make the lookup slightly slower. > This helps if we have multiple bond ports, each one has its own > recirc_id. MPLS recirc may also benefit from this. They still have the same mask, so partitioning has no benefit. >> >>> +table_id = TBL_INTERNAL; >>> +} else { >>> +table_id = 0; >>> +} >>> >>>verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true, >>> &table_id, rule); >>> >>>switch (verdict) { >>> -case RULE_DPIF_LOOKUP_VERDICT_MATCH: >>> -return table_id; >>> -case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: { >>> -struct ofport_dpif *port; >>> - >>> -port = get_ofp_port(ofproto, flow->in_port.ofp_port); >>> -if (!port) { >>> -VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16, >>> - flow->in_port.ofp_port); >>> +case RULE_DPIF_LOOKUP_VERDICT_MATCH: >>> +return table_id; >>> +case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: { >>> +struct ofport_dpif *port; >>> + >>> +port = get_ofp_port(ofproto, flow->in_port.ofp_port); >>> +if (!port) { >>> +VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port >>> %"PRIu16, >>> + flow->in_port.ofp_port); >>> +} >>> +config = port ? port->up.pp.config : 0; >>> +break; >>>} >>> -config = port ? port->up.pp.config : 0; >>> -break; >>> -} >>> -case RULE_DPIF_LOOKUP_VERDICT_DROP: >>> -config = OFPUTIL_PC_NO_PACKET_IN; >>> -break; >>> -case RULE_DPIF_LOOKUP_VERDICT_DEFAULT: >>> -if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) { >>> +case RULE_DPIF_LOOKUP_VERDICT_DROP: >>>config = OFPUTIL_PC_NO_PACKET_IN; >>> -} >>> -break; >>> -default: >>> -OVS_NOT_REACHED(); >>> +break; >>> +case RULE_DPIF_LOOKUP_VERDICT_DEFAULT: >>> +if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) { >>> +config = OFPUTIL_PC_NO_PACKET_IN; >>> +} >>> +break; >>> +default: >>> +OVS_NOT_REACHED(); >>>} >> >> The previous indentation was according to the CodingStyle. It seems to me >> nothing changed here? > I combined the underscored version function. The diffs are showing > because the code being move
[ovs-dev] [PATCH RFC V9] ofproto-dpif-upcall: Remove the dispatcher thread.
With the foundation laid in previous commits, this commit removes the 'dispatcher' thread by allowing 'handler' threads to read upcalls directly from dpif. This commit significantly simplifies the flow miss handling code and brings slight improvement to flow setup rate. Signed-off-by: Alex Wang --- NOTE: - somehow this patch bring back the race in unittest. few tests failed due to the "WARN|dummy@ovs-dummy: failed to flow_get (No such file or directory)" "WARN|dummy@ovs-dummy: failed to flow_del (No such file or directory)" So, I'm looking for high level review while keep figuring out the race. --- V8 -> V9: - remove un-comment like comment. - move the code of destroy_upcall() to calling line. - add a new function read_upcalls() for reading and classifying upcalls from dpif. V7 -> V8: - rebase. V6 -> V7: - rebase. V5 -> V6: - remove the input argument 'free_upcall' in upcall-destroy(). - destroy all upcalls at the end of handle_upcalls(). V4 -> V5: - rebase. V3 -> V4: - rebase. V2 -> V3: - rebase. PATCH -> V2: - rebase. --- ofproto/ofproto-dpif-upcall.c | 372 + ofproto/ofproto-dpif-xlate.c |6 +- 2 files changed, 123 insertions(+), 255 deletions(-) diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 0d7dd8e..987f1ad 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -45,26 +45,13 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall); -COVERAGE_DEFINE(upcall_queue_overflow); - -/* A thread that processes each upcall handed to it by the dispatcher thread, - * forwards the upcall's packet, and possibly sets up a kernel flow as a - * cache. */ +/* A thread that reads upcalls from dpif, forwards each upcall's packet, + * and possibly sets up a kernel flow as a cache. */ struct handler { struct udpif *udpif; /* Parent udpif. */ pthread_t thread; /* Thread ID. */ char *name;/* Thread name. */ - -struct ovs_mutex mutex;/* Mutex guarding the following. */ - -/* Atomic queue of unprocessed upcalls. */ -struct list upcalls OVS_GUARDED; -size_t n_upcalls OVS_GUARDED; - -bool need_signal; /* Only changed by the dispatcher. */ - -pthread_cond_t wake_cond; /* Wakes 'thread' while holding - 'mutex'. */ +uint32_t handler_id; /* Handler id. */ }; /* A thread that processes each kernel flow handed to it by the flow_dumper @@ -87,14 +74,24 @@ struct revalidator { /* An upcall handler for ofproto_dpif. * - * udpif has two logically separate pieces: + * udpif keeps records of two kind of logically separate units: + * + * upcall handling + * --- + * + *- An array of 'struct handler's for upcall handling and flow + * installation. * - *- A "dispatcher" thread that reads upcalls from the kernel and dispatches - * them to one of several "handler" threads (see struct handler). + * flow revalidation + * - * - *- A "flow_dumper" thread that reads the kernel flow table and dispatches + *- An array of 'struct revalidator's for flow revalidation and + * stats collection. + * + *- "flow_dumper" thread that reads the kernel flow table and dispatches * flows to one of several "revalidator" threads (see struct - * revalidator). */ + * revalidator). + * */ struct udpif { struct list list_node; /* In all_udpifs list. */ @@ -103,7 +100,6 @@ struct udpif { uint32_t secret; /* Random seed for upcall hash. */ -pthread_t dispatcher; /* Dispatcher thread ID. */ pthread_t flow_dumper; /* Flow dumper thread ID. */ struct handler *handlers; /* Upcall handlers. */ @@ -143,7 +139,7 @@ enum upcall_type { }; struct upcall { -struct list list_node; /* For queuing upcalls. */ +bool is_valid; /* If the upcall can be used. */ struct flow_miss *flow_miss;/* This upcall's flow_miss. */ /* Raw upcall plus data for keeping track of the memory backing it. */ @@ -219,15 +215,16 @@ struct flow_miss { bool put; }; -static void upcall_destroy(struct upcall *); - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); static struct list all_udpifs = LIST_INITIALIZER(&all_udpifs); -static void recv_upcalls(struct udpif *); -static void handle_upcalls(struct handler *handler, struct list *upcalls); +static size_t read_upcalls(struct handler *, + struct upcall upcalls[FLOW_MISS_MAX_BATCH], + struct flow_miss miss_buf[FLOW_MISS_MAX_BATCH], + struct hmap *); +static void handle_upcalls(struct handler *, struct hmap *, struct upcall *, + size_t n_upcalls); static void *udpif_flow_dumper(void *); -st
Re: [ovs-dev] [recirc datapath V4 2/5] odp-util: Always generate key/mask pair in netlink for recirc_id
Good point. I agree it looks odd. How about the following incremental: diff --git a/lib/odp-util.c b/lib/odp-util.c index 9d49198..8e95c9e 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2481,21 +2481,17 @@ ovs_to_odp_frag_mask(uint8_t nw_frag_mask) } static void -odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, - const struct flow *flow, const struct flow *mask, - odp_port_t odp_in_port, size_t max_mpls_depth) +odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, + const struct flow *mask, odp_port_t odp_in_port, + size_t max_mpls_depth, bool export_mask) { -bool is_mask; struct ovs_key_ethernet *eth_key; size_t encap; - -/* We assume that if 'data' and 'flow' are not the same, we should - * treat 'data' as a mask. */ -is_mask = (data != flow); +const struct flow *data = export_mask ? mask : flow; nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority); -if (flow->tunnel.ip_dst || is_mask) { +if (flow->tunnel.ip_dst || export_mask) { tun_key_to_attr(buf, &data->tunnel); } @@ -2511,7 +2507,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, /* Add an ingress port attribute if this is a mask or 'odp_in_port' * is not the magical value "ODPP_NONE". */ -if (is_mask || odp_in_port != ODPP_NONE) { +if (export_mask || odp_in_port != ODPP_NONE) { nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, odp_in_port); } @@ -2521,7 +2517,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, memcpy(eth_key->eth_dst, data->dl_dst, ETH_ADDR_LEN); if (flow->vlan_tci != htons(0) || flow->dl_type == htons(ETH_TYPE_VLAN)) { -if (is_mask) { +if (export_mask) { nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, OVS_BE16_MAX); } else { nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_TYPE_VLAN)); @@ -2547,7 +2543,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, * 0x Any non-Ethernet II frame (except valid *802.3 SNAP packet with valid eth_type). */ -if (is_mask) { +if (export_mask) { nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, OVS_BE16_MAX); } goto unencap; @@ -2565,7 +2561,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, ipv4_key->ipv4_proto = data->nw_proto; ipv4_key->ipv4_tos = data->nw_tos; ipv4_key->ipv4_ttl = data->nw_ttl; -ipv4_key->ipv4_frag = is_mask ? ovs_to_odp_frag_mask(data->nw_frag) +ipv4_key->ipv4_frag = export_mask ? ovs_to_odp_frag_mask(data->nw_frag) : ovs_to_odp_frag(data->nw_frag); } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { struct ovs_key_ipv6 *ipv6_key; @@ -2578,7 +2574,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, ipv6_key->ipv6_proto = data->nw_proto; ipv6_key->ipv6_tclass = data->nw_tos; ipv6_key->ipv6_hlimit = data->nw_ttl; -ipv6_key->ipv6_frag = is_mask ? ovs_to_odp_frag_mask(data->nw_frag) +ipv6_key->ipv6_frag = export_mask ? ovs_to_odp_frag_mask(data->nw_frag) : ovs_to_odp_frag(data->nw_frag); } else if (flow->dl_type == htons(ETH_TYPE_ARP) || flow->dl_type == htons(ETH_TYPE_RARP)) { @@ -2650,7 +2646,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, if (flow->tp_dst == htons(0) && (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT) || flow->tp_src == htons(ND_NEIGHBOR_ADVERT)) && -(!is_mask || (data->tp_src == htons(0x) && +(!export_mask || (data->tp_src == htons(0x) && data->tp_dst == htons(0x { struct ovs_key_nd *nd_key; @@ -2683,7 +2679,7 @@ void odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow, const struct flow *mask, odp_port_t odp_in_port) { -odp_flow_key_from_flow__(buf, flow, flow, mask, odp_in_port, SIZE_MAX); +odp_flow_key_from_flow__(buf, flow, mask, odp_in_port, SIZE_MAX, false); } /* Appends a representation of 'mask' as OVS_KEY_ATTR_* attributes to @@ -2699,8 +2695,8 @@ odp_flow_key_from_mask(struct ofpbuf *buf, const struct flow *mask, const struct flow *flow, uint32_t odp_in_port_mask, size_t max_mpls_depth) { -odp_flow_key_from_flow__(buf, mask, flow, mask, - u32_to_odp(odp_in_port_mask), max_mpls_depth); +odp_flow_key_from_flow__(buf, flow, mask, + u32_to_odp(odp_in_port_mask), max_mpls_depth, true); } /* Generate ODP flow key
Re: [ovs-dev] [recirc datapath V4 2/5] odp-util: Always generate key/mask pair in netlink for recirc_id
LGTM, Acked-by: Jarno Rajahalme > On Apr 18, 2014, at 2:32 PM, Andy Zhou wrote: > > Good point. I agree it looks odd. How about the following incremental: > > diff --git a/lib/odp-util.c b/lib/odp-util.c > index 9d49198..8e95c9e 100644 > --- a/lib/odp-util.c > +++ b/lib/odp-util.c > @@ -2481,21 +2481,17 @@ ovs_to_odp_frag_mask(uint8_t nw_frag_mask) > } > > static void > -odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, > - const struct flow *flow, const struct flow *mask, > - odp_port_t odp_in_port, size_t max_mpls_depth) > +odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, > + const struct flow *mask, odp_port_t odp_in_port, > + size_t max_mpls_depth, bool export_mask) > { > -bool is_mask; > struct ovs_key_ethernet *eth_key; > size_t encap; > - > -/* We assume that if 'data' and 'flow' are not the same, we should > - * treat 'data' as a mask. */ > -is_mask = (data != flow); > +const struct flow *data = export_mask ? mask : flow; > > nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority); > > -if (flow->tunnel.ip_dst || is_mask) { > +if (flow->tunnel.ip_dst || export_mask) { > tun_key_to_attr(buf, &data->tunnel); > } > > @@ -2511,7 +2507,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, > const struct flow *data, > > /* Add an ingress port attribute if this is a mask or 'odp_in_port' > * is not the magical value "ODPP_NONE". */ > -if (is_mask || odp_in_port != ODPP_NONE) { > +if (export_mask || odp_in_port != ODPP_NONE) { > nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, odp_in_port); > } > > @@ -2521,7 +2517,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, > const struct flow *data, > memcpy(eth_key->eth_dst, data->dl_dst, ETH_ADDR_LEN); > > if (flow->vlan_tci != htons(0) || flow->dl_type == htons(ETH_TYPE_VLAN)) { > -if (is_mask) { > +if (export_mask) { > nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, OVS_BE16_MAX); > } else { > nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, > htons(ETH_TYPE_VLAN)); > @@ -2547,7 +2543,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, > const struct flow *data, > * 0x Any non-Ethernet II frame (except valid > *802.3 SNAP packet with valid eth_type). > */ > -if (is_mask) { > +if (export_mask) { > nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, OVS_BE16_MAX); > } > goto unencap; > @@ -2565,7 +2561,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, > const struct flow *data, > ipv4_key->ipv4_proto = data->nw_proto; > ipv4_key->ipv4_tos = data->nw_tos; > ipv4_key->ipv4_ttl = data->nw_ttl; > -ipv4_key->ipv4_frag = is_mask ? ovs_to_odp_frag_mask(data->nw_frag) > +ipv4_key->ipv4_frag = export_mask ? > ovs_to_odp_frag_mask(data->nw_frag) > : ovs_to_odp_frag(data->nw_frag); > } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { > struct ovs_key_ipv6 *ipv6_key; > @@ -2578,7 +2574,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, > const struct flow *data, > ipv6_key->ipv6_proto = data->nw_proto; > ipv6_key->ipv6_tclass = data->nw_tos; > ipv6_key->ipv6_hlimit = data->nw_ttl; > -ipv6_key->ipv6_frag = is_mask ? ovs_to_odp_frag_mask(data->nw_frag) > +ipv6_key->ipv6_frag = export_mask ? > ovs_to_odp_frag_mask(data->nw_frag) > : ovs_to_odp_frag(data->nw_frag); > } else if (flow->dl_type == htons(ETH_TYPE_ARP) || >flow->dl_type == htons(ETH_TYPE_RARP)) { > @@ -2650,7 +2646,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, > const struct flow *data, > if (flow->tp_dst == htons(0) && > (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT) || > flow->tp_src == htons(ND_NEIGHBOR_ADVERT)) && > -(!is_mask || (data->tp_src == htons(0x) && > +(!export_mask || (data->tp_src == htons(0x) && > data->tp_dst == htons(0x { > > struct ovs_key_nd *nd_key; > @@ -2683,7 +2679,7 @@ void > odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow, >const struct flow *mask, odp_port_t odp_in_port) > { > -odp_flow_key_from_flow__(buf, flow, flow, mask, odp_in_port, SIZE_MAX); > +odp_flow_key_from_flow__(buf, flow, mask, odp_in_port, SIZE_MAX, false); > } > > /* Appends a representation of 'mask' as OVS_KEY_ATTR_* attributes to > @@ -2699,8 +2695,8 @@ odp_flow_key_from_mask(struct ofpbuf *buf, const > struct flow *mask, >const struct flow *flow, uint32_t odp_in_port_mask, >size_t max_mpls_depth) > { > -odp_flow_key_from_fl
Re: [ovs-dev] [recirc datapath V4 5/5] datapath: add recirc action
On Fri, Apr 18, 2014 at 2:51 AM, Andy Zhou wrote: > diff --git a/datapath/actions.c b/datapath/actions.c > index 87a8a40..fdcd576 100644 > --- a/datapath/actions.c > +++ b/datapath/actions.c > +static int execute_recirc(struct datapath *dp, struct sk_buff *skb, > +const struct nlattr *a) > +{ > + struct sw_flow_key recirc_key; > + const struct vport *p = OVS_CB(skb)->input_vport; > + uint32_t hash = OVS_CB(skb)->pkt_key->ovs_flow_hash; > + u16 port_no = p ? p->port_no : -1; I think there's still a potential problem with a NULL input_port here: when we call ovs_dp_process_packet_with_key(), it calls ovs_vport_find_upcall_portid() which assumes that the port is valid. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH RFC V9] ofproto-dpif-upcall: Remove the dispatcher thread.
> + *- "flow_dumper" thread that reads the kernel flow table and dispatches Lost the initial "A" here. > struct upcall { > -struct list list_node; /* For queuing upcalls. */ > +bool is_valid; /* If the upcall can be used. */ > struct flow_miss *flow_miss;/* This upcall's flow_miss. */ Looks like is_valid isn't necessary anymore. I was going to suggest some other major changes, but I think it probably makes more sense to do it as a separate patch. Go ahead and merge. Acked-by: Ethan Jackson > > /* Raw upcall plus data for keeping track of the memory backing it. */ > @@ -219,15 +215,16 @@ struct flow_miss { > bool put; > }; > > -static void upcall_destroy(struct upcall *); > - > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); > static struct list all_udpifs = LIST_INITIALIZER(&all_udpifs); > > -static void recv_upcalls(struct udpif *); > -static void handle_upcalls(struct handler *handler, struct list *upcalls); > +static size_t read_upcalls(struct handler *, > + struct upcall upcalls[FLOW_MISS_MAX_BATCH], > + struct flow_miss miss_buf[FLOW_MISS_MAX_BATCH], > + struct hmap *); > +static void handle_upcalls(struct handler *, struct hmap *, struct upcall *, > + size_t n_upcalls); > static void *udpif_flow_dumper(void *); > -static void *udpif_dispatcher(void *); > static void *udpif_upcall_handler(void *); > static void *udpif_revalidator(void *); > static uint64_t udpif_get_n_flows(struct udpif *); > @@ -315,9 +312,6 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers, > for (i = 0; i < udpif->n_handlers; i++) { > struct handler *handler = &udpif->handlers[i]; > > -ovs_mutex_lock(&handler->mutex); > -xpthread_cond_signal(&handler->wake_cond); > -ovs_mutex_unlock(&handler->mutex); > xpthread_join(handler->thread, NULL); > } > > @@ -331,7 +325,6 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers, > } > > xpthread_join(udpif->flow_dumper, NULL); > -xpthread_join(udpif->dispatcher, NULL); > > for (i = 0; i < udpif->n_revalidators; i++) { > struct revalidator *revalidator = &udpif->revalidators[i]; > @@ -353,17 +346,7 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers, > } > > for (i = 0; i < udpif->n_handlers; i++) { > -struct handler *handler = &udpif->handlers[i]; > -struct upcall *miss, *next; > - > -LIST_FOR_EACH_SAFE (miss, next, list_node, &handler->upcalls) { > -list_remove(&miss->list_node); > -upcall_destroy(miss); > -} > -ovs_mutex_destroy(&handler->mutex); > - > -xpthread_cond_destroy(&handler->wake_cond); > -free(handler->name); > +free(udpif->handlers[i].name); > } > latch_poll(&udpif->exit_latch); > > @@ -376,7 +359,7 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers, > udpif->n_handlers = 0; > } > > -error = dpif_handlers_set(udpif->dpif, 1); > +error = dpif_handlers_set(udpif->dpif, n_handlers); > if (error) { > VLOG_ERR("failed to configure handlers in dpif %s: %s", > dpif_name(udpif->dpif), ovs_strerror(error)); > @@ -395,10 +378,7 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers, > struct handler *handler = &udpif->handlers[i]; > > handler->udpif = udpif; > -list_init(&handler->upcalls); > -handler->need_signal = false; > -xpthread_cond_init(&handler->wake_cond, NULL); > -ovs_mutex_init(&handler->mutex); > +handler->handler_id = i; > xpthread_create(&handler->thread, NULL, udpif_upcall_handler, > handler); > } > @@ -416,7 +396,6 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers, > xpthread_create(&revalidator->thread, NULL, udpif_revalidator, > revalidator); > } > -xpthread_create(&udpif->dispatcher, NULL, udpif_dispatcher, udpif); > xpthread_create(&udpif->flow_dumper, NULL, udpif_flow_dumper, udpif); > } > > @@ -461,16 +440,9 @@ udpif_get_memory_usage(struct udpif *udpif, struct simap > *usage) > { > size_t i; > > -simap_increase(usage, "dispatchers", 1); > simap_increase(usage, "flow_dumpers", 1); > > simap_increase(usage, "handlers", udpif->n_handlers); > -for (i = 0; i < udpif->n_handlers; i++) { > -struct handler *handler = &udpif->handlers[i]; > -ovs_mutex_lock(&handler->mutex); > -simap_increase(usage, "handler upcalls", handler->n_upcalls); > -ovs_mutex_unlock(&handler->mutex); > -} > > simap_increase(usage, "revalidators", udpi
Re: [ovs-dev] [PATCH RFC V9] ofproto-dpif-upcall: Remove the dispatcher thread.
On Fri, Apr 18, 2014 at 4:51 PM, Ethan Jackson wrote: > > + *- "flow_dumper" thread that reads the kernel flow table and > dispatches > > Lost the initial "A" here. Thx, > struct upcall { > > -struct list list_node; /* For queuing upcalls. */ > > +bool is_valid; /* If the upcall can be used. */ > > struct flow_miss *flow_miss;/* This upcall's flow_miss. */ > > Looks like is_valid isn't necessary anymore. > Yeah, I also found it. Will remove it, > I was going to suggest some other major changes, but I think it > probably makes more sense to do it as a separate patch. Go ahead and > merge. > > Acked-by: Ethan Jackson ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH RFC V9] ofproto-dpif-upcall: Remove the dispatcher thread.
Thx, applied, Nothing better than applying 100th patch on Friday,~ At this rate, if everyone stop applying patches, it would take me 8 yrs to match your number of commits, and 40+yrs to match Ben's record. ;D On Fri, Apr 18, 2014 at 5:10 PM, Alex Wang wrote: > > > > On Fri, Apr 18, 2014 at 4:51 PM, Ethan Jackson wrote: > >> > + *- "flow_dumper" thread that reads the kernel flow table and >> dispatches >> >> Lost the initial "A" here. > > > > Thx, > > > > struct upcall { >> > -struct list list_node; /* For queuing upcalls. */ >> > +bool is_valid; /* If the upcall can be used. */ >> > struct flow_miss *flow_miss;/* This upcall's flow_miss. */ >> >> Looks like is_valid isn't necessary anymore. >> > > > Yeah, I also found it. Will remove it, > > > >> I was going to suggest some other major changes, but I think it >> probably makes more sense to do it as a separate patch. Go ahead and >> merge. >> >> Acked-by: Ethan Jackson > > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] ofproto-dpif-upcall: Don't use stack garbage
Thanks for catching this. It is also used later on to set xin.may_learn, does that matter too? On 19 April 2014 03:22, Jarno Rajahalme wrote: > Acked-by: Jarno Rajahalme > > On Apr 17, 2014, at 7:19 PM, YAMAMOTO Takashi > wrote: > > > Catched by "learning action - self-modifying flow with hard_timeout" > > test case. > > > > The bug introduced by commit b256dc52. > > ("ofproto-dpif-xlate: Cache xlate_actions() effects.") > > > > Signed-off-by: YAMAMOTO Takashi > > --- > > ofproto/ofproto-dpif-upcall.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/ofproto/ofproto-dpif-upcall.c > b/ofproto/ofproto-dpif-upcall.c > > index 0d7dd8e..96a53e6 100644 > > --- a/ofproto/ofproto-dpif-upcall.c > > +++ b/ofproto/ofproto-dpif-upcall.c > > @@ -1379,7 +1379,6 @@ revalidate_ukey(struct udpif *udpif, struct > udpif_flow_dump *udump, > > xoutp = NULL; > > actions = NULL; > > netflow = NULL; > > -may_learn = push.n_packets > 0; > > > > /* If we don't need to revalidate, we can simply push the stats > contained > > * in the udump, otherwise we'll have to get the actions so we can > check > > @@ -1413,6 +1412,7 @@ revalidate_ukey(struct udpif *udpif, struct > udpif_flow_dump *udump, > > goto exit; > > } > > > > +may_learn = push.n_packets > 0; > > if (ukey->xcache && !udump->need_revalidate) { > > xlate_push_stats(ukey->xcache, may_learn, &push); > > ok = true; > > -- > > 1.8.3.1 > > > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCHv11] ofproto-dpif-upcall: Remove the flow_dumper thread.
From: Ethan Jackson Previously, we had a separate flow_dumper thread that fetched flows from the datapath to distribute to revalidator threads. This patch takes the logic for dumping and pushes it into the revalidator threads, resulting in simpler code with similar performance to the current code. One thread, the "leader", is responsible for beginning and ending each flow dump, maintaining the flow_limit, and checking whether the revalidator threads need to exit. All revalidator threads dump, revalidate, delete datapath flows and garbage collect ukeys. Co-authored-by: Joe Stringer Signed-off-by: Joe Stringer --- Note: This requires "ofproto-dpif-upcall: Don't use stack garbage." patch, which as of posting is not yet applied to master. For convenience of review, this patch is made available at the following branch: URL: https://github.com/joestringer/openvswitch.git Branch: submit/remove_flow_dumper_v11 v11: Rebase. v10: Minor whitespace and documentation fixups. v9: Update testsuite for also printing actions on flow_dump. v8: Rebase. v7: Add back logic (present in master) that deletes all flows older than 100ms if we are currently exceeding the flow limit. Rebase. v6: Shift ukeys hmaps from revalidators into udpif. Documentation tidyups. v5: Handle ukey creation race. Style fixes. v4: Rebase. v3: First post. --- ofproto/ofproto-dpif-upcall.c | 649 +++-- tests/ofproto-dpif.at |8 +- 2 files changed, 309 insertions(+), 348 deletions(-) diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 4ee5bf5..28a1f27 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -54,22 +54,15 @@ struct handler { uint32_t handler_id; /* Handler id. */ }; -/* A thread that processes each kernel flow handed to it by the flow_dumper - * thread, updates OpenFlow statistics, and updates or removes the kernel flow - * as necessary. */ +/* A thread that processes datapath flows, updates OpenFlow statistics, and + * updates or removes them if necessary. */ struct revalidator { struct udpif *udpif; /* Parent udpif. */ char *name;/* Thread name. */ pthread_t thread; /* Thread ID. */ -struct hmap ukeys; /* Datapath flow keys. */ - -uint64_t dump_seq; - -struct ovs_mutex mutex;/* Mutex guarding the following. */ -pthread_cond_t wake_cond; -struct list udumps OVS_GUARDED;/* Unprocessed udumps. */ -size_t n_udumps OVS_GUARDED; /* Number of unprocessed udumps. */ +struct hmap *ukeys;/* Points into udpif->ukeys for this + revalidator. Used for GC phase. */ }; /* An upcall handler for ofproto_dpif. @@ -85,13 +78,9 @@ struct revalidator { * flow revalidation * - * - *- An array of 'struct revalidator's for flow revalidation and - * stats collection. - * - *- A "flow_dumper" thread that reads the kernel flow table and dispatches - * flows to one of several "revalidator" threads (see struct - * revalidator). - * */ + *- Revalidation threads which read the datapath flow table and maintains + * them. + */ struct udpif { struct list list_node; /* In all_udpifs list. */ @@ -100,22 +89,30 @@ struct udpif { uint32_t secret; /* Random seed for upcall hash. */ -pthread_t flow_dumper; /* Flow dumper thread ID. */ - struct handler *handlers; /* Upcall handlers. */ size_t n_handlers; struct revalidator *revalidators; /* Flow revalidators. */ size_t n_revalidators; -uint64_t last_reval_seq; /* 'reval_seq' at last revalidation. */ -struct seq *reval_seq; /* Incremented to force revalidation. */ - -struct seq *dump_seq; /* Increments each dump iteration. */ - struct latch exit_latch; /* Tells child threads to exit. */ +/* Revalidation. */ +struct seq *reval_seq; /* Incremented to force revalidation. */ +bool need_revalidate; /* As indicated by 'reval_seq'. */ +bool reval_exit; /* Set by leader on 'exit_latch. */ +pthread_barrier_t reval_barrier; /* Barrier used by revalidators. */ +struct dpif_flow_dump dump;/* DPIF flow dump state. */ long long int dump_duration; /* Duration of the last flow dump. */ +struct seq *dump_seq; /* Increments each dump iteration. */ + +/* During the flow dump phase, revalidators insert into these with a random + * distribution. During the garbage collection phase, each revalidator + * takes care of garbage collecting one of these hmaps. */ +struct { +struct ovs_mutex mutex;/* Guards the following. */ +struct hmap hmap OVS_GUARDED; /
[ovs-dev] Can OVS have multiple ip addresses?
In FAQ, there is way of configuring one ip address to OVS like this. ifconfig eth0 0.0.0.0 ifconfig br0 192.168.128.5 There are some sentences. "If your only connection to the machine running OVS is through the IP address in question, then you would want to run all of these commands on a single command line, or put them into a script. If there were any additional routes assigned to eth0, then you would also want to use commands to adjust these routes to go through br0." I did not understand them and it is related to my question. Basically, my question is this. Can we assign more ip addresses to OVS(br0)?? Thanks, ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] Can OVS have multiple ip addresses?
On April 18, 2014 at 7:50:46 PM, Junguk Cho (jman...@gmail.com) wrote: > In FAQ, there is way of configuring one ip address to OVS like this. > > ifconfig eth0 0.0.0.0 > ifconfig br0 192.168.128.5 > > > There are some sentences. > > "If your only connection to the machine running OVS is through the > IP address in question, then you would want to run all of these > commands on a single command line, or put them into a script. > > If there were any additional routes assigned to eth0, then you would > > also want to use commands to adjust these routes to go through br0." > > > I did not understand them and it is related to my question. > Basically, my question is this. Can we assign more ip addresses to > OVS(br0)?? You can use multiple IP addresses on an inteface using standard Linux IP aliasing: ifconfig br0 192.168.128.5 ifconfig br0:1 192.168.128.6 ifconfig br0:2 192.168.128.9 The FAQ entry above is saying that you need to have your IP addresses on your bridge and not attached interfaces. --Justin ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] lib/ofp-util: Restore the check for minus sign in port number strings.
> Commit 33ab38d9 (meta-flow: Simplify mf_from_ofp_port_string()) > inadvertently removed a check for minus sign at the beginning of a > port number string introduced by commit 05dddba (meta-flow: Don't > allow negative port numbers). This check is still needed, so put it > back, but to ofputil_port_from_string() this time. > > Signed-off-by: Jarno Rajahalme Reviewed-by: YAMAMOTO Takashi > --- > lib/ofp-util.c |6 +- > 1 file changed, 5 insertions(+), 1 deletion(-) > > diff --git a/lib/ofp-util.c b/lib/ofp-util.c > index af2d853..a990f46 100644 > --- a/lib/ofp-util.c > +++ b/lib/ofp-util.c > @@ -5394,8 +5394,12 @@ ofputil_port_to_ofp11(ofp_port_t ofp10_port) > bool > ofputil_port_from_string(const char *s, ofp_port_t *portp) > { > -uint32_t port32; > +unsigned int port32; /* int is at least 32 bits wide. */ > > +if (*s == '-') { > +VLOG_WARN("Negative values are not supported as port numbers."); > +return false; > +} > *portp = 0; > if (str_to_uint(s, 10, &port32)) { > if (port32 < ofp_to_u16(OFPP_MAX)) { > -- > 1.7.10.4 > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 1/2] DPI plugin addition proposal v2
This message and any attachments (the "message") are confidential, intended solely for the addressees. If you are not the intended recipient, please notify the sender immediately by e-mail and delete this message from your system. In this case, you are not authorized to use, copy this message and/or disclose the content to any other person. E-mails are susceptible to alteration. Neither Qosmos nor any of its subsidiaries or affiliates shall be liable for the message if altered, changed or falsified. Ce message et toutes ses pièces jointes (ci-après le "message")sont confidentiels et établis à l'intention exclusive de ses destinataires. Si vous avez reçu ce message par erreur, merci d’en informer immédiatement son émetteur par courrier électronique et d’effacer ce message de votre système. Dans cette hypothèse, vous n’êtes pas autorisé à utiliser, copier ce message et/ou en divulguer le contenu à un tiers. Tout message électronique est susceptible d'altération. Qosmos et ses filiales déclinent toute responsabilité au titre de ce message s'il a été altéré, déformé ou falsifié. 0001-DPI-plugin-addition-proposal-v2.patch Description: 0001-DPI-plugin-addition-proposal-v2.patch ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 5/5] dpif-netdev: Use miniflow as a flow key.
> Use miniflow as a flow key in the userspace datapath classifier. The > miniflow is expanded for upcalls, but for existing datapath flows, the > key need not be expanded. > > Signed-off-by: Jarno Rajahalme Reviewed-by: YAMAMOTO Takashi > @@ -2144,8 +2157,9 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, > > hash_act = nl_attr_get(a); > if (hash_act->hash_alg == OVS_HASH_ALG_L4) { > - > -hash = flow_hash_symmetric_l4(aux->key, hash_act->hash_bias); > +/* Hash need not be symmetric, nor does it need to include > + * L2 fields. */ > +hash = miniflow_hash_5tuple(aux->key, hash_act->hash_bias); how do you think about ipv6? YAMAMOTO Takashi ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/5] lib/flow: Add miniflow accessors and miniflow_get_tcp_flags().
> Add inlined generic accessors for miniflow integer type fields, and a > new miniflow_get_tcp_flags() usinge these. These will be used in a > later patch. > > Some definitions also used in lib/packets.h had to be moved there to > resolve circular include dependencies. Similarly, some inline > functions using struct flow are now in lib/flow.h. IMO this is > cleaner, since now the lib/flow.h need not be included from > lib/packets.h. > > Signed-off-by: Jarno Rajahalme at least tests/test-sflow.c needs arpa/inet.h for inet_ntop as it isn't indirectly included via packets.h anymore. otherwise looks fine to me. Reviewed-by: YAMAMOTO Takashi ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 3/5] lib/flow: Possibly faster miniflow_hash_in_minimask()
> Upcoming patches add classifier lookups using miniflows, this is > heavily used for it. > > Signed-off-by: Jarno Rajahalme Reviewed-by: YAMAMOTO Takashi ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 4/5] classifier: Support miniflow as a key.
> Support struct miniflow as a key for datapath flow lookup. > > The new classifier interface classifier_lookup_miniflow_first() takes > a miniflow as a key and stops at the first match with no regard to > flow prioritites. This works only if the classifier has no > conflicting rules (as is the case with the userspace datapath > classifier). > > Signed-off-by: Jarno Rajahalme Reviewed-by: YAMAMOTO Takashi ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/5] lib/flow: Introduce miniflow_extract().
> miniflow_extract() extracts packet headers directly to a miniflow, > which is a compressed form of the struct flow. This does not require > a large struct to be cleared to begin with, and accesses less memory. > These performance benefits should allow this to be used in the DPDK > datapath. > > miniflow_extract() takes a miniflow as an input/output parameter. On > input the buffer for values to be extracted must be properly > initialized. On output the map contains ones for all the fields that > have been extracted. > > Some struct flow fields are reordered to make miniflow_extract to > progress in the logical order. > > Some explicit "inline" keywords are necessary for GCC to optimize this > properly. Also, macros are used for same reason instead of inline > functions for pushing data to the miniflow. > > Signed-off-by: Jarno Rajahalme Reviewed-by: YAMAMOTO Takashi ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [recirc datapath V4 1/5] ofproto-dpif: Rule lookup starts from table zero for non-recirc datapath
Currently, all packet lookup starts from internal table for possible matching of post recirculation rules. This is not necessary for datapath that does not support recirculation. This patch adds the ability to steering rule lookup starting table based on whether datapath supports recirculation. Signed-off-by: Andy Zhou ¬ --- ofproto/ofproto-dpif.c | 83 +- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 3648dd7..1e51ff2 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -3171,41 +3171,57 @@ rule_dpif_get_actions(const struct rule_dpif *rule) return rule_get_actions(&rule->up); } -static uint8_t -rule_dpif_lookup__ (struct ofproto_dpif *ofproto, const struct flow *flow, -struct flow_wildcards *wc, struct rule_dpif **rule) +/* Lookup 'flow' in table 0 of 'ofproto''s classifier. + * If 'wc' is non-null, sets the fields that were relevant as part of + * the lookup. Returns the table_id where a match or miss occurred. + * + * The return value will be zero unless there was a miss and + * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables + * where misses occur. */ +uint8_t +rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow, + struct flow_wildcards *wc, struct rule_dpif **rule) { enum rule_dpif_lookup_verdict verdict; enum ofputil_port_config config = 0; -uint8_t table_id = TBL_INTERNAL; +uint8_t table_id; + +if (ofproto_dpif_get_enable_recirc(ofproto)) { +/* Set metadata to the value of recirc_id to speed up internal + * rule lookup. */ +flow->metadata = htonll(flow->recirc_id); +table_id = TBL_INTERNAL; +} else { +table_id = 0; +} verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true, &table_id, rule); switch (verdict) { -case RULE_DPIF_LOOKUP_VERDICT_MATCH: -return table_id; -case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: { -struct ofport_dpif *port; - -port = get_ofp_port(ofproto, flow->in_port.ofp_port); -if (!port) { -VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16, - flow->in_port.ofp_port); +case RULE_DPIF_LOOKUP_VERDICT_MATCH: +return table_id; +case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: { +struct ofport_dpif *port; + +port = get_ofp_port(ofproto, flow->in_port.ofp_port); +if (!port) { +VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16, + flow->in_port.ofp_port); +} +config = port ? port->up.pp.config : 0; +break; } -config = port ? port->up.pp.config : 0; -break; -} -case RULE_DPIF_LOOKUP_VERDICT_DROP: -config = OFPUTIL_PC_NO_PACKET_IN; -break; -case RULE_DPIF_LOOKUP_VERDICT_DEFAULT: -if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) { +case RULE_DPIF_LOOKUP_VERDICT_DROP: config = OFPUTIL_PC_NO_PACKET_IN; -} -break; -default: -OVS_NOT_REACHED(); +break; +case RULE_DPIF_LOOKUP_VERDICT_DEFAULT: +if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) { +config = OFPUTIL_PC_NO_PACKET_IN; +} +break; +default: +OVS_NOT_REACHED(); } choose_miss_rule(config, ofproto->miss_rule, @@ -3213,23 +3229,6 @@ rule_dpif_lookup__ (struct ofproto_dpif *ofproto, const struct flow *flow, return table_id; } -/* Lookup 'flow' in table 0 of 'ofproto''s classifier. - * If 'wc' is non-null, sets the fields that were relevant as part of - * the lookup. Returns the table_id where a match or miss occurred. - * - * The return value will be zero unless there was a miss and - * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables - * where misses occur. */ -uint8_t -rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow, - struct flow_wildcards *wc, struct rule_dpif **rule) -{ -/* Set metadata to the value of recirc_id to speed up internal - * rule lookup. */ -flow->metadata = htonll(flow->recirc_id); -return rule_dpif_lookup__(ofproto, flow, wc, rule); -} - static struct rule_dpif * rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id, const struct flow *flow, struct flow_wildcards *wc) -- 1.9.1 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [recirc datapath V4 3/5] openvswitch.h: rename hash action definition
Rename hash_bias to hash_basis to make it consistent with similar usages. Signed-off-by: Andy Zhou --- include/linux/openvswitch.h | 4 ++-- lib/dpif-netdev.c| 2 +- lib/odp-util.c | 2 +- ofproto/ofproto-dpif-xlate.c | 4 ++-- ofproto/ofproto-dpif-xlate.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index e17f802..1aa9158 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -553,11 +553,11 @@ enum ovs_hash_alg { /* * struct ovs_action_hash - %OVS_ACTION_ATTR_HASH action argument. * @hash_alg: Algorithm used to compute hash prior to recirculation. - * @hash_bias: bias used for computing hash. + * @hash_basis: basis used for computing hash. */ struct ovs_action_hash { uint32_t hash_alg; /* One of ovs_hash_alg. */ - uint32_t hash_bias; + uint32_t hash_basis; }; /** diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index d17bc64..291fb09 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2146,7 +2146,7 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, hash_act = nl_attr_get(a); if (hash_act->hash_alg == OVS_HASH_ALG_L4) { -hash = flow_hash_symmetric_l4(aux->key, hash_act->hash_bias); +hash = flow_hash_symmetric_l4(aux->key, hash_act->hash_basis); if (!hash) { hash = 1; /* 0 is not valid */ } diff --git a/lib/odp-util.c b/lib/odp-util.c index 70390bf..9d49198 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -399,7 +399,7 @@ format_odp_hash_action(struct ds *ds, const struct ovs_action_hash *hash_act) ds_put_format(ds, "hash("); if (hash_act->hash_alg == OVS_HASH_ALG_L4) { -ds_put_format(ds, "hash_l4(%"PRIu32")", hash_act->hash_bias); +ds_put_format(ds, "hash_l4(%"PRIu32")", hash_act->hash_basis); } else { ds_put_format(ds, "Unknown hash algorithm(%"PRIu32")", hash_act->hash_alg); diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 1ba5462..4c5a48f 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -1148,7 +1148,7 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle, if (ctx->xbridge->enable_recirc) { ctx->xout->use_recirc = bond_may_recirc( -out_xbundle->bond, &xr->recirc_id, &xr->hash_bias); +out_xbundle->bond, &xr->recirc_id, &xr->hash_basis); if (ctx->xout->use_recirc) { /* Only TCP mode uses recirculation. */ @@ -1851,7 +1851,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, OVS_ACTION_ATTR_HASH, sizeof *act_hash); act_hash->hash_alg = xr->hash_alg; -act_hash->hash_bias = xr->hash_bias; +act_hash->hash_basis = xr->hash_basis; /* Recirc action. */ nl_msg_put_u32(&ctx->xout->odp_actions, OVS_ACTION_ATTR_RECIRC, diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h index 8b53e10..3dfa3b9 100644 --- a/ofproto/ofproto-dpif-xlate.h +++ b/ofproto/ofproto-dpif-xlate.h @@ -35,7 +35,7 @@ struct mac_learning; struct xlate_recirc { uint32_t recirc_id; /* !0 Use recirculation instead of output. */ uint8_t hash_alg; /* !0 Compute hash for recirc before. */ -uint32_t hash_bias; /* Compute hash for recirc before. */ +uint32_t hash_basis; /* Compute hash for recirc before. */ }; struct xlate_out { -- 1.9.1 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [recirc datapath V4 2/5] odp-util: Always generate key/mask pair in netlink for recirc_id
Currently netlink flow (and mask) recirc_id attribute is only serialized when the recirc_id value is non-zero. For this logic to work correctly, the interpretation of the missing recirc_id depends on whether the datapath supports recirculation. This patch remove the ambiguity of the meaning of missing recirc_id attribute in netlink message. When recirc_id is non-zero, or when it is not a wildcard match, both key and mask attributes are serialized. On the other hand, when recirc_id is zero, and being wildcarded, they are not serialized. A missing recirc_id key and mask attribute thus should always be interpreted as wildcard, same as other flow fields. Signed-off-by: Andy Zhou --- lib/dpif-netdev.c | 9 + lib/odp-util.c | 16 lib/odp-util.h | 4 ++-- ofproto/ofproto-dpif.c | 4 ++-- tests/ofproto-dpif.at | 20 ++-- tests/test-odp.c | 3 ++- 6 files changed, 29 insertions(+), 27 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 76141e3..d17bc64 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1442,6 +1442,7 @@ dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_, struct dp_netdev_flow_state *state = state_; struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *netdev_flow; +struct flow_wildcards wc; int error; ovs_mutex_lock(&iter->mutex); @@ -1464,11 +1465,13 @@ dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_, return error; } +minimask_expand(&netdev_flow->cr.match.mask, &wc); + if (key) { struct ofpbuf buf; ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf); -odp_flow_key_from_flow(&buf, &netdev_flow->flow, +odp_flow_key_from_flow(&buf, &netdev_flow->flow, &wc.masks, netdev_flow->flow.in_port.odp_port); *key = ofpbuf_data(&buf); @@ -1477,10 +1480,8 @@ dpif_netdev_flow_dump_next(const struct dpif *dpif, void *iter_, void *state_, if (key && mask) { struct ofpbuf buf; -struct flow_wildcards wc; ofpbuf_use_stack(&buf, &state->maskbuf, sizeof state->maskbuf); -minimask_expand(&netdev_flow->cr.match.mask, &wc); odp_flow_key_from_mask(&buf, &wc.masks, &netdev_flow->flow, odp_to_u32(wc.masks.in_port.odp_port), SIZE_MAX); @@ -2070,7 +2071,7 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, ofpbuf_init(buf, buf_size); /* Put ODP flow. */ -odp_flow_key_from_flow(buf, flow, flow->in_port.odp_port); +odp_flow_key_from_flow(buf, flow, NULL, flow->in_port.odp_port); upcall->key = ofpbuf_data(buf); upcall->key_len = ofpbuf_size(buf); diff --git a/lib/odp-util.c b/lib/odp-util.c index 0969ce8..70390bf 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2482,8 +2482,8 @@ ovs_to_odp_frag_mask(uint8_t nw_frag_mask) static void odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, - const struct flow *flow, odp_port_t odp_in_port, - size_t max_mpls_depth) + const struct flow *flow, const struct flow *mask, + odp_port_t odp_in_port, size_t max_mpls_depth) { bool is_mask; struct ovs_key_ethernet *eth_key; @@ -2501,11 +2501,11 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark); -if (flow->recirc_id) { +if (data->recirc_id || (mask && mask->recirc_id)) { nl_msg_put_u32(buf, OVS_KEY_ATTR_RECIRC_ID, data->recirc_id); } -if (flow->dp_hash) { +if (data->dp_hash || (mask && mask->dp_hash)) { nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, data->dp_hash); } @@ -2681,9 +2681,9 @@ unencap: * capable of being expanded to allow for that much space. */ void odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow, - odp_port_t odp_in_port) + const struct flow *mask, odp_port_t odp_in_port) { -odp_flow_key_from_flow__(buf, flow, flow, odp_in_port, SIZE_MAX); +odp_flow_key_from_flow__(buf, flow, flow, mask, odp_in_port, SIZE_MAX); } /* Appends a representation of 'mask' as OVS_KEY_ATTR_* attributes to @@ -2699,8 +2699,8 @@ odp_flow_key_from_mask(struct ofpbuf *buf, const struct flow *mask, const struct flow *flow, uint32_t odp_in_port_mask, size_t max_mpls_depth) { -odp_flow_key_from_flow__(buf, mask, flow, u32_to_odp(odp_in_port_mask), - max_mpls_depth); +odp_flow_key_from_flow__(buf, mask, flow, mask, + u32_to_odp(odp_in_port_mask), max_mpls_depth); } /* Generate ODP flow key from the
[ovs-dev] [recirc datapath V4 5/5] datapath: add recirc action
Recirculation implementation for Linux kernel data path. Signed-off-by: Andy Zhou --- V3 -> v4: * OVS_CB input_port may be NULL for ovs_packet_cmd_execute() * always accept recirc_id mask * Always generate recirc_id netlink message for recirc enabled datapath. (with corresponding user space changes in patch 1 and 2) V2 -> v3: * save the input port in OVS_CB * Allow recirc_id to be masked like any other key attribute. * DO not force recirc_id to be exact match. * Needs corresponding user space changes I am still working. * Sending this out as RFC --- datapath/actions.c | 40 +++- datapath/datapath.c | 43 ++- datapath/datapath.h | 8 ++-- datapath/flow.h | 1 + datapath/flow_netlink.c | 18 ++ 5 files changed, 90 insertions(+), 20 deletions(-) diff --git a/datapath/actions.c b/datapath/actions.c index 87a8a40..fdcd576 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Nicira, Inc. + * Copyright (c) 2007-2014 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -520,6 +520,27 @@ static int execute_set_action(struct sk_buff *skb, return err; } +static int execute_recirc(struct datapath *dp, struct sk_buff *skb, +const struct nlattr *a) +{ + struct sw_flow_key recirc_key; + const struct vport *p = OVS_CB(skb)->input_vport; + uint32_t hash = OVS_CB(skb)->pkt_key->ovs_flow_hash; + u16 port_no = p ? p->port_no : -1; + int err; + + err = ovs_flow_extract(skb, port_no, &recirc_key); + if (err) + return err; + + recirc_key.ovs_flow_hash = hash; + recirc_key.recirc_id = nla_get_u32(a); + + ovs_dp_process_packet_with_key(skb, &recirc_key, dp); + + return 0; +} + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct nlattr *attr, int len, bool keep_skb) @@ -564,6 +585,23 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = pop_vlan(skb); break; + case OVS_ACTION_ATTR_RECIRC: { + struct sk_buff *recirc_skb; + const bool last_action = (a->nla_len == rem); + + if (!last_action || keep_skb) + recirc_skb = skb_clone(skb, GFP_ATOMIC); + else + recirc_skb = skb; + + err = execute_recirc(dp, recirc_skb, a); + + if (last_action || err) + return err; + + break; + } + case OVS_ACTION_ATTR_SET: err = execute_set_action(skb, nla_data(a)); break; diff --git a/datapath/datapath.c b/datapath/datapath.c index 0fdd1d4..5818686 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -240,45 +240,36 @@ void ovs_dp_detach_port(struct vport *p) ovs_vport_del(p); } -/* Must be called with rcu_read_lock. */ -void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) +void ovs_dp_process_packet_with_key(struct sk_buff *skb, + struct sw_flow_key *pkt_key, struct datapath *dp) { - struct datapath *dp = p->dp; + const struct vport *p = OVS_CB(skb)->input_vport; struct sw_flow *flow; struct dp_stats_percpu *stats; - struct sw_flow_key key; u64 *stats_counter; u32 n_mask_hit; - int error; stats = this_cpu_ptr(dp->stats_percpu); - /* Extract flow from 'skb' into 'key'. */ - error = ovs_flow_extract(skb, p->port_no, &key); - if (unlikely(error)) { - kfree_skb(skb); - return; - } - /* Look up flow. */ - flow = ovs_flow_tbl_lookup_stats(&dp->table, &key, &n_mask_hit); + flow = ovs_flow_tbl_lookup_stats(&dp->table, pkt_key, &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; upcall.cmd = OVS_PACKET_CMD_MISS; - upcall.key = &key; + upcall.key = pkt_key; upcall.userdata = NULL; - upcall.portid = ovs_vport_find_upcall_portid(p, skb); + upcall.portid = p ? ovs_vport_find_upcall_portid(p, skb) : 0; ovs_dp_upcall(dp, skb, &upcall); consume_skb(skb); stats_counter = &stats->n_missed; goto out; } + OVS_CB(skb)->pkt_key = pkt_key; OVS_CB(skb)->flow = flow; -