Thanks for comments. I posted patch according to comments except refactoring netlink-parsing. I will post separate patch for that.
--Pravin. On Tue, Jan 15, 2013 at 10:38 AM, Jesse Gross <je...@nicira.com> wrote: > On Fri, Jan 11, 2013 at 5:23 PM, Pravin B Shelar <pshe...@nicira.com> wrote: >> diff --git a/datapath/datapath.c b/datapath/datapath.c >> index 30e26a7..4ed00cf 100644 >> --- a/datapath/datapath.c >> +++ b/datapath/datapath.c >> static int validate_sample(const struct nlattr *attr, >> - const struct sw_flow_key *key, int depth) >> + const struct sw_flow_key *key, int depth, >> + struct sw_flow_actions *sfa) >> { >> const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; >> const struct nlattr *probability, *actions; >> @@ -451,7 +453,7 @@ static int validate_sample(const struct nlattr *attr, >> actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; >> if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) >> return -EINVAL; >> - return validate_actions(actions, key, depth + 1); >> + return validate_and_copy_actions(actions, key, depth + 1, sfa); > > I think this will result in the inner actions being output before the > sample attribute itself. > >> +static int ____nla_put(struct sw_flow_actions *sfa, int attrtype, void >> *data, int len) > > I would probably give this a name other than nla_put since it makes it > sound like it is part of the Netlink library itself. > >> +{ >> + struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions >> + sfa->used); >> + >> + if (NLA_ALIGN(nla_attr_size(len)) > (sfa->actions_len - sfa->used)); >> + return -EMSGSIZE; > > What if we made actions_len the current length of the attributes that > we've put and then used ksize() to check for overflow? That way we > wouldn't need sfa->used and actions_len will always reflect the > correct data. > >> +static int validate_and_copy_set_tun(const struct nlattr *attr, >> + struct sw_flow_actions *sfa) >> +{ >> + struct ovs_key_ipv4_tunnel tun_key; >> + int err; >> + >> + err = ipv4_tun_from_nlattr(attr, &tun_key); >> + if (err) >> + return err; >> + >> + err = ____nla_put(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, >> sizeof(tun_key)); >> + if (err) >> + return err; > > I think this will result in only having the OVS_KEY_ATTR_IPV4_TUNNEL > and not the wrapping set attribute. > >> @@ -600,12 +653,16 @@ static int validate_actions(const struct nlattr *attr, >> }; >> const struct ovs_action_push_vlan *vlan; >> int type = nla_type(a); >> + bool set_tun; >> + >> > > Extra blank line. > >> @@ -634,13 +691,13 @@ static int validate_actions(const struct nlattr *attr, >> break; >> >> case OVS_ACTION_ATTR_SET: >> - err = validate_set(a, key); >> + err = validate_set(a, key, sfa, &set_tun); >> if (err) >> return err; >> break; >> >> case OVS_ACTION_ATTR_SAMPLE: >> - err = validate_sample(a, key, depth); >> + err = validate_sample(a, key, depth, sfa); > > I think if we have a sample attribute with a nested tunnel attribute > then we will get both the original translated tunnel key and the > original full sample attribute. > > Don't we need to translate this back the other way as well? If we > there is a query for a flow we should return the original value. > >> @@ -951,9 +1013,16 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff >> *skb, struct genl_info *info) >> >> /* Validate actions. */ >> if (a[OVS_FLOW_ATTR_ACTIONS]) { >> - error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); >> - if (error) >> + acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); >> + error = PTR_ERR(acts); >> + if (IS_ERR(acts)) >> + goto error; >> + >> + error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], >> &key, 0, acts); >> + if (error) { >> + ovs_flow_deferred_free_acts(acts); > > We could just kfree() the actions here, no need to use RCU. > > Doesn't this leak the actions if we later encounter an error? > >> @@ -1026,21 +1087,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff >> *skb, struct genl_info *info) >> /* Update actions. */ >> old_acts = rcu_dereference_protected(flow->sf_acts, >> lockdep_genl_is_held()); >> - acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; >> - if (acts_attrs && >> - (old_acts->actions_len != nla_len(acts_attrs) || >> - memcmp(old_acts->actions, nla_data(acts_attrs), >> - old_acts->actions_len))) { >> - struct sw_flow_actions *new_acts; >> - >> - new_acts = ovs_flow_actions_alloc(acts_attrs); >> - error = PTR_ERR(new_acts); >> - if (IS_ERR(new_acts)) >> - goto error; >> - >> - rcu_assign_pointer(flow->sf_acts, new_acts); >> + if (acts && >> + (old_acts->actions_len != acts->actions_len || >> + memcmp(old_acts->actions, acts->actions, >> + old_acts->actions_len))) { >> + rcu_assign_pointer(flow->sf_acts, acts); >> ovs_flow_deferred_free_acts(old_acts); >> - } >> + } else >> + ovs_flow_deferred_free_acts(acts); > > I'm not sure that there's much point in this check any more. Before > it used to save an allocation but now it's just a pointer assignment. > >> diff --git a/datapath/flow.c b/datapath/flow.c >> index 63eef77..ce563f3 100644 >> --- a/datapath/flow.c >> +++ b/datapath/flow.c >> @@ -213,7 +213,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const >> struct nlattr *actions) >> return ERR_PTR(-ENOMEM); >> >> sfa->actions_len = actions_len; >> - memcpy(sfa->actions, nla_data(actions), actions_len); >> + sfa->used = 0; >> return sfa; >> } > > We should allow for the possibility that the size of fixed structure > is larger than the Netlink attributes. > >> +int ipv4_tun_from_nlattr(const struct nlattr *attr, >> + struct ovs_key_ipv4_tunnel *tun_key) >> +{ >> + struct nlattr *a; >> + int rem; >> + >> + memset(tun_key, 0, sizeof(*tun_key)); >> + >> + nla_for_each_nested(a, attr, rem) { >> + int type = nla_type(a); >> + >> + switch (type) { >> + case OVS_KEY_ATTR_TUN_ID: >> + memcpy(&tun_key->tun_id, nla_data(a), >> sizeof(__be64)); > > This should check the sizes of the attributes before copying, > otherwise we could run off the end. > > It seems like we have a bunch of this type of parsing, which is > basically the same as nla_parse_attribute() but it complains if there > are unknown attributes. Can we somehow factor it out? > >> + break; >> + case OVS_TUNNEL_IPV4_SRC: >> + memcpy(&tun_key->ipv4_src, nla_data(a), >> sizeof(__be32)); >> + break; >> + case OVS_TUNNEL_IPV4_DST: >> + memcpy(&tun_key->ipv4_dst, nla_data(a), >> sizeof(__be32)); >> + break; >> + case OVS_TUNNEL_TOS: >> + tun_key->ipv4_tos = nla_get_u8(a); >> + break; >> + case OVS_TUNNEL_TTL: >> + tun_key->ipv4_ttl = nla_get_u8(a); > > We should enforce that TTL is set since zero isn't a great default here. > >> diff --git a/datapath/flow.h b/datapath/flow.h >> index 3f3624f..ae15024 100644 >> --- a/datapath/flow.h >> +++ b/datapath/flow.h >> @@ -37,9 +37,20 @@ struct sk_buff; >> struct sw_flow_actions { >> struct rcu_head rcu; >> u32 actions_len; >> + u32 used; >> struct nlattr actions[]; >> }; >> >> +struct ovs_key_ipv4_tunnel { >> + __be64 tun_id; >> + __u32 tun_flags; >> + __be32 ipv4_src; >> + __be32 ipv4_dst; >> + __u8 ipv4_tos; >> + __u8 ipv4_ttl; >> + __u8 pad[2]; > > I'm not sure that we need to explicitly include the pad at this point > any more. Since it's no longer an interface, it should be fine to > allow the compiler to choose the padding as appropriate. > >> diff --git a/datapath/tunnel.h b/datapath/tunnel.h >> index 7705475..b7de7a9 100644 >> --- a/datapath/tunnel.h >> +++ b/datapath/tunnel.h >> @@ -201,7 +201,6 @@ static inline void tnl_tun_key_init(struct >> ovs_key_ipv4_tunnel *tun_key, >> tun_key->ipv4_tos = iph->tos; >> tun_key->ipv4_ttl = iph->ttl; >> tun_key->tun_flags = tun_flags; >> - memset(tun_key->pad, 0, sizeof(tun_key->pad)); > > If we do keep the padding, then I think we need to continue > initializing it here since otherwise we'll look up garbage data in the > flow table. > >> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h >> index 5e32965..96f8bcc 100644 >> --- a/include/linux/openvswitch.h >> +++ b/include/linux/openvswitch.h >> @@ -265,6 +265,21 @@ struct ovs_flow_stats { >> __u64 n_bytes; /* Number of matched bytes. */ >> }; >> >> + > > Extra blank line. > >> +/* Values for OVS_TUNNEL_FLAGS */ >> +#define OVS_TNL_F_DONT_FRAGMENT (1 << 0) >> +#define OVS_TNL_F_CSUM (1 << 1) >> +#define OVS_TNL_F_KEY (1 << 2) > > I think at this point it's no longer necessary to define these > specially, we can just make them Netlink flag attributes. We think we > can actually drop OVS_TNL_F_KEY now and use the presence or absence of > the corresponding attribute instead. > >> +enum ovs_tunnel { > > I would name this ovs_tunnel_attr for consistency. > >> + OVS_TUNNEL_IPV4_SRC, /* Tunnel src ip address. */ >> + OVS_TUNNEL_IPV4_DST, /* Tunnel dst ip address. */ >> + OVS_TUNNEL_TOS, /* Tunnel ip TOS. */ >> + OVS_TUNNEL_TTL, /* Tunnel ip TTL. */ >> + OVS_TUNNEL_FLAGS, /* OVS_TNL_F_* flags. */ > > Can you include the types in the comments and also use more standard > capitalization (IP, ToS)? > >> + __OVS_TUNNEL_MAX >> +}; > > Can you also define OVS_TUNNEL_MAX for completeness? > > I would also put this below the definition of ovs_key_attr so that it > is with the other related attributes. > >> enum ovs_key_attr { >> OVS_KEY_ATTR_UNSPEC, >> OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */ >> @@ -282,8 +297,12 @@ enum ovs_key_attr { >> OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ >> OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ >> OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ >> - OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ >> + OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */ >> OVS_KEY_ATTR_TUN_ID = 63, /* be64 tunnel ID */ >> + >> +#ifdef __KERNEL__ >> + OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ >> +#endif > > I think it would be better to define this before OVS_KEY_ATTR_TUN_ID > to prevent any possibility of overflowing our 64-bit data types. > >> diff --git a/lib/flow.h b/lib/flow.h >> index 8e79e62..323b248 100644 >> --- a/lib/flow.h >> +++ b/lib/flow.h >> @@ -63,9 +63,10 @@ struct flow_tnl { >> ovs_be64 tun_id; >> ovs_be32 ip_src; >> ovs_be32 ip_dst; >> - uint16_t flags; >> + uint32_t flags; >> uint8_t ip_tos; >> uint8_t ip_ttl; >> + uint8_t zeros[2]; >> }; >> >> /* >> @@ -103,7 +104,6 @@ struct flow { >> uint8_t arp_tha[6]; /* ARP/ND target hardware address. */ >> uint8_t nw_ttl; /* IP TTL/Hop Limit. */ >> uint8_t nw_frag; /* FLOW_FRAG_* flags. */ >> - uint8_t zeros[4]; > > I don't think that this will build on 32-bit systems. The compiler is > silently adding padding to the end of struct flow on 64-bit systems, > which is causing the original size to be the same and the assertion to > continue passing. However, I'm not sure that I really see what's > being achieved here in general though. > >> diff --git a/lib/odp-util.c b/lib/odp-util.c >> index e2f21da..4cb07fc 100644 >> --- a/lib/odp-util.c >> +++ b/lib/odp-util.c >> @@ -95,7 +95,7 @@ ovs_key_attr_to_string(enum ovs_key_attr attr) >> case OVS_KEY_ATTR_PRIORITY: return "skb_priority"; >> case OVS_KEY_ATTR_SKB_MARK: return "skb_mark"; >> case OVS_KEY_ATTR_TUN_ID: return "tun_id"; >> - case OVS_KEY_ATTR_IPV4_TUNNEL: return "ipv4_tunnel"; >> + case OVS_KEY_ATTR_TUNNEL: return "ipv4_tunnel"; > > I think this should now be "tunnel" > >> +static void >> +tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key, bool >> odp_flags) >> +{ >> + size_t tun_key_ofs; >> + >> + tun_key_ofs = nl_msg_start_nested(a, OVS_KEY_ATTR_TUNNEL); >> + >> + /* layouts differ, flags has different size */ > > I'm not sure that this comment is really relevant any more. > >> + if (tun_key->tun_id) { >> + nl_msg_put_be64(a, OVS_KEY_ATTR_TUN_ID, tun_key->tun_id); >> + } >> + if (tun_key->ip_src) { >> + nl_msg_put_be32(a, OVS_TUNNEL_IPV4_SRC, tun_key->ip_src); >> + } >> + if (tun_key->ip_dst) { >> + nl_msg_put_be32(a, OVS_TUNNEL_IPV4_DST, tun_key->ip_dst); >> + } >> + if (tun_key->ip_tos) { >> + nl_msg_put_u8(a, OVS_TUNNEL_TOS, tun_key->ip_tos); >> + } >> + if (tun_key->ip_ttl) { >> + nl_msg_put_u8(a, OVS_TUNNEL_TTL, tun_key->ip_ttl); >> + } > > If we start enforcing the TTL is present then we should make sure that > we put it in on both userspace and kernel sides. > >> @@ -734,17 +840,17 @@ format_odp_key_attr(const struct nlattr *a, struct ds >> *ds) >> ds_put_format(ds, "(%#"PRIx64")", ntohll(nl_attr_get_be64(a))); >> break; >> >> - case OVS_KEY_ATTR_IPV4_TUNNEL: >> - ipv4_tun_key = nl_attr_get(a); >> + case OVS_KEY_ATTR_TUNNEL: >> + tun_key_from_attr(a, &tun_key); > > I don't think it's really right to convert this to userspace format > first. They do have a similar structure but it's not really > semantically correct. I guess this is why you expanded the size of > the flags field in struct flow but we don't really want to impose > those types of requirements for things like this. > >> @@ -974,23 +1080,23 @@ parse_odp_key_attr(const char *s, const struct simap >> *port_names, >> { >> char tun_id_s[32]; >> int tos, ttl; >> - struct ovs_key_ipv4_tunnel tun_key; >> + struct flow_tnl tun_key; >> int n = -1; >> >> if (sscanf(s, "ipv4_tunnel(tun_id=%31[x0123456789abcdefABCDEF]," >> "src="IP_SCAN_FMT",dst="IP_SCAN_FMT >> ",tos=%i,ttl=%i,flags%n", tun_id_s, >> - IP_SCAN_ARGS(&tun_key.ipv4_src), >> - IP_SCAN_ARGS(&tun_key.ipv4_dst), &tos, &ttl, >> + IP_SCAN_ARGS(&tun_key.ip_src), >> + IP_SCAN_ARGS(&tun_key.ip_dst), &tos, &ttl, > > Similarly, we're also dealing with kernel format here. > >> @@ -1905,23 +1977,17 @@ odp_flow_key_to_flow(const struct nlattr *key, >> size_t key_len, >> expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TUN_ID; >> } >> >> - if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4_TUNNEL)) { >> - const struct ovs_key_ipv4_tunnel *ipv4_tun_key; >> + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) { >> >> - ipv4_tun_key = nl_attr_get(attrs[OVS_KEY_ATTR_IPV4_TUNNEL]); >> - >> - flow->tunnel.tun_id = ipv4_tun_key->tun_id; >> - flow->tunnel.ip_src = ipv4_tun_key->ipv4_src; >> - flow->tunnel.ip_dst = ipv4_tun_key->ipv4_dst; >> - flow->tunnel.flags = odp_to_flow_flags(ipv4_tun_key->tun_flags); >> - flow->tunnel.ip_tos = ipv4_tun_key->ipv4_tos; >> - flow->tunnel.ip_ttl = ipv4_tun_key->ipv4_ttl; >> + if (tun_key_from_attr(attrs[OVS_KEY_ATTR_TUNNEL], &flow->tunnel)) >> + return ODP_FIT_ERROR; > > This needs to be able to check whether there were extra attributes, > otherwise we won't be able to recreate the kernel flow later. _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev