On 10/19/2018 1:52 AM, Jiri Pirko wrote: > Thu, Oct 18, 2018 at 08:24:44PM CEST, amritha.namb...@intel.com wrote: >> On 10/18/2018 5:17 AM, Jiri Pirko wrote: >>> Fri, Oct 12, 2018 at 03:53:30PM CEST, amritha.namb...@intel.com wrote: >>>> Added support in tc flower for filtering based on port ranges. >>>> This is a rework of the RFC patch at: >>>> https://patchwork.ozlabs.org/patch/969595/ >>>> >>>> Example: >>>> 1. Match on a port range: >>>> ------------------------- >>>> $ tc filter add dev enp4s0 protocol ip parent ffff:\ >>>> prio 1 flower ip_proto tcp dst_port range 20-30 skip_hw\ >>>> action drop >>>> >>>> $ tc -s filter show dev enp4s0 parent ffff: >>>> filter protocol ip pref 1 flower chain 0 >>>> filter protocol ip pref 1 flower chain 0 handle 0x1 >>>> eth_type ipv4 >>>> ip_proto tcp >>>> dst_port_min 20 >>>> dst_port_max 30 >>>> skip_hw >>>> not_in_hw >>>> action order 1: gact action drop >>>> random type none pass val 0 >>>> index 1 ref 1 bind 1 installed 181 sec used 5 sec >>>> Action statistics: >>>> Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0) >>>> backlog 0b 0p requeues 0 >>>> >>>> 2. Match on IP address and port range: >>>> -------------------------------------- >>>> $ tc filter add dev enp4s0 protocol ip parent ffff:\ >>>> prio 1 flower dst_ip 192.168.1.1 ip_proto tcp dst_port range 100-200\ >>>> skip_hw action drop >>>> >>>> $ tc -s filter show dev enp4s0 parent ffff: >>>> filter protocol ip pref 1 flower chain 0 handle 0x2 >>>> eth_type ipv4 >>>> ip_proto tcp >>>> dst_ip 192.168.1.1 >>>> dst_port_min 100 >>>> dst_port_max 200 >>>> skip_hw >>>> not_in_hw >>>> action order 1: gact action drop >>>> random type none pass val 0 >>>> index 2 ref 1 bind 1 installed 28 sec used 6 sec >>>> Action statistics: >>>> Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0) >>>> backlog 0b 0p requeues 0 >>>> >>>> Signed-off-by: Amritha Nambiar <amritha.namb...@intel.com> >>>> --- >>>> include/uapi/linux/pkt_cls.h | 5 ++ >>>> net/sched/cls_flower.c | 134 >>>> ++++++++++++++++++++++++++++++++++++++++-- >>>> 2 files changed, 132 insertions(+), 7 deletions(-) >>>> >>>> diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h >>>> index 401d0c1..b569308 100644 >>>> --- a/include/uapi/linux/pkt_cls.h >>>> +++ b/include/uapi/linux/pkt_cls.h >>>> @@ -405,6 +405,11 @@ enum { >>>> TCA_FLOWER_KEY_UDP_SRC, /* be16 */ >>>> TCA_FLOWER_KEY_UDP_DST, /* be16 */ >>>> >>>> + TCA_FLOWER_KEY_PORT_SRC_MIN, /* be16 */ >>>> + TCA_FLOWER_KEY_PORT_SRC_MAX, /* be16 */ >>>> + TCA_FLOWER_KEY_PORT_DST_MIN, /* be16 */ >>>> + TCA_FLOWER_KEY_PORT_DST_MAX, /* be16 */ >>>> + >>>> TCA_FLOWER_FLAGS, >>>> TCA_FLOWER_KEY_VLAN_ID, /* be16 */ >>>> TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */ >>>> diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c >>>> index 9aada2d..5f135f0 100644 >>>> --- a/net/sched/cls_flower.c >>>> +++ b/net/sched/cls_flower.c >>>> @@ -55,6 +55,9 @@ struct fl_flow_key { >>>> struct flow_dissector_key_ip ip; >>>> struct flow_dissector_key_ip enc_ip; >>>> struct flow_dissector_key_enc_opts enc_opts; >>>> + >>>> + struct flow_dissector_key_ports tp_min; >>>> + struct flow_dissector_key_ports tp_max; >>>> } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as >>>> longs. */ >>>> >>>> struct fl_flow_mask_range { >>>> @@ -103,6 +106,11 @@ struct cls_fl_filter { >>>> struct net_device *hw_dev; >>>> }; >>>> >>>> +enum fl_endpoint { >>>> + FLOWER_ENDPOINT_DST, >>>> + FLOWER_ENDPOINT_SRC >>>> +}; >>>> + >>>> static const struct rhashtable_params mask_ht_params = { >>>> .key_offset = offsetof(struct fl_flow_mask, key), >>>> .key_len = sizeof(struct fl_flow_key), >>>> @@ -179,11 +187,86 @@ static void fl_clear_masked_range(struct fl_flow_key >>>> *key, >>>> memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask)); >>>> } >>>> >>>> +static int fl_range_compare_params(struct cls_fl_filter *filter, >>>> + struct fl_flow_key *key, >>>> + struct fl_flow_key *mkey, >>>> + enum fl_endpoint endpoint) >>>> +{ >>>> + __be16 min_mask, max_mask, min_val, max_val; >>>> + >>>> + if (endpoint == FLOWER_ENDPOINT_DST) { >>>> + min_mask = htons(filter->mask->key.tp_min.dst); >>>> + max_mask = htons(filter->mask->key.tp_max.dst); >>>> + min_val = htons(filter->key.tp_min.dst); >>>> + max_val = htons(filter->key.tp_max.dst); >>>> + >>>> + if (min_mask && max_mask) { >>>> + if (htons(key->tp.dst) < min_val || >>>> + htons(key->tp.dst) > max_val) >>>> + return -1; >>>> + >>>> + /* skb does not have min and max values */ >>>> + mkey->tp_min.dst = filter->mkey.tp_min.dst; >>>> + mkey->tp_max.dst = filter->mkey.tp_max.dst; >>>> + } >>>> + } else { >>>> + min_mask = htons(filter->mask->key.tp_min.src); >>>> + max_mask = htons(filter->mask->key.tp_max.src); >>>> + min_val = htons(filter->key.tp_min.src); >>>> + max_val = htons(filter->key.tp_max.src); >>>> + >>>> + if (min_mask && max_mask) { >>>> + if (htons(key->tp.src) < min_val || >>>> + htons(key->tp.src) > max_val) >>>> + return -1; >>>> + >>>> + /* skb does not have min and max values */ >>>> + mkey->tp_min.src = filter->mkey.tp_min.src; >>>> + mkey->tp_max.src = filter->mkey.tp_max.src; >>>> + } >>> >>> You basically have 2 functions in 1 here. Just have 2 functions: >>> fl_port_range_dst_cmp() >>> and >>> fl_port_range_src_cmp() >>> >>> And avoid the "endpoint enum. >>> Also, as you return -1 or 0, just make it bool. >>> >> >> Makes sense. Will do. >> >>> >>>> + } >>>> + return 0; >>>> +} >>>> + >>>> +static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask, >>>> + struct fl_flow_key *mkey, >>>> + struct fl_flow_key *key) >>>> +{ >>>> + struct cls_fl_filter *filter, *f; >>>> + int ret; >>>> + >>>> + list_for_each_entry_rcu(filter, &mask->filters, list) { >>>> + ret = fl_range_compare_params(filter, key, mkey, >>>> + FLOWER_ENDPOINT_DST); >>>> + if (ret < 0) >>>> + continue; >>>> + >>>> + ret = fl_range_compare_params(filter, key, mkey, >>>> + FLOWER_ENDPOINT_SRC); >>>> + if (ret < 0) >>>> + continue; >>>> + >>>> + f = rhashtable_lookup_fast(&mask->ht, >>>> + fl_key_get_start(mkey, mask), >>>> + mask->filter_ht_params); >>>> + if (f) >>>> + return f; >>>> + } >>>> + return NULL; >>>> +} >>>> + >>>> static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask, >>>> - struct fl_flow_key *mkey) >>>> + struct fl_flow_key *mkey, >>>> + struct fl_flow_key *key, bool is_skb) >>>> { >>>> - return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask), >>>> - mask->filter_ht_params); >>>> + if ((!(mask->key.tp_min.dst && mask->key.tp_max.dst) && >>>> + !(mask->key.tp_min.src && mask->key.tp_max.src)) || !is_skb) { >>> >>> Would be probably good to have a dedicated bit to check for and decide >>> if you do normal/range lookup. This is fast path. >>> >> >> Will fix in v2. >> >>> >>>> + return rhashtable_lookup_fast(&mask->ht, >>> >>> Remove double space ^^ >>> >> >> Will fix in v2. >> >>> >>>> + fl_key_get_start(mkey, mask), >>>> + mask->filter_ht_params); >>>> + } >>>> + /* Classify based on range */ >>>> + return fl_lookup_range(mask, mkey, key); >>>> } >>>> >>>> static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, >>>> @@ -207,8 +290,8 @@ static int fl_classify(struct sk_buff *skb, const >>>> struct tcf_proto *tp, >>>> skb_flow_dissect(skb, &mask->dissector, &skb_key, 0); >>>> >>>> fl_set_masked_key(&skb_mkey, &skb_key, mask); >>>> + f = fl_lookup(mask, &skb_mkey, &skb_key, true); >>>> >>>> - f = fl_lookup(mask, &skb_mkey); >>>> if (f && !tc_skip_sw(f->flags)) { >>>> *res = f->res; >>>> return tcf_exts_exec(skb, &f->exts, res); >>>> @@ -909,6 +992,23 @@ static int fl_set_key(struct net *net, struct nlattr >>>> **tb, >>>> sizeof(key->arp.tha)); >>>> } >>>> >>>> + if (key->basic.ip_proto == IPPROTO_TCP || >>>> + key->basic.ip_proto == IPPROTO_UDP || >>>> + key->basic.ip_proto == IPPROTO_SCTP) { >>>> + fl_set_key_val(tb, &key->tp_min.dst, >>>> + TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst, >>>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst)); >>>> + fl_set_key_val(tb, &key->tp_max.dst, >>>> + TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst, >>>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst)); >>>> + fl_set_key_val(tb, &key->tp_min.src, >>>> + TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src, >>>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src)); >>>> + fl_set_key_val(tb, &key->tp_max.src, >>>> + TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src, >>>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src)); >>>> + } >>>> + >>>> if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || >>>> tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) { >>>> key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; >>>> @@ -1026,8 +1126,7 @@ static void fl_init_dissector(struct flow_dissector >>>> *dissector, >>>> FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); >>>> FL_KEY_SET_IF_MASKED(mask, keys, cnt, >>>> FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); >>>> - FL_KEY_SET_IF_MASKED(mask, keys, cnt, >>>> - FLOW_DISSECTOR_KEY_PORTS, tp); >>>> + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp); >>>> FL_KEY_SET_IF_MASKED(mask, keys, cnt, >>>> FLOW_DISSECTOR_KEY_IP, ip); >>>> FL_KEY_SET_IF_MASKED(mask, keys, cnt, >>>> @@ -1227,7 +1326,7 @@ static int fl_change(struct net *net, struct sk_buff >>>> *in_skb, >>>> goto errout_idr; >>>> >>>> if (!tc_skip_sw(fnew->flags)) { >>>> - if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) { >>>> + if (!fold && fl_lookup(fnew->mask, &fnew->mkey, NULL, false)) { >>> >>> >>> I don't undestand why do you need the "is_skb" arg here. Could you >>> please explain? >>> >>> Thanks! >>> >> >> The reason to keep the 'is_skb' arg is because, fl_lookup is called in >> two cases, one for skb classification and another for checking if a >> filter exists every-time a new filter is added. In case of skb >> classification, we need to go through the range-comparator to decide if >> the skb's port-value falls within the range-filter's min and max limits. >> In case of filter validation, the range-filter that we are trying to add >> will have min and max values, and we are validating it against other >> range-filters with min and max values. So, rhashtable lookup will >> suffice here and there is no need to go through the range-comparator in >> this case. In the above code, we are validating if a range-filter >> exists, so 'is_skb' is false. > > Got it. In that case, please just have a helper: > static struct cls_fl_filter *__fl_lookup(struct fl_flow_mask *mask, > struct fl_flow_key *mkey) > { > return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask), > mask->filter_ht_params); > } > > Call this helper from both fl_lookup() and fl_change() >
Alright. Will fix. > >> >>> >>>> err = -EEXIST; >>>> goto errout_mask; >>>> } >>>> @@ -1800,6 +1899,27 @@ static int fl_dump_key(struct sk_buff *skb, struct >>>> net *net, >>>> sizeof(key->arp.tha)))) >>>> goto nla_put_failure; >>>> >>>> + if ((key->basic.ip_proto == IPPROTO_TCP || >>>> + key->basic.ip_proto == IPPROTO_UDP || > >>>> + key->basic.ip_proto == IPPROTO_SCTP) && >>>> + (fl_dump_key_val(skb, &key->tp_min.dst, >>>> + TCA_FLOWER_KEY_PORT_DST_MIN, >>>> + &mask->tp_min.dst, TCA_FLOWER_UNSPEC, >>>> + sizeof(key->tp_min.dst)) || >>>> + fl_dump_key_val(skb, &key->tp_max.dst, >>>> + TCA_FLOWER_KEY_PORT_DST_MAX, >>>> + &mask->tp_max.dst, TCA_FLOWER_UNSPEC, >>>> + sizeof(key->tp_max.dst)) || >>>> + fl_dump_key_val(skb, &key->tp_min.src, >>>> + TCA_FLOWER_KEY_PORT_SRC_MIN, >>>> + &mask->tp_min.src, TCA_FLOWER_UNSPEC, >>>> + sizeof(key->tp_min.src)) || >>>> + fl_dump_key_val(skb, &key->tp_max.src, >>>> + TCA_FLOWER_KEY_PORT_SRC_MAX, >>>> + &mask->tp_max.src, TCA_FLOWER_UNSPEC, >>>> + sizeof(key->tp_max.src)))) >>>> + goto nla_put_failure; >>>> + >>>> if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && >>>> (fl_dump_key_val(skb, &key->enc_ipv4.src, >>>> TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src, >>>>