Thu, Oct 18, 2018 at 08:24:44PM CEST, amritha.namb...@intel.com wrote: >On 10/18/2018 5:17 AM, Jiri Pirko wrote: >> Fri, Oct 12, 2018 at 03:53:30PM CEST, amritha.namb...@intel.com wrote: >>> Added support in tc flower for filtering based on port ranges. >>> This is a rework of the RFC patch at: >>> https://patchwork.ozlabs.org/patch/969595/ >>> >>> Example: >>> 1. Match on a port range: >>> ------------------------- >>> $ tc filter add dev enp4s0 protocol ip parent ffff:\ >>> prio 1 flower ip_proto tcp dst_port range 20-30 skip_hw\ >>> action drop >>> >>> $ tc -s filter show dev enp4s0 parent ffff: >>> filter protocol ip pref 1 flower chain 0 >>> filter protocol ip pref 1 flower chain 0 handle 0x1 >>> eth_type ipv4 >>> ip_proto tcp >>> dst_port_min 20 >>> dst_port_max 30 >>> skip_hw >>> not_in_hw >>> action order 1: gact action drop >>> random type none pass val 0 >>> index 1 ref 1 bind 1 installed 181 sec used 5 sec >>> Action statistics: >>> Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0) >>> backlog 0b 0p requeues 0 >>> >>> 2. Match on IP address and port range: >>> -------------------------------------- >>> $ tc filter add dev enp4s0 protocol ip parent ffff:\ >>> prio 1 flower dst_ip 192.168.1.1 ip_proto tcp dst_port range 100-200\ >>> skip_hw action drop >>> >>> $ tc -s filter show dev enp4s0 parent ffff: >>> filter protocol ip pref 1 flower chain 0 handle 0x2 >>> eth_type ipv4 >>> ip_proto tcp >>> dst_ip 192.168.1.1 >>> dst_port_min 100 >>> dst_port_max 200 >>> skip_hw >>> not_in_hw >>> action order 1: gact action drop >>> random type none pass val 0 >>> index 2 ref 1 bind 1 installed 28 sec used 6 sec >>> Action statistics: >>> Sent 460 bytes 10 pkt (dropped 10, overlimits 0 requeues 0) >>> backlog 0b 0p requeues 0 >>> >>> Signed-off-by: Amritha Nambiar <amritha.namb...@intel.com> >>> --- >>> include/uapi/linux/pkt_cls.h | 5 ++ >>> net/sched/cls_flower.c | 134 >>> ++++++++++++++++++++++++++++++++++++++++-- >>> 2 files changed, 132 insertions(+), 7 deletions(-) >>> >>> diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h >>> index 401d0c1..b569308 100644 >>> --- a/include/uapi/linux/pkt_cls.h >>> +++ b/include/uapi/linux/pkt_cls.h >>> @@ -405,6 +405,11 @@ enum { >>> TCA_FLOWER_KEY_UDP_SRC, /* be16 */ >>> TCA_FLOWER_KEY_UDP_DST, /* be16 */ >>> >>> + TCA_FLOWER_KEY_PORT_SRC_MIN, /* be16 */ >>> + TCA_FLOWER_KEY_PORT_SRC_MAX, /* be16 */ >>> + TCA_FLOWER_KEY_PORT_DST_MIN, /* be16 */ >>> + TCA_FLOWER_KEY_PORT_DST_MAX, /* be16 */ >>> + >>> TCA_FLOWER_FLAGS, >>> TCA_FLOWER_KEY_VLAN_ID, /* be16 */ >>> TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */ >>> diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c >>> index 9aada2d..5f135f0 100644 >>> --- a/net/sched/cls_flower.c >>> +++ b/net/sched/cls_flower.c >>> @@ -55,6 +55,9 @@ struct fl_flow_key { >>> struct flow_dissector_key_ip ip; >>> struct flow_dissector_key_ip enc_ip; >>> struct flow_dissector_key_enc_opts enc_opts; >>> + >>> + struct flow_dissector_key_ports tp_min; >>> + struct flow_dissector_key_ports tp_max; >>> } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as >>> longs. */ >>> >>> struct fl_flow_mask_range { >>> @@ -103,6 +106,11 @@ struct cls_fl_filter { >>> struct net_device *hw_dev; >>> }; >>> >>> +enum fl_endpoint { >>> + FLOWER_ENDPOINT_DST, >>> + FLOWER_ENDPOINT_SRC >>> +}; >>> + >>> static const struct rhashtable_params mask_ht_params = { >>> .key_offset = offsetof(struct fl_flow_mask, key), >>> .key_len = sizeof(struct fl_flow_key), >>> @@ -179,11 +187,86 @@ static void fl_clear_masked_range(struct fl_flow_key >>> *key, >>> memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask)); >>> } >>> >>> +static int fl_range_compare_params(struct cls_fl_filter *filter, >>> + struct fl_flow_key *key, >>> + struct fl_flow_key *mkey, >>> + enum fl_endpoint endpoint) >>> +{ >>> + __be16 min_mask, max_mask, min_val, max_val; >>> + >>> + if (endpoint == FLOWER_ENDPOINT_DST) { >>> + min_mask = htons(filter->mask->key.tp_min.dst); >>> + max_mask = htons(filter->mask->key.tp_max.dst); >>> + min_val = htons(filter->key.tp_min.dst); >>> + max_val = htons(filter->key.tp_max.dst); >>> + >>> + if (min_mask && max_mask) { >>> + if (htons(key->tp.dst) < min_val || >>> + htons(key->tp.dst) > max_val) >>> + return -1; >>> + >>> + /* skb does not have min and max values */ >>> + mkey->tp_min.dst = filter->mkey.tp_min.dst; >>> + mkey->tp_max.dst = filter->mkey.tp_max.dst; >>> + } >>> + } else { >>> + min_mask = htons(filter->mask->key.tp_min.src); >>> + max_mask = htons(filter->mask->key.tp_max.src); >>> + min_val = htons(filter->key.tp_min.src); >>> + max_val = htons(filter->key.tp_max.src); >>> + >>> + if (min_mask && max_mask) { >>> + if (htons(key->tp.src) < min_val || >>> + htons(key->tp.src) > max_val) >>> + return -1; >>> + >>> + /* skb does not have min and max values */ >>> + mkey->tp_min.src = filter->mkey.tp_min.src; >>> + mkey->tp_max.src = filter->mkey.tp_max.src; >>> + } >> >> You basically have 2 functions in 1 here. Just have 2 functions: >> fl_port_range_dst_cmp() >> and >> fl_port_range_src_cmp() >> >> And avoid the "endpoint enum. >> Also, as you return -1 or 0, just make it bool. >> > >Makes sense. Will do. > >> >>> + } >>> + return 0; >>> +} >>> + >>> +static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask, >>> + struct fl_flow_key *mkey, >>> + struct fl_flow_key *key) >>> +{ >>> + struct cls_fl_filter *filter, *f; >>> + int ret; >>> + >>> + list_for_each_entry_rcu(filter, &mask->filters, list) { >>> + ret = fl_range_compare_params(filter, key, mkey, >>> + FLOWER_ENDPOINT_DST); >>> + if (ret < 0) >>> + continue; >>> + >>> + ret = fl_range_compare_params(filter, key, mkey, >>> + FLOWER_ENDPOINT_SRC); >>> + if (ret < 0) >>> + continue; >>> + >>> + f = rhashtable_lookup_fast(&mask->ht, >>> + fl_key_get_start(mkey, mask), >>> + mask->filter_ht_params); >>> + if (f) >>> + return f; >>> + } >>> + return NULL; >>> +} >>> + >>> static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask, >>> - struct fl_flow_key *mkey) >>> + struct fl_flow_key *mkey, >>> + struct fl_flow_key *key, bool is_skb) >>> { >>> - return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask), >>> - mask->filter_ht_params); >>> + if ((!(mask->key.tp_min.dst && mask->key.tp_max.dst) && >>> + !(mask->key.tp_min.src && mask->key.tp_max.src)) || !is_skb) { >> >> Would be probably good to have a dedicated bit to check for and decide >> if you do normal/range lookup. This is fast path. >> > >Will fix in v2. > >> >>> + return rhashtable_lookup_fast(&mask->ht, >> >> Remove double space ^^ >> > >Will fix in v2. > >> >>> + fl_key_get_start(mkey, mask), >>> + mask->filter_ht_params); >>> + } >>> + /* Classify based on range */ >>> + return fl_lookup_range(mask, mkey, key); >>> } >>> >>> static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, >>> @@ -207,8 +290,8 @@ static int fl_classify(struct sk_buff *skb, const >>> struct tcf_proto *tp, >>> skb_flow_dissect(skb, &mask->dissector, &skb_key, 0); >>> >>> fl_set_masked_key(&skb_mkey, &skb_key, mask); >>> + f = fl_lookup(mask, &skb_mkey, &skb_key, true); >>> >>> - f = fl_lookup(mask, &skb_mkey); >>> if (f && !tc_skip_sw(f->flags)) { >>> *res = f->res; >>> return tcf_exts_exec(skb, &f->exts, res); >>> @@ -909,6 +992,23 @@ static int fl_set_key(struct net *net, struct nlattr >>> **tb, >>> sizeof(key->arp.tha)); >>> } >>> >>> + if (key->basic.ip_proto == IPPROTO_TCP || >>> + key->basic.ip_proto == IPPROTO_UDP || >>> + key->basic.ip_proto == IPPROTO_SCTP) { >>> + fl_set_key_val(tb, &key->tp_min.dst, >>> + TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst, >>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst)); >>> + fl_set_key_val(tb, &key->tp_max.dst, >>> + TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst, >>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst)); >>> + fl_set_key_val(tb, &key->tp_min.src, >>> + TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src, >>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src)); >>> + fl_set_key_val(tb, &key->tp_max.src, >>> + TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src, >>> + TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src)); >>> + } >>> + >>> if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || >>> tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) { >>> key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; >>> @@ -1026,8 +1126,7 @@ static void fl_init_dissector(struct flow_dissector >>> *dissector, >>> FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); >>> FL_KEY_SET_IF_MASKED(mask, keys, cnt, >>> FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); >>> - FL_KEY_SET_IF_MASKED(mask, keys, cnt, >>> - FLOW_DISSECTOR_KEY_PORTS, tp); >>> + FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp); >>> FL_KEY_SET_IF_MASKED(mask, keys, cnt, >>> FLOW_DISSECTOR_KEY_IP, ip); >>> FL_KEY_SET_IF_MASKED(mask, keys, cnt, >>> @@ -1227,7 +1326,7 @@ static int fl_change(struct net *net, struct sk_buff >>> *in_skb, >>> goto errout_idr; >>> >>> if (!tc_skip_sw(fnew->flags)) { >>> - if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) { >>> + if (!fold && fl_lookup(fnew->mask, &fnew->mkey, NULL, false)) { >> >> >> I don't undestand why do you need the "is_skb" arg here. Could you >> please explain? >> >> Thanks! >> > >The reason to keep the 'is_skb' arg is because, fl_lookup is called in >two cases, one for skb classification and another for checking if a >filter exists every-time a new filter is added. In case of skb >classification, we need to go through the range-comparator to decide if >the skb's port-value falls within the range-filter's min and max limits. >In case of filter validation, the range-filter that we are trying to add >will have min and max values, and we are validating it against other >range-filters with min and max values. So, rhashtable lookup will >suffice here and there is no need to go through the range-comparator in >this case. In the above code, we are validating if a range-filter >exists, so 'is_skb' is false.
Got it. In that case, please just have a helper: static struct cls_fl_filter *__fl_lookup(struct fl_flow_mask *mask, struct fl_flow_key *mkey) { return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask), mask->filter_ht_params); } Call this helper from both fl_lookup() and fl_change() > >> >>> err = -EEXIST; >>> goto errout_mask; >>> } >>> @@ -1800,6 +1899,27 @@ static int fl_dump_key(struct sk_buff *skb, struct >>> net *net, >>> sizeof(key->arp.tha)))) >>> goto nla_put_failure; >>> >>> + if ((key->basic.ip_proto == IPPROTO_TCP || >>> + key->basic.ip_proto == IPPROTO_UDP || >>> + key->basic.ip_proto == IPPROTO_SCTP) && >>> + (fl_dump_key_val(skb, &key->tp_min.dst, >>> + TCA_FLOWER_KEY_PORT_DST_MIN, >>> + &mask->tp_min.dst, TCA_FLOWER_UNSPEC, >>> + sizeof(key->tp_min.dst)) || >>> + fl_dump_key_val(skb, &key->tp_max.dst, >>> + TCA_FLOWER_KEY_PORT_DST_MAX, >>> + &mask->tp_max.dst, TCA_FLOWER_UNSPEC, >>> + sizeof(key->tp_max.dst)) || >>> + fl_dump_key_val(skb, &key->tp_min.src, >>> + TCA_FLOWER_KEY_PORT_SRC_MIN, >>> + &mask->tp_min.src, TCA_FLOWER_UNSPEC, >>> + sizeof(key->tp_min.src)) || >>> + fl_dump_key_val(skb, &key->tp_max.src, >>> + TCA_FLOWER_KEY_PORT_SRC_MAX, >>> + &mask->tp_max.src, TCA_FLOWER_UNSPEC, >>> + sizeof(key->tp_max.src)))) >>> + goto nla_put_failure; >>> + >>> if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && >>> (fl_dump_key_val(skb, &key->enc_ipv4.src, >>> TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src, >>>