On 05/24/2019 06:05 PM, John Hurley wrote:
> TC hooks allow the application of filters and actions to packets at both
> ingress and egress of the network stack. It is possible, with poor
> configuration, that this can produce loops whereby an ingress hook calls
> a mirred egress action that has an egress hook that redirects back to
> the first ingress etc. The TC core classifier protects against loops when
> doing reclassifies but, as yet, there is no protection against a packet
> looping between multiple hooks. This can lead to stack overflow panics.
> 
> Add a per cpu counter that tracks recursion of packets through TC hooks.
> The packet will be dropped if a recursive limit is passed and the counter
> reset for the next packet.

NAK. This is quite a hack in the middle of the fast path. Such redirection
usually has a rescheduling point, like in cls_bpf case. If this is not the
case for mirred action as I read your commit message above, then fix mirred
instead if it's such broken.

Thanks,
Daniel

> Signed-off-by: John Hurley <john.hur...@netronome.com>
> Reviewed-by: Simon Horman <simon.hor...@netronome.com>
> ---
>  net/core/dev.c | 62 
> +++++++++++++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 55 insertions(+), 7 deletions(-)
> 
> diff --git a/net/core/dev.c b/net/core/dev.c
> index b6b8505..a6d9ed7 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -154,6 +154,9 @@
>  /* This should be increased if a protocol with a bigger head is added. */
>  #define GRO_MAX_HEAD (MAX_HEADER + 128)
>  
> +#define SCH_RECURSION_LIMIT  4
> +static DEFINE_PER_CPU(int, sch_recursion_level);
> +
>  static DEFINE_SPINLOCK(ptype_lock);
>  static DEFINE_SPINLOCK(offload_lock);
>  struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
> @@ -3598,16 +3601,42 @@ int dev_loopback_xmit(struct net *net, struct sock 
> *sk, struct sk_buff *skb)
>  }
>  EXPORT_SYMBOL(dev_loopback_xmit);
>  
> +static inline int sch_check_inc_recur_level(void)
> +{
> +     int rec_level = __this_cpu_inc_return(sch_recursion_level);
> +
> +     if (rec_level >= SCH_RECURSION_LIMIT) {
> +             net_warn_ratelimited("Recursion limit reached on TC datapath, 
> probable configuration error\n");
> +             return -ELOOP;
> +     }
> +
> +     return 0;
> +}
> +
> +static inline void sch_dec_recur_level(void)
> +{
> +     __this_cpu_dec(sch_recursion_level);
> +}
> +
>  #ifdef CONFIG_NET_EGRESS
>  static struct sk_buff *
>  sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
>  {
>       struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
>       struct tcf_result cl_res;
> +     int err;
>  
>       if (!miniq)
>               return skb;
>  
> +     err = sch_check_inc_recur_level();
> +     if (err) {
> +             sch_dec_recur_level();
> +             *ret = NET_XMIT_DROP;
> +             consume_skb(skb);
> +             return NULL;
> +     }
> +
>       /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
>       mini_qdisc_bstats_cpu_update(miniq, skb);
>  
> @@ -3620,22 +3649,26 @@ sch_handle_egress(struct sk_buff *skb, int *ret, 
> struct net_device *dev)
>               mini_qdisc_qstats_cpu_drop(miniq);
>               *ret = NET_XMIT_DROP;
>               kfree_skb(skb);
> -             return NULL;
> +             skb = NULL;
> +             break;
>       case TC_ACT_STOLEN:
>       case TC_ACT_QUEUED:
>       case TC_ACT_TRAP:
>               *ret = NET_XMIT_SUCCESS;
>               consume_skb(skb);
> -             return NULL;
> +             skb = NULL;
> +             break;
>       case TC_ACT_REDIRECT:
>               /* No need to push/pop skb's mac_header here on egress! */
>               skb_do_redirect(skb);
>               *ret = NET_XMIT_SUCCESS;
> -             return NULL;
> +             skb = NULL;
> +             break;
>       default:
>               break;
>       }
>  
> +     sch_dec_recur_level();
>       return skb;
>  }
>  #endif /* CONFIG_NET_EGRESS */
> @@ -4670,6 +4703,7 @@ sch_handle_ingress(struct sk_buff *skb, struct 
> packet_type **pt_prev, int *ret,
>  #ifdef CONFIG_NET_CLS_ACT
>       struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
>       struct tcf_result cl_res;
> +     int err;
>  
>       /* If there's at least one ingress present somewhere (so
>        * we get here via enabled static key), remaining devices
> @@ -4679,6 +4713,14 @@ sch_handle_ingress(struct sk_buff *skb, struct 
> packet_type **pt_prev, int *ret,
>       if (!miniq)
>               return skb;
>  
> +     err = sch_check_inc_recur_level();
> +     if (err) {
> +             sch_dec_recur_level();
> +             *ret = NET_XMIT_DROP;
> +             consume_skb(skb);
> +             return NULL;
> +     }
> +
>       if (*pt_prev) {
>               *ret = deliver_skb(skb, *pt_prev, orig_dev);
>               *pt_prev = NULL;
> @@ -4696,12 +4738,14 @@ sch_handle_ingress(struct sk_buff *skb, struct 
> packet_type **pt_prev, int *ret,
>       case TC_ACT_SHOT:
>               mini_qdisc_qstats_cpu_drop(miniq);
>               kfree_skb(skb);
> -             return NULL;
> +             skb = NULL;
> +             break;
>       case TC_ACT_STOLEN:
>       case TC_ACT_QUEUED:
>       case TC_ACT_TRAP:
>               consume_skb(skb);
> -             return NULL;
> +             skb = NULL;
> +             break;
>       case TC_ACT_REDIRECT:
>               /* skb_mac_header check was done by cls/act_bpf, so
>                * we can safely push the L2 header back before
> @@ -4709,14 +4753,18 @@ sch_handle_ingress(struct sk_buff *skb, struct 
> packet_type **pt_prev, int *ret,
>                */
>               __skb_push(skb, skb->mac_len);
>               skb_do_redirect(skb);
> -             return NULL;
> +             skb = NULL;
> +             break;
>       case TC_ACT_REINSERT:
>               /* this does not scrub the packet, and updates stats on error */
>               skb_tc_reinsert(skb, &cl_res);
> -             return NULL;
> +             skb = NULL;
> +             break;
>       default:
>               break;
>       }
> +
> +     sch_dec_recur_level();
>  #endif /* CONFIG_NET_CLS_ACT */
>       return skb;
>  }
> 

Reply via email to