sched: act_police: don't use spinlock in the data path

Eric Dumazet Fri, 16 Nov 2018 06:35:19 -0800

On 11/16/2018 03:28 AM, Davide Caratti wrote:
> On Thu, 2018-11-15 at 05:53 -0800, Eric Dumazet wrote:
>>
>> On 11/15/2018 03:43 AM, Davide Caratti wrote:
>>> On Wed, 2018-11-14 at 22:46 -0800, Eric Dumazet wrote:
>>>> On 09/13/2018 10:29 AM, Davide Caratti wrote:
>>>>> use RCU instead of spinlocks, to protect concurrent read/write on
>>>>> act_police configuration. This reduces the effects of contention in the
>>>>> data path, in case multiple readers are present.
>>>>>
>>>>> Signed-off-by: Davide Caratti <dcara...@redhat.com>
>>>>> ---
>>>>>  net/sched/act_police.c | 156 ++++++++++++++++++++++++-----------------
>>>>>  1 file changed, 92 insertions(+), 64 deletions(-)
>>>>>
>>>>
>>>> I must be missing something obvious with this patch.
>>>
>>> hello Eric,
>>>
>>> On the opposite, I missed something obvious when I wrote that patch: there
>>> is a race condition on tcfp_toks, tcfp_ptoks and tcfp_t_c: thank you for
>>> noticing it.
>>>
>>> These variables still need to be protected with a spinlock. I will do a
>>> patch and evaluate if 'act_police' is still faster than a version where   
>>> 2d550dbad83c ("net/sched: .... ") is reverted, and share results in the
>>> next hours.
>>>
>>> Ok?
>>>
>>
>> SGTM, thanks.
> 
> hello,
> I just finished the comparison of act_police, in the following cases:
> 
> a) revert the RCU-ification (i.e. commit 2d550dbad83c ("net/sched:
> act_police: don't use spinlock in the data path"), and leave per-cpu
> counters used by the rate estimator
> 
> b) keep RCU-ified configuration parameters, and protect read/update of
> tcfp_toks, tcfp_ptoks and tcfp_t with a spinlock (code at the bottom  of
> this message).
> 
> ## Test setup:
> 
> $DEV is a 'dummy' with clsact qdisc; the following two commands,
> 
> # test police with 'rate'
> $TC filter add dev $DEV egress matchall \
>  action police rate 2gbit burst 100k conform-exceed pass/pass index 100
> 
> # test police with 'avrate'
> $TC filter add dev prova egress estimator 1s 8s matchall \
>         action police avrate 2gbit conform-exceed pass/pass index 100
> 
> are tested with the following loop:
> 
> for c in 1 2 4 8 16; do
> ./pktgen/pktgen_bench_xmit_mode_queue_xmit.sh -v -s 64  -t $c -n 5000000 -i 
> $DEV
> done
> 
> 
> ## Test results:
> 
> using  rate  | reverted       | patched
>         $c   | act_police (a) | act_police (b)
> -------------+----------------+---------------
>          1   |       3364442  |      3345580  
>          2   |       2703030  |      2721919  
>          4   |       1130146  |      1253555
>          8   |        664238  |       658777
>         16   |        154026  |       155259
> 
> 
> using avrate | reverted       | patched
>         $c   | act_police (a) | act_police (b)
> -------------+----------------+---------------
>          1   |       3621796  |      3658472 
>          2   |       3075589  |      3548135  
>          4   |       2313314  |      3343717
>          8   |        768458  |      3260480
>         16   |        177776  |      3254128
> 
> 
> so, 'avrate' still gets a significant improvement because the 'conform/exceed'
> decision doesn't need the spinlock in this case. The estimation is probably
> less accurate, because it use per-CPU variables: if this is not acceptable,
> then we need to revert also 93be42f9173b ("net/sched: act_police: use per-cpu
> counters").
> 
> 
> ## patch code:
> 
> -- >8 --
> diff --git a/net/sched/act_police.c b/net/sched/act_police.c
> index 052855d..42db852 100644
> --- a/net/sched/act_police.c
> +++ b/net/sched/act_police.c
> @@ -27,10 +27,7 @@ struct tcf_police_params {
>       u32                     tcfp_ewma_rate;
>       s64                     tcfp_burst;
>       u32                     tcfp_mtu;
> -     s64                     tcfp_toks;
> -     s64                     tcfp_ptoks;
>       s64                     tcfp_mtu_ptoks;
> -     s64                     tcfp_t_c;
>       struct psched_ratecfg   rate;
>       bool                    rate_present;
>       struct psched_ratecfg   peak;
> @@ -40,6 +37,9 @@ struct tcf_police_params {
>  
>  struct tcf_police {
>       struct tc_action        common;


> +     s64                     tcfp_toks;
> +     s64                     tcfp_ptoks;
> +     s64                     tcfp_t_c;

I suggest to use a single cache line with a dedicated spinlock and these three 
s64

        spinlock_t  tcfp_lock ____cacheline_aligned_in_smp;
        s64                     ...
        s64                     ...
        s64                     ...


>       struct tcf_police_params __rcu *params;

Make sure to use a different cache line for *params 

        struct tcf_police_params __rcu *params ____cacheline_aligned_in_smp;

>  };
>  
> @@ -186,12 +186,6 @@ static int tcf_police_init(struct net *net, struct 
> nlattr *nla,
>       }
>  
>       new->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
> -     new->tcfp_toks = new->tcfp_burst;
> -     if (new->peak_present) {
> -             new->tcfp_mtu_ptoks = (s64)psched_l2t_ns(&new->peak,
> -                                                      new->tcfp_mtu);
> -             new->tcfp_ptoks = new->tcfp_mtu_ptoks;
> -     }
>  
>       if (tb[TCA_POLICE_AVRATE])
>               new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
> @@ -207,7 +201,14 @@ static int tcf_police_init(struct net *net, struct 
> nlattr *nla,
>       }
>  
>       spin_lock_bh(&police->tcf_lock);
> -     new->tcfp_t_c = ktime_get_ns();
> +     police->tcfp_t_c = ktime_get_ns();
> +     police->tcfp_toks = new->tcfp_burst;
> +     if (new->peak_present) {
> +             new->tcfp_mtu_ptoks = (s64)psched_l2t_ns(&new->peak,
> +                                                      new->tcfp_mtu);
> +             police->tcfp_ptoks = new->tcfp_mtu_ptoks;
> +     }
> +
>       police->tcf_action = parm->action;
>       rcu_swap_protected(police->params,
>                          new,
> @@ -255,27 +256,29 @@ static int tcf_police_act(struct sk_buff *skb, const 
> struct tc_action *a,
>                       ret = p->tcfp_result;
>                       goto end;
>               }
> -
> +             spin_lock_bh(&police->tcf_lock);
>               now = ktime_get_ns();

The ktime_get_ns() call can be done before acquiring the spinlock

> -             toks = min_t(s64, now - p->tcfp_t_c, p->tcfp_burst);
> +             toks = min_t(s64, now - police->tcfp_t_c, p->tcfp_burst);
>               if (p->peak_present) {
> -                     ptoks = toks + p->tcfp_ptoks;
> +                     ptoks = toks + police->tcfp_ptoks;
>                       if (ptoks > p->tcfp_mtu_ptoks)
>                               ptoks = p->tcfp_mtu_ptoks;
>                       ptoks -= (s64)psched_l2t_ns(&p->peak,
>                                                   qdisc_pkt_len(skb));
>               }
> -             toks += p->tcfp_toks;
> +             toks += police->tcfp_toks;
>               if (toks > p->tcfp_burst)
>                       toks = p->tcfp_burst;
>               toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
>               if ((toks|ptoks) >= 0) {
> -                     p->tcfp_t_c = now;
> -                     p->tcfp_toks = toks;
> -                     p->tcfp_ptoks = ptoks;
> +                     police->tcfp_t_c = now;
> +                     police->tcfp_toks = toks;
> +                     police->tcfp_ptoks = ptoks;
> +                     spin_unlock_bh(&police->tcf_lock);
>                       ret = p->tcfp_result;
>                       goto inc_drops;
>               }
> +             spin_unlock_bh(&police->tcf_lock);
>       }
>  
>  inc_overlimits:
> -- >8 --
> 
> any feedback is appreciated.
> thanks!
>
Re: [PATCH net-next 2/2] net/sched: act_police: don't use spinlock in the data path

Reply via email to