I make diff linux 4.14.211 and 4.15 kernel And changes is:
atomic_inc to refcount_inc And on other part of code in ppp_generic.c remove skb_free …. You see diff down : --- linux-4.14.211/drivers/net/ppp/ppp_generic.c 2020-12-08 09:17:35.000000000 +0000 +++ linux-4.15/drivers/net/ppp/ppp_generic.c 2018-01-28 21:20:33.000000000 +0000 @@ -51,6 +51,7 @@ #include <asm/unaligned.h> #include <net/slhc_vj.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/nsproxy.h> #include <net/net_namespace.h> @@ -84,7 +85,7 @@ struct ppp_file { struct sk_buff_head xq; /* pppd transmit queue */ struct sk_buff_head rq; /* receive queue for pppd */ wait_queue_head_t rwait; /* for poll on reading /dev/ppp */ - atomic_t refcnt; /* # refs (incl /dev/ppp attached) */ + refcount_t refcnt; /* # refs (incl /dev/ppp attached) */ int hdrlen; /* space to leave for headers */ int index; /* interface unit / channel number */ int dead; /* unit/channel has been shut down */ @@ -256,7 +257,7 @@ struct ppp_net { /* Prototypes. */ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf, struct file *file, unsigned int cmd, unsigned long arg); -static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb); +static void ppp_xmit_process(struct ppp *ppp); static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb); static void ppp_push(struct ppp *ppp); static void ppp_channel_push(struct channel *pch); @@ -389,7 +390,7 @@ static int ppp_open(struct inode *inode, /* * This could (should?) be enforced by the permissions on /dev/ppp. */ - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(file->f_cred->user_ns, CAP_NET_ADMIN)) return -EPERM; return 0; } @@ -408,7 +409,7 @@ static int ppp_release(struct inode *unu unregister_netdevice(ppp->dev); rtnl_unlock(); } - if (atomic_dec_and_test(&pf->refcnt)) { + if (refcount_dec_and_test(&pf->refcnt)) { switch (pf->kind) { case INTERFACE: ppp_destroy_interface(PF_TO_PPP(pf)); @@ -512,12 +513,13 @@ static ssize_t ppp_write(struct file *fi goto out; } + skb_queue_tail(&pf->xq, skb); + switch (pf->kind) { case INTERFACE: - ppp_xmit_process(PF_TO_PPP(pf), skb); + ppp_xmit_process(PF_TO_PPP(pf)); break; case CHANNEL: - skb_queue_tail(&pf->xq, skb); ppp_channel_push(PF_TO_CHANNEL(pf)); break; } @@ -880,7 +882,7 @@ static int ppp_unattached_ioctl(struct n mutex_lock(&pn->all_ppp_mutex); ppp = ppp_find_unit(pn, unit); if (ppp) { - atomic_inc(&ppp->file.refcnt); + refcount_inc(&ppp->file.refcnt); file->private_data = &ppp->file; err = 0; } @@ -895,7 +897,7 @@ static int ppp_unattached_ioctl(struct n spin_lock_bh(&pn->all_channels_lock); chan = ppp_find_channel(pn, unit); if (chan) { - atomic_inc(&chan->file.refcnt); + refcount_inc(&chan->file.refcnt); file->private_data = &chan->file; err = 0; } @@ -960,6 +962,8 @@ static __net_exit void ppp_exit_net(stru mutex_destroy(&pn->all_ppp_mutex); idr_destroy(&pn->units_idr); + WARN_ON_ONCE(!list_empty(&pn->all_channels)); + WARN_ON_ONCE(!list_empty(&pn->new_channels)); } static struct pernet_operations ppp_net_ops = { @@ -1263,8 +1267,8 @@ ppp_start_xmit(struct sk_buff *skb, stru put_unaligned_be16(proto, pp); skb_scrub_packet(skb, !net_eq(ppp->ppp_net, dev_net(dev))); - ppp_xmit_process(ppp, skb); - + skb_queue_tail(&ppp->file.xq, skb); + ppp_xmit_process(ppp); return NETDEV_TX_OK; outf: @@ -1349,7 +1353,7 @@ static int ppp_dev_init(struct net_devic * that ppp_destroy_interface() won't run before the device gets * unregistered. */ - atomic_inc(&ppp->file.refcnt); + refcount_inc(&ppp->file.refcnt); return 0; } @@ -1378,7 +1382,7 @@ static void ppp_dev_priv_destructor(stru struct ppp *ppp; ppp = netdev_priv(dev); - if (atomic_dec_and_test(&ppp->file.refcnt)) + if (refcount_dec_and_test(&ppp->file.refcnt)) ppp_destroy_interface(ppp); } @@ -1416,14 +1420,13 @@ static void ppp_setup(struct net_device */ /* Called to do any work queued up on the transmit side that can now be done */ -static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb) +static void __ppp_xmit_process(struct ppp *ppp) { + struct sk_buff *skb; + ppp_xmit_lock(ppp); if (!ppp->closing) { ppp_push(ppp); - - if (skb) - skb_queue_tail(&ppp->file.xq, skb); while (!ppp->xmit_pending && (skb = skb_dequeue(&ppp->file.xq))) ppp_send_frame(ppp, skb); @@ -1433,13 +1436,11 @@ static void __ppp_xmit_process(struct pp netif_wake_queue(ppp->dev); else netif_stop_queue(ppp->dev); - } else { - kfree_skb(skb); } ppp_xmit_unlock(ppp); } -static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb) +static void ppp_xmit_process(struct ppp *ppp) { local_bh_disable(); @@ -1447,7 +1448,7 @@ static void ppp_xmit_process(struct ppp goto err; (*this_cpu_ptr(ppp->xmit_recursion))++; - __ppp_xmit_process(ppp, skb); + __ppp_xmit_process(ppp); (*this_cpu_ptr(ppp->xmit_recursion))--; local_bh_enable(); @@ -1457,8 +1458,6 @@ static void ppp_xmit_process(struct ppp err: local_bh_enable(); - kfree_skb(skb); - if (net_ratelimit()) netdev_err(ppp->dev, "recursion detected\n"); } @@ -1943,7 +1942,7 @@ static void __ppp_channel_push(struct ch if (skb_queue_empty(&pch->file.xq)) { ppp = pch->ppp; if (ppp) - __ppp_xmit_process(ppp, NULL); + __ppp_xmit_process(ppp); } } @@ -2682,7 +2681,7 @@ ppp_unregister_channel(struct ppp_channe pch->file.dead = 1; wake_up_interruptible(&pch->file.rwait); - if (atomic_dec_and_test(&pch->file.refcnt)) + if (refcount_dec_and_test(&pch->file.refcnt)) ppp_destroy_channel(pch); } @@ -3052,7 +3051,7 @@ init_ppp_file(struct ppp_file *pf, int k pf->kind = kind; skb_queue_head_init(&pf->xq); skb_queue_head_init(&pf->rq); - atomic_set(&pf->refcnt, 1); + refcount_set(&pf->refcnt, 1); init_waitqueue_head(&pf->rwait); } @@ -3162,15 +3161,6 @@ ppp_connect_channel(struct channel *pch, goto outl; ppp_lock(ppp); - spin_lock_bh(&pch->downl); - if (!pch->chan) { - /* Don't connect unregistered channels */ - spin_unlock_bh(&pch->downl); - ppp_unlock(ppp); - ret = -ENOTCONN; - goto outl; - } - spin_unlock_bh(&pch->downl); if (pch->file.hdrlen > ppp->file.hdrlen) ppp->file.hdrlen = pch->file.hdrlen; hdrlen = pch->file.hdrlen + 2; /* for protocol bytes */ @@ -3179,7 +3169,7 @@ ppp_connect_channel(struct channel *pch, list_add_tail(&pch->clist, &ppp->channels); ++ppp->n_channels; pch->ppp = ppp; - atomic_inc(&ppp->file.refcnt); + refcount_inc(&ppp->file.refcnt); ppp_unlock(ppp); ret = 0; @@ -3210,7 +3200,7 @@ ppp_disconnect_channel(struct channel *p if (--ppp->n_channels == 0) wake_up_interruptible(&ppp->file.rwait); ppp_unlock(ppp); - if (atomic_dec_and_test(&ppp->file.refcnt)) + if (refcount_dec_and_test(&ppp->file.refcnt)) ppp_destroy_interface(ppp); err = 0; } > On 9 Dec 2020, at 18:57, Martin Zaharinov <micro...@gmail.com> wrote: > > Hi Nault > > > >> On 9 Dec 2020, at 18:40, Guillaume Nault <gna...@redhat.com> wrote: >> >> On Wed, Dec 09, 2020 at 04:47:52PM +0200, Martin Zaharinov wrote: >>> Hi All >>> >>> I have problem with latest kernel release >>> And the problem is base on this late problem : >>> >>> >>> https://www.mail-archive.com/search?l=netdev@vger.kernel.org&q=subject:%22Re%5C%3A+ppp%5C%2Fpppoe%2C+still+panic+4.15.3+in+ppp_push%22&o=newest&f=1 >>> >>> I have same problem in kernel 5.6 > now I use kernel 5.9.13 and have same >>> problem. >>> >>> >>> In kernel 5.9.13 now don’t have any crashes in dimes but in one moment >>> accel service stop with defunct and in log have many of this line : >>> >>> >>> error: vlan608: ioctl(PPPIOCCONNECT): Transport endpoint is not connected >>> error: vlan617: ioctl(PPPIOCCONNECT): Transport endpoint is not connected >>> error: vlan679: ioctl(PPPIOCCONNECT): Transport endpoint is not connected >>> >>> In one moment connected user bump double or triple and after that service >>> defunct and need wait to drop all session to start . >>> >>> I talk with accel-ppp team and they said this is kernel related problem and >>> to back to kernel 4.14 there is not this problem. >>> >>> Problem is come after kernel 4.15 > and not have solution to this moment. >> >> I'm sorry, I don't understand. >> Do you mean that v4.14 worked fine (no crash, no ioctl() error)? >> Did the problem start appearing in v4.15? Or did v4.15 work and the >> problem appeared in v4.16? > > In Telegram group I talk with Sergey and Dimka and told my the problem is > come after changes from 4.14 to 4.15 > Sergey write this : "as I know, there was a similar issue in kernel 4.15 so > maybe it is still not fixed” > > I don’t have options to test with this old kernel 4.14.xxx i don’t have > support for them. > > >> >>> Please help to find the problem. >>> >>> Last time in link I see is make changes in ppp_generic.c >>> >>> ppp_lock(ppp); >>> spin_lock_bh(&pch->downl); >>> if (!pch->chan) { >>> /* Don't connect unregistered channels */ >>> spin_unlock_bh(&pch->downl); >>> ppp_unlock(ppp); >>> ret = -ENOTCONN; >>> goto outl; >>> } >>> spin_unlock_bh(&pch->downl); >>> >>> >>> But this fix only to don’t display error and freeze system >>> The problem is stay and is to big. >> >> Do you use accel-ppp's unit-cache option? Does the problem go away if >> you stop using it? >> > > No I don’t use unit-cache , if I set unit-cache accel-ppp defunct same but > user Is connect and disconnet more fast. > > The problem is same with unit and without . > Only after this patch I don’t see error in dimes but this is not solution. > In network have customer what have power cut problem, when drop 600 user and > back Is normal but in this moment kernel is locking and start to make this : > sessions: > starting: 4235 > active: 3882 > finishing: 378 > > The problem is starting session is not real user normal user in this server > is ~4k customers . > > I use pppd_compat . > > Any idea ? > >>> >>> Please help to fix. > Martin