this makes ifq_start try to wait for 4 packets before calling if->if_qstart.
this is based on work sephe did in dragonflybsd, and described in a comment in their sys/net/if.c. there's a link to it here: https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/sys/net/if.c#L2976-L2987 the tests we've done generally show a performance bump, but otherwise no degradation in performance. the mechanism for bundling packets is to but schedule a task to service the queue later. if 4 packets get accumulated before the task runs, it's cancelled and the code runs the start routine directly. the most significant difference this implementation has to the dfly one is that our ifqs dont (currently) track the number of bytes on the q. dfly sends if it can bundle 4 packets, or up to mtu bytes worth of packets. this implementation only looks at the number of packets. the taskq the ifq uses is one of the softnets, which is assigned when the ifq is initted and unconditionally used during the ifq's lifetime. because ifq work could now be pending in a softnet taskq, ifq_barrier also needs to put a barrier in the taskq. this is implemented using taskq_barrier, which i wrote ages ago but didn't have a use case for at the time. tests? ok? Index: share/man/man9/task_add.9 =================================================================== RCS file: /cvs/src/share/man/man9/task_add.9,v retrieving revision 1.16 diff -u -p -r1.16 task_add.9 --- share/man/man9/task_add.9 14 Sep 2015 15:14:55 -0000 1.16 +++ share/man/man9/task_add.9 10 Nov 2017 00:45:41 -0000 @@ -20,6 +20,7 @@ .Sh NAME .Nm taskq_create , .Nm taskq_destroy , +.Nm taskq_barrier , .Nm task_set , .Nm task_add , .Nm task_del , @@ -37,6 +38,8 @@ .Ft void .Fn taskq_destroy "struct taskq *tq" .Ft void +.Fn taskq_barrier "struct taskq *tq" +.Ft void .Fn task_set "struct task *t" "void (*fn)(void *)" "void *arg" .Ft int .Fn task_add "struct taskq *tq" "struct task *t" @@ -88,6 +91,15 @@ Calling against the system taskq is an error and will lead to undefined behaviour or a system fault. .Pp +.Fn taskq_barrier +guarantees that any task that was running on the +.Fa tq +taskq when the barrier was called has finished by the time the barrier +returns. +.Fn taskq_barrier +is only supported on taskqs serviced by 1 thread, +and may not be called by a task running in the specified taskq. +.Pp It is the responsibility of the caller to provide the .Fn task_set , .Fn task_add , @@ -163,6 +175,8 @@ argument given in and .Fn taskq_destroy can be called during autoconf, or from process context. +.Fn taskq_barrier +can be called from process context. .Fn task_set , .Fn task_add , and Index: sys/sys/task.h =================================================================== RCS file: /cvs/src/sys/sys/task.h,v retrieving revision 1.11 diff -u -p -r1.11 task.h --- sys/sys/task.h 7 Jun 2016 07:53:33 -0000 1.11 +++ sys/sys/task.h 10 Nov 2017 00:45:41 -0000 @@ -43,6 +43,7 @@ extern struct taskq *const systqmp; struct taskq *taskq_create(const char *, unsigned int, int, unsigned int); void taskq_destroy(struct taskq *); +void taskq_barrier(struct taskq *); void task_set(struct task *, void (*)(void *), void *); int task_add(struct taskq *, struct task *); Index: sys/kern/kern_task.c =================================================================== RCS file: /cvs/src/sys/kern/kern_task.c,v retrieving revision 1.20 diff -u -p -r1.20 kern_task.c --- sys/kern/kern_task.c 30 Oct 2017 14:01:42 -0000 1.20 +++ sys/kern/kern_task.c 10 Nov 2017 00:45:41 -0000 @@ -22,6 +22,7 @@ #include <sys/mutex.h> #include <sys/kthread.h> #include <sys/task.h> +#include <sys/proc.h> #define TASK_ONQUEUE 1 @@ -68,6 +69,7 @@ struct taskq *const systqmp = &taskq_sys void taskq_init(void); /* called in init_main.c */ void taskq_create_thread(void *); +void taskq_barrier_task(void *); int taskq_sleep(const volatile void *, struct mutex *, int, const char *, int); int taskq_next_work(struct taskq *, struct task *, sleepfn); @@ -176,6 +178,30 @@ taskq_create_thread(void *arg) } while (tq->tq_running < tq->tq_nthreads); mtx_leave(&tq->tq_mtx); +} + +void +taskq_barrier(struct taskq *tq) +{ + struct sleep_state sls; + unsigned int notdone = 1; + struct task t = TASK_INITIALIZER(taskq_barrier_task, ¬done); + + task_add(tq, &t); + + while (notdone) { + sleep_setup(&sls, ¬done, PWAIT, "tqbar"); + sleep_finish(&sls, notdone); + } +} + +void +taskq_barrier_task(void *p) +{ + unsigned int *notdone = p; + + *notdone = 0; + wakeup_one(notdone); } void Index: sys/net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.12 diff -u -p -r1.12 ifq.c --- sys/net/ifq.c 2 Jun 2017 00:07:12 -0000 1.12 +++ sys/net/ifq.c 10 Nov 2017 00:45:41 -0000 @@ -64,9 +64,16 @@ struct priq { void ifq_start_task(void *); void ifq_restart_task(void *); void ifq_barrier_task(void *); +void ifq_bundle_task(void *); #define TASK_ONQUEUE 0x1 +static inline void +ifq_run_start(struct ifqueue *ifq) +{ + ifq_serialize(ifq, &ifq->ifq_start); +} + void ifq_serialize(struct ifqueue *ifq, struct task *t) { @@ -108,6 +115,16 @@ ifq_is_serialized(struct ifqueue *ifq) } void +ifq_start(struct ifqueue *ifq) +{ + if (ifq_len(ifq) >= 4) { + task_del(ifq->ifq_softnet, &ifq->ifq_bundle); + ifq_run_start(ifq); + } else + task_add(ifq->ifq_softnet, &ifq->ifq_bundle); +} + +void ifq_start_task(void *p) { struct ifqueue *ifq = p; @@ -131,6 +148,14 @@ ifq_restart_task(void *p) } void +ifq_bundle_task(void *p) +{ + struct ifqueue *ifq = p; + + ifq_run_start(ifq); +} + +void ifq_barrier(struct ifqueue *ifq) { struct sleep_state sls; @@ -140,6 +165,9 @@ ifq_barrier(struct ifqueue *ifq) /* this should only be called from converted drivers */ KASSERT(ISSET(ifq->ifq_if->if_xflags, IFXF_MPSAFE)); + if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle)) + taskq_barrier(ifq->ifq_softnet); + if (ifq->ifq_serializer == NULL) return; @@ -168,6 +196,7 @@ void ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) { ifq->ifq_if = ifp; + ifq->ifq_softnet = net_tq(ifp->if_index + idx); ifq->ifq_softc = NULL; mtx_init(&ifq->ifq_mtx, IPL_NET); @@ -189,6 +218,7 @@ ifq_init(struct ifqueue *ifq, struct ifn mtx_init(&ifq->ifq_task_mtx, IPL_NET); TAILQ_INIT(&ifq->ifq_task_list); ifq->ifq_serializer = NULL; + task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq); task_set(&ifq->ifq_start, ifq_start_task, ifq); task_set(&ifq->ifq_restart, ifq_restart_task, ifq); @@ -239,6 +269,8 @@ void ifq_destroy(struct ifqueue *ifq) { struct mbuf_list ml = MBUF_LIST_INITIALIZER(); + + ifq_barrier(ifq); /* ensure nothing is running with the ifq */ /* don't need to lock because this is the last use of the ifq */ Index: sys/net/ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v retrieving revision 1.13 diff -u -p -r1.13 ifq.h --- sys/net/ifq.h 3 May 2017 20:55:29 -0000 1.13 +++ sys/net/ifq.h 10 Nov 2017 00:45:41 -0000 @@ -25,6 +25,7 @@ struct ifq_ops; struct ifqueue { struct ifnet *ifq_if; + struct taskq *ifq_softnet; union { void *_ifq_softc; /* @@ -57,6 +58,7 @@ struct ifqueue { struct mutex ifq_task_mtx; struct task_list ifq_task_list; void *ifq_serializer; + struct task ifq_bundle; /* work to be serialised */ struct task ifq_start; @@ -378,6 +380,7 @@ void ifq_init(struct ifqueue *, struct void ifq_attach(struct ifqueue *, const struct ifq_ops *, void *); void ifq_destroy(struct ifqueue *); int ifq_enqueue(struct ifqueue *, struct mbuf *); +void ifq_start(struct ifqueue *); struct mbuf *ifq_deq_begin(struct ifqueue *); void ifq_deq_commit(struct ifqueue *, struct mbuf *); void ifq_deq_rollback(struct ifqueue *, struct mbuf *); @@ -411,12 +414,6 @@ static inline unsigned int ifq_is_oactive(struct ifqueue *ifq) { return (ifq->ifq_oactive); -} - -static inline void -ifq_start(struct ifqueue *ifq) -{ - ifq_serialize(ifq, &ifq->ifq_start); } static inline void
