There was a previous attempt to use xmit_more, but the change had to be reverted because under load sometimes a transmit timeout occurred [0]. Maybe this was caused by a missing memory barrier, the new attempt keeps the memory barrier before the call to netif_stop_queue like it is used by the driver as of today. The new attempt also changes the order of some calls as suggested by Eric.
[0] https://lkml.org/lkml/2019/2/10/39 Signed-off-by: Heiner Kallweit <hkallwe...@gmail.com> --- drivers/net/ethernet/realtek/r8169_main.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 864ca529d..d9261e68f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5637,6 +5637,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, struct device *d = tp_to_dev(tp); dma_addr_t mapping; u32 opts[2], len; + bool stop_queue; + bool door_bell; int frags; if (unlikely(!rtl_tx_slots_avail(tp, skb_shinfo(skb)->nr_frags))) { @@ -5680,13 +5682,13 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, txd->opts2 = cpu_to_le32(opts[1]); - netdev_sent_queue(dev, skb->len); - skb_tx_timestamp(skb); /* Force memory writes to complete before releasing descriptor */ dma_wmb(); + door_bell = __netdev_sent_queue(dev, skb->len, netdev_xmit_more()); + txd->opts1 = rtl8169_get_txd_opts1(opts[0], len, entry); /* Force all memory writes to complete before notifying device */ @@ -5694,14 +5696,19 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, tp->cur_tx += frags + 1; - RTL_W8(tp, TxPoll, NPQ); - - if (!rtl_tx_slots_avail(tp, MAX_SKB_FRAGS)) { + stop_queue = !rtl_tx_slots_avail(tp, MAX_SKB_FRAGS); + if (unlikely(stop_queue)) { /* Avoid wrongly optimistic queue wake-up: rtl_tx thread must * not miss a ring update when it notices a stopped queue. */ smp_wmb(); netif_stop_queue(dev); + } + + if (door_bell) + RTL_W8(tp, TxPoll, NPQ); + + if (unlikely(stop_queue)) { /* Sync with rtl_tx: * - publish queue status and cur_tx ring index (write barrier) * - refresh dirty_tx ring index (read barrier). -- 2.22.0