Hello, I discovered the lantiq xrx200 lacks support for interrupts on secondary VPE, and I've managed to add this functionality to the kernel, the second icu controller lives on base address 0x1f880300 and works in the same way as first. Tested with 4.14.93 kernel, untested patches in the attachment or on openwrt forum:
https://forum.openwrt.org/t/xrx200-irq-balancing-between-vpes/29732/11 My second submission is a backported xrx200 ethernet driver from vanilla kernel v5 patched with switch and phy functions. Using kernel napi polling seems to highly increase the throughput of the network. Trying to initially increase the throughput of the network I've found there is an inefficiently setting of DMA burst size. My patch sets this value to a higher value. These two patches were tested with 4.14.93 kernel too, there is forum thread: https://forum.openwrt.org/t/how-can-we-make-the-lantiq-xrx200-devices-faster/9724/30 RFC for any parallel development info (next version of ethernet driver will be based on vanilla kernel version?), ideas, testing ... best regards, Petr Cvek
--- ./a/arch/mips/lantiq/irq.c 2019-01-30 02:20:35.739994259 +0100 +++ ./b/arch/mips/lantiq/irq.c 2019-01-30 04:30:31.152538191 +0100 @@ -49,8 +49,8 @@ */ #define LTQ_ICU_EBU_IRQ 22 -#define ltq_icu_w32(m, x, y) ltq_w32((x), ltq_icu_membase[m] + (y)) -#define ltq_icu_r32(m, x) ltq_r32(ltq_icu_membase[m] + (x)) +#define ltq_icu_w32(vpe, m, x, y) ltq_w32((x), ltq_icu_membase[vpe][m] + (y)) +#define ltq_icu_r32(vpe, m, x) ltq_r32(ltq_icu_membase[vpe][m] + (x)) #define ltq_eiu_w32(x, y) ltq_w32((x), ltq_eiu_membase + (y)) #define ltq_eiu_r32(x) ltq_r32(ltq_eiu_membase + (x)) @@ -62,11 +62,50 @@ /* we have a cascade of 8 irqs */ #define MIPS_CPU_IRQ_CASCADE 8 +#define MAX_VPES 2 + +/* + * Convenience Macro. Should be somewhere generic. + */ +#define get_current_vpe() \ + ((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE) + + +#if 1 // TODO debug? SMP cores can access at the same time +#if defined(CONFIG_SMP) +#define LOCK_VPE() \ + local_irq_save(flags); \ + mtflags = dmt() + +#define UNLOCK_VPE() \ + emt(mtflags); \ + local_irq_restore(flags) + +#define LOCK_CORE() \ + local_irq_save(flags); \ + mtflags = dvpe() + +#define UNLOCK_CORE() \ + evpe(mtflags); \ + local_irq_restore(flags) +#else /* CONFIG_SMP*/ +#define LOCK_VPE() +#define UNLOCK_VPE() +#endif /* CONFIG_SMP */ + +#else // TODO debug future delete +#define LOCK_VPE() (void)flags;(void)mtflags +#define UNLOCK_VPE() +#define LOCK_CORE() (void)flags;(void)mtflags +#define UNLOCK_CORE() +#endif + static int exin_avail; static u32 ltq_eiu_irq[MAX_EIU]; -static void __iomem *ltq_icu_membase[MAX_IM]; +static void __iomem *ltq_icu_membase[MAX_VPES][MAX_IM]; static void __iomem *ltq_eiu_membase; static struct irq_domain *ltq_domain; +static DEFINE_SPINLOCK(ltq_eiu_lock); static int ltq_perfcount_irq; int ltq_eiu_get_irq(int exin) @@ -81,9 +120,14 @@ u32 ier = LTQ_ICU_IM0_IER; int offset = d->hwirq - MIPS_CPU_IRQ_CASCADE; int im = offset / INT_NUM_IM_OFFSET; - + int vpe = get_current_vpe(); +#if defined(CONFIG_SMP) + unsigned long flags, mtflags; +#endif offset %= INT_NUM_IM_OFFSET; - ltq_icu_w32(im, ltq_icu_r32(im, ier) & ~BIT(offset), ier); + LOCK_VPE(); + ltq_icu_w32(vpe, im, ltq_icu_r32(vpe, im, ier) & ~BIT(offset), ier); + UNLOCK_VPE(); } void ltq_mask_and_ack_irq(struct irq_data *d) @@ -92,10 +136,16 @@ u32 isr = LTQ_ICU_IM0_ISR; int offset = d->hwirq - MIPS_CPU_IRQ_CASCADE; int im = offset / INT_NUM_IM_OFFSET; + int vpe = get_current_vpe(); +#if defined(CONFIG_SMP) + unsigned long flags, mtflags; +#endif offset %= INT_NUM_IM_OFFSET; - ltq_icu_w32(im, ltq_icu_r32(im, ier) & ~BIT(offset), ier); - ltq_icu_w32(im, BIT(offset), isr); + LOCK_VPE(); + ltq_icu_w32(vpe, im, ltq_icu_r32(vpe, im, ier) & ~BIT(offset), ier); + ltq_icu_w32(vpe, im, BIT(offset), isr); + UNLOCK_VPE(); } EXPORT_SYMBOL(ltq_mask_and_ack_irq); @@ -104,24 +154,43 @@ u32 isr = LTQ_ICU_IM0_ISR; int offset = d->hwirq - MIPS_CPU_IRQ_CASCADE; int im = offset / INT_NUM_IM_OFFSET; + int vpe = get_current_vpe(); +#if defined(CONFIG_SMP) + unsigned long flags, mtflags; +#endif offset %= INT_NUM_IM_OFFSET; - ltq_icu_w32(im, BIT(offset), isr); + LOCK_VPE(); + ltq_icu_w32(vpe, im, BIT(offset), isr); + UNLOCK_VPE(); } void ltq_enable_irq(struct irq_data *d) { u32 ier = LTQ_ICU_IM0_IER; +// u32 isr = LTQ_ICU_IM0_ISR; int offset = d->hwirq - MIPS_CPU_IRQ_CASCADE; int im = offset / INT_NUM_IM_OFFSET; + int vpe = get_current_vpe(); +#if defined(CONFIG_SMP) + unsigned long flags, mtflags; +#endif offset %= INT_NUM_IM_OFFSET; - ltq_icu_w32(im, ltq_icu_r32(im, ier) | BIT(offset), ier); + LOCK_VPE(); + + // TODO present in the v3.10 kernel, system is OK without it + /* Bug fix for fake interrupt */ + //ltq_icu_w32(vpe, im, BIT(offset), isr); + + ltq_icu_w32(vpe, im, ltq_icu_r32(vpe, im, ier) | BIT(offset), ier); + UNLOCK_VPE(); } static int ltq_eiu_settype(struct irq_data *d, unsigned int type) { int i; + unsigned long flags; for (i = 0; i < exin_avail; i++) { if (d->hwirq == ltq_eiu_irq[i]) { @@ -158,8 +227,11 @@ if (edge) irq_set_handler(d->hwirq, handle_edge_irq); - ltq_eiu_w32(ltq_eiu_r32(LTQ_EIU_EXIN_C) | - (val << (i * 4)), LTQ_EIU_EXIN_C); + // v3.10 kernel has this atomic for SMP + spin_lock_irqsave(<q_eiu_lock, flags); + ltq_eiu_w32((ltq_eiu_r32(LTQ_EIU_EXIN_C) & (~ (val << (i * 4)))) | + (val << (i * 4)), LTQ_EIU_EXIN_C); + spin_unlock_irqrestore(<q_eiu_lock, flags); } } @@ -203,6 +275,36 @@ } } +#if defined(CONFIG_MIPS_MT_SMP) +static int ltq_icu_irq_set_affinity(struct irq_data *d, + const struct cpumask *cpumask, bool force) +{ + int cpu; + unsigned long flags; + unsigned int mtflags; + u32 ier = LTQ_ICU_IM0_IER; + int offset = d->hwirq - MIPS_CPU_IRQ_CASCADE; + int im = offset / INT_NUM_IM_OFFSET; + + LOCK_CORE(); + + offset %= INT_NUM_IM_OFFSET; + + for_each_online_cpu(cpu) { + if (!cpumask_test_cpu(cpu, cpumask)) + ltq_icu_w32(cpu, im, ltq_icu_r32(cpu, im, ier) & ~BIT(offset), ier); + else + ltq_icu_w32(cpu, im, ltq_icu_r32(cpu, im, ier) | BIT(offset), ier); + } + + //v4 kernel requires this, taken from some other SMP board + irq_data_update_effective_affinity(d, cpumask); + + UNLOCK_CORE(); + return IRQ_SET_MASK_OK; +} +#endif + static struct irq_chip ltq_irq_type = { .name = "icu", .irq_enable = ltq_enable_irq, @@ -211,6 +313,9 @@ .irq_ack = ltq_ack_irq, .irq_mask = ltq_disable_irq, .irq_mask_ack = ltq_mask_and_ack_irq, +#if defined(CONFIG_MIPS_MT_SMP) + .irq_set_affinity = ltq_icu_irq_set_affinity, +#endif }; static struct irq_chip ltq_eiu_type = { @@ -231,8 +336,10 @@ int module = irq_desc_get_irq(desc) - 2; u32 irq; int hwirq; + int vpe = get_current_vpe(); - irq = ltq_icu_r32(module, LTQ_ICU_IM0_IOSR); + //v3.10 has lock_vpe around this, is it really necessary? + irq = ltq_icu_r32(vpe, module, LTQ_ICU_IM0_IOSR); if (irq == 0) return; @@ -275,29 +382,60 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent) { struct device_node *eiu_node; +#if defined(CONFIG_MIPS_MT_SMP) + struct device_node *icu1_node; +#endif struct resource res; int i, ret; for (i = 0; i < MAX_IM; i++) { if (of_address_to_resource(node, i, &res)) - panic("Failed to get icu memory range"); + panic("Failed to get icu0 memory range"); if (!request_mem_region(res.start, resource_size(&res), res.name)) - pr_err("Failed to request icu memory"); + pr_err("Failed to request icu0 memory"); + + if (of_node_cmp(node->name, "icu0") == 0) { + ltq_icu_membase[0][i] = ioremap_nocache(res.start, + resource_size(&res)); + } + if (!ltq_icu_membase[0][i]) + panic("Failed to remap icu0 memory"); + } + +#if defined(CONFIG_MIPS_MT_SMP) + // TODO add to the icu0 reg array or new node, what if requires a new node, TODO merge with icu0 node? + + icu1_node = of_find_compatible_node(NULL, NULL, "lantiq,icu1"); + for (i = 0; i < MAX_IM; i++) { + if (of_address_to_resource(icu1_node, i, &res)) + panic("Failed to get icu1 memory range"); - ltq_icu_membase[i] = ioremap_nocache(res.start, + if (request_mem_region(res.start, resource_size(&res), + res.name) < 0) + pr_err("Failed to request icu1 memory"); + + if (of_node_cmp(icu1_node->name, "icu1") == 0){ + ltq_icu_membase[1][i] = ioremap_nocache(res.start, resource_size(&res)); - if (!ltq_icu_membase[i]) - panic("Failed to remap icu memory"); + } + + if (!ltq_icu_membase[1][i]) + panic("Failed to remap icu1 memory"); } +#endif /* turn off all irqs by default */ for (i = 0; i < MAX_IM; i++) { /* make sure all irqs are turned off by default */ - ltq_icu_w32(i, 0, LTQ_ICU_IM0_IER); + ltq_icu_w32(0, i, 0, LTQ_ICU_IM0_IER); /* clear all possibly pending interrupts */ - ltq_icu_w32(i, ~0, LTQ_ICU_IM0_ISR); + ltq_icu_w32(0, i, ~0, LTQ_ICU_IM0_ISR); +#if defined(CONFIG_MIPS_MT_SMP) + ltq_icu_w32(1, i, 0, LTQ_ICU_IM0_IER); + ltq_icu_w32(1, i, ~0, LTQ_ICU_IM0_ISR); +#endif } mips_cpu_irq_init();
--- ./a/arch/mips/kernel/smp-mt.c 2019-01-26 09:37:07.000000000 +0100 +++ ./b/arch/mips/kernel/smp-mt.c 2019-01-30 04:41:49.864053129 +0100 @@ -125,6 +125,8 @@ STATUSF_IP6 | STATUSF_IP7); else change_c0_status(ST0_IM, STATUSF_IP0 | STATUSF_IP1 | + STATUSF_IP2 | STATUSF_IP3 | + STATUSF_IP4 | STATUSF_IP5 | STATUSF_IP6 | STATUSF_IP7); }
--- ./a/arch/mips/boot/dts/vr9.dtsi 2019-01-30 01:23:31.159807402 +0100 +++ ./b/arch/mips/boot/dts/vr9.dtsi 2019-01-30 10:51:43.737591528 +0100 @@ -34,7 +34,7 @@ reg = <0x1f800000 0x800000>; ranges = <0x0 0x1f800000 0x7fffff>; - icu0: icu@80200 { + icu0: icu0@80200 { #interrupt-cells = <1>; interrupt-controller; compatible = "lantiq,icu"; @@ -44,6 +44,18 @@ 0x80278 0x28 0x802a0 0x28>; }; + +//pc2005 irq for VPE1 + icu1: icu1@80300 { + #interrupt-cells = <1>; +// interrupt-controller; + compatible = "lantiq,icu1"; + reg = <0x80300 0x28 + 0x80328 0x28 + 0x80350 0x28 + 0x80378 0x28 + 0x803a0 0x28>; + }; watchdog@803f0 { compatible = "lantiq,xrx100-wdt", "lantiq,xrx100-wdt";
--- ./a/drivers/net/ethernet/lantiq_xrx200.c 2019-01-30 02:20:35.780993746 +0100 +++ ./b/drivers/net/ethernet/lantiq_xrx200.c 2019-01-30 08:53:38.080174436 +0100 @@ -39,12 +39,6 @@ #define SW_POLLING #define SW_ROUTING -#ifdef SW_ROUTING -#define XRX200_MAX_DEV 2 -#else -#define XRX200_MAX_DEV 1 -#endif - #define XRX200_MAX_VLAN 64 #define XRX200_PCE_ACTVLAN_IDX 0x01 #define XRX200_PCE_VLANMAP_IDX 0x02 @@ -207,46 +201,42 @@ int refcount; int tx_free; - struct net_device dummy_dev; - struct net_device *devs[XRX200_MAX_DEV]; - struct tasklet_struct tasklet; struct napi_struct napi; struct ltq_dma_channel dma; struct sk_buff *skb[LTQ_DESC_NUM]; + struct xrx200_priv *priv; spinlock_t lock; }; -struct xrx200_hw { - struct clk *clk; - struct mii_bus *mii_bus; - - struct xrx200_chan chan[XRX200_MAX_DMA]; - u16 vlan_vid[XRX200_MAX_VLAN]; - u16 vlan_port_map[XRX200_MAX_VLAN]; - - struct net_device *devs[XRX200_MAX_DEV]; - int num_devs; - - int port_map[XRX200_MAX_PORT]; - unsigned short wan_map; - - struct switch_dev swdev; -}; - struct xrx200_priv { struct net_device_stats stats; int id; + + struct clk *clk; + struct xrx200_chan chan_tx; + struct xrx200_chan chan_rx; + struct net_device *net_dev; + struct device *dev; + struct xrx200_port port[XRX200_MAX_PORT]; int num_port; bool wan; bool sw; - unsigned short port_map; + unsigned short d_port_map; unsigned char mac[6]; - struct xrx200_hw *hw; + struct mii_bus *mii_bus; + + u16 vlan_vid[XRX200_MAX_VLAN]; + u16 vlan_port_map[XRX200_MAX_VLAN]; + + int port_map[XRX200_MAX_PORT]; + unsigned short wan_map; + + struct switch_dev swdev; }; static __iomem void *xrx200_switch_membase; @@ -470,14 +460,14 @@ } // swconfig interface -static void xrx200_hw_init(struct xrx200_hw *hw); +static void xrx200_hw_init(struct xrx200_priv *priv); // global static int xrx200sw_reset_switch(struct switch_dev *dev) { - struct xrx200_hw *hw = container_of(dev, struct xrx200_hw, swdev); + struct xrx200_priv *priv = container_of(dev, struct xrx200_priv, swdev); - xrx200_hw_init(hw); + xrx200_hw_init(priv); return 0; } @@ -523,7 +513,7 @@ static int xrx200sw_set_vlan_vid(struct switch_dev *dev, const struct switch_attr *attr, struct switch_val *val) { - struct xrx200_hw *hw = container_of(dev, struct xrx200_hw, swdev); + struct xrx200_priv *priv = container_of(dev, struct xrx200_priv, swdev); int i; struct xrx200_pce_table_entry tev; struct xrx200_pce_table_entry tem; @@ -538,7 +528,7 @@ return -EINVAL; } - hw->vlan_vid[val->port_vlan] = val->value.i; + priv->vlan_vid[val->port_vlan] = val->value.i; tev.index = val->port_vlan; xrx200_pce_table_entry_read(&tev); @@ -571,7 +561,7 @@ static int xrx200sw_set_vlan_ports(struct switch_dev *dev, struct switch_val *val) { - struct xrx200_hw *hw = container_of(dev, struct xrx200_hw, swdev); + struct xrx200_priv *priv = container_of(dev, struct xrx200_priv, swdev); int i, portmap, tagmap, untagged; struct xrx200_pce_table_entry tem; @@ -624,7 +614,7 @@ ltq_switch_w32_mask(0, portmap, PCE_PMAP2); ltq_switch_w32_mask(0, portmap, PCE_PMAP3); - hw->vlan_port_map[val->port_vlan] = portmap; + priv->vlan_port_map[val->port_vlan] = portmap; xrx200sw_fixup_pvids(); @@ -834,19 +824,16 @@ // .get_port_stats = xrx200sw_get_port_stats, //TODO }; -static int xrx200sw_init(struct xrx200_hw *hw) +static int xrx200sw_init(struct xrx200_priv *priv) { - int netdev_num; - for (netdev_num = 0; netdev_num < hw->num_devs; netdev_num++) - { struct switch_dev *swdev; - struct net_device *dev = hw->devs[netdev_num]; - struct xrx200_priv *priv = netdev_priv(dev); - if (!priv->sw) - continue; + if (!priv->sw) { + pr_info("!!!! no switch\n"); + return -ENODEV; + } - swdev = &hw->swdev; + swdev = &priv->swdev; swdev->name = "Lantiq XRX200 Switch"; swdev->vlans = XRX200_MAX_VLAN; @@ -854,32 +841,49 @@ swdev->cpu_port = 6; swdev->ops = &xrx200sw_ops; - register_switch(swdev, dev); + register_switch(swdev, priv->net_dev); return 0; // enough switches +} + +/* drop all the packets from the DMA ring */ +static void xrx200_flush_dma(struct xrx200_chan *ch) +{ + int i; + + for (i = 0; i < LTQ_DESC_NUM; i++) { + struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc]; + + if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) != LTQ_DMA_C) + break; + + desc->ctl = LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) | + XRX200_DMA_DATA_LEN; + ch->dma.desc++; + ch->dma.desc %= LTQ_DESC_NUM; } - return 0; } static int xrx200_open(struct net_device *dev) { struct xrx200_priv *priv = netdev_priv(dev); - int i; - for (i = 0; i < XRX200_MAX_DMA; i++) { - if (!priv->hw->chan[i].dma.irq) - continue; - spin_lock_bh(&priv->hw->chan[i].lock); - if (!priv->hw->chan[i].refcount) { - if (XRX200_DMA_IS_RX(i)) - napi_enable(&priv->hw->chan[i].napi); - ltq_dma_open(&priv->hw->chan[i].dma); - } - priv->hw->chan[i].refcount++; - spin_unlock_bh(&priv->hw->chan[i].lock); - } - for (i = 0; i < priv->num_port; i++) - if (priv->port[i].phydev) - phy_start(priv->port[i].phydev); + napi_enable(&priv->chan_tx.napi); + ltq_dma_open(&priv->chan_tx.dma); + ltq_dma_enable_irq(&priv->chan_tx.dma); + + napi_enable(&priv->chan_rx.napi); + ltq_dma_open(&priv->chan_rx.dma); + /* The boot loader does not always deactivate the receiving of frames + * on the ports and then some packets queue up in the PPE buffers. + * They already passed the PMAC so they do not have the tags + * configured here. Read the these packets here and drop them. + * The HW should have written them into memory after 10us + */ + usleep_range(20, 40); + xrx200_flush_dma(&priv->chan_rx); + + ltq_dma_enable_irq(&priv->chan_rx.dma); + netif_wake_queue(dev); return 0; @@ -896,19 +900,11 @@ if (priv->port[i].phydev) phy_stop(priv->port[i].phydev); - for (i = 0; i < XRX200_MAX_DMA; i++) { - if (!priv->hw->chan[i].dma.irq) - continue; + napi_disable(&priv->chan_rx.napi); + ltq_dma_close(&priv->chan_rx.dma); - priv->hw->chan[i].refcount--; - if (!priv->hw->chan[i].refcount) { - if (XRX200_DMA_IS_RX(i)) - napi_disable(&priv->hw->chan[i].napi); - spin_lock_bh(&priv->hw->chan[i].lock); - ltq_dma_close(&priv->hw->chan[XRX200_DMA_RX].dma); - spin_unlock_bh(&priv->hw->chan[i].lock); - } - } + napi_disable(&priv->chan_tx.napi); + ltq_dma_close(&priv->chan_tx.dma); return 0; } @@ -938,8 +934,8 @@ static void xrx200_hw_receive(struct xrx200_chan *ch, int id) { - struct net_device *dev = ch->devs[id]; - struct xrx200_priv *priv = netdev_priv(dev); + struct xrx200_priv *priv = ch->priv; + struct net_device *dev = priv->net_dev; struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc]; struct sk_buff *skb = ch->skb[ch->dma.desc]; int len = (desc->ctl & LTQ_DMA_SIZE_MASK); @@ -963,15 +959,15 @@ skb->dev = dev; skb->protocol = eth_type_trans(skb, dev); netif_receive_skb(skb); - priv->stats.rx_packets++; - priv->stats.rx_bytes+=len; + dev->stats.rx_packets++; + dev->stats.rx_bytes+=len; } static int xrx200_poll_rx(struct napi_struct *napi, int budget) { struct xrx200_chan *ch = container_of(napi, struct xrx200_chan, napi); - struct xrx200_priv *priv = netdev_priv(ch->devs[0]); + struct xrx200_priv *priv = ch->priv; int rx = 0; int complete = 0; @@ -982,7 +978,7 @@ struct sk_buff *skb = ch->skb[ch->dma.desc]; u8 *special_tag = (u8*)skb->data; int port = (special_tag[7] >> SPPID_SHIFT) & SPPID_MASK; - xrx200_hw_receive(ch, priv->hw->port_map[port]); + xrx200_hw_receive(ch, priv->port_map[port]); #else xrx200_hw_receive(ch, 0); #endif @@ -993,47 +989,63 @@ } if (complete || !rx) { - napi_complete(&ch->napi); + + if (napi_complete_done(&ch->napi,rx)) { + ltq_dma_ack_irq(&ch->dma); ltq_dma_enable_irq(&ch->dma); + } } return rx; } -static void xrx200_tx_housekeeping(unsigned long ptr) + +static struct net_device_stats *xrx200_get_stats (struct net_device *dev) { - struct xrx200_chan *ch = (struct xrx200_chan *) ptr; + struct xrx200_priv *priv = netdev_priv(dev); + + return &priv->stats; +} + + +static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) +{ + struct xrx200_chan *ch = container_of(napi, + struct xrx200_chan, napi); + struct net_device *net_dev = ch->priv->net_dev; int pkts = 0; - int i; + int bytes = 0; - spin_lock_bh(&ch->lock); - ltq_dma_ack_irq(&ch->dma); - while ((ch->dma.desc_base[ch->tx_free].ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) == LTQ_DMA_C) { - struct sk_buff *skb = ch->skb[ch->tx_free]; + while (pkts < budget) { + struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->tx_free]; + + if ((desc->ctl & (LTQ_DMA_OWN | LTQ_DMA_C)) == LTQ_DMA_C) { + struct sk_buff *skb = ch->skb[ch->tx_free]; - pkts++; - ch->skb[ch->tx_free] = NULL; - dev_kfree_skb(skb); - memset(&ch->dma.desc_base[ch->tx_free], 0, - sizeof(struct ltq_dma_desc)); - ch->tx_free++; - ch->tx_free %= LTQ_DESC_NUM; + pkts++; + bytes += skb->len; + ch->skb[ch->tx_free] = NULL; + consume_skb(skb); + memset(&ch->dma.desc_base[ch->tx_free], 0, + sizeof(struct ltq_dma_desc)); + ch->tx_free++; + ch->tx_free %= LTQ_DESC_NUM; + } else { + break; + } } - ltq_dma_enable_irq(&ch->dma); - spin_unlock_bh(&ch->lock); - if (!pkts) - return; + net_dev->stats.tx_packets += pkts; + net_dev->stats.tx_bytes += bytes; - for (i = 0; i < XRX200_MAX_DEV && ch->devs[i]; i++) - netif_wake_queue(ch->devs[i]); -} - -static struct net_device_stats *xrx200_get_stats (struct net_device *dev) -{ - struct xrx200_priv *priv = netdev_priv(dev); + if (pkts < budget) { + if (napi_complete_done(&ch->napi, pkts)) { + ltq_dma_ack_irq(&ch->dma); + ltq_dma_enable_irq(&ch->dma); + } + } - return &priv->stats; + return pkts; } static void xrx200_tx_timeout(struct net_device *dev) @@ -1043,13 +1055,17 @@ printk(KERN_ERR "%s: transmit timed out, disable the dma channel irq\n", dev->name); priv->stats.tx_errors++; + + ltq_dma_enable_irq(&priv->chan_tx.dma); //TODO necessary? + netif_wake_queue(dev); } static int xrx200_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xrx200_priv *priv = netdev_priv(dev); - struct xrx200_chan *ch; + struct xrx200_chan *ch = &priv->chan_tx; + struct ltq_dma_desc *desc; u32 byte_offset; int ret = NETDEV_TX_OK; @@ -1057,10 +1073,6 @@ #ifdef SW_ROUTING u32 special_tag = (SPID_CPU_PORT << SPID_SHIFT) | DPID_ENABLE; #endif - if(priv->id) - ch = &priv->hw->chan[XRX200_DMA_TX_2]; - else - ch = &priv->hw->chan[XRX200_DMA_TX]; desc = &ch->dma.desc_base[ch->dma.desc]; @@ -1069,7 +1081,7 @@ #ifdef SW_ROUTING if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) { - u16 port_map = priv->port_map; + u16 port_map = priv->d_port_map; if (priv->sw && skb->protocol == htons(ETH_P_8021Q)) { u16 vid; @@ -1078,9 +1090,9 @@ port_map = 0; if (!__vlan_get_tag(skb, &vid)) { for (i = 0; i < XRX200_MAX_VLAN; i++) { - if (priv->hw->vlan_vid[i] != vid) + if (priv->vlan_vid[i] != vid) continue; - port_map = priv->hw->vlan_port_map[i]; + port_map = priv->vlan_port_map[i]; break; } } @@ -1114,9 +1126,7 @@ ch->skb[ch->dma.desc] = skb; - netif_trans_update(dev); - - desc->addr = ((unsigned int) dma_map_single(NULL, skb->data, len, + desc->addr = ((unsigned int) dma_map_single(priv->dev, skb->data, len, DMA_TO_DEVICE)) - byte_offset; wmb(); desc->ctl = LTQ_DMA_OWN | LTQ_DMA_SOP | LTQ_DMA_EOP | @@ -1126,71 +1136,81 @@ if (ch->dma.desc == ch->tx_free) netif_stop_queue(dev); - - priv->stats.tx_packets++; - priv->stats.tx_bytes+=len; - + skb_tx_timestamp(skb); + out: spin_unlock_bh(&ch->lock); return ret; } -static irqreturn_t xrx200_dma_irq(int irq, void *priv) +static irqreturn_t xrx200_dma_irq(int irq, void *ptr) { - struct xrx200_hw *hw = priv; - int chnr = irq - XRX200_DMA_IRQ; - struct xrx200_chan *ch = &hw->chan[chnr]; + struct xrx200_chan *ch = ptr; ltq_dma_disable_irq(&ch->dma); ltq_dma_ack_irq(&ch->dma); - - if (chnr % 2) - tasklet_schedule(&ch->tasklet); - else - napi_schedule(&ch->napi); + napi_schedule(&ch->napi); return IRQ_HANDLED; } -static int xrx200_dma_init(struct xrx200_hw *hw) +static int xrx200_dma_init(struct xrx200_priv *priv) { - int i, err = 0; + int i; + struct xrx200_chan *ch_rx = &priv->chan_rx; + struct xrx200_chan *ch_tx = &priv->chan_tx; + int ret; ltq_dma_init_port(DMA_PORT_ETOP); - for (i = 0; i < 8 && !err; i++) { - int irq = XRX200_DMA_IRQ + i; - struct xrx200_chan *ch = &hw->chan[i]; - - spin_lock_init(&ch->lock); - - ch->idx = ch->dma.nr = i; - - if (i == XRX200_DMA_TX) { - ltq_dma_alloc_tx(&ch->dma); - err = request_irq(irq, xrx200_dma_irq, 0, "vrx200_tx", hw); - } else if (i == XRX200_DMA_TX_2) { - ltq_dma_alloc_tx(&ch->dma); - err = request_irq(irq, xrx200_dma_irq, 0, "vrx200_tx_2", hw); - } else if (i == XRX200_DMA_RX) { - ltq_dma_alloc_rx(&ch->dma); - for (ch->dma.desc = 0; ch->dma.desc < LTQ_DESC_NUM; - ch->dma.desc++) - if (xrx200_alloc_skb(ch)) - err = -ENOMEM; - ch->dma.desc = 0; - err = request_irq(irq, xrx200_dma_irq, 0, "vrx200_rx", hw); - } else - continue; + ch_rx->dma.nr = XRX200_DMA_RX; + ch_rx->priv = priv; + + ltq_dma_alloc_rx(&ch_rx->dma); + for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM; + ch_rx->dma.desc++) { + ret = xrx200_alloc_skb(ch_rx); + if (ret) + goto rx_free; + } + ch_rx->dma.desc = 0; + + ret = devm_request_irq(priv->dev, ch_rx->dma.irq, xrx200_dma_irq, 0, + "vrx200_rx", &priv->chan_rx); + if (ret) { + dev_err(priv->dev, "failed to request RX irq %d\n", + ch_rx->dma.irq); + goto rx_ring_free; + } + + ch_tx->dma.nr = XRX200_DMA_TX; + ch_tx->priv = priv; + + ltq_dma_alloc_tx(&ch_tx->dma); + ret = devm_request_irq(priv->dev, ch_tx->dma.irq, xrx200_dma_irq, 0, + "vrx200_tx", &priv->chan_tx); + if (ret) { + dev_err(priv->dev, "failed to request TX irq %d\n", + ch_tx->dma.irq); + goto tx_free; + } - if (!err) - ch->dma.irq = irq; - else - pr_err("net-xrx200: failed to request irq %d\n", irq); + return ret; + +tx_free: + ltq_dma_free(&ch_tx->dma); + +rx_ring_free: + /* free the allocated RX ring */ + for (i = 0; i < LTQ_DESC_NUM; i++) { + if (priv->chan_rx.skb[i]) + dev_kfree_skb_any(priv->chan_rx.skb[i]); } - return err; +rx_free: + ltq_dma_free(&ch_rx->dma); + return ret; } #ifdef SW_POLLING @@ -1328,11 +1348,12 @@ { struct net_device *netdev = phydev->attached_dev; - if (do_carrier) + if (do_carrier) { if (up) netif_carrier_on(netdev); else if (!xrx200_phy_has_link(netdev)) netif_carrier_off(netdev); + } phydev->adjust_link(netdev); } @@ -1343,7 +1364,7 @@ struct phy_device *phydev = NULL; unsigned val; - phydev = mdiobus_get_phy(priv->hw->mii_bus, port->phy_addr); + phydev = mdiobus_get_phy(priv->mii_bus, port->phy_addr); if (!phydev) { netdev_err(dev, "no PHY found\n"); @@ -1376,10 +1397,10 @@ #ifdef SW_POLLING phy_read_status(phydev); - val = xrx200_mdio_rd(priv->hw->mii_bus, MDIO_DEVAD_NONE, MII_CTRL1000); + val = xrx200_mdio_rd(priv->mii_bus, MDIO_DEVAD_NONE, MII_CTRL1000); val |= ADVERTIZE_MPD; - xrx200_mdio_wr(priv->hw->mii_bus, MDIO_DEVAD_NONE, MII_CTRL1000, val); - xrx200_mdio_wr(priv->hw->mii_bus, 0, 0, 0x1040); + xrx200_mdio_wr(priv->mii_bus, MDIO_DEVAD_NONE, MII_CTRL1000, val); + xrx200_mdio_wr(priv->mii_bus, 0, 0, 0x1040); phy_start_aneg(phydev); #endif @@ -1522,12 +1543,12 @@ ltq_switch_w32_mask(0, BIT(3), PCE_GCTRL_REG(0)); } -static void xrx200_hw_init(struct xrx200_hw *hw) +static void xrx200_hw_init(struct xrx200_priv *priv) { int i; /* enable clock gate */ - clk_enable(hw->clk); + clk_enable(priv->clk); ltq_switch_w32(1, 0); mdelay(100); @@ -1595,49 +1616,45 @@ xrx200sw_write_x(1, XRX200_BM_QUEUE_GCTRL_GL_MOD, 0); for (i = 0; i < XRX200_MAX_VLAN; i++) - hw->vlan_vid[i] = i; + priv->vlan_vid[i] = i; } -static void xrx200_hw_cleanup(struct xrx200_hw *hw) +static void xrx200_hw_cleanup(struct xrx200_priv *priv) { int i; /* disable the switch */ ltq_mdio_w32_mask(MDIO_GLOB_ENABLE, 0, MDIO_GLOB); - /* free the channels and IRQs */ - for (i = 0; i < 2; i++) { - ltq_dma_free(&hw->chan[i].dma); - if (hw->chan[i].dma.irq) - free_irq(hw->chan[i].dma.irq, hw); - } + ltq_dma_free(&priv->chan_tx.dma); + ltq_dma_free(&priv->chan_rx.dma); /* free the allocated RX ring */ for (i = 0; i < LTQ_DESC_NUM; i++) - dev_kfree_skb_any(hw->chan[XRX200_DMA_RX].skb[i]); + dev_kfree_skb_any(priv->chan_rx.skb[i]); /* clear the mdio bus */ - mdiobus_unregister(hw->mii_bus); - mdiobus_free(hw->mii_bus); + mdiobus_unregister(priv->mii_bus); + mdiobus_free(priv->mii_bus); /* release the clock */ - clk_disable(hw->clk); - clk_put(hw->clk); + clk_disable(priv->clk); + clk_put(priv->clk); } -static int xrx200_of_mdio(struct xrx200_hw *hw, struct device_node *np) +static int xrx200_of_mdio(struct xrx200_priv *priv, struct device_node *np) { - hw->mii_bus = mdiobus_alloc(); - if (!hw->mii_bus) + priv->mii_bus = mdiobus_alloc(); + if (!priv->mii_bus) return -ENOMEM; - hw->mii_bus->read = xrx200_mdio_rd; - hw->mii_bus->write = xrx200_mdio_wr; - hw->mii_bus->name = "lantiq,xrx200-mdio"; - snprintf(hw->mii_bus->id, MII_BUS_ID_SIZE, "%x", 0); + priv->mii_bus->read = xrx200_mdio_rd; + priv->mii_bus->write = xrx200_mdio_wr; + priv->mii_bus->name = "lantiq,xrx200-mdio"; + snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%x", 0); - if (of_mdiobus_register(hw->mii_bus, np)) { - mdiobus_free(hw->mii_bus); + if (of_mdiobus_register(priv->mii_bus, np)) { + mdiobus_free(priv->mii_bus); return -ENXIO; } @@ -1677,12 +1694,12 @@ } /* is this port a wan port ? */ if (priv->wan) - priv->hw->wan_map |= BIT(p->num); + priv->wan_map |= BIT(p->num); - priv->port_map |= BIT(p->num); + priv->d_port_map |= BIT(p->num); /* store the port id in the hw struct so we can map ports -> devices */ - priv->hw->port_map[p->num] = priv->hw->num_devs; + priv->port_map[p->num] = 0; } static const struct net_device_ops xrx200_netdev_ops = { @@ -1696,29 +1713,21 @@ .ndo_tx_timeout = xrx200_tx_timeout, }; -static void xrx200_of_iface(struct xrx200_hw *hw, struct device_node *iface, struct device *dev) +static void xrx200_of_iface(struct xrx200_priv *priv, struct device_node *iface, struct device *dev) { - struct xrx200_priv *priv; struct device_node *port; const __be32 *wan; const u8 *mac; - /* alloc the network device */ - hw->devs[hw->num_devs] = alloc_etherdev(sizeof(struct xrx200_priv)); - if (!hw->devs[hw->num_devs]) - return; - /* setup the network device */ - strcpy(hw->devs[hw->num_devs]->name, "eth%d"); - hw->devs[hw->num_devs]->netdev_ops = &xrx200_netdev_ops; - hw->devs[hw->num_devs]->watchdog_timeo = XRX200_TX_TIMEOUT; - hw->devs[hw->num_devs]->needed_headroom = XRX200_HEADROOM; - SET_NETDEV_DEV(hw->devs[hw->num_devs], dev); + strcpy(priv->net_dev->name, "eth%d"); + priv->net_dev->netdev_ops = &xrx200_netdev_ops; + priv->net_dev->watchdog_timeo = XRX200_TX_TIMEOUT; + priv->net_dev->needed_headroom = XRX200_HEADROOM; + SET_NETDEV_DEV(priv->net_dev, dev); /* setup our private data */ - priv = netdev_priv(hw->devs[hw->num_devs]); - priv->hw = hw; - priv->id = hw->num_devs; + priv->id = 0; mac = of_get_mac_address(iface); if (mac) @@ -1738,20 +1747,33 @@ if (of_device_is_compatible(port, "lantiq,xrx200-pdi-port")) xrx200_of_port(priv, port); - /* register the actual device */ - if (!register_netdev(hw->devs[hw->num_devs])) - hw->num_devs++; } -static struct xrx200_hw xrx200_hw; - static int xrx200_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; struct resource *res[4]; struct device_node *mdio_np, *iface_np, *phy_np; struct of_phandle_iterator it; int err; int i; + struct xrx200_priv *priv; + struct net_device *net_dev; + + + /* alloc the network device */ + net_dev = devm_alloc_etherdev(dev, sizeof(struct xrx200_priv)); + if (!net_dev) + return -ENOMEM; + + priv = netdev_priv(net_dev); + priv->net_dev = net_dev; + priv->dev = dev; + + net_dev->netdev_ops = &xrx200_netdev_ops; + SET_NETDEV_DEV(net_dev, dev); + net_dev->min_mtu = ETH_ZLEN; + net_dev->max_mtu = XRX200_DMA_DATA_LEN; /* load the memory ranges */ for (i = 0; i < 4; i++) { @@ -1781,85 +1803,90 @@ return -EPROBE_DEFER; } } + + priv->chan_rx.dma.irq = XRX200_DMA_IRQ + XRX200_DMA_RX; + priv->chan_tx.dma.irq = XRX200_DMA_IRQ + XRX200_DMA_TX; + priv->chan_rx.priv = priv; + priv->chan_tx.priv = priv; /* get the clock */ - xrx200_hw.clk = clk_get(&pdev->dev, NULL); - if (IS_ERR(xrx200_hw.clk)) { + priv->clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { dev_err(&pdev->dev, "failed to get clock\n"); - return PTR_ERR(xrx200_hw.clk); + return PTR_ERR(priv->clk); } /* bring up the dma engine and IP core */ - xrx200_dma_init(&xrx200_hw); - xrx200_hw_init(&xrx200_hw); - tasklet_init(&xrx200_hw.chan[XRX200_DMA_TX].tasklet, xrx200_tx_housekeeping, (u32) &xrx200_hw.chan[XRX200_DMA_TX]); - tasklet_init(&xrx200_hw.chan[XRX200_DMA_TX_2].tasklet, xrx200_tx_housekeeping, (u32) &xrx200_hw.chan[XRX200_DMA_TX_2]); + err = xrx200_dma_init(priv); + if (err) + return err; + + /* enable clock gate */ + err = clk_prepare_enable(priv->clk); + if (err) + goto err_uninit_dma; + + xrx200_hw_init(priv); /* bring up the mdio bus */ mdio_np = of_find_compatible_node(pdev->dev.of_node, NULL, "lantiq,xrx200-mdio"); if (mdio_np) - if (xrx200_of_mdio(&xrx200_hw, mdio_np)) + if (xrx200_of_mdio(priv, mdio_np)) dev_err(&pdev->dev, "mdio probe failed\n"); /* load the interfaces */ for_each_child_of_node(pdev->dev.of_node, iface_np) - if (of_device_is_compatible(iface_np, "lantiq,xrx200-pdi")) { - if (xrx200_hw.num_devs < XRX200_MAX_DEV) - xrx200_of_iface(&xrx200_hw, iface_np, &pdev->dev); - else - dev_err(&pdev->dev, - "only %d interfaces allowed\n", - XRX200_MAX_DEV); - } - - if (!xrx200_hw.num_devs) { - xrx200_hw_cleanup(&xrx200_hw); - dev_err(&pdev->dev, "failed to load interfaces\n"); - return -ENOENT; - } - - xrx200sw_init(&xrx200_hw); + if (of_device_is_compatible(iface_np, "lantiq,xrx200-pdi")) { + xrx200_of_iface(priv, iface_np, &pdev->dev); + break; //hack + } + + xrx200sw_init(priv); /* set wan port mask */ - ltq_pmac_w32(xrx200_hw.wan_map, PMAC_EWAN); - - for (i = 0; i < xrx200_hw.num_devs; i++) { - xrx200_hw.chan[XRX200_DMA_RX].devs[i] = xrx200_hw.devs[i]; - xrx200_hw.chan[XRX200_DMA_TX].devs[i] = xrx200_hw.devs[i]; - xrx200_hw.chan[XRX200_DMA_TX_2].devs[i] = xrx200_hw.devs[i]; - } + ltq_pmac_w32(priv->wan_map, PMAC_EWAN); /* setup NAPI */ - init_dummy_netdev(&xrx200_hw.chan[XRX200_DMA_RX].dummy_dev); - netif_napi_add(&xrx200_hw.chan[XRX200_DMA_RX].dummy_dev, - &xrx200_hw.chan[XRX200_DMA_RX].napi, xrx200_poll_rx, 32); + netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx, 32); //32 + netif_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32); + + platform_set_drvdata(pdev, priv); - platform_set_drvdata(pdev, &xrx200_hw); + err = register_netdev(net_dev); + if (err) + goto err_unprepare_clk; return 0; + +err_unprepare_clk: + clk_disable_unprepare(priv->clk); + +err_uninit_dma: + xrx200_hw_cleanup(priv); + + return err; } static int xrx200_remove(struct platform_device *pdev) { - struct net_device *dev = platform_get_drvdata(pdev); - struct xrx200_priv *priv; - - if (!dev) - return 0; - priv = netdev_priv(dev); + struct xrx200_priv *priv = platform_get_drvdata(pdev); + struct net_device *net_dev = priv->net_dev; /* free stack related instances */ - netif_stop_queue(dev); - netif_napi_del(&xrx200_hw.chan[XRX200_DMA_RX].napi); - - /* shut down hardware */ - xrx200_hw_cleanup(&xrx200_hw); + netif_stop_queue(net_dev); + netif_napi_del(&priv->chan_tx.napi); + netif_napi_del(&priv->chan_rx.napi); /* remove the actual device */ - unregister_netdev(dev); - free_netdev(dev); + unregister_netdev(net_dev); + + /* release the clock */ + clk_disable_unprepare(priv->clk); + + /* shut down hardware */ + xrx200_hw_cleanup(priv); return 0; }
--- ./a/arch/mips/lantiq/xway/dma.c 2019-01-26 09:37:07.000000000 +0100 +++ ./b/arch/mips/lantiq/xway/dma.c 2019-01-30 05:04:42.750889682 +0100 @@ -194,6 +194,11 @@ ltq_dma_w32(p, LTQ_DMA_PS); switch (p) { case DMA_PORT_ETOP: + + //TODO test without when ethernet driver is stable + ltq_dma_w32_mask(0x3c, (DMA_2W_BURST << 4) | (DMA_2W_BURST << 2), + LTQ_DMA_PCTRL); + /* * Tell the DMA engine to swap the endianness of data frames and * drop packets if the channel arbitration fails.
_______________________________________________ openwrt-devel mailing list openwrt-devel@lists.openwrt.org https://lists.openwrt.org/mailman/listinfo/openwrt-devel