[RFC V1 net-next 0/1] Introduce xdp to ena
From: Sameeh Jubran This patch set has one patch which implements xdp drop support in the ena driver. Sameeh Jubran (1): net: ena: implement XDP drop support drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 +++- drivers/net/ethernet/amazon/ena/ena_netdev.h | 29 +++ 2 files changed, 111 insertions(+), 1 deletion(-) -- 2.17.1
[RFC V1 net-next 1/1] net: ena: implement XDP drop support
From: Sameeh Jubran This commit implements the basic functionality of drop/pass logic in the ena driver. Signed-off-by: Sameeh Jubran --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 +++- drivers/net/ethernet/amazon/ena/ena_netdev.h | 29 +++ 2 files changed, 111 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 20ec8ff03..3d65c0771 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -33,10 +33,10 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #ifdef CONFIG_RFS_ACCEL +#include #include #endif /* CONFIG_RFS_ACCEL */ #include -#include #include #include #include @@ -105,11 +105,82 @@ static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu) adapter->rx_ring[i].mtu = mtu; } +static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) +{ + struct bpf_prog *xdp_prog = rx_ring->xdp_bpf_prog; + u32 verdict = XDP_PASS; + + rcu_read_lock(); + + if (!xdp_prog) + goto out; + + verdict = bpf_prog_run_xdp(xdp_prog, xdp); + + if (unlikely(verdict == XDP_ABORTED)) + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + else if (unlikely(verdict > XDP_REDIRECT)) + bpf_warn_invalid_xdp_action(verdict); +out: + rcu_read_unlock(); + return verdict; +} + +static int ena_xdp_set(struct net_device *netdev, struct bpf_prog *prog) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct bpf_prog *old_bpf_prog; + int i; + + if (ena_xdp_allowed(adapter)) { + old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); + + for (i = 0; i < adapter->num_queues; i++) + xchg(&adapter->rx_ring[i].xdp_bpf_prog, prog); + + if (old_bpf_prog) + bpf_prog_put(old_bpf_prog); + + } else { + netif_err(adapter, drv, adapter->netdev, "Failed to set xdp program, the current MTU (%d) is larger than the maximal allowed MTU (%lu) while xdp is on", + netdev->mtu, ENA_XDP_MAX_MTU); + return -EFAULT; + } + + return 0; +} + +/* This is the main xdp callback, it's used by the kernel to set/unset the xdp + * program as well as to query the current xdp program id. + */ +static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + switch (bpf->command) { + case XDP_SETUP_PROG: + return ena_xdp_set(netdev, bpf->prog); + case XDP_QUERY_PROG: + bpf->prog_id = adapter->xdp_bpf_prog ? + adapter->xdp_bpf_prog->aux->id : 0; + break; + default: + return -EINVAL; + } + return 0; +} + static int ena_change_mtu(struct net_device *dev, int new_mtu) { struct ena_adapter *adapter = netdev_priv(dev); int ret; + if (new_mtu > ENA_XDP_MAX_MTU && ena_xdp_present(adapter)) { + netif_err(adapter, drv, dev, + "Requested MTU value is not valid while xdp is enabled new_mtu: %d max mtu: %lu min mtu: %d\n", + new_mtu, ENA_XDP_MAX_MTU, ENA_MIN_MTU); + return -EINVAL; + } ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); if (!ret) { netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu); @@ -888,6 +959,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, va = page_address(rx_info->page) + rx_info->page_offset; prefetch(va + NET_IP_ALIGN); + if (ena_xdp_present_ring(rx_ring)) { + rx_ring->xdp.data = va; + rx_ring->xdp.data_meta = rx_ring->xdp.data; + rx_ring->xdp.data_hard_start = rx_ring->xdp.data - + rx_info->page_offset; + rx_ring->xdp.data_end = rx_ring->xdp.data + len; + if (ena_xdp_execute(rx_ring, &rx_ring->xdp) != XDP_PASS) + return NULL; + } if (len <= rx_ring->rx_copybreak) { skb = ena_alloc_skb(rx_ring, false); if (unlikely(!skb)) @@ -2549,6 +2629,7 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_change_mtu = ena_change_mtu, .ndo_set_mac_address= NULL, .ndo_validate_addr = eth_validate_addr, + .ndo_bpf= ena_xdp, }; static int ena_device_validate_params(struct ena_adapter *adapter, diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index f2b6e2e05..e17965f7a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -35,6 +35,7 @@ #include #include
Re: [RFC net-next] net: dsa: add support for MC_DISABLED attribute
On Fri, Jun 21, 2019 at 05:29:52PM -0400, Vivien Didelot wrote: > On Thu, 20 Jun 2019 19:24:47 -0700, Florian Fainelli > wrote: > > > This patch adds support for enabling or disabling the flooding of > > > unknown multicast traffic on the CPU ports, depending on the value > > > of the switchdev SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED attribute. > > > > > > This allows the user to prevent the CPU to be flooded with a lot of > > > undesirable traffic that the network stack needs to filter in software. > > > > > > The bridge has multicast snooping enabled by default, hence CPU ports > > > aren't bottlenecked with arbitrary network applications anymore. > > > But this can be an issue in some scenarios such as pinging the bridge's > > > IPv6 address. Setting /sys/class/net/br0/bridge/multicast_snooping to > > > 0 would restore unknown multicast flooding and thus fix ICMPv6. As > > > an alternative, enabling multicast_querier would program the bridge > > > address into the switch. > > From what I can read from mlxsw, we should probably also implement the > > SWITCHDEV_ATTR_ID_PORT_MROUTER attribute in order to be consistent. > > > > Since the attribute MC_DISABLED is on the bridge master, we should also > > iterate over the list of switch ports being a member of that bridge and > > change their flooding attribute, taking into account whether > > BR_MCAST_FLOOD is set on that particular port or not. Just paraphrasing > > what mlxsw does here again... > > Ouch, doesn't sound like what a driver should be doing :-( > > Ido, I cannot find documentation for multicast_snooping or MC_DISABLED > and the naming isn't clear. Can this be considered as an equivalent > of mcast_flood but targeting the bridge device itself, describing > whether the bridge is interested or not in unknown multicast traffic? Not really, you're only looking at one aspect of this, but there is more. For example, when multicast snooping is disabled, the MDB is not considered. > > Once you act on the user-facing ports, you might be able to leave the > > CPU port flooding unconditionally, since it would only "flood" the CPU > > port either because an user-facing port has BR_MCAST_FLOOD set, or > > because this is known MC traffic that got programmed via the bridge's > > MDB. Would that work? > > You may want the machine or network connected behind a bridge port > to be flooded with unknown multicast traffic, without having the > CPU conduit clogged up with this traffic. So these are two distinct > settings for me. > > The only scenario I can think of needing the CPU to be flooded is if > there's a non-DSA port in the bridge maybe. But IMHO this should be > handled by the bridge, offloading or not the appropriate attribute. > > > On a higher level, I really wish we did not have to re-implement a lot > > of identical or similar logic in each switch drivers and had a more > > central model of what is behaviorally expected. > > I couldn't agree more, ethernet switch drivers should only offload > the notified bridge configuration, not noodling their own logic... Please see my comment to Florian. The driver is doing what needs to be done in order to comply with the behavior of the bridge driver. As I wrote to Florian, we can probably move some logic up the stack and simplify drivers. > Russell, Ido, Florian, so far I understand that a multicast-unaware > bridge must flood unknown traffic everywhere (CPU included); > and a multicast-aware bridge must only flood its ports if their > mcast_flood is on, and known traffic targeting the bridge must be > offloaded accordingly. Do you guys agree with this? When multicast snooping is enabled unregistered multicast traffic should only be flooded to mrouter ports.
[PATCH V1 net-next] net: ena: Fix bug where ring allocation backoff stopped too late
From: Sameeh Jubran The current code of create_queues_with_size_backoff() allows the ring size to become as small as ENA_MIN_RING_SIZE/2. This is a bug since we don't want the queue ring to be smaller than ENA_MIN_RING_SIZE In this commit we change the loop's termination condition to look at the queue size of the next iteration instead of that of the current one, so that the minimal queue size again becomes ENA_MIN_RING_SIZE. Fixes: eece4d2ab9d2 ("net: ena: add ethtool function for changing io queue sizes") Signed-off-by: Arthur Kiyanovski Signed-off-by: Sameeh Jubran --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index b7865ee1d..20ec8ff03 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1839,8 +1839,8 @@ err_setup_tx: if (cur_rx_ring_size >= cur_tx_ring_size) new_rx_ring_size = cur_rx_ring_size / 2; - if (cur_tx_ring_size < ENA_MIN_RING_SIZE || - cur_rx_ring_size < ENA_MIN_RING_SIZE) { + if (new_tx_ring_size < ENA_MIN_RING_SIZE || + new_rx_ring_size < ENA_MIN_RING_SIZE) { netif_err(adapter, ifup, adapter->netdev, "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", ENA_MIN_RING_SIZE); -- 2.17.1
Re: [RFC net-next] net: dsa: add support for MC_DISABLED attribute
On Sun, Jun 23, 2019 at 07:09:52AM +, Ido Schimmel wrote: > When multicast snooping is enabled unregistered multicast traffic should > only be flooded to mrouter ports. Given that IPv6 relies upon multicast working, and multicast snooping is a kernel configuration option, and MLD messages will only be sent when whenever the configuration on the target changes, and there may not be a multicast querier in the system, who does that ensure that IPv6 can work on a bridge where the kernel configured and built with multicast snooping enabled? -- RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up According to speedtest.net: 11.9Mbps down 500kbps up
Re: [RFC net-next] net: dsa: add support for MC_DISABLED attribute
On Sun, Jun 23, 2019 at 08:26:05AM +0100, Russell King - ARM Linux admin wrote: > On Sun, Jun 23, 2019 at 07:09:52AM +, Ido Schimmel wrote: > > When multicast snooping is enabled unregistered multicast traffic should > > only be flooded to mrouter ports. > > Given that IPv6 relies upon multicast working, and multicast snooping > is a kernel configuration option, and MLD messages will only be sent > when whenever the configuration on the target changes, and there may > not be a multicast querier in the system, who does that ensure that > IPv6 can work on a bridge where the kernel configured and built with > multicast snooping enabled? See commit b00589af3b04 ("bridge: disable snooping if there is no querier"). I think that's unfortunate behavior that we need because multicast snooping is enabled by default. If it weren't enabled by default, then anyone enabling it would also make sure there's a querier in the network.
[PATCH] sis900: increment revision number
Increment revision number to 1.08.11 (TX completion fix). Signed-off-by: Sergej Benilov --- drivers/net/ethernet/sis/sis900.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index abb9b42e..09b4e1c5 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1,6 +1,6 @@ /* sis900.c: A SiS 900/7016 PCI Fast Ethernet driver for Linux. Copyright 1999 Silicon Integrated System Corporation - Revision: 1.08.10 Apr. 2 2006 + Revision: 1.08.11 Jun. 23 2019 Modified from the driver which is originally written by Donald Becker. @@ -16,7 +16,8 @@ preliminary Rev. 1.0 Nov. 10, 1998 SiS 7014 Single Chip 100BASE-TX/10BASE-T Physical Layer Solution, preliminary Rev. 1.0 Jan. 18, 1998 - + + Rev 1.08.11 Jun. 23 2019 Sergej Benilov TX completion fix Rev 1.08.10 Apr. 2 2006 Daniele Venzano add vlan (jumbo packets) support Rev 1.08.09 Sep. 19 2005 Daniele Venzano add Wake on LAN support Rev 1.08.08 Jan. 22 2005 Daniele Venzano use netif_msg for debugging messages @@ -79,7 +80,7 @@ #include "sis900.h" #define SIS900_MODULE_NAME "sis900" -#define SIS900_DRV_VERSION "v1.08.10 Apr. 2 2006" +#define SIS900_DRV_VERSION "v1.08.11 Jun. 23 2019" static const char version[] = KERN_INFO "sis900.c: " SIS900_DRV_VERSION "\n"; -- 2.17.1
Re: [PATCH] sis900: increment revision number
Sorry to be late in replying, I am ok also with the previous changes. Signed-off-by: Daniele Venzano On 23/06/2019 09:47, Sergej Benilov wrote: Increment revision number to 1.08.11 (TX completion fix). Signed-off-by: Sergej Benilov --- drivers/net/ethernet/sis/sis900.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index abb9b42e..09b4e1c5 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1,6 +1,6 @@ /* sis900.c: A SiS 900/7016 PCI Fast Ethernet driver for Linux. Copyright 1999 Silicon Integrated System Corporation - Revision: 1.08.10 Apr. 2 2006 + Revision: 1.08.11 Jun. 23 2019 Modified from the driver which is originally written by Donald Becker. @@ -16,7 +16,8 @@ preliminary Rev. 1.0 Nov. 10, 1998 SiS 7014 Single Chip 100BASE-TX/10BASE-T Physical Layer Solution, preliminary Rev. 1.0 Jan. 18, 1998 - + + Rev 1.08.11 Jun. 23 2019 Sergej Benilov TX completion fix Rev 1.08.10 Apr. 2 2006 Daniele Venzano add vlan (jumbo packets) support Rev 1.08.09 Sep. 19 2005 Daniele Venzano add Wake on LAN support Rev 1.08.08 Jan. 22 2005 Daniele Venzano use netif_msg for debugging messages @@ -79,7 +80,7 @@ #include "sis900.h" #define SIS900_MODULE_NAME "sis900" -#define SIS900_DRV_VERSION "v1.08.10 Apr. 2 2006" +#define SIS900_DRV_VERSION "v1.08.11 Jun. 23 2019" static const char version[] = KERN_INFO "sis900.c: " SIS900_DRV_VERSION "\n";
Re: [PATCH] sis900: increment revision number
On Sun, 2019-06-23 at 09:47 +0200, Sergej Benilov wrote: > Increment revision number to 1.08.11 (TX completion fix). Better not to bother as the last increment was in 2006. The driver version gets the kernel version in any case. > diff --git a/drivers/net/ethernet/sis/sis900.c > b/drivers/net/ethernet/sis/sis900.c [] > @@ -1,6 +1,6 @@ > /* sis900.c: A SiS 900/7016 PCI Fast Ethernet driver for Linux. > Copyright 1999 Silicon Integrated System Corporation > - Revision: 1.08.10 Apr. 2 2006 > + Revision: 1.08.11 Jun. 23 2019 etc...
Re: [PATCH] sis900: increment revision number
Hello, I think it is good to know just by looking at the sources that the driver is still kept up-to-date, so I am in favor of this patch. Daniele On 23/06/2019 11:10, Joe Perches wrote: On Sun, 2019-06-23 at 09:47 +0200, Sergej Benilov wrote: Increment revision number to 1.08.11 (TX completion fix). Better not to bother as the last increment was in 2006. The driver version gets the kernel version in any case. diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c [] @@ -1,6 +1,6 @@ /* sis900.c: A SiS 900/7016 PCI Fast Ethernet driver for Linux. Copyright 1999 Silicon Integrated System Corporation - Revision: 1.08.10 Apr. 2 2006 + Revision: 1.08.11 Jun. 23 2019 etc...
[no subject]
unsubscribe netdev
Re: [PATCH v2 net] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET
On Sat, Jun 22, 2019 at 10:12:46PM -0400, Willem de Bruijn wrote: > On Sat, Jun 22, 2019 at 1:42 PM Neil Horman wrote: > > > > When an application is run that: > > a) Sets its scheduler to be SCHED_FIFO > > and > > b) Opens a memory mapped AF_PACKET socket, and sends frames with the > > MSG_DONTWAIT flag cleared, its possible for the application to hang > > forever in the kernel. This occurs because when waiting, the code in > > tpacket_snd calls schedule, which under normal circumstances allows > > other tasks to run, including ksoftirqd, which in some cases is > > responsible for freeing the transmitted skb (which in AF_PACKET calls a > > destructor that flips the status bit of the transmitted frame back to > > available, allowing the transmitting task to complete). > > > > However, when the calling application is SCHED_FIFO, its priority is > > such that the schedule call immediately places the task back on the cpu, > > preventing ksoftirqd from freeing the skb, which in turn prevents the > > transmitting task from detecting that the transmission is complete. > > > > We can fix this by converting the schedule call to a completion > > mechanism. By using a completion queue, we force the calling task, when > > it detects there are no more frames to send, to schedule itself off the > > cpu until such time as the last transmitted skb is freed, allowing > > forward progress to be made. > > > > Tested by myself and the reporter, with good results > > > > Appies to the net tree > > > > Signed-off-by: Neil Horman > > Reported-by: Matteo Croce > > CC: "David S. Miller" > > CC: Willem de Bruijn > > > > Change Notes: > > > > V1->V2: > > Enhance the sleep logic to support being interruptible and > > allowing for honoring to SK_SNDTIMEO (Willem de Bruijn) > > --- > > net/packet/af_packet.c | 60 -- > > net/packet/internal.h | 2 ++ > > 2 files changed, 48 insertions(+), 14 deletions(-) > > > > diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c > > index a29d66da7394..8ddb2f7aebb4 100644 > > --- a/net/packet/af_packet.c > > +++ b/net/packet/af_packet.c > > @@ -358,7 +358,8 @@ static inline struct page * __pure pgv_to_page(void > > *addr) > > return virt_to_page(addr); > > } > > > > -static void __packet_set_status(struct packet_sock *po, void *frame, int > > status) > > +static void __packet_set_status(struct packet_sock *po, void *frame, int > > status, > > + bool call_complete) > > { > > union tpacket_uhdr h; > > > > @@ -381,6 +382,8 @@ static void __packet_set_status(struct packet_sock *po, > > void *frame, int status) > > BUG(); > > } > > > > + if (po->wait_on_complete && call_complete) > > + complete(&po->skb_completion); > > This wake need not happen before the barrier. Only one caller of > __packet_set_status passes call_complete (tpacket_destruct_skb). > Moving this branch to the caller avoids a lot of code churn. > Yeah, thats a fair point, I'll adjust that. > Also, multiple packets may be released before the process is awoken. > The process will block until packet_read_pending drops to zero. Can > defer the wait_on_complete to that one instance. > Also true, we can gate the complete call on wait_on_complete and pack_read_pending(...) == 0 > > smp_wmb(); > > } > > > > @@ -1148,6 +1151,14 @@ static void *packet_previous_frame(struct > > packet_sock *po, > > return packet_lookup_frame(po, rb, previous, status); > > } > > > > +static void *packet_next_frame(struct packet_sock *po, > > + struct packet_ring_buffer *rb, > > + int status) > > +{ > > + unsigned int next = rb->head != rb->frame_max ? rb->head+1 : 0; > > + return packet_lookup_frame(po, rb, next, status); > > +} > > + > > static void packet_increment_head(struct packet_ring_buffer *buff) > > { > > buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; > > @@ -2360,7 +2371,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct > > net_device *dev, > > #endif > > > > if (po->tp_version <= TPACKET_V2) { > > - __packet_set_status(po, h.raw, status); > > + __packet_set_status(po, h.raw, status, false); > > sk->sk_data_ready(sk); > > } else { > > prb_clear_blk_fill_status(&po->rx_ring); > > @@ -2400,7 +2411,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb) > > packet_dec_pending(&po->tx_ring); > > > > ts = __packet_set_timestamp(po, ph, skb); > > - __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); > > + __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts, true); > > } > > > > sock_wfree(skb); > > @@ -2585,13 +2596,13 @@ static int tpacket_parse_header(struct packet_sock > > *po, void *frame, > > > > static int tpacket_snd(struct packet_sock *po, s
Re: [PATCH v2 net] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET
On Sat, Jun 22, 2019 at 10:21:31PM -0400, Willem de Bruijn wrote: > > > -static void __packet_set_status(struct packet_sock *po, void *frame, int > > > status) > > > +static void __packet_set_status(struct packet_sock *po, void *frame, int > > > status, > > > + bool call_complete) > > > { > > > union tpacket_uhdr h; > > > > > > @@ -381,6 +382,8 @@ static void __packet_set_status(struct packet_sock > > > *po, void *frame, int status) > > > BUG(); > > > } > > > > > > + if (po->wait_on_complete && call_complete) > > > + complete(&po->skb_completion); > > > > This wake need not happen before the barrier. Only one caller of > > __packet_set_status passes call_complete (tpacket_destruct_skb). > > Moving this branch to the caller avoids a lot of code churn. > > > > Also, multiple packets may be released before the process is awoken. > > The process will block until packet_read_pending drops to zero. Can > > defer the wait_on_complete to that one instance. > > Eh no. The point of having this sleep in the send loop is that > additional slots may be released for transmission (flipped to > TP_STATUS_SEND_REQUEST) from another thread while this thread is > waiting. > Thats incorrect. The entirety of tpacket_snd is protected by a mutex. No other thread can alter the state of the frames in the vector from the kernel send path while this thread is waiting. > Else, it would have been much simpler to move the wait below the send > loop: send as many packets as possible, then wait for all of them > having been released. Much clearer control flow. > Thats (almost) what happens now. The only difference is that with this implementation, the waiting thread has the opportunity to see if userspace has queued more frames for transmission during the wait period. We could potentially change that, but thats outside the scope of this fix. > Where to set and clear the wait_on_complete boolean remains. Integer > assignment is fragile, as the compiler and processor may optimize or > move simple seemingly independent operations. As complete() takes a > spinlock, avoiding that in the DONTWAIT case is worthwhile. But probably > still preferable to set when beginning waiting and clear when calling > complete. We avoid any call to wait_for_complete or complete already, based on the gating of the need_wait variable in tpacket_snd. If the transmitting thread doesn't set MSG_DONTWAIT in the flags of the msg structure, we will never set wait_for_complete, and so we will never manipulate the completion queue. Neil >
Re: [PATCH bpf-next v5 00/16] AF_XDP infrastructure improvements and mlx5e support
On 6/21/2019 10:52 PM, Saeed Mahameed wrote: > On Thu, Jun 20, 2019 at 2:13 AM Björn Töpel wrote: >> >> On Tue, 18 Jun 2019 at 14:00, Maxim Mikityanskiy >> wrote: >>> >>> This series contains improvements to the AF_XDP kernel infrastructure >>> and AF_XDP support in mlx5e. The infrastructure improvements are >>> required for mlx5e, but also some of them benefit to all drivers, and >>> some can be useful for other drivers that want to implement AF_XDP. >>> >>> The performance testing was performed on a machine with the following >>> configuration: >>> >>> - 24 cores of Intel Xeon E5-2620 v3 @ 2.40 GHz >>> - Mellanox ConnectX-5 Ex with 100 Gbit/s link >>> >>> The results with retpoline disabled, single stream: >>> >>> txonly: 33.3 Mpps (21.5 Mpps with queue and app pinned to the same CPU) >>> rxdrop: 12.2 Mpps >>> l2fwd: 9.4 Mpps >>> >>> The results with retpoline enabled, single stream: >>> >>> txonly: 21.3 Mpps (14.1 Mpps with queue and app pinned to the same CPU) >>> rxdrop: 9.9 Mpps >>> l2fwd: 6.8 Mpps >>> >>> v2 changes: >>> >>> Added patches for mlx5e and addressed the comments for v1. Rebased for >>> bpf-next. >>> >>> v3 changes: >>> >>> Rebased for the newer bpf-next, resolved conflicts in libbpf. Addressed >>> Björn's comments for coding style. Fixed a bug in error handling flow in >>> mlx5e_open_xsk. >>> >>> v4 changes: >>> >>> UAPI is not changed, XSK RX queues are exposed to the kernel. The lower >>> half of the available amount of RX queues are regular queues, and the >>> upper half are XSK RX queues. The patch "xsk: Extend channels to support >>> combined XSK/non-XSK traffic" was dropped. The final patch was reworked >>> accordingly. >>> >>> Added "net/mlx5e: Attach/detach XDP program safely", as the changes >>> introduced in the XSK patch base on the stuff from this one. >>> >>> Added "libbpf: Support drivers with non-combined channels", which aligns >>> the condition in libbpf with the condition in the kernel. >>> >>> Rebased over the newer bpf-next. >>> >>> v5 changes: >>> >>> In v4, ethtool reports the number of channels as 'combined' and the >>> number of XSK RX queues as 'rx' for mlx5e. It was changed, so that 'rx' >>> is 0, and 'combined' reports the double amount of channels if there is >>> an active UMEM - to make libbpf happy. >>> >>> The patch for libbpf was dropped. Although it's still useful and fixes >>> things, it raises some disagreement, so I'm dropping it - it's no longer >>> useful for mlx5e anymore after the change above. >>> >> >> Just a heads-up: There are some checkpatch warnings (>80 chars/line) > > Thanks Bjorn for your comment, in mlx5 we allow up to 95 chars per line, > otherwise it is going to be an ugly zigzags. > >> for the mlnx5 driver parts, and the series didn't apply cleanly on >> bpf-next for me. >> >> I haven't been able to test the mlnx5 parts. >> >> Parts of the series are unrelated/orthogonal, and could be submitted >> as separate series, e.g. patches {1,7} and patches {3,4}. No blockers >> for me, though. >> >> Thanks for the hard work! >> >> For the series: >> Acked-by: Björn Töpel Just wanted to make sure we're on the same page, so we don't miss this kernel. AIU, currently no action is needed from Maxim's side, as Saeed is fine with the mlx5 part, and series is still marked as 'New' in patchworks with no requested changes. Regards, Tariq
[PATCH] net: dsa: microchip: Use gpiod_set_value_cansleep()
Replace gpiod_set_value() with gpiod_set_value_cansleep(), as the switch reset GPIO can be connected to e.g. I2C GPIO expander and it is perfectly fine for the kernel to sleep for a bit in ksz_switch_register(). Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Linus Walleij Cc: Tristram Ha Cc: Woojung Huh --- drivers/net/dsa/microchip/ksz_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 4f6648d5ac8b..bc81806dd75e 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -436,9 +436,9 @@ int ksz_switch_register(struct ksz_device *dev, return PTR_ERR(dev->reset_gpio); if (dev->reset_gpio) { - gpiod_set_value(dev->reset_gpio, 1); + gpiod_set_value_cansleep(dev->reset_gpio, 1); mdelay(10); - gpiod_set_value(dev->reset_gpio, 0); + gpiod_set_value_cansleep(dev->reset_gpio, 0); } mutex_init(&dev->dev_mutex); -- 2.20.1
[PATCH] net: ethernet: ti: cpsw: Assign OF node to slave devices
Assign OF node to CPSW slave devices, otherwise it is not possible to bind e.g. DSA switch to them. Without this patch, the DSA code tries to find the ethernet device by OF match, but fails to do so because the slave device has NULL OF node. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Ivan Khoronzhuk --- drivers/net/ethernet/ti/cpsw.c | 3 +++ drivers/net/ethernet/ti/cpsw_priv.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 7bdd287074fc..c39790e21276 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2179,6 +2179,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, return ret; } + slave_data->slave_node = slave_node; slave_data->phy_node = of_parse_phandle(slave_node, "phy-handle", 0); parp = of_get_property(slave_node, "phy_id", &lenp); @@ -2329,6 +2330,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) /* register the network device */ SET_NETDEV_DEV(ndev, cpsw->dev); + ndev->dev.of_node = cpsw->slaves[1].data->slave_node; ret = register_netdev(ndev); if (ret) dev_err(cpsw->dev, "cpsw: error registering net device\n"); @@ -2506,6 +2508,7 @@ static int cpsw_probe(struct platform_device *pdev) /* register the network device */ SET_NETDEV_DEV(ndev, dev); + ndev->dev.of_node = cpsw->slaves[0].data->slave_node; ret = register_netdev(ndev); if (ret) { dev_err(dev, "error registering net device\n"); diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index 04795b97ee71..e32f11da2dce 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -272,6 +272,7 @@ struct cpsw_host_regs { }; struct cpsw_slave_data { + struct device_node *slave_node; struct device_node *phy_node; charphy_id[MII_BUS_ID_SIZE]; int phy_if; -- 2.20.1
[PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout
From: Vadim Pasternak Extend macros MLXSW_REG_MTMP_TEMP_TO_MC() to allow support of negative temperature readout, since chip and others thermal components are capable of operating within the negative temperature. With no such support negative temperature will be consider as very high temperature and it will cause wrong readout and thermal shutdown. For negative values 2`s complement is used. Tested in chamber. Example of chip ambient temperature readout with chamber temperature: -10 Celsius: temp1: -6.0C (highest = -5.0C) -5 Celsius: temp1: -1.0C (highest = -1.0C) Signed-off-by: Vadim Pasternak Signed-off-by: Ido Schimmel --- drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c | 12 +--- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 14 +++--- drivers/net/ethernet/mellanox/mlxsw/reg.h | 12 +++- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 056e3f55ae6c..fb1fd334a8d4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -52,8 +52,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; - int index; + int temp, index; int err; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, @@ -65,7 +64,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); - return sprintf(buf, "%u\n", temp); + return sprintf(buf, "%d\n", temp); } static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, @@ -76,8 +75,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp_max; - int index; + int temp_max, index; int err; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, @@ -89,7 +87,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL); - return sprintf(buf, "%u\n", temp_max); + return sprintf(buf, "%d\n", temp_max); } static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, @@ -215,8 +213,8 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; u8 module; + int temp; int err; module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 504a34d240f7..35a1dc89c28a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -312,7 +312,7 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, struct mlxsw_thermal *thermal = tzdev->devdata; struct device *dev = thermal->bus_info->dev; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; + int temp; int err; mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false); @@ -327,7 +327,7 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips, temp); - *p_temp = (int) temp; + *p_temp = temp; return 0; } @@ -503,7 +503,7 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, struct mlxsw_thermal *thermal = tz->parent; struct device *dev = thermal->bus_info->dev; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; + int temp; int err; /* Read module temperature. */ @@ -519,14 +519,14 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, return 0; } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); - *p_temp = (int) temp; + *p_temp = temp; if (!temp) return 0; /* Update trip points. */ err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz); - if (!err) + if (!err && temp > 0) mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); return 0; @@ -612,8
[PATCH net-next 2/3] mlxsw: core: Add the hottest thermal zone detection
From: Vadim Pasternak When multiple sensors are mapped to the same cooling device, the cooling device should be set according the worst sensor from the sensors associated with this cooling device. Provide the hottest thermal zone detection and enforce cooling device to follow the temperature trends of the hottest zone only. Prevent competition for the cooling device control from others zones, by "stable trend" indication. A cooling device will not perform any actions associated with a zone with a "stable trend". When other thermal zone is detected as a hottest, a cooling device is to be switched to following temperature trends of new hottest zone. Thermal zone score is represented by 32 bits unsigned integer and calculated according to the next formula: For T < TZ, where t from {normal trip = 0, high trip = 1, hot trip = 2, critical = 3}: TZ score = (T + (TZ - T) / 2) / (TZ - T) * 256 ** j; Highest thermal zone score s is set as MAX(TZscore); Following this formula, if TZ is in trip point higher than TZ, the higher score is to be always assigned to TZ. For two thermal zones located at the same kind of trip point, the higher score will be assigned to the zone which is closer to the next trip point. Thus, the highest score will always be assigned objectively to the hottest thermal zone. All the thermal zones initially are to be configured with mode "enabled" with the "step_wise" governor. Signed-off-by: Vadim Pasternak Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel --- .../ethernet/mellanox/mlxsw/core_thermal.c| 75 +++ 1 file changed, 62 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 88f43ad2cc4f..504a34d240f7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -23,6 +23,7 @@ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000/* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT(MLXSW_THERMAL_HYSTERESIS_TEMP * 2) #define MLXSW_THERMAL_ZONE_MAX_NAME16 +#define MLXSW_THERMAL_TEMP_SCORE_MAX GENMASK(31, 0) #define MLXSW_THERMAL_MAX_STATE10 #define MLXSW_THERMAL_MAX_DUTY 255 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values @@ -113,6 +114,8 @@ struct mlxsw_thermal { struct mlxsw_thermal_module *tz_module_arr; struct mlxsw_thermal_module *tz_gearbox_arr; u8 tz_gearbox_num; + unsigned int tz_highest_score; + struct thermal_zone_device *tz_highest_dev; }; static inline u8 mlxsw_state_to_duty(int state) @@ -197,6 +200,34 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, return 0; } +static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal, + struct thermal_zone_device *tzdev, + struct mlxsw_thermal_trip *trips, + int temp) +{ + struct mlxsw_thermal_trip *trip = trips; + unsigned int score, delta, i, shift = 1; + + /* Calculate thermal zone score, if temperature is above the critical +* threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX. +*/ + score = MLXSW_THERMAL_TEMP_SCORE_MAX; + for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS; +i++, trip++) { + if (temp < trip->temp) { + delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp); + score = delta * shift; + break; + } + shift *= 256; + } + + if (score > thermal->tz_highest_score) { + thermal->tz_highest_score = score; + thermal->tz_highest_dev = tzdev; + } +} + static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev, struct thermal_cooling_device *cdev) { @@ -292,6 +323,9 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + if (temp > 0) + mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips, + temp); *p_temp = (int) temp; return 0; @@ -353,6 +387,22 @@ static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev, return 0; } +static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev, + int trip, enum thermal_trend *trend) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + if (tzdev == thermal->tz_highest_dev) + return 1; + + *trend = THERMAL_TREND_STABLE; + return 0; +} +
[PATCH net-next 1/3] mlxsw: core: Extend thermal core with per inter-connect device thermal zones
From: Vadim Pasternak Add a dedicated thermal zone for each inter-connect device. The current temperature is obtained from inter-connect temperature sensor and the default trip points are set to the same values as default ASIC trip points. These settings could be changed from the user space. A cooling device (fan) is bound to all inter-connect devices. Signed-off-by: Vadim Pasternak Signed-off-by: Ido Schimmel --- .../ethernet/mellanox/mlxsw/core_thermal.c| 137 +- 1 file changed, 136 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index cfab0e330a47..88f43ad2cc4f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -98,7 +98,7 @@ struct mlxsw_thermal_module { struct thermal_zone_device *tzdev; struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; - int module; + int module; /* Module or gearbox number */ }; struct mlxsw_thermal { @@ -111,6 +111,8 @@ struct mlxsw_thermal { struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; struct mlxsw_thermal_module *tz_module_arr; + struct mlxsw_thermal_module *tz_gearbox_arr; + u8 tz_gearbox_num; }; static inline u8 mlxsw_state_to_duty(int state) @@ -554,6 +556,46 @@ static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, }; +static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned int temp; + u16 index; + int err; + + index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module; + mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); + + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) + return err; + + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + + *p_temp = (int) temp; + return 0; +} + +static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = { + .bind = mlxsw_thermal_module_bind, + .unbind = mlxsw_thermal_module_unbind, + .get_mode = mlxsw_thermal_module_mode_get, + .set_mode = mlxsw_thermal_module_mode_set, + .get_temp = mlxsw_thermal_gearbox_temp_get, + .get_trip_type = mlxsw_thermal_module_trip_type_get, + .get_trip_temp = mlxsw_thermal_module_trip_temp_get, + .set_trip_temp = mlxsw_thermal_module_trip_temp_set, + .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, + .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, +}; + +static struct thermal_zone_params mlxsw_thermal_gearbox_params = { + .governor_name = "user_space", +}; + static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev, unsigned long *p_state) { @@ -779,6 +821,92 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) kfree(thermal->tz_module_arr); } +static int +mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz) +{ + char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME]; + + snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d", +gearbox_tz->module + 1); + gearbox_tz->tzdev = thermal_zone_device_register(tz_name, + MLXSW_THERMAL_NUM_TRIPS, + MLXSW_THERMAL_TRIP_MASK, + gearbox_tz, + &mlxsw_thermal_gearbox_ops, + &mlxsw_thermal_gearbox_params, + 0, 0); + if (IS_ERR(gearbox_tz->tzdev)) + return PTR_ERR(gearbox_tz->tzdev); + + return 0; +} + +static void +mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz) +{ + thermal_zone_device_unregister(gearbox_tz->tzdev); +} + +static int +mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, +struct mlxsw_thermal *thermal) +{ + struct mlxsw_thermal_module *gearbox_tz; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + int i; + int err; + + if (!mlxsw_core_res_query_enabled(core)) + return 0; + + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL); + if (!thermal->tz_gearbox_num) + return 0
[PATCH net-next 0/3] mlxsw: Thermal and hwmon extensions
From: Ido Schimmel This patchset from Vadim includes various enhancements to thermal and hwmon code in mlxsw. Patch #1 adds a thermal zone for each inter-connect device (gearbox). These devices are present in SN3800 systems and code to expose their temperature via hwmon was added in commit 2e265a8b6c09 ("mlxsw: core: Extend hwmon interface with inter-connect temperature attributes"). Currently, there are multiple thermal zones in mlxsw and only a few cooling devices. Patch #2 detects the hottest thermal zone and the cooling devices are switched to follow its trends. RFC was sent last month [1]. Patch #3 allows to read and report negative temperature of the sensors mlxsw exposes via hwmon and thermal subsystems. [1] https://patchwork.ozlabs.org/patch/1107161/ Vadim Pasternak (3): mlxsw: core: Extend thermal core with per inter-connect device thermal zones mlxsw: core: Add the hottest thermal zone detection mlxsw: core: Add support for negative temperature readout .../net/ethernet/mellanox/mlxsw/core_hwmon.c | 12 +- .../ethernet/mellanox/mlxsw/core_thermal.c| 208 +- drivers/net/ethernet/mellanox/mlxsw/reg.h | 12 +- 3 files changed, 208 insertions(+), 24 deletions(-) -- 2.20.1
Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support
I'm very happy to see progress with XDP for the ENA driver. On Sun, 23 Jun 2019 10:06:49 +0300 wrote: > @@ -888,6 +959,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring > *rx_ring, > va = page_address(rx_info->page) + rx_info->page_offset; > prefetch(va + NET_IP_ALIGN); > > + if (ena_xdp_present_ring(rx_ring)) { > + rx_ring->xdp.data = va; > + rx_ring->xdp.data_meta = rx_ring->xdp.data; > + rx_ring->xdp.data_hard_start = rx_ring->xdp.data - > + rx_info->page_offset; > + rx_ring->xdp.data_end = rx_ring->xdp.data + len; > + if (ena_xdp_execute(rx_ring, &rx_ring->xdp) != XDP_PASS) > + return NULL; > + } Looks like you forgot to init xdp.rxq pointer. You can use the samples/bpf/xdp_rxq_info* to access this... and this driver will likely crash if you use it... -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat LinkedIn: http://www.linkedin.com/in/brouer
Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support
On Sun, 23 Jun 2019 10:06:49 +0300 wrote: > This commit implements the basic functionality of drop/pass logic in the > ena driver. Usually we require a driver to implement all the XDP return codes, before we accept it. But as Daniel and I discussed with Zorik during NetConf[1], we are going to make an exception and accept the driver if you also implement XDP_TX. As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT later, given he/you wants AF_XDP support which requires XDP_REDIRECT. [1] http://vger.kernel.org/netconf2019.html -- Best regards, Jesper Dangaard Brouer MSc.CS, Principal Kernel Engineer at Red Hat LinkedIn: http://www.linkedin.com/in/brouer
Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support
On 6/23/19 1:06 AM, same...@amazon.com wrote: > +static int ena_xdp_set(struct net_device *netdev, struct bpf_prog *prog) > +{ > + struct ena_adapter *adapter = netdev_priv(netdev); > + struct bpf_prog *old_bpf_prog; > + int i; > + > + if (ena_xdp_allowed(adapter)) { > + old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); > + > + for (i = 0; i < adapter->num_queues; i++) > + xchg(&adapter->rx_ring[i].xdp_bpf_prog, prog); > + > + if (old_bpf_prog) > + bpf_prog_put(old_bpf_prog); > + > + } else { > + netif_err(adapter, drv, adapter->netdev, "Failed to set xdp > program, the current MTU (%d) is larger than the maximal allowed MTU (%lu) > while xdp is on", > + netdev->mtu, ENA_XDP_MAX_MTU); > + return -EFAULT; unfortunate that extack has not been plumbed to ndo_bpf handler so that message is passed to the user. And EFAULT seems like the wrong return code. > + } > + > + return 0; > +} > + > +/* This is the main xdp callback, it's used by the kernel to set/unset the > xdp > + * program as well as to query the current xdp program id. > + */ > +static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) > +{ > + struct ena_adapter *adapter = netdev_priv(netdev); > + > + switch (bpf->command) { > + case XDP_SETUP_PROG: > + return ena_xdp_set(netdev, bpf->prog); > + case XDP_QUERY_PROG: > + bpf->prog_id = adapter->xdp_bpf_prog ? > + adapter->xdp_bpf_prog->aux->id : 0; > + break; > + default: > + return -EINVAL; > + } > + return 0; > +} > + > static int ena_change_mtu(struct net_device *dev, int new_mtu) > { > struct ena_adapter *adapter = netdev_priv(dev); > int ret; > > + if (new_mtu > ENA_XDP_MAX_MTU && ena_xdp_present(adapter)) { > + netif_err(adapter, drv, dev, > + "Requested MTU value is not valid while xdp is > enabled new_mtu: %d max mtu: %lu min mtu: %d\n", > + new_mtu, ENA_XDP_MAX_MTU, ENA_MIN_MTU); > + return -EINVAL; > + } If you manage dev->max_mtu as an XDP program is installed / removed this check is not needed. Instead it is handled by the core change_mtu logic and has better user semanitcs: link list shows the new MTU and trying to change it above the new max a message is sent to the user as opposed to kernel log. > ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); > if (!ret) { > netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
Re: [PATCH v2 net] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET
On Sun, Jun 23, 2019 at 7:40 AM Neil Horman wrote: > > On Sat, Jun 22, 2019 at 10:21:31PM -0400, Willem de Bruijn wrote: > > > > -static void __packet_set_status(struct packet_sock *po, void *frame, > > > > int status) > > > > +static void __packet_set_status(struct packet_sock *po, void *frame, > > > > int status, > > > > + bool call_complete) > > > > { > > > > union tpacket_uhdr h; > > > > > > > > @@ -381,6 +382,8 @@ static void __packet_set_status(struct packet_sock > > > > *po, void *frame, int status) > > > > BUG(); > > > > } > > > > > > > > + if (po->wait_on_complete && call_complete) > > > > + complete(&po->skb_completion); > > > > > > This wake need not happen before the barrier. Only one caller of > > > __packet_set_status passes call_complete (tpacket_destruct_skb). > > > Moving this branch to the caller avoids a lot of code churn. > > > > > > Also, multiple packets may be released before the process is awoken. > > > The process will block until packet_read_pending drops to zero. Can > > > defer the wait_on_complete to that one instance. > > > > Eh no. The point of having this sleep in the send loop is that > > additional slots may be released for transmission (flipped to > > TP_STATUS_SEND_REQUEST) from another thread while this thread is > > waiting. > > > Thats incorrect. The entirety of tpacket_snd is protected by a mutex. No > other > thread can alter the state of the frames in the vector from the kernel send > path > while this thread is waiting. I meant another user thread updating the memory mapped ring contents. > > Else, it would have been much simpler to move the wait below the send > > loop: send as many packets as possible, then wait for all of them > > having been released. Much clearer control flow. > > > Thats (almost) what happens now. The only difference is that with this > implementation, the waiting thread has the opportunity to see if userspace has > queued more frames for transmission during the wait period. We could > potentially change that, but thats outside the scope of this fix. Agreed. I think the current, more complex, behavior was intentional. We could still restructure to move it out of the loop and jump back. But, yes, definitely out of scope for a fix. > > Where to set and clear the wait_on_complete boolean remains. Integer > > assignment is fragile, as the compiler and processor may optimize or > > move simple seemingly independent operations. As complete() takes a > > spinlock, avoiding that in the DONTWAIT case is worthwhile. But probably > > still preferable to set when beginning waiting and clear when calling > > complete. > We avoid any call to wait_for_complete or complete already, based on the > gating > of the need_wait variable in tpacket_snd. If the transmitting thread doesn't > set MSG_DONTWAIT in the flags of the msg structure, we will never set > wait_for_complete, and so we will never manipulate the completion queue. But we don't know the state of this at tpacket_destruct_skb time without wait_for_completion?
Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support
On Sun, 23 Jun 2019 10:06:49 +0300 wrote: > From: Sameeh Jubran > > This commit implements the basic functionality of drop/pass logic in the > ena driver. > > Signed-off-by: Sameeh Jubran > --- > drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 +++- > drivers/net/ethernet/amazon/ena/ena_netdev.h | 29 +++ > 2 files changed, 111 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c > b/drivers/net/ethernet/amazon/ena/ena_netdev.c > index 20ec8ff03..3d65c0771 100644 > --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c > +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c > @@ -33,10 +33,10 @@ > #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > > #ifdef CONFIG_RFS_ACCEL > +#include > #include > #endif /* CONFIG_RFS_ACCEL */ > #include > -#include > #include > #include > #include > @@ -105,11 +105,82 @@ static void update_rx_ring_mtu(struct ena_adapter > *adapter, int mtu) > adapter->rx_ring[i].mtu = mtu; > } > > +static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) > +{ > + struct bpf_prog *xdp_prog = rx_ring->xdp_bpf_prog; > + u32 verdict = XDP_PASS; > + > + rcu_read_lock(); > + > + if (!xdp_prog) > + goto out; > + > + verdict = bpf_prog_run_xdp(xdp_prog, xdp); > + > + if (unlikely(verdict == XDP_ABORTED)) > + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); > + else if (unlikely(verdict > XDP_REDIRECT)) Shouldn't this be verdict >= XDP_TX at this point? You're not providing XDP TX resources in this patch and you're saying that only drop/pass actions are supported. > + bpf_warn_invalid_xdp_action(verdict); > +out: > + rcu_read_unlock(); > + return verdict; > +} > + > +static int ena_xdp_set(struct net_device *netdev, struct bpf_prog *prog) > +{ > + struct ena_adapter *adapter = netdev_priv(netdev); > + struct bpf_prog *old_bpf_prog; > + int i; > + > + if (ena_xdp_allowed(adapter)) { > + old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); > + > + for (i = 0; i < adapter->num_queues; i++) > + xchg(&adapter->rx_ring[i].xdp_bpf_prog, prog); > + > + if (old_bpf_prog) > + bpf_prog_put(old_bpf_prog); > + > + } else { > + netif_err(adapter, drv, adapter->netdev, "Failed to set xdp > program, the current MTU (%d) is larger than the maximal allowed MTU (%lu) > while xdp is on", > + netdev->mtu, ENA_XDP_MAX_MTU); Consider using netlink's extack mechanism. Downside of this approach is that you can't use format specifiers, so you won't tell user about the max allowed mtu size for XDP case, but OTOH the message is printed right in front of user's face in console, no need to look into dmesg. > + return -EFAULT; > + } > + > + return 0; > +} > + > +/* This is the main xdp callback, it's used by the kernel to set/unset the > xdp > + * program as well as to query the current xdp program id. > + */ > +static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) > +{ > + struct ena_adapter *adapter = netdev_priv(netdev); > + > + switch (bpf->command) { > + case XDP_SETUP_PROG: > + return ena_xdp_set(netdev, bpf->prog); > + case XDP_QUERY_PROG: > + bpf->prog_id = adapter->xdp_bpf_prog ? > + adapter->xdp_bpf_prog->aux->id : 0; > + break; > + default: Again, consider the following: NL_SET_ERR_MSG_MOD(bpf->extack, "Unsupported XDP command"); > + return -EINVAL; > + } > + return 0; > +} > + > static int ena_change_mtu(struct net_device *dev, int new_mtu) > { > struct ena_adapter *adapter = netdev_priv(dev); > int ret; > > + if (new_mtu > ENA_XDP_MAX_MTU && ena_xdp_present(adapter)) { > + netif_err(adapter, drv, dev, > + "Requested MTU value is not valid while xdp is > enabled new_mtu: %d max mtu: %lu min mtu: %d\n", > + new_mtu, ENA_XDP_MAX_MTU, ENA_MIN_MTU); > + return -EINVAL; > + } > ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); > if (!ret) { > netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu); > @@ -888,6 +959,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring > *rx_ring, > va = page_address(rx_info->page) + rx_info->page_offset; > prefetch(va + NET_IP_ALIGN); > > + if (ena_xdp_present_ring(rx_ring)) { > + rx_ring->xdp.data = va; > + rx_ring->xdp.data_meta = rx_ring->xdp.data; > + rx_ring->xdp.data_hard_start = rx_ring->xdp.data - > + rx_info->page_offset; > + rx_ring->xdp.data_end = rx_ring->xdp.data + len; > + if (ena_xdp_execute(rx_ring, &rx_ring->xdp) != XDP_PASS) > + return NULL; > + } Hmm running XDP program wit
Re: [PATCH] net: dsa: microchip: Use gpiod_set_value_cansleep()
On Sun, Jun 23, 2019 at 02:10:36PM +0200, Marek Vasut wrote: > Replace gpiod_set_value() with gpiod_set_value_cansleep(), as the switch > reset GPIO can be connected to e.g. I2C GPIO expander and it is perfectly > fine for the kernel to sleep for a bit in ksz_switch_register(). > > Signed-off-by: Marek Vasut Reviewed-by: Andrew Lunn Andrew
Re: [PATCH] net: dsa: microchip: Use gpiod_set_value_cansleep()
On 6/23/19 5:09 PM, Andrew Lunn wrote: > On Sun, Jun 23, 2019 at 02:10:36PM +0200, Marek Vasut wrote: >> Replace gpiod_set_value() with gpiod_set_value_cansleep(), as the switch >> reset GPIO can be connected to e.g. I2C GPIO expander and it is perfectly >> fine for the kernel to sleep for a bit in ksz_switch_register(). >> >> Signed-off-by: Marek Vasut > > Reviewed-by: Andrew Lunn Actually, no, I missed a change in .remove , so I'll send a V2 with this RB if you don't mind. -- Best regards, Marek Vasut
[PATCH V2] net: dsa: microchip: Use gpiod_set_value_cansleep()
Replace gpiod_set_value() with gpiod_set_value_cansleep(), as the switch reset GPIO can be connected to e.g. I2C GPIO expander and it is perfectly fine for the kernel to sleep for a bit in ksz_switch_register(). Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Linus Walleij Cc: Tristram Ha Cc: Woojung Huh Reviewed-by: Andrew Lunn --- V2: use _cansleep in .remove as well --- drivers/net/dsa/microchip/ksz_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 4f6648d5ac8b..978c59aa8efb 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -436,9 +436,9 @@ int ksz_switch_register(struct ksz_device *dev, return PTR_ERR(dev->reset_gpio); if (dev->reset_gpio) { - gpiod_set_value(dev->reset_gpio, 1); + gpiod_set_value_cansleep(dev->reset_gpio, 1); mdelay(10); - gpiod_set_value(dev->reset_gpio, 0); + gpiod_set_value_cansleep(dev->reset_gpio, 0); } mutex_init(&dev->dev_mutex); @@ -489,7 +489,7 @@ void ksz_switch_remove(struct ksz_device *dev) dsa_unregister_switch(dev->ds); if (dev->reset_gpio) - gpiod_set_value(dev->reset_gpio, 1); + gpiod_set_value_cansleep(dev->reset_gpio, 1); } EXPORT_SYMBOL(ksz_switch_remove); -- 2.20.1
Re: [PATCH] sis900: increment revision number
From: Sergej Benilov Date: Sun, 23 Jun 2019 09:47:07 +0200 > Increment revision number to 1.08.11 (TX completion fix). > > Signed-off-by: Sergej Benilov These are useless, really... People are going to backport the TX completion fix all by itself and not this change if I were to merge it. I really want to heavily discourage these kinds of things, sorry. Code is code, the fix is there or it isn't.
Re: [PATCH] sis900: increment revision number
From: Daniele Venzano Date: Sun, 23 Jun 2019 11:13:28 +0200 > Hello, > > I think it is good to know just by looking at the sources that the > driver is still kept up-to-date, so I am in favor of this patch. I absolutely, strongly, disagree. These are pointless.
Re: [PATCH V1 net-next] net: ena: Fix bug where ring allocation backoff stopped too late
From: Date: Sun, 23 Jun 2019 10:11:10 +0300 > From: Sameeh Jubran > > The current code of create_queues_with_size_backoff() allows the ring size > to become as small as ENA_MIN_RING_SIZE/2. This is a bug since we don't > want the queue ring to be smaller than ENA_MIN_RING_SIZE > > In this commit we change the loop's termination condition to look at the > queue size of the next iteration instead of that of the current one, > so that the minimal queue size again becomes ENA_MIN_RING_SIZE. > > Fixes: eece4d2ab9d2 ("net: ena: add ethtool function for changing io queue > sizes") > > Signed-off-by: Arthur Kiyanovski > Signed-off-by: Sameeh Jubran Applied, thank you.
Re: [PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout
> --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c > +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c > @@ -52,8 +52,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, > container_of(attr, struct mlxsw_hwmon_attr, dev_attr); > struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; > char mtmp_pl[MLXSW_REG_MTMP_LEN]; > - unsigned int temp; > - int index; > + int temp, index; > int err; > > index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, > @@ -65,7 +64,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, > return err; > } > mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); > - return sprintf(buf, "%u\n", temp); > + return sprintf(buf, "%d\n", temp); > } If you had used the hwmon core, rather than implementing it yourself, you could of avoided this part of the bug. > static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, > @@ -215,8 +213,8 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device > *dev, > container_of(attr, struct mlxsw_hwmon_attr, dev_attr); > struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; > char mtmp_pl[MLXSW_REG_MTMP_LEN]; > - unsigned int temp; > u8 module; > + int temp; > int err; > > module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; I think you missed changing the %u to %d in this function. > @@ -519,14 +519,14 @@ static int mlxsw_thermal_module_temp_get(struct > thermal_zone_device *tzdev, > return 0; > } > mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); > - *p_temp = (int) temp; > + *p_temp = temp; > > if (!temp) > return 0; > > /* Update trip points. */ > err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz); > - if (!err) > + if (!err && temp > 0) > mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); Why the > 0? Andrew
RE: [PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout
> -Original Message- > From: Andrew Lunn > Sent: Sunday, June 23, 2019 6:44 PM > To: Ido Schimmel > Cc: netdev@vger.kernel.org; da...@davemloft.net; Jiri Pirko > ; mlxsw ; Vadim Pasternak > ; Ido Schimmel > Subject: Re: [PATCH net-next 3/3] mlxsw: core: Add support for negative > temperature readout > > > --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c > > +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c > > @@ -52,8 +52,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device > *dev, > > container_of(attr, struct mlxsw_hwmon_attr, > dev_attr); > > struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; > > char mtmp_pl[MLXSW_REG_MTMP_LEN]; > > - unsigned int temp; > > - int index; > > + int temp, index; > > int err; > > > > index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr- > >type_index, > > @@ -65,7 +64,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device > *dev, > > return err; > > } > > mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); > > - return sprintf(buf, "%u\n", temp); > > + return sprintf(buf, "%d\n", temp); > > } > > If you had used the hwmon core, rather than implementing it yourself, you > could > of avoided this part of the bug. > Hi Andrew. Yes. But before we handle only positive temperature. And currently support for the negative readouts has been added. > > static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, @@ > > -215,8 +213,8 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct > device *dev, > > container_of(attr, struct mlxsw_hwmon_attr, > dev_attr); > > struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; > > char mtmp_pl[MLXSW_REG_MTMP_LEN]; > > - unsigned int temp; > > u8 module; > > + int temp; > > int err; > > > > module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon- > >sensor_count; > > I think you missed changing the %u to %d in this function. If I am not wrong, I think you refer to mlxsw_hwmon_fan_rpm_show(), where it should be %u. > > > @@ -519,14 +519,14 @@ static int mlxsw_thermal_module_temp_get(struct > thermal_zone_device *tzdev, > > return 0; > > } > > mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); > > - *p_temp = (int) temp; > > + *p_temp = temp; > > > > if (!temp) > > return 0; > > > > /* Update trip points. */ > > err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz); > > - if (!err) > > + if (!err && temp > 0) > > mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, > temp); > > Why the > 0? We don't consider negative temperature for thermal control. > > Andrew
RE: [PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout
> -Original Message- > From: Vadim Pasternak > Sent: Sunday, June 23, 2019 7:01 PM > To: Andrew Lunn ; Ido Schimmel > Cc: netdev@vger.kernel.org; da...@davemloft.net; Jiri Pirko > ; mlxsw ; Ido Schimmel > > Subject: RE: [PATCH net-next 3/3] mlxsw: core: Add support for negative > temperature readout > > > > > -Original Message- > > From: Andrew Lunn > > Sent: Sunday, June 23, 2019 6:44 PM > > To: Ido Schimmel > > Cc: netdev@vger.kernel.org; da...@davemloft.net; Jiri Pirko > > ; mlxsw ; Vadim Pasternak > > ; Ido Schimmel > > Subject: Re: [PATCH net-next 3/3] mlxsw: core: Add support for > > negative temperature readout > > > > > --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c > > > +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c > > > @@ -52,8 +52,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device > > *dev, > > > container_of(attr, struct mlxsw_hwmon_attr, > > dev_attr); > > > struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; > > > char mtmp_pl[MLXSW_REG_MTMP_LEN]; > > > - unsigned int temp; > > > - int index; > > > + int temp, index; > > > int err; > > > > > > index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr- > > >type_index, > > > @@ -65,7 +64,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device > > *dev, > > > return err; > > > } > > > mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); > > > - return sprintf(buf, "%u\n", temp); > > > + return sprintf(buf, "%d\n", temp); > > > } > > > > If you had used the hwmon core, rather than implementing it yourself, > > you could of avoided this part of the bug. > > > > Hi Andrew. > > Yes. > But before we handle only positive temperature. > And currently support for the negative readouts has been added. > > > > static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, @@ > > > -215,8 +213,8 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct > > device *dev, > > > container_of(attr, struct mlxsw_hwmon_attr, > > dev_attr); > > > struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; > > > char mtmp_pl[MLXSW_REG_MTMP_LEN]; > > > - unsigned int temp; > > > u8 module; > > > + int temp; > > > int err; > > > > > > module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon- > sensor_count; > > > > I think you missed changing the %u to %d in this function. > > If I am not wrong, I think you refer to mlxsw_hwmon_fan_rpm_show(), where it > should be %u. > O, I see what you mentioned. This is mlxsw_hwmon_module_temp_show(). Yes, right it should be %d. Thank you. > > > > > @@ -519,14 +519,14 @@ static int > > > mlxsw_thermal_module_temp_get(struct > > thermal_zone_device *tzdev, > > > return 0; > > > } > > > mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); > > > - *p_temp = (int) temp; > > > + *p_temp = temp; > > > > > > if (!temp) > > > return 0; > > > > > > /* Update trip points. */ > > > err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz); > > > - if (!err) > > > + if (!err && temp > 0) > > > mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, > > temp); > > > > Why the > 0? > > We don't consider negative temperature for thermal control. > > > > > Andrew
Re: [PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout
> > Why the > 0? > > We don't consider negative temperature for thermal control. Is this because the thermal control is also broken and does not support negative values? This is just a workaround papering over the cracks? I've worked on some systems where the thermal subsystem has controller a heater. Mostly industrial systems, extended temperature range, and you have to make sure the hardware is kept above -25C, otherwise the DRAM timing goes to pot and the system crashed and froze. Andrew
Re: [PATCH] sis900: increment revision number
On Sun, 2019-06-23 at 08:37 -0700, David Miller wrote: > From: Daniele Venzano > Date: Sun, 23 Jun 2019 11:13:28 +0200 > > > Hello, > > > > I think it is good to know just by looking at the sources that the > > driver is still kept up-to-date, so I am in favor of this patch. > > I absolutely, strongly, disagree. > > These are pointless. Perhaps (most)? all the .get_drvinfo function calls where the driver version is returned should use the default release. This is similar to net/wireless/ethtool.c Maybe: --- net/core/ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4d1011b2e24f..644a2043714c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -806,6 +807,8 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, devlink_compat_running_version(dev, info.fw_version, sizeof(info.fw_version)); + strlcpy(info.version, init_utsname()->release, sizeof(info.version)); + if (copy_to_user(useraddr, &info, sizeof(info))) return -EFAULT; return 0;
RE: [PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout
> -Original Message- > From: Andrew Lunn > Sent: Sunday, June 23, 2019 7:26 PM > To: Vadim Pasternak > Cc: Ido Schimmel ; netdev@vger.kernel.org; > da...@davemloft.net; Jiri Pirko ; mlxsw > ; Ido Schimmel > Subject: Re: [PATCH net-next 3/3] mlxsw: core: Add support for negative > temperature readout > > > > Why the > 0? > > > > We don't consider negative temperature for thermal control. > > Is this because the thermal control is also broken and does not support > negative > values? This is just a workaround papering over the cracks? We just have system hardware requirements for minimal speed for system PWM. It could not be less than 20%. So for temperature ~40C or below it PWM will set to this speed. > > I've worked on some systems where the thermal subsystem has controller a > heater. Mostly industrial systems, extended temperature range, and you have to > make sure the hardware is kept above -25C, otherwise the DRAM timing goes to > pot and the system crashed and froze. Interesting input. I didn't know about such feature. We don't have heaters within our systems. Maybe we should think about it for the next generatin systems. > > Andrew
Re: [PATCH net-next 10/16] qlge: Factor out duplicated expression
From: Benjamin Poirier Date: Mon, 17 Jun 2019 16:48:52 +0900 > Signed-off-by: Benjamin Poirier > --- > drivers/net/ethernet/qlogic/qlge/qlge.h | 6 ++ > drivers/net/ethernet/qlogic/qlge/qlge_main.c | 18 ++ > 2 files changed, 12 insertions(+), 12 deletions(-) > > diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h > b/drivers/net/ethernet/qlogic/qlge/qlge.h > index 5a4b2520cd2a..0bb7ccdca6a7 100644 > --- a/drivers/net/ethernet/qlogic/qlge/qlge.h > +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h > @@ -77,6 +77,12 @@ > #define LSD(x) ((u32)((u64)(x))) > #define MSD(x) ((u32)u64)(x)) >> 32))) > > +#define QLGE_FIT16(value) \ > +({ \ > + typeof(value) _value = value; \ > + (_value) == 65536 ? 0 : (u16)(_value); \ > +}) > + "(u16) 65536" is zero and the range of these values is 0 -- 65536. This whole expression is way overdone.
Re: [PATCH net-next 10/16] qlge: Factor out duplicated expression
From: David Miller Date: Sun, 23 Jun 2019 10:59:35 -0700 (PDT) > "(u16) 65536" is zero and the range of these values is 0 -- 65536. > > This whole expression is way overdone. Also, when you post the next revision of this patch series, please provide a proper "[PATCH net-next 00/16]" header posting explaining what this patch series does logically at the high level, how it is doing it, and why it is doing it that way. Thank you.
Re: [PATCH net] vxlan: do not destroy fdb if register_netdevice() is failed
From: Taehee Yoo Date: Thu, 20 Jun 2019 20:51:08 +0900 > __vxlan_dev_create() destroys FDB using specific pointer which indicates > a fdb when error occurs. > But that pointer should not be used when register_netdevice() fails because > register_netdevice() internally destroys fdb when error occurs. > > In order to avoid un-registered dev's notification, fdb destroying routine > checks dev's register status before notification. Simply pass do_notify as false in this failure code path of __vxlan_dev_create(), thank you.
Re: network unstable on odroid-c1/meson8b.
Le 20/06/2019 à 22:54, Aymeric a écrit : > Le 20/06/2019 à 17:53, Heiner Kallweit a écrit : >> On 20.06.2019 09:55, Aymeric wrote: >>> Hi, >>> On 2019-06-20 00:14, Heiner Kallweit wrote: On 19.06.2019 22:18, Aymeric wrote: > Hello all, > Kernel 3.10 didn't have a dedicated RTL8211F PHY driver yet, therefore I assume the genphy driver was used. Do you have a line with "attached PHY driver" in dmesg output of the vendor kernel? >>> No. >>> Here is the full output of the dmesg from vendor kernel [¹]. >>> >>> I've also noticed something strange, it might be linked, but mac address of >>> the board is set to a random value when using mainline kernel and I've to >>> set it manually but not when using vendor kernel. >>> The dedicated PHY driver takes care of the tx delay, if the genphy driver is used we have to rely on what uboot configured. But if we indeed had an issue with a misconfigured delay, I think the connection shouldn't be fine with just another link partner. Just to have it tested you could make rtl8211f_config_init() in drivers/net/phy/realtek.c a no-op (in current kernels). >>> I'm not an expert here, just adding a "return 0;" here[²] would be enough? >>> And you could compare at least the basic PHY registers 0x00 - 0x30 with both kernel versions, e.g. with phytool. >>> They are not the same but I don't know what I'm looking for, so for kernel >>> 3.10 [³] and for kernel 5.1.12 [⁴]. >>> >>> Aymeric >>> >>> [¹]: >>> https://paste.aplu.fr/?38ef95b44ebdbfc3#G666/YbhgU+O+tdC/2HaimUCigm8ZTB44qvQip/HJ5A= >>> [²]: >>> https://github.com/torvalds/linux/blob/241e39004581475b2802cd63c111fec43bb0123e/drivers/net/phy/realtek.c#L164 >>> [³]: >>> https://paste.aplu.fr/?2dde1c32d5c68f4c#6xIa8MjTm6jpI6citEJAqFTLMMHDjFZRet/M00/EwjU= >>> [⁴]: >>> https://paste.aplu.fr/?32130e9bcb05dde7#N/xdnvb5GklcJtiOxMpTCm+9gsUliRwH8X3dcwSV+ng= >>> >> The vendor kernel has some, but not really much magic: >> https://github.com/hardkernel/linux/blob/odroidc-3.10.y/drivers/amlogic/ethernet/phy/am_rtl8211f.c >> The write to RTL8211F_PHYCR2 is overwritten later, therefore we don't have >> to consider it. >> >> The following should make the current Realtek PHY driver behave like in the >> vendor driver. >> Could you test it? > (sending again for mailing list, sorry, I forgot to force it in plaintext…) > > I've applied your patch and tried but it doesn't change anything. > > Here is dmesg output and phytool results. > > https://paste.aplu.fr/?9735c99907528929#SeCgwR45cgnbDA1tXIVBHCBT8RNct2r41jU6vsguLVc= > Hello all, I had some news from a friend who have the same issue than me, his board is connected to an "intelligent" switch a Ubiquiti EdgeSwitch. Also, when he force the link to 100 it is stable. Aymeric. -- Aymeric
Re: [PATCH v3 net-next 0/5] ipv6: avoid taking refcnt on dst during route lookup
From: Wei Wang Date: Thu, 20 Jun 2019 17:36:36 -0700 > v2->v3: > - Handled fib6_rule_lookup() when CONFIG_IPV6_MULTIPLE_TABLES is not > configured in patch 03 (suggested by David Ahern) > - Removed the renaming of l3mdev_link_scope_lookup() in patch 05 > (suggested by David Ahern) > - Moved definition of ip6_route_output_flags() from an inline function > in /net/ipv6/route.c to net/ipv6/route.c in order to address kbuild > error in patch 05 I'll give David A. a chance to review this before applying.
Re: [PATCH net-next 1/4] cxgb4: Re-work the logic for mps refcounting
From: Raju Rangoju Date: Fri, 21 Jun 2019 20:06:33 +0530 > +struct mps_entries_ref { > + struct list_head list; > + u8 addr[ETH_ALEN]; > + u8 mask[ETH_ALEN]; > + u16 idx; > + atomic_t refcnt; > +}; Since you're making this change, please use refcnt_t.
Re: [PATCH v2 net] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET
On Sun, Jun 23, 2019 at 10:39:12AM -0400, Willem de Bruijn wrote: > On Sun, Jun 23, 2019 at 7:40 AM Neil Horman wrote: > > > > On Sat, Jun 22, 2019 at 10:21:31PM -0400, Willem de Bruijn wrote: > > > > > -static void __packet_set_status(struct packet_sock *po, void *frame, > > > > > int status) > > > > > +static void __packet_set_status(struct packet_sock *po, void *frame, > > > > > int status, > > > > > + bool call_complete) > > > > > { > > > > > union tpacket_uhdr h; > > > > > > > > > > @@ -381,6 +382,8 @@ static void __packet_set_status(struct > > > > > packet_sock *po, void *frame, int status) > > > > > BUG(); > > > > > } > > > > > > > > > > + if (po->wait_on_complete && call_complete) > > > > > + complete(&po->skb_completion); > > > > > > > > This wake need not happen before the barrier. Only one caller of > > > > __packet_set_status passes call_complete (tpacket_destruct_skb). > > > > Moving this branch to the caller avoids a lot of code churn. > > > > > > > > Also, multiple packets may be released before the process is awoken. > > > > The process will block until packet_read_pending drops to zero. Can > > > > defer the wait_on_complete to that one instance. > > > > > > Eh no. The point of having this sleep in the send loop is that > > > additional slots may be released for transmission (flipped to > > > TP_STATUS_SEND_REQUEST) from another thread while this thread is > > > waiting. > > > > > Thats incorrect. The entirety of tpacket_snd is protected by a mutex. No > > other > > thread can alter the state of the frames in the vector from the kernel send > > path > > while this thread is waiting. > > I meant another user thread updating the memory mapped ring contents. > Yes, thats true, and if that happens, we will loop through this path again (the do..while section, picking up the next frame for transmit) > > > Else, it would have been much simpler to move the wait below the send > > > loop: send as many packets as possible, then wait for all of them > > > having been released. Much clearer control flow. > > > > > Thats (almost) what happens now. The only difference is that with this > > implementation, the waiting thread has the opportunity to see if userspace > > has > > queued more frames for transmission during the wait period. We could > > potentially change that, but thats outside the scope of this fix. > > Agreed. I think the current, more complex, behavior was intentional. > We could still restructure to move it out of the loop and jump back. > But, yes, definitely out of scope for a fix. > Yes, it was, though based on your comments I've moved the wait_for_completion call to the bottom of the loop, so its only checked after we are guaranteed to have sent at least one frame. I think that makes the code a bit more legible. > > > Where to set and clear the wait_on_complete boolean remains. Integer > > > assignment is fragile, as the compiler and processor may optimize or > > > move simple seemingly independent operations. As complete() takes a > > > spinlock, avoiding that in the DONTWAIT case is worthwhile. But probably > > > still preferable to set when beginning waiting and clear when calling > > > complete. > > We avoid any call to wait_for_complete or complete already, based on the > > gating > > of the need_wait variable in tpacket_snd. If the transmitting thread > > doesn't > > set MSG_DONTWAIT in the flags of the msg structure, we will never set > > wait_for_complete, and so we will never manipulate the completion queue. > > But we don't know the state of this at tpacket_destruct_skb time without > wait_for_completion? > Sure we do, wait_for_complete is stored in the packet_sock structure, which is available and stable at the time tpacket_destruct_skb is called. po->wait_for_complete is set in tpacket_snd iff: 1) The MSG_DONTWAIT flag is clear and 2) We have detected that the next frame in the memory mapped buffer does not have its status set to TP_STATUS_SEND_REQUEST. If those two conditions are true, we set po->wait_for_complete to 1, which indicates that tpacket_destruct_skb should call complete, when all the frames we've sent to the physical layer have been freed (i.e. when packet_read_pending is zero). If wait_for_complete is non-zero, we also can be confident that the calling task is either: a) Already blocking on wait_for_completion_interruptible_timeout or b) Will be waiting on it shortly In case (a) the blocking/transmitting task will be woken up, and continue on its way In case (b) the transmitting task will call wait_for_completion_interruptible_timeout, see that the completion has already been called (based on the completion structs done variable being positive), and return immediately. I've made a slight update to the logic/comments in my next version to make that a little more clear Neil
Re: [PATCH v3 net-next 0/5] ipv6: avoid taking refcnt on dst during route lookup
On 6/23/19 12:27 PM, David Miller wrote: > From: Wei Wang > Date: Thu, 20 Jun 2019 17:36:36 -0700 > >> v2->v3: >> - Handled fib6_rule_lookup() when CONFIG_IPV6_MULTIPLE_TABLES is not >> configured in patch 03 (suggested by David Ahern) >> - Removed the renaming of l3mdev_link_scope_lookup() in patch 05 >> (suggested by David Ahern) >> - Moved definition of ip6_route_output_flags() from an inline function >> in /net/ipv6/route.c to net/ipv6/route.c in order to address kbuild >> error in patch 05 > > I'll give David A. a chance to review this before applying. > Hey Dave: I responded to the cover-letter on Friday.
Re: [PATCH v3 net-next 0/5] ipv6: avoid taking refcnt on dst during route lookup
From: David Ahern Date: Sun, 23 Jun 2019 13:29:27 -0600 > On 6/23/19 12:27 PM, David Miller wrote: >> From: Wei Wang >> Date: Thu, 20 Jun 2019 17:36:36 -0700 >> >>> v2->v3: >>> - Handled fib6_rule_lookup() when CONFIG_IPV6_MULTIPLE_TABLES is not >>> configured in patch 03 (suggested by David Ahern) >>> - Removed the renaming of l3mdev_link_scope_lookup() in patch 05 >>> (suggested by David Ahern) >>> - Moved definition of ip6_route_output_flags() from an inline function >>> in /net/ipv6/route.c to net/ipv6/route.c in order to address kbuild >>> error in patch 05 >> >> I'll give David A. a chance to review this before applying. >> > > Hey Dave: I responded to the cover-letter on Friday. Indeed, and so did tractor man. Series applied, thanks everyone.
Re: [EXT] [PATCH] bnx2x: Prevent ptp_task to be rescheduled indefinitely
On Sun, Jun 23, 2019 at 2:13 AM Sudarsana Reddy Kalluru wrote: > > Thanks for uncovering this issue, and the fix. > With the proposed fix, if HW latches the timestamp after 3 iterations then it > would lead to erroneous PTP functionality. When driver receives the next PTP > packet, driver sees that timestamp is available in the register and would > assign this value (which corresponds to last/skipped ptp packet) to the PTP > packet. In general, FW takes around 1ms to 100ms to perform the timestamp and > may take more. Hence the promising solution for this issue would be to wait > for timestamp for particular period of time. If Tx timestamp is not available > for a pre-determined max period, then declare it as an error scenario and > terminate the thread. > > Just for the reference, similar fix was added for Marvell qede driver > recently. Patch details are, > 9adebac37e7d: (qede: Handle infinite driver spinning for Tx timestamp.) > https://www.spinics.net/lists/netdev/msg572838.html > Thanks a lot for the quick review and good suggestion Sudarsana! I'll rework the fix based on your reference, and resubmit. Cheers, Guilherme
Re: [PATCH v2 0/3] fix bugs when enable route_localnet
> From: Zhiqiang Liu > Date: Sat, 22 Jun 2019 16:41:49 +0800 > >> Friendly ping ... > > I'm not applying this patch series without someone reviewing it. > Of course, all patches should be reviewd before deciding whether to apply. In v2, we add a couple of test for enabling route_localnet in selftests suggested by David Ahern. In additon, another similar bugfix is added for arp_ignore = 3. We would appreciate David Ahern or someone could help review the patch series.
Re: 6c409a3aee: kernel_selftests.bpf.test_verifier.fail
On 6/21/19 11:52 PM, Alexei Starovoitov wrote: On Fri, Jun 21, 2019 at 1:36 AM kernel test robot wrote: # #340/p direct packet access: test22 (x += pkt_ptr, 3) OK # #341/p direct packet access: test23 (x += pkt_ptr, 4) FAIL # Unexpected success to load! # verification time 17 usec # stack depth 8 # processed 18 insns (limit 100) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 0 # #342/p direct packet access: test24 (x += pkt_ptr, 5) OK # #343/p direct packet access: test25 (marking on <, good access) OK .. # #673/p meta access, test9 OK # #674/p meta access, test10 FAIL # Unexpected success to load! # verification time 29 usec # stack depth 8 # processed 19 insns (limit 100) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 0 # #675/p meta access, test11 OK Hi Rong, the patch quoted is not in bpf-next/net-next. This patch is work-in-progress that I posted to mailing list and pushed into my own git branch on kernel.org. It's awesome that build bot does this early testing. I really like it. Would be great if the bot can add a tag to email subject that it's testing this not yet merged patch. Right now since the email says commit: 6c409a3aee945e50c6dd4109689f52 it felt that this is real commit and my initial reaction was that 'ohh something is broken in the merge code' which wasn't the case :) Hi Alexei, Thanks for the advice, we'll improve the email subject. Best Regards, Rong Chen
[PATCH v3 net] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET
When an application is run that: a) Sets its scheduler to be SCHED_FIFO and b) Opens a memory mapped AF_PACKET socket, and sends frames with the MSG_DONTWAIT flag cleared, its possible for the application to hang forever in the kernel. This occurs because when waiting, the code in tpacket_snd calls schedule, which under normal circumstances allows other tasks to run, including ksoftirqd, which in some cases is responsible for freeing the transmitted skb (which in AF_PACKET calls a destructor that flips the status bit of the transmitted frame back to available, allowing the transmitting task to complete). However, when the calling application is SCHED_FIFO, its priority is such that the schedule call immediately places the task back on the cpu, preventing ksoftirqd from freeing the skb, which in turn prevents the transmitting task from detecting that the transmission is complete. We can fix this by converting the schedule call to a completion mechanism. By using a completion queue, we force the calling task, when it detects there are no more frames to send, to schedule itself off the cpu until such time as the last transmitted skb is freed, allowing forward progress to be made. Tested by myself and the reporter, with good results Appies to the net tree Signed-off-by: Neil Horman Reported-by: Matteo Croce CC: "David S. Miller" CC: Willem de Bruijn Change Notes: V1->V2: Enhance the sleep logic to support being interruptible and allowing for honoring to SK_SNDTIMEO (Willem de Bruijn) V2->V3: Rearrage the point at which we wait for the completion queue, to avoid needing to check for ph/skb being null at the end of the loop. Also move the complete call to the skb destructor to avoid needing to modify __packet_set_status. Also gate calling complete on packet_read_pending returning zero to avoid multiple calls to complete. (Willem de Bruijn) Move timeo computation within loop, to re-fetch the socket timeout since we also use the timeo variable to record the return code from the wait_for_complete call (Neil Horman) --- net/packet/af_packet.c | 59 +- net/packet/internal.h | 2 ++ 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a29d66da7394..5c48bb7a4fa5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -380,7 +380,6 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) WARN(1, "TPACKET version not supported.\n"); BUG(); } - smp_wmb(); } @@ -1148,6 +1147,14 @@ static void *packet_previous_frame(struct packet_sock *po, return packet_lookup_frame(po, rb, previous, status); } +static void *packet_next_frame(struct packet_sock *po, + struct packet_ring_buffer *rb, + int status) +{ + unsigned int next = rb->head != rb->frame_max ? rb->head+1 : 0; + return packet_lookup_frame(po, rb, next, status); +} + static void packet_increment_head(struct packet_ring_buffer *buff) { buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; @@ -2401,6 +2408,9 @@ static void tpacket_destruct_skb(struct sk_buff *skb) ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); + + if (po->wait_on_complete && !packet_read_pending(&po->tx_ring)) + complete(&po->skb_completion); } sock_wfree(skb); @@ -2600,9 +2610,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen, copylen = 0; + long timeo = 0; mutex_lock(&po->pg_vec_lock); + if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; @@ -2647,16 +2659,16 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) size_max = dev->mtu + reserve + VLAN_HLEN; do { + ph = packet_current_frame(po, &po->tx_ring, TP_STATUS_SEND_REQUEST); - if (unlikely(ph == NULL)) { - if (need_wait && need_resched()) - schedule(); - continue; - } + + if (unlikely(ph == NULL)) + break; skb = NULL; tp_len = tpacket_parse_header(po, ph, size_max, &data); + if (tp_len < 0) goto tpacket_error; @@ -2720,6 +2732,21 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) skb->destructor = tpacket_destruct_skb; __packet_set_status(po, ph, TP_STATUS_SENDING); + + /* +* If we need to wait and we've sent the last f
RE: [iproute2-next v5] tipc: support interface name when activating UDP bearer
Thanks David. I will update code change as your comments. For the item: > + /* remove from cache */ > + ll_drop_by_index(ifr.ifr_ifindex); why the call to ll_drop_by_index? doing so means that ifindex is looked up again. [Hoang] > + ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name); This function stored an entry ll_cache in hash map table. We have to call this function to prevent memory leaked. Regards, Hoang -Original Message- From: David Ahern Sent: Saturday, June 22, 2019 5:50 AM To: Hoang Le ; dsah...@gmail.com; jon.ma...@ericsson.com; ma...@donjonn.com; ying@windriver.com; netdev@vger.kernel.org; tipc-discuss...@lists.sourceforge.net Subject: Re: [iproute2-next v5] tipc: support interface name when activating UDP bearer On 6/13/19 2:07 AM, Hoang Le wrote: > @@ -119,6 +121,74 @@ static int generate_multicast(short af, char *buf, int > bufsize) > return 0; > } > > +static struct ifreq ifr = {}; you don't need to initialize globals, but you could pass a a struct as the arg to the filter here which is both the addr buffer and the ifindex of interest. > +static int nl_dump_addr_filter(struct nlmsghdr *nlh, void *arg) > +{ > + struct ifaddrmsg *ifa = NLMSG_DATA(nlh); > + char *r_addr = (char *)arg; > + int len = nlh->nlmsg_len; > + struct rtattr *addr_attr; > + > + if (ifr.ifr_ifindex != ifa->ifa_index) > + return 0; > + > + if (strlen(r_addr) > 0) > + return 1; > + > + addr_attr = parse_rtattr_one(IFA_ADDRESS, IFA_RTA(ifa), > + len - NLMSG_LENGTH(sizeof(*ifa))); > + if (!addr_attr) > + return 0; > + > + if (ifa->ifa_family == AF_INET) { > + struct sockaddr_in ip4addr; > + memcpy(&ip4addr.sin_addr, RTA_DATA(addr_attr), > +sizeof(struct in_addr)); > + if (inet_ntop(AF_INET, &ip4addr.sin_addr, r_addr, > + INET_ADDRSTRLEN) == NULL) > + return 0; > + } else if (ifa->ifa_family == AF_INET6) { > + struct sockaddr_in6 ip6addr; > + memcpy(&ip6addr.sin6_addr, RTA_DATA(addr_attr), > +sizeof(struct in6_addr)); > + if (inet_ntop(AF_INET6, &ip6addr.sin6_addr, r_addr, > + INET6_ADDRSTRLEN) == NULL) > + return 0; > + } > + return 1; > +} > + > +static int cmd_bearer_validate_and_get_addr(const char *name, char *r_addr) > +{ > + struct rtnl_handle rth ={ .fd = -1 }; space between '={' > + > + memset(&ifr, 0, sizeof(ifr)); > + if (!name || !r_addr || get_ifname(ifr.ifr_name, name)) > + return 0; > + > + ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name); > + if (!ifr.ifr_ifindex) > + return 0; > + > + /* remove from cache */ > + ll_drop_by_index(ifr.ifr_ifindex); why the call to ll_drop_by_index? doing so means that ifindex is looked up again. > + > + if (rtnl_open(&rth, 0) < 0) > + return 0; > + > + if (rtnl_addrdump_req(&rth, AF_UNSPEC, 0) < 0) { If you pass a filter here to set ifa_index, this command on newer kernels will be much more efficient. See ipaddr_dump_filter. > + rtnl_close(&rth); > + return 0; > + } > + > + if (rtnl_dump_filter(&rth, nl_dump_addr_filter, r_addr) < 0) { > + rtnl_close(&rth); > + return 0; > + } > + rtnl_close(&rth); > + return 1; > +} it would better to have 1 exit with the rtnl_close and return rc based on above.
Re: [PATCH net] vxlan: do not destroy fdb if register_netdevice() is failed
On Mon, 24 Jun 2019 at 03:07, David Miller wrote: > Hi David, Thank you for the review! > From: Taehee Yoo > Date: Thu, 20 Jun 2019 20:51:08 +0900 > > > __vxlan_dev_create() destroys FDB using specific pointer which indicates > > a fdb when error occurs. > > But that pointer should not be used when register_netdevice() fails because > > register_netdevice() internally destroys fdb when error occurs. > > > > In order to avoid un-registered dev's notification, fdb destroying routine > > checks dev's register status before notification. > > Simply pass do_notify as false in this failure code path of > __vxlan_dev_create(), > thank you. Failure path of __vxlan_dev_create() can't handle do_notify in that case because if register_netdevice() fails it internally calls ->ndo_uninit() which is vxlan_uninit(). vxlan_uninit() internally calls vxlan_fdb_delete_default() and it callls vxlan_fdb_destroy(). do_notify of vxlan_fdb_destroy() in vxlan_fdb_delete_default() is always true. So, failure path of __vxlan_dev_create() doesn't have any opportunity to handle do_notify. Thank you!
Re: [PATCH next 0/3] blackhole device to invalidate dst
On Sat, Jun 22, 2019 at 8:35 AM David Ahern wrote: > > On 6/21/19 6:45 PM, Mahesh Bandewar wrote: > > When we invalidate dst or mark it "dead", we assign 'lo' to > > dst->dev. First of all this assignment is racy and more over, > > it has MTU implications. > > > > The standard dev MTU is 1500 while the Loopback MTU is 64k. TCP > > code when dereferencing the dst don't check if the dst is valid > > or not. TCP when dereferencing a dead-dst while negotiating a > > new connection, may use dst device which is 'lo' instead of > > using the correct device. Consider the following scenario: > > > > Why doesn't the TCP code (or any code) check if a cached dst is valid? > That's the whole point of marking it dead - to tell users not to rely on > it. Well, I'm not an expert in TCP, but I could guess. Eric, please help me being honest with my guess. The code that marks it dead (control path) and the code that need to check the validity (data-path) need to have some sort of synchronization and that could be costly in the data-path. Also what is the worst case scenario? ... that packet is going to get dropped since the under-lying device or route has issues (minus this corner case). May be that was the reason. As I mentioned in the log, we could remove the racy-ness with barriers or locks but then that would be an additional cost in the data-path. having a dummy/backhole dev is the cheapest solution with no changes to the data-path.
Re: [PATCH V3 01/10] net: dsa: microchip: Remove ksz_{read,write}24()
On Mon, Jun 24, 2019 at 12:34:59AM +0200, Marek Vasut wrote: > These functions and callbacks are never used, remove them. > > Signed-off-by: Marek Vasut Reviewed-by: Andrew Lunn Andrew
Re: [PATCH V3 02/10] net: dsa: microchip: Remove ksz_{get,set}()
On Mon, Jun 24, 2019 at 12:35:00AM +0200, Marek Vasut wrote: > These functions and callbacks are never used, remove them. > > Signed-off-by: Marek Vasut Reviewed-by: Andrew Lunn Andrew
Re: [PATCH V3 03/10] net: dsa: microchip: Inline ksz_spi.h
On Mon, Jun 24, 2019 at 12:35:01AM +0200, Marek Vasut wrote: > The functions in the header file are static, and the header file is > included from single C file, just inline the code into the C file. > The bonus is that it's easier to spot further content to clean up. > > Signed-off-by: Marek Vasut Reviewed-by: Andrew Lunn Andrew
Re: [PATCH V3 04/10] net: dsa: microchip: Move ksz_cfg and ksz_port_cfg to ksz9477.c
On Mon, Jun 24, 2019 at 12:35:02AM +0200, Marek Vasut wrote: > These functions are only used by the KSZ9477 code, move them from > the header into that code. Note that these functions will be soon > replaced by regmap equivalents. > > Signed-off-by: Marek Vasut Reviewed-by: Andrew Lunn Andrew
Re: [PATCH V3 05/10] net: dsa: microchip: Use PORT_CTRL_ADDR() instead of indirect function call
On Mon, Jun 24, 2019 at 12:35:03AM +0200, Marek Vasut wrote: > The indirect function call to dev->dev_ops->get_port_addr() is expensive > especially if called for every single register access, and only returns > the value of PORT_CTRL_ADDR() macro. Use PORT_CTRL_ADDR() macro directly > instead. Hi Marek Rather than change just one instance, it would be better to change them all. And then remove dev_ops->get_port_addr(). Andrew
Re: [PATCH V3 06/10] net: dsa: microchip: Factor out register access opcode generation
On Mon, Jun 24, 2019 at 12:35:04AM +0200, Marek Vasut wrote: > Factor out the code which sends out the register read/write opcodes > to the switch, since the code differs in single bit between read and > write. > > Signed-off-by: Marek Vasut Reviewed-by: Andrew Lunn Andrew
[PATCH V3 06/10] net: dsa: microchip: Factor out register access opcode generation
Factor out the code which sends out the register read/write opcodes to the switch, since the code differs in single bit between read and write. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Tristram Ha Cc: Woojung Huh --- V2: New patch V3: - Rebase on next/master - Test on KSZ9477EVB --- drivers/net/dsa/microchip/ksz9477_spi.c | 22 -- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index a34e66eccbcd..49aeb92d36fc 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -25,19 +25,24 @@ /* Enough to read all switch port registers. */ #define SPI_TX_BUF_LEN 0x100 -static int ksz9477_spi_read_reg(struct spi_device *spi, u32 reg, u8 *val, - unsigned int len) +static u32 ksz9477_spi_cmd(u32 reg, bool read) { u32 txbuf; - int ret; txbuf = reg & SPI_ADDR_MASK; - txbuf |= KS_SPIOP_RD << SPI_ADDR_SHIFT; + txbuf |= (read ? KS_SPIOP_RD : KS_SPIOP_WR) << SPI_ADDR_SHIFT; txbuf <<= SPI_TURNAROUND_SHIFT; txbuf = cpu_to_be32(txbuf); - ret = spi_write_then_read(spi, &txbuf, 4, val, len); - return ret; + return txbuf; +} + +static int ksz9477_spi_read_reg(struct spi_device *spi, u32 reg, u8 *val, + unsigned int len) +{ + u32 txbuf = ksz9477_spi_cmd(reg, true); + + return spi_write_then_read(spi, &txbuf, 4, val, len); } static int ksz9477_spi_write_reg(struct spi_device *spi, u32 reg, u8 *val, @@ -45,10 +50,7 @@ static int ksz9477_spi_write_reg(struct spi_device *spi, u32 reg, u8 *val, { u32 *txbuf = (u32 *)val; - *txbuf = reg & SPI_ADDR_MASK; - *txbuf |= (KS_SPIOP_WR << SPI_ADDR_SHIFT); - *txbuf <<= SPI_TURNAROUND_SHIFT; - *txbuf = cpu_to_be32(*txbuf); + *txbuf = ksz9477_spi_cmd(reg, false); return spi_write(spi, txbuf, 4 + len); } -- 2.20.1
[PATCH V3 09/10] net: dsa: microchip: Factor out regmap config generation into common header
The regmap config tables are rather similar for various generations of the KSZ8xxx/KSZ9xxx switches. Introduce a macro which allows generating those tables without duplication. Note that $regalign parameter is not used right now, but will be used in KSZ87xx series switches. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Tristram Ha Cc: Woojung Huh --- V2: New patch V3: - Rebase on next/master - Test on KSZ9477EVB - Increase regmap max register, to cover all switch registers - Make register swabbing configurable, to allow handling switches with only 16bit registers as well as switches with some 32bit ones --- drivers/net/dsa/microchip/ksz9477_spi.c | 29 +++--- drivers/net/dsa/microchip/ksz_common.h | 32 + 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index 8c8bf3237013..5a9e27b337a8 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -14,37 +14,14 @@ #include #include "ksz_priv.h" +#include "ksz_common.h" #define SPI_ADDR_SHIFT 24 #define SPI_ADDR_ALIGN 3 #define SPI_TURNAROUND_SHIFT 5 -/* SPI frame opcodes */ -#define KS_SPIOP_RD3 -#define KS_SPIOP_WR2 - -#define KS_SPIOP_FLAG_MASK(opcode) \ - cpu_to_be32((opcode) << (SPI_ADDR_SHIFT + SPI_TURNAROUND_SHIFT)) - -#define KSZ_REGMAP_COMMON(width) \ - { \ - .val_bits = (width),\ - .reg_stride = (width) / 8, \ - .reg_bits = SPI_ADDR_SHIFT + SPI_ADDR_ALIGN,\ - .pad_bits = SPI_TURNAROUND_SHIFT, \ - .max_register = BIT(SPI_ADDR_SHIFT) - 1,\ - .cache_type = REGCACHE_NONE,\ - .read_flag_mask = KS_SPIOP_FLAG_MASK(KS_SPIOP_RD), \ - .write_flag_mask = KS_SPIOP_FLAG_MASK(KS_SPIOP_WR), \ - .reg_format_endian = REGMAP_ENDIAN_BIG, \ - .val_format_endian = REGMAP_ENDIAN_BIG \ - } - -static const struct regmap_config ksz9477_regmap_config[] = { - KSZ_REGMAP_COMMON(8), - KSZ_REGMAP_COMMON(16), - KSZ_REGMAP_COMMON(32), -}; +KSZ_REGMAP_TABLE(ksz9477, 32, SPI_ADDR_SHIFT, +SPI_TURNAROUND_SHIFT, SPI_ADDR_ALIGN); static int ksz9477_spi_probe(struct spi_device *spi) { diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index c3871ed9b097..78b5ab7db403 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -133,4 +133,36 @@ static inline u32 ksz_pread32_poll(struct ksz_poll_ctx *ctx) return data; } +/* Regmap tables generation */ +#define KSZ_SPI_OP_RD 3 +#define KSZ_SPI_OP_WR 2 + +#define KSZ_SPI_OP_FLAG_MASK(opcode, swp, regbits, regpad) \ + cpu_to_be##swp((opcode) << ((regbits) + (regpad))) + +#define KSZ_REGMAP_ENTRY(width, swp, regbits, regpad, regalign) \ + { \ + .val_bits = (width),\ + .reg_stride = (width) / 8, \ + .reg_bits = (regbits) + (regalign), \ + .pad_bits = (regpad), \ + .max_register = BIT(regbits) - 1, \ + .cache_type = REGCACHE_NONE,\ + .read_flag_mask = \ + KSZ_SPI_OP_FLAG_MASK(KSZ_SPI_OP_RD, swp,\ +regbits, regpad), \ + .write_flag_mask = \ + KSZ_SPI_OP_FLAG_MASK(KSZ_SPI_OP_WR, swp,\ +regbits, regpad), \ + .reg_format_endian = REGMAP_ENDIAN_BIG, \ + .val_format_endian = REGMAP_ENDIAN_BIG \ + } + +#define KSZ_REGMAP_TABLE(ksz, swp, regbits, regpad, regalign) \ + static const struct regmap_config ksz##_regmap_config[] = { \ + KSZ_REGMAP_ENTRY(8, swp, (regbits), (regpad), (regalign)), \ + KSZ_REGMAP_ENTRY(16, swp, (regbits), (regpad), (regalign)), \ + KSZ_REGMAP_ENTRY(32, swp, (regbits), (regpad), (regalign)), \ + } + #endif -- 2.20.1
[PATCH V3 01/10] net: dsa: microchip: Remove ksz_{read,write}24()
These functions and callbacks are never used, remove them. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Tristram Ha Cc: Woojung Huh --- V2: No change V3: - Rebase on next/master - Test on KSZ9477EVB --- drivers/net/dsa/microchip/ksz9477_spi.c | 25 - drivers/net/dsa/microchip/ksz_common.h | 22 -- drivers/net/dsa/microchip/ksz_priv.h| 2 -- 3 files changed, 49 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index 75178624d3f5..e7118319c192 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -73,37 +73,12 @@ static int ksz_spi_write(struct ksz_device *dev, u32 reg, void *data, return ksz9477_spi_write_reg(spi, reg, dev->txbuf, len); } -static int ksz_spi_read24(struct ksz_device *dev, u32 reg, u32 *val) -{ - int ret; - - *val = 0; - ret = ksz_spi_read(dev, reg, (u8 *)val, 3); - if (!ret) { - *val = be32_to_cpu(*val); - /* convert to 24bit */ - *val >>= 8; - } - - return ret; -} - -static int ksz_spi_write24(struct ksz_device *dev, u32 reg, u32 value) -{ - /* make it to big endian 24bit from MSB */ - value <<= 8; - value = cpu_to_be32(value); - return ksz_spi_write(dev, reg, &value, 3); -} - static const struct ksz_io_ops ksz9477_spi_ops = { .read8 = ksz_spi_read8, .read16 = ksz_spi_read16, - .read24 = ksz_spi_read24, .read32 = ksz_spi_read32, .write8 = ksz_spi_write8, .write16 = ksz_spi_write16, - .write24 = ksz_spi_write24, .write32 = ksz_spi_write32, .get = ksz_spi_get, .set = ksz_spi_set, diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 21cd794e18f1..1781539c3a81 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -61,17 +61,6 @@ static inline int ksz_read16(struct ksz_device *dev, u32 reg, u16 *val) return ret; } -static inline int ksz_read24(struct ksz_device *dev, u32 reg, u32 *val) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read24(dev, reg, val); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - static inline int ksz_read32(struct ksz_device *dev, u32 reg, u32 *val) { int ret; @@ -105,17 +94,6 @@ static inline int ksz_write16(struct ksz_device *dev, u32 reg, u16 value) return ret; } -static inline int ksz_write24(struct ksz_device *dev, u32 reg, u32 value) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->write24(dev, reg, value); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - static inline int ksz_write32(struct ksz_device *dev, u32 reg, u32 value) { int ret; diff --git a/drivers/net/dsa/microchip/ksz_priv.h b/drivers/net/dsa/microchip/ksz_priv.h index c615d2a81dd5..5ef6153bd2cc 100644 --- a/drivers/net/dsa/microchip/ksz_priv.h +++ b/drivers/net/dsa/microchip/ksz_priv.h @@ -105,11 +105,9 @@ struct ksz_device { struct ksz_io_ops { int (*read8)(struct ksz_device *dev, u32 reg, u8 *value); int (*read16)(struct ksz_device *dev, u32 reg, u16 *value); - int (*read24)(struct ksz_device *dev, u32 reg, u32 *value); int (*read32)(struct ksz_device *dev, u32 reg, u32 *value); int (*write8)(struct ksz_device *dev, u32 reg, u8 value); int (*write16)(struct ksz_device *dev, u32 reg, u16 value); - int (*write24)(struct ksz_device *dev, u32 reg, u32 value); int (*write32)(struct ksz_device *dev, u32 reg, u32 value); int (*get)(struct ksz_device *dev, u32 reg, void *data, size_t len); int (*set)(struct ksz_device *dev, u32 reg, void *data, size_t len); -- 2.20.1
[PATCH V3 08/10] net: dsa: microchip: Dispose of ksz_io_ops
Since the driver now uses regmap , get rid of ad-hoc ksz_io_ops abstraction, which no longer has any meaning. Moreover, since regmap has it's own locking, get rid of the register access mutex. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Tristram Ha Cc: Woojung Huh --- V2: Use separate regmaps for 8/16/32bit registers V3: - Rebase on next/master - Test on KSZ9477EVB --- drivers/net/dsa/microchip/ksz9477_spi.c | 57 + drivers/net/dsa/microchip/ksz_common.c | 6 +-- drivers/net/dsa/microchip/ksz_common.h | 50 ++ drivers/net/dsa/microchip/ksz_priv.h| 16 +-- 4 files changed, 17 insertions(+), 112 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index 77e3cb100eae..8c8bf3237013 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -46,67 +46,12 @@ static const struct regmap_config ksz9477_regmap_config[] = { KSZ_REGMAP_COMMON(32), }; -static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val) -{ - unsigned int value; - int ret = regmap_read(dev->regmap, reg, &value); - - *val = value; - return ret; -} - -static int ksz_spi_read16(struct ksz_device *dev, u32 reg, u16 *val) -{ - int ret = regmap_bulk_read(dev->regmap, reg, val, 2); - - if (!ret) - *val = be16_to_cpu(*val); - - return ret; -} - -static int ksz_spi_read32(struct ksz_device *dev, u32 reg, u32 *val) -{ - int ret = regmap_bulk_read(dev->regmap, reg, val, 4); - - if (!ret) - *val = be32_to_cpu(*val); - - return ret; -} - -static int ksz_spi_write8(struct ksz_device *dev, u32 reg, u8 value) -{ - return regmap_write(dev->regmap, reg, value); -} - -static int ksz_spi_write16(struct ksz_device *dev, u32 reg, u16 value) -{ - value = cpu_to_be16(value); - return regmap_bulk_write(dev->regmap, reg, &value, 2); -} - -static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value) -{ - value = cpu_to_be32(value); - return regmap_bulk_write(dev->regmap, reg, &value, 4); -} - -static const struct ksz_io_ops ksz9477_spi_ops = { - .read8 = ksz_spi_read8, - .read16 = ksz_spi_read16, - .read32 = ksz_spi_read32, - .write8 = ksz_spi_write8, - .write16 = ksz_spi_write16, - .write32 = ksz_spi_write32, -}; - static int ksz9477_spi_probe(struct spi_device *spi) { struct ksz_device *dev; int i, ret; - dev = ksz_switch_alloc(&spi->dev, &ksz9477_spi_ops, spi); + dev = ksz_switch_alloc(&spi->dev, spi); if (!dev) return -ENOMEM; diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 978c59aa8efb..a3d2d67894bd 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -396,9 +396,7 @@ void ksz_disable_port(struct dsa_switch *ds, int port) } EXPORT_SYMBOL_GPL(ksz_disable_port); -struct ksz_device *ksz_switch_alloc(struct device *base, - const struct ksz_io_ops *ops, - void *priv) +struct ksz_device *ksz_switch_alloc(struct device *base, void *priv) { struct dsa_switch *ds; struct ksz_device *swdev; @@ -416,7 +414,6 @@ struct ksz_device *ksz_switch_alloc(struct device *base, swdev->ds = ds; swdev->priv = priv; - swdev->ops = ops; return swdev; } @@ -442,7 +439,6 @@ int ksz_switch_register(struct ksz_device *dev, } mutex_init(&dev->dev_mutex); - mutex_init(&dev->reg_mutex); mutex_init(&dev->stats_mutex); mutex_init(&dev->alu_mutex); mutex_init(&dev->vlan_mutex); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index fe576a00facf..c3871ed9b097 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -7,6 +7,8 @@ #ifndef __KSZ_COMMON_H #define __KSZ_COMMON_H +#include + void ksz_port_cleanup(struct ksz_device *dev, int port); void ksz_update_port_member(struct ksz_device *dev, int port); void ksz_init_mib_timer(struct ksz_device *dev); @@ -41,68 +43,44 @@ void ksz_disable_port(struct dsa_switch *ds, int port); static inline int ksz_read8(struct ksz_device *dev, u32 reg, u8 *val) { - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read8(dev, reg, val); - mutex_unlock(&dev->reg_mutex); + unsigned int value; + int ret = regmap_read(dev->regmap[0], reg, &value); + *val = value; return ret; } static inline int ksz_read16(struct ksz_device *dev, u32 reg, u16 *val) { - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read16(dev, reg, val); - mutex_unlock(&dev->reg_mutex); + unsigned int value; +
[PATCH V3 10/10] net: dsa: microchip: Replace ad-hoc bit manipulation with regmap
Regmap provides bit manipulation functions to set/clear bits, use those insted of reimplementing them. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Tristram Ha Cc: Woojung Huh --- V2: New patch V3: - Rebase on next/master - Test on KSZ9477EVB --- drivers/net/dsa/microchip/ksz9477.c | 46 - 1 file changed, 6 insertions(+), 40 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 7d209fd9f26f..8f13dcc05a10 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -67,60 +67,26 @@ static const struct { static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set) { - u8 data; - - ksz_read8(dev, addr, &data); - if (set) - data |= bits; - else - data &= ~bits; - ksz_write8(dev, addr, data); + regmap_update_bits(dev->regmap[0], addr, bits, set ? bits : 0); } static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits, bool set) { - u32 addr; - u8 data; - - addr = PORT_CTRL_ADDR(port, offset); - ksz_read8(dev, addr, &data); - - if (set) - data |= bits; - else - data &= ~bits; - - ksz_write8(dev, addr, data); + regmap_update_bits(dev->regmap[0], PORT_CTRL_ADDR(port, offset), + bits, set ? bits : 0); } static void ksz9477_cfg32(struct ksz_device *dev, u32 addr, u32 bits, bool set) { - u32 data; - - ksz_read32(dev, addr, &data); - if (set) - data |= bits; - else - data &= ~bits; - ksz_write32(dev, addr, data); + regmap_update_bits(dev->regmap[2], addr, bits, set ? bits : 0); } static void ksz9477_port_cfg32(struct ksz_device *dev, int port, int offset, u32 bits, bool set) { - u32 addr; - u32 data; - - addr = PORT_CTRL_ADDR(port, offset); - ksz_read32(dev, addr, &data); - - if (set) - data |= bits; - else - data &= ~bits; - - ksz_write32(dev, addr, data); + regmap_update_bits(dev->regmap[2], PORT_CTRL_ADDR(port, offset), + bits, set ? bits : 0); } static int ksz9477_wait_vlan_ctrl_ready(struct ksz_device *dev, u32 waiton, -- 2.20.1
[PATCH V3 07/10] net: dsa: microchip: Initial SPI regmap support
Add basic SPI regmap support into the driver. Previous patches unconver that ksz_spi_write() is always ever called with len = 1, 2 or 4. We can thus drop the if (len > SPI_TX_BUF_LEN) check and we can also drop the allocation of the txbuf which is part of the driver data and wastes 256 bytes for no reason. Regmap covers the whole thing now. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Tristram Ha Cc: Woojung Huh --- V2: - Squash with "net: dsa: microchip: Remove dev->txbuf" - Use separate regmaps for 8/16/32bit registers - Increase the regmap size to 0xd00 to cover the entire register area V3: - Rebase on next/master - Test on KSZ9477EVB - Increase regmap max register, to cover all switch registers - Correct regmap reg_bits value to match the hardware - Use cpu_to_be32() instead of cpu_to_le16() in register masks, since the KSZ9477 has some 32bit registers. --- drivers/net/dsa/microchip/Kconfig | 1 + drivers/net/dsa/microchip/ksz9477_spi.c | 114 +++- drivers/net/dsa/microchip/ksz_priv.h| 3 +- 3 files changed, 52 insertions(+), 66 deletions(-) diff --git a/drivers/net/dsa/microchip/Kconfig b/drivers/net/dsa/microchip/Kconfig index 2c3a6751bdaf..56790d544eb2 100644 --- a/drivers/net/dsa/microchip/Kconfig +++ b/drivers/net/dsa/microchip/Kconfig @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config NET_DSA_MICROCHIP_KSZ_COMMON + select REGMAP_SPI tristate menuconfig NET_DSA_MICROCHIP_KSZ9477 diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index 49aeb92d36fc..77e3cb100eae 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -10,78 +10,54 @@ #include #include #include +#include #include #include "ksz_priv.h" -/* SPI frame opcodes */ -#define KS_SPIOP_RD3 -#define KS_SPIOP_WR2 - #define SPI_ADDR_SHIFT 24 -#define SPI_ADDR_MASK (BIT(SPI_ADDR_SHIFT) - 1) +#define SPI_ADDR_ALIGN 3 #define SPI_TURNAROUND_SHIFT 5 -/* Enough to read all switch port registers. */ -#define SPI_TX_BUF_LEN 0x100 - -static u32 ksz9477_spi_cmd(u32 reg, bool read) -{ - u32 txbuf; - - txbuf = reg & SPI_ADDR_MASK; - txbuf |= (read ? KS_SPIOP_RD : KS_SPIOP_WR) << SPI_ADDR_SHIFT; - txbuf <<= SPI_TURNAROUND_SHIFT; - txbuf = cpu_to_be32(txbuf); - - return txbuf; -} - -static int ksz9477_spi_read_reg(struct spi_device *spi, u32 reg, u8 *val, - unsigned int len) -{ - u32 txbuf = ksz9477_spi_cmd(reg, true); - - return spi_write_then_read(spi, &txbuf, 4, val, len); -} - -static int ksz9477_spi_write_reg(struct spi_device *spi, u32 reg, u8 *val, -unsigned int len) -{ - u32 *txbuf = (u32 *)val; - - *txbuf = ksz9477_spi_cmd(reg, false); - - return spi_write(spi, txbuf, 4 + len); -} - -static int ksz_spi_read(struct ksz_device *dev, u32 reg, u8 *data, - unsigned int len) -{ - struct spi_device *spi = dev->priv; - - return ksz9477_spi_read_reg(spi, reg, data, len); -} - -static int ksz_spi_write(struct ksz_device *dev, u32 reg, void *data, -unsigned int len) -{ - struct spi_device *spi = dev->priv; +/* SPI frame opcodes */ +#define KS_SPIOP_RD3 +#define KS_SPIOP_WR2 - if (len > SPI_TX_BUF_LEN) - len = SPI_TX_BUF_LEN; - memcpy(&dev->txbuf[4], data, len); - return ksz9477_spi_write_reg(spi, reg, dev->txbuf, len); -} +#define KS_SPIOP_FLAG_MASK(opcode) \ + cpu_to_be32((opcode) << (SPI_ADDR_SHIFT + SPI_TURNAROUND_SHIFT)) + +#define KSZ_REGMAP_COMMON(width) \ + { \ + .val_bits = (width),\ + .reg_stride = (width) / 8, \ + .reg_bits = SPI_ADDR_SHIFT + SPI_ADDR_ALIGN,\ + .pad_bits = SPI_TURNAROUND_SHIFT, \ + .max_register = BIT(SPI_ADDR_SHIFT) - 1,\ + .cache_type = REGCACHE_NONE,\ + .read_flag_mask = KS_SPIOP_FLAG_MASK(KS_SPIOP_RD), \ + .write_flag_mask = KS_SPIOP_FLAG_MASK(KS_SPIOP_WR), \ + .reg_format_endian = REGMAP_ENDIAN_BIG, \ + .val_format_endian = REGMAP_ENDIAN_BIG \ + } + +static const struct regmap_config ksz9477_regmap_config[] = { + KSZ_REGMAP_COMMON(8), + KSZ_REGMAP_COMMON(16), + KSZ_REGMAP_COMMON(32), +}; static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val)
[PATCH V3 04/10] net: dsa: microchip: Move ksz_cfg and ksz_port_cfg to ksz9477.c
These functions are only used by the KSZ9477 code, move them from the header into that code. Note that these functions will be soon replaced by regmap equivalents. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Tristram Ha Cc: Woojung Huh --- V2: New patch V3: - Rebase on next/master - Test on KSZ9477EVB --- drivers/net/dsa/microchip/ksz9477.c| 29 ++ drivers/net/dsa/microchip/ksz_common.h | 29 -- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 508380f80875..e8b96566abd9 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -65,6 +65,35 @@ static const struct { { 0x83, "tx_discards" }, }; +static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set) +{ + u8 data; + + ksz_read8(dev, addr, &data); + if (set) + data |= bits; + else + data &= ~bits; + ksz_write8(dev, addr, data); +} + +static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits, +bool set) +{ + u32 addr; + u8 data; + + addr = dev->dev_ops->get_port_addr(port, offset); + ksz_read8(dev, addr, &data); + + if (set) + data |= bits; + else + data &= ~bits; + + ksz_write8(dev, addr, data); +} + static void ksz9477_cfg32(struct ksz_device *dev, u32 addr, u32 bits, bool set) { u32 data; diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index c15b49528bad..fe576a00facf 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -141,35 +141,6 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset, ksz_write32(dev, dev->dev_ops->get_port_addr(port, offset), data); } -static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set) -{ - u8 data; - - ksz_read8(dev, addr, &data); - if (set) - data |= bits; - else - data &= ~bits; - ksz_write8(dev, addr, data); -} - -static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits, -bool set) -{ - u32 addr; - u8 data; - - addr = dev->dev_ops->get_port_addr(port, offset); - ksz_read8(dev, addr, &data); - - if (set) - data |= bits; - else - data &= ~bits; - - ksz_write8(dev, addr, data); -} - struct ksz_poll_ctx { struct ksz_device *dev; int port; -- 2.20.1
[PATCH V3 00/10] net: dsa: microchip: Convert to regmap
This patchset converts KSZ9477 switch driver to regmap. This was tested with extra patches on KSZ8795. This was also tested on KSZ9477 on Microchip KSZ9477EVB board, which I now have. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Tristram Ha Cc: Woojung Huh Marek Vasut (10): net: dsa: microchip: Remove ksz_{read,write}24() net: dsa: microchip: Remove ksz_{get,set}() net: dsa: microchip: Inline ksz_spi.h net: dsa: microchip: Move ksz_cfg and ksz_port_cfg to ksz9477.c net: dsa: microchip: Use PORT_CTRL_ADDR() instead of indirect function call net: dsa: microchip: Factor out register access opcode generation net: dsa: microchip: Initial SPI regmap support net: dsa: microchip: Dispose of ksz_io_ops net: dsa: microchip: Factor out regmap config generation into common header net: dsa: microchip: Replace ad-hoc bit manipulation with regmap drivers/net/dsa/microchip/Kconfig | 1 + drivers/net/dsa/microchip/ksz9477.c | 35 +++--- drivers/net/dsa/microchip/ksz9477_spi.c | 114 +++-- drivers/net/dsa/microchip/ksz_common.c | 6 +- drivers/net/dsa/microchip/ksz_common.h | 157 +++- drivers/net/dsa/microchip/ksz_priv.h| 23 +--- drivers/net/dsa/microchip/ksz_spi.h | 69 --- 7 files changed, 84 insertions(+), 321 deletions(-) delete mode 100644 drivers/net/dsa/microchip/ksz_spi.h -- 2.20.1
[PATCH V3 03/10] net: dsa: microchip: Inline ksz_spi.h
The functions in the header file are static, and the header file is included from single C file, just inline the code into the C file. The bonus is that it's easier to spot further content to clean up. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Tristram Ha Cc: Woojung Huh --- V2: No change V3: - Rebase on next/master - Test on KSZ9477EVB --- drivers/net/dsa/microchip/ksz9477_spi.c | 43 +- drivers/net/dsa/microchip/ksz_spi.h | 59 - 2 files changed, 42 insertions(+), 60 deletions(-) delete mode 100644 drivers/net/dsa/microchip/ksz_spi.h diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index 86d12d48a2a9..a34e66eccbcd 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -13,7 +13,6 @@ #include #include "ksz_priv.h" -#include "ksz_spi.h" /* SPI frame opcodes */ #define KS_SPIOP_RD3 @@ -73,6 +72,48 @@ static int ksz_spi_write(struct ksz_device *dev, u32 reg, void *data, return ksz9477_spi_write_reg(spi, reg, dev->txbuf, len); } +static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val) +{ + return ksz_spi_read(dev, reg, val, 1); +} + +static int ksz_spi_read16(struct ksz_device *dev, u32 reg, u16 *val) +{ + int ret = ksz_spi_read(dev, reg, (u8 *)val, 2); + + if (!ret) + *val = be16_to_cpu(*val); + + return ret; +} + +static int ksz_spi_read32(struct ksz_device *dev, u32 reg, u32 *val) +{ + int ret = ksz_spi_read(dev, reg, (u8 *)val, 4); + + if (!ret) + *val = be32_to_cpu(*val); + + return ret; +} + +static int ksz_spi_write8(struct ksz_device *dev, u32 reg, u8 value) +{ + return ksz_spi_write(dev, reg, &value, 1); +} + +static int ksz_spi_write16(struct ksz_device *dev, u32 reg, u16 value) +{ + value = cpu_to_be16(value); + return ksz_spi_write(dev, reg, &value, 2); +} + +static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value) +{ + value = cpu_to_be32(value); + return ksz_spi_write(dev, reg, &value, 4); +} + static const struct ksz_io_ops ksz9477_spi_ops = { .read8 = ksz_spi_read8, .read16 = ksz_spi_read16, diff --git a/drivers/net/dsa/microchip/ksz_spi.h b/drivers/net/dsa/microchip/ksz_spi.h deleted file mode 100644 index 976bace31f37.. --- a/drivers/net/dsa/microchip/ksz_spi.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Microchip KSZ series SPI access common header - * - * Copyright (C) 2017-2018 Microchip Technology Inc. - * Tristram Ha - */ - -#ifndef __KSZ_SPI_H -#define __KSZ_SPI_H - -/* Chip dependent SPI access */ -static int ksz_spi_read(struct ksz_device *dev, u32 reg, u8 *data, - unsigned int len); -static int ksz_spi_write(struct ksz_device *dev, u32 reg, void *data, -unsigned int len); - -static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val) -{ - return ksz_spi_read(dev, reg, val, 1); -} - -static int ksz_spi_read16(struct ksz_device *dev, u32 reg, u16 *val) -{ - int ret = ksz_spi_read(dev, reg, (u8 *)val, 2); - - if (!ret) - *val = be16_to_cpu(*val); - - return ret; -} - -static int ksz_spi_read32(struct ksz_device *dev, u32 reg, u32 *val) -{ - int ret = ksz_spi_read(dev, reg, (u8 *)val, 4); - - if (!ret) - *val = be32_to_cpu(*val); - - return ret; -} - -static int ksz_spi_write8(struct ksz_device *dev, u32 reg, u8 value) -{ - return ksz_spi_write(dev, reg, &value, 1); -} - -static int ksz_spi_write16(struct ksz_device *dev, u32 reg, u16 value) -{ - value = cpu_to_be16(value); - return ksz_spi_write(dev, reg, &value, 2); -} - -static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value) -{ - value = cpu_to_be32(value); - return ksz_spi_write(dev, reg, &value, 4); -} - -#endif -- 2.20.1
[PATCH V3 02/10] net: dsa: microchip: Remove ksz_{get,set}()
These functions and callbacks are never used, remove them. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Tristram Ha Cc: Woojung Huh --- V2: No change V3: - Rebase on next/master - Test on KSZ9477EVB --- drivers/net/dsa/microchip/ksz9477_spi.c | 2 -- drivers/net/dsa/microchip/ksz_common.h | 24 drivers/net/dsa/microchip/ksz_priv.h| 2 -- drivers/net/dsa/microchip/ksz_spi.h | 10 -- 4 files changed, 38 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index e7118319c192..86d12d48a2a9 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -80,8 +80,6 @@ static const struct ksz_io_ops ksz9477_spi_ops = { .write8 = ksz_spi_write8, .write16 = ksz_spi_write16, .write32 = ksz_spi_write32, - .get = ksz_spi_get, - .set = ksz_spi_set, }; static int ksz9477_spi_probe(struct spi_device *spi) diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 1781539c3a81..c15b49528bad 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -105,30 +105,6 @@ static inline int ksz_write32(struct ksz_device *dev, u32 reg, u32 value) return ret; } -static inline int ksz_get(struct ksz_device *dev, u32 reg, void *data, - size_t len) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->get(dev, reg, data, len); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int ksz_set(struct ksz_device *dev, u32 reg, void *data, - size_t len) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->set(dev, reg, data, len); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - static inline void ksz_pread8(struct ksz_device *dev, int port, int offset, u8 *data) { diff --git a/drivers/net/dsa/microchip/ksz_priv.h b/drivers/net/dsa/microchip/ksz_priv.h index 5ef6153bd2cc..d3ddf98156bb 100644 --- a/drivers/net/dsa/microchip/ksz_priv.h +++ b/drivers/net/dsa/microchip/ksz_priv.h @@ -109,8 +109,6 @@ struct ksz_io_ops { int (*write8)(struct ksz_device *dev, u32 reg, u8 value); int (*write16)(struct ksz_device *dev, u32 reg, u16 value); int (*write32)(struct ksz_device *dev, u32 reg, u32 value); - int (*get)(struct ksz_device *dev, u32 reg, void *data, size_t len); - int (*set)(struct ksz_device *dev, u32 reg, void *data, size_t len); }; struct alu_struct { diff --git a/drivers/net/dsa/microchip/ksz_spi.h b/drivers/net/dsa/microchip/ksz_spi.h index 427811bd60b3..976bace31f37 100644 --- a/drivers/net/dsa/microchip/ksz_spi.h +++ b/drivers/net/dsa/microchip/ksz_spi.h @@ -56,14 +56,4 @@ static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value) return ksz_spi_write(dev, reg, &value, 4); } -static int ksz_spi_get(struct ksz_device *dev, u32 reg, void *data, size_t len) -{ - return ksz_spi_read(dev, reg, data, len); -} - -static int ksz_spi_set(struct ksz_device *dev, u32 reg, void *data, size_t len) -{ - return ksz_spi_write(dev, reg, data, len); -} - #endif -- 2.20.1
[PATCH V3 05/10] net: dsa: microchip: Use PORT_CTRL_ADDR() instead of indirect function call
The indirect function call to dev->dev_ops->get_port_addr() is expensive especially if called for every single register access, and only returns the value of PORT_CTRL_ADDR() macro. Use PORT_CTRL_ADDR() macro directly instead. Signed-off-by: Marek Vasut Cc: Andrew Lunn Cc: Florian Fainelli Cc: Tristram Ha Cc: Woojung Huh --- V2: New patch V3: - Rebase on next/master - Test on KSZ9477EVB --- drivers/net/dsa/microchip/ksz9477.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index e8b96566abd9..7d209fd9f26f 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -83,7 +83,7 @@ static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits, u32 addr; u8 data; - addr = dev->dev_ops->get_port_addr(port, offset); + addr = PORT_CTRL_ADDR(port, offset); ksz_read8(dev, addr, &data); if (set) -- 2.20.1
Re: [PATCH v2 0/3] fix bugs when enable route_localnet
On 6/22/19 6:46 AM, David Miller wrote: > From: Zhiqiang Liu > Date: Sat, 22 Jun 2019 16:41:49 +0800 > >> Friendly ping ... > > I'm not applying this patch series without someone reviewing it. > I have stared at it a few times since the patches were sent and can not find anything obviously wrong about it. The fallout seems limited to users of route_localnet which I have to believe is small (I only know of 2 other users of 127/8 for non-loopback and those were almost 10 years ago). Putting in net-next is the safest.
Re: [PATCH net-next 1/7] net: aquantia: replace internal driver version code with uts
On Sat, 22 Jun 2019 17:05:14 +0200, Andrew Lunn wrote: > On Sat, Jun 22, 2019 at 01:45:12PM +, Igor Russkikh wrote: > > As it was discussed some time previously, driver is better to > > report kernel version string, as it in a best way identifies > > the codebase. > > > > Signed-off-by: Igor Russkikh > > Nice. Indeed! > Devlink has just gained something similar to ethtool -i. Maybe we > should get the devlink core to also report the kernel version? I don't think we have the driver version at all there, my usual inclination being to not duplicate information across APIs. Do we have non-hypothetical instances of users reporting ethtool -i without uname output? Admittedly I may work with above-average Linux-trained engineers :S Would it be okay to just get devlink user space to use uname() to get the info?
[PATCH net] net/tls: fix page double free on TX cleanup
From: Dirk van der Merwe With commit 94850257cf0f ("tls: Fix tls_device handling of partial records") a new path was introduced to cleanup partial records during sk_proto_close. This path does not handle the SW KTLS tx_list cleanup. This is unnecessary though since the free_resources calls for both SW and offload paths will cleanup a partial record. The visible effect is the following warning, but this bug also causes a page double free. WARNING: CPU: 7 PID: 4000 at net/core/stream.c:206 sk_stream_kill_queues+0x103/0x110 RIP: 0010:sk_stream_kill_queues+0x103/0x110 RSP: 0018:b6df87e07bd0 EFLAGS: 00010206 RAX: RBX: 8c21db4971c0 RCX: 0007 RDX: ffa0 RSI: 001d RDI: 8c21db497270 RBP: 8c21db497270 R08: 8c29f4748600 R09: 00010020001a R10: b6df87e07aa0 R11: 9a445600 R12: 0007 R13: R14: 8c21f03f2900 R15: 8c21f03b8df0 Call Trace: inet_csk_destroy_sock+0x55/0x100 tcp_close+0x25d/0x400 ? tcp_check_oom+0x120/0x120 tls_sk_proto_close+0x127/0x1c0 inet_release+0x3c/0x60 __sock_release+0x3d/0xb0 sock_close+0x11/0x20 __fput+0xd8/0x210 task_work_run+0x84/0xa0 do_exit+0x2dc/0xb90 ? release_sock+0x43/0x90 do_group_exit+0x3a/0xa0 get_signal+0x295/0x720 do_signal+0x36/0x610 ? SYSC_recvfrom+0x11d/0x130 exit_to_usermode_loop+0x69/0xb0 do_syscall_64+0x173/0x180 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x7fe9b9abc10d RSP: 002b:7fe9b19a1d48 EFLAGS: 0246 ORIG_RAX: 00ca RAX: fe00 RBX: 0006 RCX: 7fe9b9abc10d RDX: 0002 RSI: 0080 RDI: 7fe948003430 RBP: 7fe948003410 R08: 7fe948003430 R09: R10: R11: 0246 R12: 5603739d9080 R13: 7fe9b9ab9f90 R14: 7fe948003430 R15: Fixes: 94850257cf0f ("tls: Fix tls_device handling of partial records") Signed-off-by: Dirk van der Merwe Signed-off-by: Jakub Kicinski --- include/net/tls.h | 15 --- net/tls/tls_main.c | 3 ++- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 4a55ce6a303f..53d96bca220d 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -373,21 +373,6 @@ static inline bool tls_is_partially_sent_record(struct tls_context *ctx) return !!ctx->partially_sent_record; } -static inline int tls_complete_pending_work(struct sock *sk, - struct tls_context *ctx, - int flags, long *timeo) -{ - int rc = 0; - - if (unlikely(sk->sk_write_pending)) - rc = wait_on_pending_writer(sk, timeo); - - if (!rc && tls_is_partially_sent_record(ctx)) - rc = tls_push_partial_record(sk, ctx, flags); - - return rc; -} - static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) { return tls_ctx->pending_open_record_frags; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index fc81ae18cc44..e2b69e805d46 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -279,7 +279,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) goto skip_tx_cleanup; } - if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) + if (unlikely(sk->sk_write_pending) && + !wait_on_pending_writer(sk, &timeo)) tls_handle_open_record(sk, 0); /* We need these for tls_sw_fallback handling of other packets */ -- 2.21.0
[net-next v2] tipc: add loopback device tracking
Since node internal messages are passed directly to socket it is not possible to observe this message exchange via tcpdump or wireshark. We now remedy this by making it possible to clone such messages and send the clones to the loopback interface. The clones are dropped at reception and have no functional role except making the traffic visible. The feature is turned on/off by enabling/disabling the loopback "bearer" "eth:lo". Acked-by: Jon Maloy Signed-off-by: John Rutherford --- net/tipc/bcast.c | 4 +++- net/tipc/bearer.c | 67 +++ net/tipc/bearer.h | 3 +++ net/tipc/core.c | 5 - net/tipc/core.h | 12 ++ net/tipc/node.c | 1 + net/tipc/topsrv.c | 2 ++ 7 files changed, 92 insertions(+), 2 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 6c997d4..235331d 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -406,8 +406,10 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, rc = tipc_bcast_xmit(net, pkts, cong_link_cnt); } - if (dests->local) + if (dests->local) { + tipc_loopback_trace(net, &localq); tipc_sk_mcast_rcv(net, &localq, &inputq); + } exit: /* This queue should normally be empty by now */ __skb_queue_purge(pkts); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 2bed658..27b4fd7 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -836,6 +836,12 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + if (!strcmp(name, "eth:lo")) { + tipc_net(net)->loopback_trace = false; + pr_info("Disabled packet tracing on loopback interface\n"); + return 0; + } + bearer = tipc_bearer_find(net, name); if (!bearer) return -EINVAL; @@ -881,6 +887,12 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) bearer = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + if (!strcmp(bearer, "eth:lo")) { + tipc_net(net)->loopback_trace = true; + pr_info("Enabled packet tracing on loopback interface\n"); + return 0; + } + if (attrs[TIPC_NLA_BEARER_DOMAIN]) domain = nla_get_u32(attrs[TIPC_NLA_BEARER_DOMAIN]); @@ -1021,6 +1033,61 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) return err; } +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *xmitq) +{ + struct net_device *dev = net->loopback_dev; + struct sk_buff *skb, *_skb; + int exp; + + skb_queue_walk(xmitq, _skb) { + skb = pskb_copy(_skb, GFP_ATOMIC); + if (!skb) + continue; + exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); + if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) { + kfree_skb(skb); + continue; + } + skb_reset_network_header(skb); + skb->dev = dev; + skb->protocol = htons(ETH_P_TIPC); + dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr, + dev->dev_addr, skb->len); + dev_queue_xmit(skb); + } +} + +static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev, +struct packet_type *pt, struct net_device *od) +{ + consume_skb(skb); + return NET_RX_SUCCESS; +} + +int tipc_attach_loopback(struct net *net) +{ + struct net_device *dev = net->loopback_dev; + struct tipc_net *tn = tipc_net(net); + + if (!dev) + return -ENODEV; + dev_hold(dev); + tn->loopback_pt.dev = dev; + tn->loopback_pt.type = htons(ETH_P_TIPC); + tn->loopback_pt.func = tipc_loopback_rcv_pkt; + tn->loopback_trace = false; + dev_add_pack(&tn->loopback_pt); + return 0; +} + +void tipc_detach_loopback(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + dev_remove_pack(&tn->loopback_pt); + dev_put(net->loopback_dev); +} + static int __tipc_nl_add_media(struct tipc_nl_msg *msg, struct tipc_media *media, int nlflags) { diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 7f4c569..ef7fad9 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -232,6 +232,9 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct tipc_media_addr *dst); void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq); +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *xmitq); +int tipc_attach_loopback(struct net *net); +void tipc_detach_loopback(struct net *net); /* check if device MTU is too low for ti
[net-next] tipc: fix missing indentation in source code
Fix misalignment of policy statement in netlink.c due to automatic spatch code transformation. Fixes: 3b0f31f2b8c9 ("genetlink: make policy common to family") Acked-by: Jon Maloy Signed-off-by: John Rutherford --- net/tipc/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 99bd166..d6165ad 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -261,7 +261,7 @@ struct genl_family tipc_genl_family __ro_after_init = { .version= TIPC_GENL_V2_VERSION, .hdrsize= 0, .maxattr= TIPC_NLA_MAX, - .policy = tipc_nl_policy, + .policy = tipc_nl_policy, .netnsok= true, .module = THIS_MODULE, .ops= tipc_genl_v2_ops, -- 2.11.0