date:20190623

[RFC V1 net-next 0/1] Introduce xdp to ena

2019-06-23 Thread sameehj

From: Sameeh Jubran 

This patch set has one patch which implements xdp drop support in the
ena driver.

Sameeh Jubran (1):
  net: ena: implement XDP drop support

 drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 +++-
 drivers/net/ethernet/amazon/ena/ena_netdev.h | 29 +++
 2 files changed, 111 insertions(+), 1 deletion(-)

-- 
2.17.1

[RFC V1 net-next 1/1] net: ena: implement XDP drop support

2019-06-23 Thread sameehj

From: Sameeh Jubran 

This commit implements the basic functionality of drop/pass logic in the
ena driver.

Signed-off-by: Sameeh Jubran 
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 +++-
 drivers/net/ethernet/amazon/ena/ena_netdev.h | 29 +++
 2 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c 
b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 20ec8ff03..3d65c0771 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -33,10 +33,10 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #ifdef CONFIG_RFS_ACCEL
+#include 
 #include 
 #endif /* CONFIG_RFS_ACCEL */
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -105,11 +105,82 @@ static void update_rx_ring_mtu(struct ena_adapter 
*adapter, int mtu)
adapter->rx_ring[i].mtu = mtu;
 }
 
+static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+{
+   struct bpf_prog *xdp_prog = rx_ring->xdp_bpf_prog;
+   u32 verdict = XDP_PASS;
+
+   rcu_read_lock();
+
+   if (!xdp_prog)
+   goto out;
+
+   verdict = bpf_prog_run_xdp(xdp_prog, xdp);
+
+   if (unlikely(verdict == XDP_ABORTED))
+   trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+   else if (unlikely(verdict > XDP_REDIRECT))
+   bpf_warn_invalid_xdp_action(verdict);
+out:
+   rcu_read_unlock();
+   return verdict;
+}
+
+static int ena_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
+{
+   struct ena_adapter *adapter = netdev_priv(netdev);
+   struct bpf_prog *old_bpf_prog;
+   int i;
+
+   if (ena_xdp_allowed(adapter)) {
+   old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
+
+   for (i = 0; i < adapter->num_queues; i++)
+   xchg(&adapter->rx_ring[i].xdp_bpf_prog, prog);
+
+   if (old_bpf_prog)
+   bpf_prog_put(old_bpf_prog);
+
+   } else {
+   netif_err(adapter, drv, adapter->netdev, "Failed to set xdp 
program, the current MTU (%d) is larger than the maximal allowed MTU (%lu) 
while xdp is on",
+ netdev->mtu, ENA_XDP_MAX_MTU);
+   return -EFAULT;
+   }
+
+   return 0;
+}
+
+/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
+ * program as well as to query the current xdp program id.
+ */
+static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+   struct ena_adapter *adapter = netdev_priv(netdev);
+
+   switch (bpf->command) {
+   case XDP_SETUP_PROG:
+   return ena_xdp_set(netdev, bpf->prog);
+   case XDP_QUERY_PROG:
+   bpf->prog_id = adapter->xdp_bpf_prog ?
+   adapter->xdp_bpf_prog->aux->id : 0;
+   break;
+   default:
+   return -EINVAL;
+   }
+   return 0;
+}
+
 static int ena_change_mtu(struct net_device *dev, int new_mtu)
 {
struct ena_adapter *adapter = netdev_priv(dev);
int ret;
 
+   if (new_mtu > ENA_XDP_MAX_MTU && ena_xdp_present(adapter)) {
+   netif_err(adapter, drv, dev,
+ "Requested MTU value is not valid while xdp is 
enabled new_mtu: %d max mtu: %lu min mtu: %d\n",
+ new_mtu, ENA_XDP_MAX_MTU, ENA_MIN_MTU);
+   return -EINVAL;
+   }
ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
if (!ret) {
netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
@@ -888,6 +959,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
va = page_address(rx_info->page) + rx_info->page_offset;
prefetch(va + NET_IP_ALIGN);
 
+   if (ena_xdp_present_ring(rx_ring)) {
+   rx_ring->xdp.data = va;
+   rx_ring->xdp.data_meta = rx_ring->xdp.data;
+   rx_ring->xdp.data_hard_start = rx_ring->xdp.data -
+   rx_info->page_offset;
+   rx_ring->xdp.data_end = rx_ring->xdp.data + len;
+   if (ena_xdp_execute(rx_ring, &rx_ring->xdp) != XDP_PASS)
+   return NULL;
+   }
if (len <= rx_ring->rx_copybreak) {
skb = ena_alloc_skb(rx_ring, false);
if (unlikely(!skb))
@@ -2549,6 +2629,7 @@ static const struct net_device_ops ena_netdev_ops = {
.ndo_change_mtu = ena_change_mtu,
.ndo_set_mac_address= NULL,
.ndo_validate_addr  = eth_validate_addr,
+   .ndo_bpf= ena_xdp,
 };
 
 static int ena_device_validate_params(struct ena_adapter *adapter,
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h 
b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index f2b6e2e05..e17965f7a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -35,6 +35,7 @@
 
 #include 
 #include

Re: [RFC net-next] net: dsa: add support for MC_DISABLED attribute

2019-06-23 Thread Ido Schimmel

On Fri, Jun 21, 2019 at 05:29:52PM -0400, Vivien Didelot wrote:
> On Thu, 20 Jun 2019 19:24:47 -0700, Florian Fainelli  
> wrote:
> > > This patch adds support for enabling or disabling the flooding of
> > > unknown multicast traffic on the CPU ports, depending on the value
> > > of the switchdev SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED attribute.
> > > 
> > > This allows the user to prevent the CPU to be flooded with a lot of
> > > undesirable traffic that the network stack needs to filter in software.
> > > 
> > > The bridge has multicast snooping enabled by default, hence CPU ports
> > > aren't bottlenecked with arbitrary network applications anymore.
> > > But this can be an issue in some scenarios such as pinging the bridge's
> > > IPv6 address. Setting /sys/class/net/br0/bridge/multicast_snooping to
> > > 0 would restore unknown multicast flooding and thus fix ICMPv6. As
> > > an alternative, enabling multicast_querier would program the bridge
> > > address into the switch.
> > From what I can read from mlxsw, we should probably also implement the
> > SWITCHDEV_ATTR_ID_PORT_MROUTER attribute in order to be consistent.
> > 
> > Since the attribute MC_DISABLED is on the bridge master, we should also
> > iterate over the list of switch ports being a member of that bridge and
> > change their flooding attribute, taking into account whether
> > BR_MCAST_FLOOD is set on that particular port or not. Just paraphrasing
> > what mlxsw does here again...
> 
> Ouch, doesn't sound like what a driver should be doing :-(
> 
> Ido, I cannot find documentation for multicast_snooping or MC_DISABLED
> and the naming isn't clear. Can this be considered as an equivalent
> of mcast_flood but targeting the bridge device itself, describing
> whether the bridge is interested or not in unknown multicast traffic?

Not really, you're only looking at one aspect of this, but there is
more. For example, when multicast snooping is disabled, the MDB is not
considered.

> > Once you act on the user-facing ports, you might be able to leave the
> > CPU port flooding unconditionally, since it would only "flood" the CPU
> > port either because an user-facing port has BR_MCAST_FLOOD set, or
> > because this is known MC traffic that got programmed via the bridge's
> > MDB. Would that work?
> 
> You may want the machine or network connected behind a bridge port
> to be flooded with unknown multicast traffic, without having the
> CPU conduit clogged up with this traffic. So these are two distinct
> settings for me.
> 
> The only scenario I can think of needing the CPU to be flooded is if
> there's a non-DSA port in the bridge maybe. But IMHO this should be
> handled by the bridge, offloading or not the appropriate attribute.
> 
> > On a higher level, I really wish we did not have to re-implement a lot
> > of identical or similar logic in each switch drivers and had a more
> > central model of what is behaviorally expected.
> 
> I couldn't agree more, ethernet switch drivers should only offload
> the notified bridge configuration, not noodling their own logic...

Please see my comment to Florian. The driver is doing what needs to be
done in order to comply with the behavior of the bridge driver. As I
wrote to Florian, we can probably move some logic up the stack and
simplify drivers.

> Russell, Ido, Florian, so far I understand that a multicast-unaware
> bridge must flood unknown traffic everywhere (CPU included);
> and a multicast-aware bridge must only flood its ports if their
> mcast_flood is on, and known traffic targeting the bridge must be
> offloaded accordingly. Do you guys agree with this?

When multicast snooping is enabled unregistered multicast traffic should
only be flooded to mrouter ports.

[PATCH V1 net-next] net: ena: Fix bug where ring allocation backoff stopped too late

2019-06-23 Thread sameehj

From: Sameeh Jubran 

The current code of create_queues_with_size_backoff() allows the ring size
to become as small as ENA_MIN_RING_SIZE/2. This is a bug since we don't
want the queue ring to be smaller than ENA_MIN_RING_SIZE

In this commit we change the loop's termination condition to look at the
queue size of the next iteration instead of that of the current one,
so that the minimal queue size again becomes ENA_MIN_RING_SIZE.

Fixes: eece4d2ab9d2 ("net: ena: add ethtool function for changing io queue 
sizes")

Signed-off-by: Arthur Kiyanovski 
Signed-off-by: Sameeh Jubran 
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c 
b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index b7865ee1d..20ec8ff03 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1839,8 +1839,8 @@ err_setup_tx:
if (cur_rx_ring_size >= cur_tx_ring_size)
new_rx_ring_size = cur_rx_ring_size / 2;
 
-   if (cur_tx_ring_size < ENA_MIN_RING_SIZE ||
-   cur_rx_ring_size < ENA_MIN_RING_SIZE) {
+   if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
+   new_rx_ring_size < ENA_MIN_RING_SIZE) {
netif_err(adapter, ifup, adapter->netdev,
  "Queue creation failed with the smallest 
possible queue size of %d for both queues. Not retrying with smaller queues\n",
  ENA_MIN_RING_SIZE);
-- 
2.17.1

Re: [RFC net-next] net: dsa: add support for MC_DISABLED attribute

2019-06-23 Thread Russell King - ARM Linux admin

On Sun, Jun 23, 2019 at 07:09:52AM +, Ido Schimmel wrote:
> When multicast snooping is enabled unregistered multicast traffic should
> only be flooded to mrouter ports.

Given that IPv6 relies upon multicast working, and multicast snooping
is a kernel configuration option, and MLD messages will only be sent
when whenever the configuration on the target changes, and there may
not be a multicast querier in the system, who does that ensure that
IPv6 can work on a bridge where the kernel configured and built with
multicast snooping enabled?

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up

Re: [RFC net-next] net: dsa: add support for MC_DISABLED attribute

2019-06-23 Thread Ido Schimmel

On Sun, Jun 23, 2019 at 08:26:05AM +0100, Russell King - ARM Linux admin wrote:
> On Sun, Jun 23, 2019 at 07:09:52AM +, Ido Schimmel wrote:
> > When multicast snooping is enabled unregistered multicast traffic should
> > only be flooded to mrouter ports.
> 
> Given that IPv6 relies upon multicast working, and multicast snooping
> is a kernel configuration option, and MLD messages will only be sent
> when whenever the configuration on the target changes, and there may
> not be a multicast querier in the system, who does that ensure that
> IPv6 can work on a bridge where the kernel configured and built with
> multicast snooping enabled?

See commit b00589af3b04 ("bridge: disable snooping if there is no
querier"). I think that's unfortunate behavior that we need because
multicast snooping is enabled by default. If it weren't enabled by
default, then anyone enabling it would also make sure there's a querier
in the network.

[PATCH] sis900: increment revision number

2019-06-23 Thread Sergej Benilov

Increment revision number to 1.08.11 (TX completion fix).

Signed-off-by: Sergej Benilov 
---
 drivers/net/ethernet/sis/sis900.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/sis/sis900.c 
b/drivers/net/ethernet/sis/sis900.c
index abb9b42e..09b4e1c5 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -1,6 +1,6 @@
 /* sis900.c: A SiS 900/7016 PCI Fast Ethernet driver for Linux.
Copyright 1999 Silicon Integrated System Corporation
-   Revision:   1.08.10 Apr. 2 2006
+   Revision:   1.08.11 Jun. 23 2019
 
Modified from the driver which is originally written by Donald Becker.
 
@@ -16,7 +16,8 @@
preliminary Rev. 1.0 Nov. 10, 1998
SiS 7014 Single Chip 100BASE-TX/10BASE-T Physical Layer Solution,
preliminary Rev. 1.0 Jan. 18, 1998
-
+   
+   Rev 1.08.11 Jun. 23 2019 Sergej Benilov TX completion fix
Rev 1.08.10 Apr.  2 2006 Daniele Venzano add vlan (jumbo packets) support
Rev 1.08.09 Sep. 19 2005 Daniele Venzano add Wake on LAN support
Rev 1.08.08 Jan. 22 2005 Daniele Venzano use netif_msg for debugging 
messages
@@ -79,7 +80,7 @@
 #include "sis900.h"
 
 #define SIS900_MODULE_NAME "sis900"
-#define SIS900_DRV_VERSION "v1.08.10 Apr. 2 2006"
+#define SIS900_DRV_VERSION "v1.08.11 Jun. 23 2019"
 
 static const char version[] =
KERN_INFO "sis900.c: " SIS900_DRV_VERSION "\n";
-- 
2.17.1

Re: [PATCH] sis900: increment revision number

2019-06-23 Thread Daniele Venzano


Sorry to be late in replying, I am ok also with the previous changes.

Signed-off-by: Daniele Venzano 

On 23/06/2019 09:47, Sergej Benilov wrote:

Increment revision number to 1.08.11 (TX completion fix).

Signed-off-by: Sergej Benilov 
---
  drivers/net/ethernet/sis/sis900.c | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/sis/sis900.c 
b/drivers/net/ethernet/sis/sis900.c
index abb9b42e..09b4e1c5 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -1,6 +1,6 @@
  /* sis900.c: A SiS 900/7016 PCI Fast Ethernet driver for Linux.
 Copyright 1999 Silicon Integrated System Corporation
-   Revision:   1.08.10 Apr. 2 2006
+   Revision:   1.08.11 Jun. 23 2019
  
 Modified from the driver which is originally written by Donald Becker.
  
@@ -16,7 +16,8 @@

 preliminary Rev. 1.0 Nov. 10, 1998
 SiS 7014 Single Chip 100BASE-TX/10BASE-T Physical Layer Solution,
 preliminary Rev. 1.0 Jan. 18, 1998
-
+
+   Rev 1.08.11 Jun. 23 2019 Sergej Benilov TX completion fix
 Rev 1.08.10 Apr.  2 2006 Daniele Venzano add vlan (jumbo packets) support
 Rev 1.08.09 Sep. 19 2005 Daniele Venzano add Wake on LAN support
 Rev 1.08.08 Jan. 22 2005 Daniele Venzano use netif_msg for debugging 
messages
@@ -79,7 +80,7 @@
  #include "sis900.h"
  
  #define SIS900_MODULE_NAME "sis900"

-#define SIS900_DRV_VERSION "v1.08.10 Apr. 2 2006"
+#define SIS900_DRV_VERSION "v1.08.11 Jun. 23 2019"
  
  static const char version[] =

KERN_INFO "sis900.c: " SIS900_DRV_VERSION "\n";

Re: [PATCH] sis900: increment revision number

2019-06-23 Thread Joe Perches

On Sun, 2019-06-23 at 09:47 +0200, Sergej Benilov wrote:
> Increment revision number to 1.08.11 (TX completion fix).

Better not to bother as the last increment was in 2006.
The driver version gets the kernel version in any case.

> diff --git a/drivers/net/ethernet/sis/sis900.c 
> b/drivers/net/ethernet/sis/sis900.c
[]
> @@ -1,6 +1,6 @@
>  /* sis900.c: A SiS 900/7016 PCI Fast Ethernet driver for Linux.
> Copyright 1999 Silicon Integrated System Corporation
> -   Revision: 1.08.10 Apr. 2 2006
> +   Revision: 1.08.11 Jun. 23 2019

etc...

Re: [PATCH] sis900: increment revision number

2019-06-23 Thread Daniele Venzano


Hello,

I think it is good to know just by looking at the sources that the 
driver is still kept up-to-date, so I am in favor of this patch.


Daniele

On 23/06/2019 11:10, Joe Perches wrote:

On Sun, 2019-06-23 at 09:47 +0200, Sergej Benilov wrote:

Increment revision number to 1.08.11 (TX completion fix).

Better not to bother as the last increment was in 2006.
The driver version gets the kernel version in any case.


diff --git a/drivers/net/ethernet/sis/sis900.c 
b/drivers/net/ethernet/sis/sis900.c

[]

@@ -1,6 +1,6 @@
  /* sis900.c: A SiS 900/7016 PCI Fast Ethernet driver for Linux.
 Copyright 1999 Silicon Integrated System Corporation
-   Revision:   1.08.10 Apr. 2 2006
+   Revision:   1.08.11 Jun. 23 2019

etc...

[no subject]

2019-06-23 Thread Marko in1t3r Pavlovic

unsubscribe netdev

Re: [PATCH v2 net] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET

2019-06-23 Thread Neil Horman

On Sat, Jun 22, 2019 at 10:12:46PM -0400, Willem de Bruijn wrote:
> On Sat, Jun 22, 2019 at 1:42 PM Neil Horman  wrote:
> >
> > When an application is run that:
> > a) Sets its scheduler to be SCHED_FIFO
> > and
> > b) Opens a memory mapped AF_PACKET socket, and sends frames with the
> > MSG_DONTWAIT flag cleared, its possible for the application to hang
> > forever in the kernel.  This occurs because when waiting, the code in
> > tpacket_snd calls schedule, which under normal circumstances allows
> > other tasks to run, including ksoftirqd, which in some cases is
> > responsible for freeing the transmitted skb  (which in AF_PACKET calls a
> > destructor that flips the status bit of the transmitted frame back to
> > available, allowing the transmitting task to complete).
> >
> > However, when the calling application is SCHED_FIFO, its priority is
> > such that the schedule call immediately places the task back on the cpu,
> > preventing ksoftirqd from freeing the skb, which in turn prevents the
> > transmitting task from detecting that the transmission is complete.
> >
> > We can fix this by converting the schedule call to a completion
> > mechanism.  By using a completion queue, we force the calling task, when
> > it detects there are no more frames to send, to schedule itself off the
> > cpu until such time as the last transmitted skb is freed, allowing
> > forward progress to be made.
> >
> > Tested by myself and the reporter, with good results
> >
> > Appies to the net tree
> >
> > Signed-off-by: Neil Horman 
> > Reported-by: Matteo Croce 
> > CC: "David S. Miller" 
> > CC: Willem de Bruijn 
> >
> > Change Notes:
> >
> > V1->V2:
> > Enhance the sleep logic to support being interruptible and
> > allowing for honoring to SK_SNDTIMEO (Willem de Bruijn)
> > ---
> >  net/packet/af_packet.c | 60 --
> >  net/packet/internal.h  |  2 ++
> >  2 files changed, 48 insertions(+), 14 deletions(-)
> >
> > diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> > index a29d66da7394..8ddb2f7aebb4 100644
> > --- a/net/packet/af_packet.c
> > +++ b/net/packet/af_packet.c
> > @@ -358,7 +358,8 @@ static inline struct page * __pure pgv_to_page(void 
> > *addr)
> > return virt_to_page(addr);
> >  }
> >
> > -static void __packet_set_status(struct packet_sock *po, void *frame, int 
> > status)
> > +static void __packet_set_status(struct packet_sock *po, void *frame, int 
> > status,
> > +   bool call_complete)
> >  {
> > union tpacket_uhdr h;
> >
> > @@ -381,6 +382,8 @@ static void __packet_set_status(struct packet_sock *po, 
> > void *frame, int status)
> > BUG();
> > }
> >
> > +   if (po->wait_on_complete && call_complete)
> > +   complete(&po->skb_completion);
> 
> This wake need not happen before the barrier. Only one caller of
> __packet_set_status passes call_complete (tpacket_destruct_skb).
> Moving this branch to the caller avoids a lot of code churn.
> 
Yeah, thats a fair point, I'll adjust that.

> Also, multiple packets may be released before the process is awoken.
> The process will block until packet_read_pending drops to zero. Can
> defer the wait_on_complete to that one instance.
> 
Also true, we can gate the complete call on wait_on_complete and
pack_read_pending(...) == 0
> > smp_wmb();
> >  }
> >
> > @@ -1148,6 +1151,14 @@ static void *packet_previous_frame(struct 
> > packet_sock *po,
> > return packet_lookup_frame(po, rb, previous, status);
> >  }
> >
> > +static void *packet_next_frame(struct packet_sock *po,
> > +   struct packet_ring_buffer *rb,
> > +   int status)
> > +{
> > +   unsigned int next = rb->head != rb->frame_max ? rb->head+1 : 0;
> > +   return packet_lookup_frame(po, rb, next, status);
> > +}
> > +
> >  static void packet_increment_head(struct packet_ring_buffer *buff)
> >  {
> > buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
> > @@ -2360,7 +2371,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct 
> > net_device *dev,
> >  #endif
> >
> > if (po->tp_version <= TPACKET_V2) {
> > -   __packet_set_status(po, h.raw, status);
> > +   __packet_set_status(po, h.raw, status, false);
> > sk->sk_data_ready(sk);
> > } else {
> > prb_clear_blk_fill_status(&po->rx_ring);
> > @@ -2400,7 +2411,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
> > packet_dec_pending(&po->tx_ring);
> >
> > ts = __packet_set_timestamp(po, ph, skb);
> > -   __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
> > +   __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts, true);
> > }
> >
> > sock_wfree(skb);
> > @@ -2585,13 +2596,13 @@ static int tpacket_parse_header(struct packet_sock 
> > *po, void *frame,
> >
> >  static int tpacket_snd(struct packet_sock *po, s

Re: [PATCH v2 net] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET

2019-06-23 Thread Neil Horman

On Sat, Jun 22, 2019 at 10:21:31PM -0400, Willem de Bruijn wrote:
> > > -static void __packet_set_status(struct packet_sock *po, void *frame, int 
> > > status)
> > > +static void __packet_set_status(struct packet_sock *po, void *frame, int 
> > > status,
> > > +   bool call_complete)
> > >  {
> > > union tpacket_uhdr h;
> > >
> > > @@ -381,6 +382,8 @@ static void __packet_set_status(struct packet_sock 
> > > *po, void *frame, int status)
> > > BUG();
> > > }
> > >
> > > +   if (po->wait_on_complete && call_complete)
> > > +   complete(&po->skb_completion);
> >
> > This wake need not happen before the barrier. Only one caller of
> > __packet_set_status passes call_complete (tpacket_destruct_skb).
> > Moving this branch to the caller avoids a lot of code churn.
> >
> > Also, multiple packets may be released before the process is awoken.
> > The process will block until packet_read_pending drops to zero. Can
> > defer the wait_on_complete to that one instance.
> 
> Eh no. The point of having this sleep in the send loop is that
> additional slots may be released for transmission (flipped to
> TP_STATUS_SEND_REQUEST) from another thread while this thread is
> waiting.
> 
Thats incorrect.  The entirety of tpacket_snd is protected by a mutex. No other
thread can alter the state of the frames in the vector from the kernel send path
while this thread is waiting.

> Else, it would have been much simpler to move the wait below the send
> loop: send as many packets as possible, then wait for all of them
> having been released. Much clearer control flow.
> 
Thats (almost) what happens now.  The only difference is that with this
implementation, the waiting thread has the opportunity to see if userspace has
queued more frames for transmission during the wait period.  We could
potentially change that, but thats outside the scope of this fix.

> Where to set and clear the wait_on_complete boolean remains. Integer
> assignment is fragile, as the compiler and processor may optimize or
> move simple seemingly independent operations. As complete() takes a
> spinlock, avoiding that in the DONTWAIT case is worthwhile. But probably
> still preferable to set when beginning waiting and clear when calling
> complete.
We avoid any call to wait_for_complete or complete already, based on the gating
of the need_wait variable in tpacket_snd.  If the transmitting thread doesn't
set MSG_DONTWAIT in the flags of the msg structure, we will never set
wait_for_complete, and so we will never manipulate the completion queue.

Neil

>

Re: [PATCH bpf-next v5 00/16] AF_XDP infrastructure improvements and mlx5e support

2019-06-23 Thread Tariq Toukan



On 6/21/2019 10:52 PM, Saeed Mahameed wrote:
> On Thu, Jun 20, 2019 at 2:13 AM Björn Töpel  wrote:
>>
>> On Tue, 18 Jun 2019 at 14:00, Maxim Mikityanskiy  
>> wrote:
>>>
>>> This series contains improvements to the AF_XDP kernel infrastructure
>>> and AF_XDP support in mlx5e. The infrastructure improvements are
>>> required for mlx5e, but also some of them benefit to all drivers, and
>>> some can be useful for other drivers that want to implement AF_XDP.
>>>
>>> The performance testing was performed on a machine with the following
>>> configuration:
>>>
>>> - 24 cores of Intel Xeon E5-2620 v3 @ 2.40 GHz
>>> - Mellanox ConnectX-5 Ex with 100 Gbit/s link
>>>
>>> The results with retpoline disabled, single stream:
>>>
>>> txonly: 33.3 Mpps (21.5 Mpps with queue and app pinned to the same CPU)
>>> rxdrop: 12.2 Mpps
>>> l2fwd: 9.4 Mpps
>>>
>>> The results with retpoline enabled, single stream:
>>>
>>> txonly: 21.3 Mpps (14.1 Mpps with queue and app pinned to the same CPU)
>>> rxdrop: 9.9 Mpps
>>> l2fwd: 6.8 Mpps
>>>
>>> v2 changes:
>>>
>>> Added patches for mlx5e and addressed the comments for v1. Rebased for
>>> bpf-next.
>>>
>>> v3 changes:
>>>
>>> Rebased for the newer bpf-next, resolved conflicts in libbpf. Addressed
>>> Björn's comments for coding style. Fixed a bug in error handling flow in
>>> mlx5e_open_xsk.
>>>
>>> v4 changes:
>>>
>>> UAPI is not changed, XSK RX queues are exposed to the kernel. The lower
>>> half of the available amount of RX queues are regular queues, and the
>>> upper half are XSK RX queues. The patch "xsk: Extend channels to support
>>> combined XSK/non-XSK traffic" was dropped. The final patch was reworked
>>> accordingly.
>>>
>>> Added "net/mlx5e: Attach/detach XDP program safely", as the changes
>>> introduced in the XSK patch base on the stuff from this one.
>>>
>>> Added "libbpf: Support drivers with non-combined channels", which aligns
>>> the condition in libbpf with the condition in the kernel.
>>>
>>> Rebased over the newer bpf-next.
>>>
>>> v5 changes:
>>>
>>> In v4, ethtool reports the number of channels as 'combined' and the
>>> number of XSK RX queues as 'rx' for mlx5e. It was changed, so that 'rx'
>>> is 0, and 'combined' reports the double amount of channels if there is
>>> an active UMEM - to make libbpf happy.
>>>
>>> The patch for libbpf was dropped. Although it's still useful and fixes
>>> things, it raises some disagreement, so I'm dropping it - it's no longer
>>> useful for mlx5e anymore after the change above.
>>>
>>
>> Just a heads-up: There are some checkpatch warnings (>80 chars/line)
> 
> Thanks Bjorn for your comment, in mlx5 we allow up to 95 chars per line,
> otherwise it is going to be an ugly zigzags.
> 
>> for the mlnx5 driver parts, and the series didn't apply cleanly on
>> bpf-next for me.
>>
>> I haven't been able to test the mlnx5 parts.
>>
>> Parts of the series are unrelated/orthogonal, and could be submitted
>> as separate series, e.g. patches {1,7} and patches {3,4}. No blockers
>> for me, though.
>>
>> Thanks for the hard work!
>>
>> For the series:
>> Acked-by: Björn Töpel 

Just wanted to make sure we're on the same page, so we don't miss this 
kernel.

AIU, currently no action is needed from Maxim's side, as Saeed is fine 
with the mlx5 part, and series is still marked as 'New' in patchworks 
with no requested changes.

Regards,
Tariq

[PATCH] net: dsa: microchip: Use gpiod_set_value_cansleep()

2019-06-23 Thread Marek Vasut

Replace gpiod_set_value() with gpiod_set_value_cansleep(), as the switch
reset GPIO can be connected to e.g. I2C GPIO expander and it is perfectly
fine for the kernel to sleep for a bit in ksz_switch_register().

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Linus Walleij 
Cc: Tristram Ha 
Cc: Woojung Huh 
---
 drivers/net/dsa/microchip/ksz_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz_common.c 
b/drivers/net/dsa/microchip/ksz_common.c
index 4f6648d5ac8b..bc81806dd75e 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -436,9 +436,9 @@ int ksz_switch_register(struct ksz_device *dev,
return PTR_ERR(dev->reset_gpio);
 
if (dev->reset_gpio) {
-   gpiod_set_value(dev->reset_gpio, 1);
+   gpiod_set_value_cansleep(dev->reset_gpio, 1);
mdelay(10);
-   gpiod_set_value(dev->reset_gpio, 0);
+   gpiod_set_value_cansleep(dev->reset_gpio, 0);
}
 
mutex_init(&dev->dev_mutex);
-- 
2.20.1

[PATCH] net: ethernet: ti: cpsw: Assign OF node to slave devices

2019-06-23 Thread Marek Vasut

Assign OF node to CPSW slave devices, otherwise it is not possible to
bind e.g. DSA switch to them. Without this patch, the DSA code tries
to find the ethernet device by OF match, but fails to do so because
the slave device has NULL OF node.

Signed-off-by: Marek Vasut 
Cc: David S. Miller 
Cc: Ivan Khoronzhuk 
---
 drivers/net/ethernet/ti/cpsw.c  | 3 +++
 drivers/net/ethernet/ti/cpsw_priv.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 7bdd287074fc..c39790e21276 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2179,6 +2179,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
return ret;
}
 
+   slave_data->slave_node = slave_node;
slave_data->phy_node = of_parse_phandle(slave_node,
"phy-handle", 0);
parp = of_get_property(slave_node, "phy_id", &lenp);
@@ -2329,6 +2330,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
 
/* register the network device */
SET_NETDEV_DEV(ndev, cpsw->dev);
+   ndev->dev.of_node = cpsw->slaves[1].data->slave_node;
ret = register_netdev(ndev);
if (ret)
dev_err(cpsw->dev, "cpsw: error registering net device\n");
@@ -2506,6 +2508,7 @@ static int cpsw_probe(struct platform_device *pdev)
 
/* register the network device */
SET_NETDEV_DEV(ndev, dev);
+   ndev->dev.of_node = cpsw->slaves[0].data->slave_node;
ret = register_netdev(ndev);
if (ret) {
dev_err(dev, "error registering net device\n");
diff --git a/drivers/net/ethernet/ti/cpsw_priv.h 
b/drivers/net/ethernet/ti/cpsw_priv.h
index 04795b97ee71..e32f11da2dce 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.h
+++ b/drivers/net/ethernet/ti/cpsw_priv.h
@@ -272,6 +272,7 @@ struct cpsw_host_regs {
 };
 
 struct cpsw_slave_data {
+   struct device_node *slave_node;
struct device_node *phy_node;
charphy_id[MII_BUS_ID_SIZE];
int phy_if;
-- 
2.20.1

[PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout

2019-06-23 Thread Ido Schimmel

From: Vadim Pasternak 

Extend macros MLXSW_REG_MTMP_TEMP_TO_MC() to allow support of negative
temperature readout, since chip and others thermal components are
capable of operating within the negative temperature.
With no such support negative temperature will be consider as very high
temperature and it will cause wrong readout and thermal shutdown.
For negative values 2`s complement is used.
Tested in chamber.
Example of chip ambient temperature readout with chamber temperature:
-10 Celsius:
temp1: -6.0C  (highest =  -5.0C)
-5 Celsius:
temp1: -1.0C  (highest =  -1.0C)

Signed-off-by: Vadim Pasternak 
Signed-off-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c   | 12 +---
 drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 14 +++---
 drivers/net/ethernet/mellanox/mlxsw/reg.h  | 12 +++-
 3 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c 
b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 056e3f55ae6c..fb1fd334a8d4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -52,8 +52,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
-   unsigned int temp;
-   int index;
+   int temp, index;
int err;
 
index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
@@ -65,7 +64,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
return err;
}
mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
-   return sprintf(buf, "%u\n", temp);
+   return sprintf(buf, "%d\n", temp);
 }
 
 static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
@@ -76,8 +75,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
-   unsigned int temp_max;
-   int index;
+   int temp_max, index;
int err;
 
index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
@@ -89,7 +87,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
return err;
}
mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL);
-   return sprintf(buf, "%u\n", temp_max);
+   return sprintf(buf, "%d\n", temp_max);
 }
 
 static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
@@ -215,8 +213,8 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device 
*dev,
container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
-   unsigned int temp;
u8 module;
+   int temp;
int err;
 
module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c 
b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 504a34d240f7..35a1dc89c28a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -312,7 +312,7 @@ static int mlxsw_thermal_get_temp(struct 
thermal_zone_device *tzdev,
struct mlxsw_thermal *thermal = tzdev->devdata;
struct device *dev = thermal->bus_info->dev;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
-   unsigned int temp;
+   int temp;
int err;
 
mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
@@ -327,7 +327,7 @@ static int mlxsw_thermal_get_temp(struct 
thermal_zone_device *tzdev,
mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
  temp);
 
-   *p_temp = (int) temp;
+   *p_temp = temp;
return 0;
 }
 
@@ -503,7 +503,7 @@ static int mlxsw_thermal_module_temp_get(struct 
thermal_zone_device *tzdev,
struct mlxsw_thermal *thermal = tz->parent;
struct device *dev = thermal->bus_info->dev;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
-   unsigned int temp;
+   int temp;
int err;
 
/* Read module temperature. */
@@ -519,14 +519,14 @@ static int mlxsw_thermal_module_temp_get(struct 
thermal_zone_device *tzdev,
return 0;
}
mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
-   *p_temp = (int) temp;
+   *p_temp = temp;
 
if (!temp)
return 0;
 
/* Update trip points. */
err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
-   if (!err)
+   if (!err && temp > 0)
mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
 
return 0;
@@ -612,8

[PATCH net-next 2/3] mlxsw: core: Add the hottest thermal zone detection

2019-06-23 Thread Ido Schimmel

From: Vadim Pasternak 

When multiple sensors are mapped to the same cooling device, the
cooling device should be set according the worst sensor from the
sensors associated with this cooling device.

Provide the hottest thermal zone detection and enforce cooling device
to follow the temperature trends of the hottest zone only.
Prevent competition for the cooling device control from others zones,
by "stable trend" indication. A cooling device will not perform any
actions associated with a zone with a "stable trend".

When other thermal zone is detected as a hottest, a cooling device is
to be switched to following temperature trends of new hottest zone.

Thermal zone score is represented by 32 bits unsigned integer and
calculated according to the next formula:
For T < TZ, where t from {normal trip = 0, high trip = 1, hot
trip = 2, critical = 3}:
TZ score = (T + (TZ - T) / 2) / (TZ - T) * 256 ** j;
Highest thermal zone score s is set as MAX(TZscore);
Following this formula, if TZ is in trip point higher than TZ,
the higher score is to be always assigned to TZ.

For two thermal zones located at the same kind of trip point, the higher
score will be assigned to the zone which is closer to the next trip
point. Thus, the highest score will always be assigned objectively to
the hottest thermal zone.

All the thermal zones initially are to be configured with mode
"enabled" with the "step_wise" governor.

Signed-off-by: Vadim Pasternak 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 .../ethernet/mellanox/mlxsw/core_thermal.c| 75 +++
 1 file changed, 62 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c 
b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 88f43ad2cc4f..504a34d240f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -23,6 +23,7 @@
 #define MLXSW_THERMAL_HYSTERESIS_TEMP  5000/* 5C */
 #define MLXSW_THERMAL_MODULE_TEMP_SHIFT(MLXSW_THERMAL_HYSTERESIS_TEMP 
* 2)
 #define MLXSW_THERMAL_ZONE_MAX_NAME16
+#define MLXSW_THERMAL_TEMP_SCORE_MAX   GENMASK(31, 0)
 #define MLXSW_THERMAL_MAX_STATE10
 #define MLXSW_THERMAL_MAX_DUTY 255
 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values
@@ -113,6 +114,8 @@ struct mlxsw_thermal {
struct mlxsw_thermal_module *tz_module_arr;
struct mlxsw_thermal_module *tz_gearbox_arr;
u8 tz_gearbox_num;
+   unsigned int tz_highest_score;
+   struct thermal_zone_device *tz_highest_dev;
 };
 
 static inline u8 mlxsw_state_to_duty(int state)
@@ -197,6 +200,34 @@ mlxsw_thermal_module_trips_update(struct device *dev, 
struct mlxsw_core *core,
return 0;
 }
 
+static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
+ struct thermal_zone_device *tzdev,
+ struct mlxsw_thermal_trip *trips,
+ int temp)
+{
+   struct mlxsw_thermal_trip *trip = trips;
+   unsigned int score, delta, i, shift = 1;
+
+   /* Calculate thermal zone score, if temperature is above the critical
+* threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
+*/
+   score = MLXSW_THERMAL_TEMP_SCORE_MAX;
+   for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS;
+i++, trip++) {
+   if (temp < trip->temp) {
+   delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp);
+   score = delta * shift;
+   break;
+   }
+   shift *= 256;
+   }
+
+   if (score > thermal->tz_highest_score) {
+   thermal->tz_highest_score = score;
+   thermal->tz_highest_dev = tzdev;
+   }
+}
+
 static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
  struct thermal_cooling_device *cdev)
 {
@@ -292,6 +323,9 @@ static int mlxsw_thermal_get_temp(struct 
thermal_zone_device *tzdev,
return err;
}
mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+   if (temp > 0)
+   mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
+ temp);
 
*p_temp = (int) temp;
return 0;
@@ -353,6 +387,22 @@ static int mlxsw_thermal_set_trip_hyst(struct 
thermal_zone_device *tzdev,
return 0;
 }
 
+static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev,
+  int trip, enum thermal_trend *trend)
+{
+   struct mlxsw_thermal_module *tz = tzdev->devdata;
+   struct mlxsw_thermal *thermal = tz->parent;
+
+   if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+   return -EINVAL;
+
+   if (tzdev == thermal->tz_highest_dev)
+   return 1;
+
+   *trend = THERMAL_TREND_STABLE;
+   return 0;
+}
+

[PATCH net-next 1/3] mlxsw: core: Extend thermal core with per inter-connect device thermal zones

2019-06-23 Thread Ido Schimmel

From: Vadim Pasternak 

Add a dedicated thermal zone for each inter-connect device. The
current temperature is obtained from inter-connect temperature sensor
and the default trip points are set to the same values as default ASIC
trip points. These settings could be changed from the user space.
A cooling device (fan) is bound to all inter-connect devices.

Signed-off-by: Vadim Pasternak 
Signed-off-by: Ido Schimmel 
---
 .../ethernet/mellanox/mlxsw/core_thermal.c| 137 +-
 1 file changed, 136 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c 
b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index cfab0e330a47..88f43ad2cc4f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -98,7 +98,7 @@ struct mlxsw_thermal_module {
struct thermal_zone_device *tzdev;
struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
enum thermal_device_mode mode;
-   int module;
+   int module; /* Module or gearbox number */
 };
 
 struct mlxsw_thermal {
@@ -111,6 +111,8 @@ struct mlxsw_thermal {
struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
enum thermal_device_mode mode;
struct mlxsw_thermal_module *tz_module_arr;
+   struct mlxsw_thermal_module *tz_gearbox_arr;
+   u8 tz_gearbox_num;
 };
 
 static inline u8 mlxsw_state_to_duty(int state)
@@ -554,6 +556,46 @@ static struct thermal_zone_device_ops 
mlxsw_thermal_module_ops = {
.set_trip_hyst  = mlxsw_thermal_module_trip_hyst_set,
 };
 
+static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
+ int *p_temp)
+{
+   struct mlxsw_thermal_module *tz = tzdev->devdata;
+   struct mlxsw_thermal *thermal = tz->parent;
+   char mtmp_pl[MLXSW_REG_MTMP_LEN];
+   unsigned int temp;
+   u16 index;
+   int err;
+
+   index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
+   mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
+
+   err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+   if (err)
+   return err;
+
+   mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+
+   *p_temp = (int) temp;
+   return 0;
+}
+
+static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
+   .bind   = mlxsw_thermal_module_bind,
+   .unbind = mlxsw_thermal_module_unbind,
+   .get_mode   = mlxsw_thermal_module_mode_get,
+   .set_mode   = mlxsw_thermal_module_mode_set,
+   .get_temp   = mlxsw_thermal_gearbox_temp_get,
+   .get_trip_type  = mlxsw_thermal_module_trip_type_get,
+   .get_trip_temp  = mlxsw_thermal_module_trip_temp_get,
+   .set_trip_temp  = mlxsw_thermal_module_trip_temp_set,
+   .get_trip_hyst  = mlxsw_thermal_module_trip_hyst_get,
+   .set_trip_hyst  = mlxsw_thermal_module_trip_hyst_set,
+};
+
+static struct thermal_zone_params mlxsw_thermal_gearbox_params = {
+   .governor_name = "user_space",
+};
+
 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
   unsigned long *p_state)
 {
@@ -779,6 +821,92 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
kfree(thermal->tz_module_arr);
 }
 
+static int
+mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
+{
+   char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
+
+   snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d",
+gearbox_tz->module + 1);
+   gearbox_tz->tzdev = thermal_zone_device_register(tz_name,
+   MLXSW_THERMAL_NUM_TRIPS,
+   MLXSW_THERMAL_TRIP_MASK,
+   gearbox_tz,
+   &mlxsw_thermal_gearbox_ops,
+   &mlxsw_thermal_gearbox_params,
+   0, 0);
+   if (IS_ERR(gearbox_tz->tzdev))
+   return PTR_ERR(gearbox_tz->tzdev);
+
+   return 0;
+}
+
+static void
+mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz)
+{
+   thermal_zone_device_unregister(gearbox_tz->tzdev);
+}
+
+static int
+mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
+struct mlxsw_thermal *thermal)
+{
+   struct mlxsw_thermal_module *gearbox_tz;
+   char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+   int i;
+   int err;
+
+   if (!mlxsw_core_res_query_enabled(core))
+   return 0;
+
+   mlxsw_reg_mgpir_pack(mgpir_pl);
+   err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
+   if (err)
+   return err;
+
+   mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL);
+   if (!thermal->tz_gearbox_num)
+   return 0

[PATCH net-next 0/3] mlxsw: Thermal and hwmon extensions

2019-06-23 Thread Ido Schimmel

From: Ido Schimmel 

This patchset from Vadim includes various enhancements to thermal and
hwmon code in mlxsw.

Patch #1 adds a thermal zone for each inter-connect device (gearbox).
These devices are present in SN3800 systems and code to expose their
temperature via hwmon was added in commit 2e265a8b6c09 ("mlxsw: core:
Extend hwmon interface with inter-connect temperature attributes").

Currently, there are multiple thermal zones in mlxsw and only a few
cooling devices. Patch #2 detects the hottest thermal zone and the
cooling devices are switched to follow its trends. RFC was sent last
month [1].

Patch #3 allows to read and report negative temperature of the sensors
mlxsw exposes via hwmon and thermal subsystems.

[1] https://patchwork.ozlabs.org/patch/1107161/

Vadim Pasternak (3):
  mlxsw: core: Extend thermal core with per inter-connect device thermal
zones
  mlxsw: core: Add the hottest thermal zone detection
  mlxsw: core: Add support for negative temperature readout

 .../net/ethernet/mellanox/mlxsw/core_hwmon.c  |  12 +-
 .../ethernet/mellanox/mlxsw/core_thermal.c| 208 +-
 drivers/net/ethernet/mellanox/mlxsw/reg.h |  12 +-
 3 files changed, 208 insertions(+), 24 deletions(-)

-- 
2.20.1

Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support

2019-06-23 Thread Jesper Dangaard Brouer



I'm very happy to see progress with XDP for the ENA driver.

On Sun, 23 Jun 2019 10:06:49 +0300  wrote:

> @@ -888,6 +959,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring 
> *rx_ring,
>   va = page_address(rx_info->page) + rx_info->page_offset;
>   prefetch(va + NET_IP_ALIGN);
>  
> + if (ena_xdp_present_ring(rx_ring)) {
> + rx_ring->xdp.data = va;
> + rx_ring->xdp.data_meta = rx_ring->xdp.data;
> + rx_ring->xdp.data_hard_start = rx_ring->xdp.data -
> + rx_info->page_offset;
> + rx_ring->xdp.data_end = rx_ring->xdp.data + len;
> + if (ena_xdp_execute(rx_ring, &rx_ring->xdp) != XDP_PASS)
> + return NULL;
> + }

Looks like you forgot to init xdp.rxq pointer.

You can use the samples/bpf/xdp_rxq_info* to access this... and this
driver will likely crash if you use it...

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support

2019-06-23 Thread Jesper Dangaard Brouer

On Sun, 23 Jun 2019 10:06:49 +0300  wrote:

> This commit implements the basic functionality of drop/pass logic in the
> ena driver.

Usually we require a driver to implement all the XDP return codes,
before we accept it.  But as Daniel and I discussed with Zorik during
NetConf[1], we are going to make an exception and accept the driver
if you also implement XDP_TX.

As we trust that Zorik/Amazon will follow and implement XDP_REDIRECT
later, given he/you wants AF_XDP support which requires XDP_REDIRECT.

[1] http://vger.kernel.org/netconf2019.html
-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support

2019-06-23 Thread David Ahern

On 6/23/19 1:06 AM, same...@amazon.com wrote:
> +static int ena_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
> +{
> + struct ena_adapter *adapter = netdev_priv(netdev);
> + struct bpf_prog *old_bpf_prog;
> + int i;
> +
> + if (ena_xdp_allowed(adapter)) {
> + old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
> +
> + for (i = 0; i < adapter->num_queues; i++)
> + xchg(&adapter->rx_ring[i].xdp_bpf_prog, prog);
> +
> + if (old_bpf_prog)
> + bpf_prog_put(old_bpf_prog);
> +
> + } else {
> + netif_err(adapter, drv, adapter->netdev, "Failed to set xdp 
> program, the current MTU (%d) is larger than the maximal allowed MTU (%lu) 
> while xdp is on",
> +   netdev->mtu, ENA_XDP_MAX_MTU);
> + return -EFAULT;

unfortunate that extack has not been plumbed to ndo_bpf handler so that
message is passed to the user.

And EFAULT seems like the wrong return code.


> + }
> +
> + return 0;
> +}
> +
> +/* This is the main xdp callback, it's used by the kernel to set/unset the 
> xdp
> + * program as well as to query the current xdp program id.
> + */
> +static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
> +{
> + struct ena_adapter *adapter = netdev_priv(netdev);
> +
> + switch (bpf->command) {
> + case XDP_SETUP_PROG:
> + return ena_xdp_set(netdev, bpf->prog);
> + case XDP_QUERY_PROG:
> + bpf->prog_id = adapter->xdp_bpf_prog ?
> + adapter->xdp_bpf_prog->aux->id : 0;
> + break;
> + default:
> + return -EINVAL;
> + }
> + return 0;
> +}
> +
>  static int ena_change_mtu(struct net_device *dev, int new_mtu)
>  {
>   struct ena_adapter *adapter = netdev_priv(dev);
>   int ret;
>  
> + if (new_mtu > ENA_XDP_MAX_MTU && ena_xdp_present(adapter)) {
> + netif_err(adapter, drv, dev,
> +   "Requested MTU value is not valid while xdp is 
> enabled new_mtu: %d max mtu: %lu min mtu: %d\n",
> +   new_mtu, ENA_XDP_MAX_MTU, ENA_MIN_MTU);
> + return -EINVAL;
> + }

If you manage dev->max_mtu as an XDP program is installed / removed this
check is not needed. Instead it is handled by the core change_mtu logic
and has better user semanitcs: link list shows the new MTU and trying to
change it above the new max a message is sent to the user as opposed to
kernel log.

>   ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
>   if (!ret) {
>   netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);

Re: [PATCH v2 net] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET

2019-06-23 Thread Willem de Bruijn

On Sun, Jun 23, 2019 at 7:40 AM Neil Horman  wrote:
>
> On Sat, Jun 22, 2019 at 10:21:31PM -0400, Willem de Bruijn wrote:
> > > > -static void __packet_set_status(struct packet_sock *po, void *frame, 
> > > > int status)
> > > > +static void __packet_set_status(struct packet_sock *po, void *frame, 
> > > > int status,
> > > > +   bool call_complete)
> > > >  {
> > > > union tpacket_uhdr h;
> > > >
> > > > @@ -381,6 +382,8 @@ static void __packet_set_status(struct packet_sock 
> > > > *po, void *frame, int status)
> > > > BUG();
> > > > }
> > > >
> > > > +   if (po->wait_on_complete && call_complete)
> > > > +   complete(&po->skb_completion);
> > >
> > > This wake need not happen before the barrier. Only one caller of
> > > __packet_set_status passes call_complete (tpacket_destruct_skb).
> > > Moving this branch to the caller avoids a lot of code churn.
> > >
> > > Also, multiple packets may be released before the process is awoken.
> > > The process will block until packet_read_pending drops to zero. Can
> > > defer the wait_on_complete to that one instance.
> >
> > Eh no. The point of having this sleep in the send loop is that
> > additional slots may be released for transmission (flipped to
> > TP_STATUS_SEND_REQUEST) from another thread while this thread is
> > waiting.
> >
> Thats incorrect.  The entirety of tpacket_snd is protected by a mutex. No 
> other
> thread can alter the state of the frames in the vector from the kernel send 
> path
> while this thread is waiting.

I meant another user thread updating the memory mapped ring contents.

> > Else, it would have been much simpler to move the wait below the send
> > loop: send as many packets as possible, then wait for all of them
> > having been released. Much clearer control flow.
> >
> Thats (almost) what happens now.  The only difference is that with this
> implementation, the waiting thread has the opportunity to see if userspace has
> queued more frames for transmission during the wait period.  We could
> potentially change that, but thats outside the scope of this fix.

Agreed. I think the current, more complex, behavior was intentional.
We could still restructure to move it out of the loop and jump back.
But, yes, definitely out of scope for a fix.

> > Where to set and clear the wait_on_complete boolean remains. Integer
> > assignment is fragile, as the compiler and processor may optimize or
> > move simple seemingly independent operations. As complete() takes a
> > spinlock, avoiding that in the DONTWAIT case is worthwhile. But probably
> > still preferable to set when beginning waiting and clear when calling
> > complete.
> We avoid any call to wait_for_complete or complete already, based on the 
> gating
> of the need_wait variable in tpacket_snd.  If the transmitting thread doesn't
> set MSG_DONTWAIT in the flags of the msg structure, we will never set
> wait_for_complete, and so we will never manipulate the completion queue.

But we don't know the state of this at tpacket_destruct_skb time without
wait_for_completion?

Re: [RFC V1 net-next 1/1] net: ena: implement XDP drop support

2019-06-23 Thread Maciej Fijalkowski

On Sun, 23 Jun 2019 10:06:49 +0300
 wrote:

> From: Sameeh Jubran 
> 
> This commit implements the basic functionality of drop/pass logic in the
> ena driver.
> 
> Signed-off-by: Sameeh Jubran 
> ---
>  drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 +++-
>  drivers/net/ethernet/amazon/ena/ena_netdev.h | 29 +++
>  2 files changed, 111 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c 
> b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> index 20ec8ff03..3d65c0771 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> @@ -33,10 +33,10 @@
>  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>  
>  #ifdef CONFIG_RFS_ACCEL
> +#include 
>  #include 
>  #endif /* CONFIG_RFS_ACCEL */
>  #include 
> -#include 
>  #include 
>  #include 
>  #include 
> @@ -105,11 +105,82 @@ static void update_rx_ring_mtu(struct ena_adapter 
> *adapter, int mtu)
>   adapter->rx_ring[i].mtu = mtu;
>  }
>  
> +static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
> +{
> + struct bpf_prog *xdp_prog = rx_ring->xdp_bpf_prog;
> + u32 verdict = XDP_PASS;
> +
> + rcu_read_lock();
> +
> + if (!xdp_prog)
> + goto out;
> +
> + verdict = bpf_prog_run_xdp(xdp_prog, xdp);
> +
> + if (unlikely(verdict == XDP_ABORTED))
> + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
> + else if (unlikely(verdict > XDP_REDIRECT))

Shouldn't this be verdict >= XDP_TX at this point? You're not providing XDP TX
resources in this patch and you're saying that only drop/pass actions are
supported.

> + bpf_warn_invalid_xdp_action(verdict);
> +out:
> + rcu_read_unlock();
> + return verdict;
> +}
> +
> +static int ena_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
> +{
> + struct ena_adapter *adapter = netdev_priv(netdev);
> + struct bpf_prog *old_bpf_prog;
> + int i;
> +
> + if (ena_xdp_allowed(adapter)) {
> + old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
> +
> + for (i = 0; i < adapter->num_queues; i++)
> + xchg(&adapter->rx_ring[i].xdp_bpf_prog, prog);
> +
> + if (old_bpf_prog)
> + bpf_prog_put(old_bpf_prog);
> +
> + } else {
> + netif_err(adapter, drv, adapter->netdev, "Failed to set xdp 
> program, the current MTU (%d) is larger than the maximal allowed MTU (%lu) 
> while xdp is on",
> +   netdev->mtu, ENA_XDP_MAX_MTU);

Consider using netlink's extack mechanism. Downside of this approach is that you
can't use format specifiers, so you won't tell user about the max allowed mtu
size for XDP case, but OTOH the message is printed right in front of user's
face in console, no need to look into dmesg.

> + return -EFAULT;
> + }
> +
> + return 0;
> +}
> +
> +/* This is the main xdp callback, it's used by the kernel to set/unset the 
> xdp
> + * program as well as to query the current xdp program id.
> + */
> +static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
> +{
> + struct ena_adapter *adapter = netdev_priv(netdev);
> +
> + switch (bpf->command) {
> + case XDP_SETUP_PROG:
> + return ena_xdp_set(netdev, bpf->prog);
> + case XDP_QUERY_PROG:
> + bpf->prog_id = adapter->xdp_bpf_prog ?
> + adapter->xdp_bpf_prog->aux->id : 0;
> + break;
> + default:

Again, consider the following:
NL_SET_ERR_MSG_MOD(bpf->extack, "Unsupported XDP command");

> + return -EINVAL;
> + }
> + return 0;
> +}
> +
>  static int ena_change_mtu(struct net_device *dev, int new_mtu)
>  {
>   struct ena_adapter *adapter = netdev_priv(dev);
>   int ret;
>  
> + if (new_mtu > ENA_XDP_MAX_MTU && ena_xdp_present(adapter)) {
> + netif_err(adapter, drv, dev,
> +   "Requested MTU value is not valid while xdp is 
> enabled new_mtu: %d max mtu: %lu min mtu: %d\n",
> +   new_mtu, ENA_XDP_MAX_MTU, ENA_MIN_MTU);
> + return -EINVAL;
> + }
>   ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
>   if (!ret) {
>   netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
> @@ -888,6 +959,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring 
> *rx_ring,
>   va = page_address(rx_info->page) + rx_info->page_offset;
>   prefetch(va + NET_IP_ALIGN);
>  
> + if (ena_xdp_present_ring(rx_ring)) {
> + rx_ring->xdp.data = va;
> + rx_ring->xdp.data_meta = rx_ring->xdp.data;
> + rx_ring->xdp.data_hard_start = rx_ring->xdp.data -
> + rx_info->page_offset;
> + rx_ring->xdp.data_end = rx_ring->xdp.data + len;
> + if (ena_xdp_execute(rx_ring, &rx_ring->xdp) != XDP_PASS)
> + return NULL;
> + }

Hmm running XDP program wit

Re: [PATCH] net: dsa: microchip: Use gpiod_set_value_cansleep()

2019-06-23 Thread Andrew Lunn

On Sun, Jun 23, 2019 at 02:10:36PM +0200, Marek Vasut wrote:
> Replace gpiod_set_value() with gpiod_set_value_cansleep(), as the switch
> reset GPIO can be connected to e.g. I2C GPIO expander and it is perfectly
> fine for the kernel to sleep for a bit in ksz_switch_register().
> 
> Signed-off-by: Marek Vasut 

Reviewed-by: Andrew Lunn 

Andrew

Re: [PATCH] net: dsa: microchip: Use gpiod_set_value_cansleep()

2019-06-23 Thread Marek Vasut

On 6/23/19 5:09 PM, Andrew Lunn wrote:
> On Sun, Jun 23, 2019 at 02:10:36PM +0200, Marek Vasut wrote:
>> Replace gpiod_set_value() with gpiod_set_value_cansleep(), as the switch
>> reset GPIO can be connected to e.g. I2C GPIO expander and it is perfectly
>> fine for the kernel to sleep for a bit in ksz_switch_register().
>>
>> Signed-off-by: Marek Vasut 
> 
> Reviewed-by: Andrew Lunn 

Actually, no, I missed a change in .remove , so I'll send a V2 with this
RB if you don't mind.

-- 
Best regards,
Marek Vasut

[PATCH V2] net: dsa: microchip: Use gpiod_set_value_cansleep()

2019-06-23 Thread Marek Vasut

Replace gpiod_set_value() with gpiod_set_value_cansleep(), as the switch
reset GPIO can be connected to e.g. I2C GPIO expander and it is perfectly
fine for the kernel to sleep for a bit in ksz_switch_register().

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Linus Walleij 
Cc: Tristram Ha 
Cc: Woojung Huh 
Reviewed-by: Andrew Lunn 
---
V2: use _cansleep in .remove as well
---
 drivers/net/dsa/microchip/ksz_common.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz_common.c 
b/drivers/net/dsa/microchip/ksz_common.c
index 4f6648d5ac8b..978c59aa8efb 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -436,9 +436,9 @@ int ksz_switch_register(struct ksz_device *dev,
return PTR_ERR(dev->reset_gpio);
 
if (dev->reset_gpio) {
-   gpiod_set_value(dev->reset_gpio, 1);
+   gpiod_set_value_cansleep(dev->reset_gpio, 1);
mdelay(10);
-   gpiod_set_value(dev->reset_gpio, 0);
+   gpiod_set_value_cansleep(dev->reset_gpio, 0);
}
 
mutex_init(&dev->dev_mutex);
@@ -489,7 +489,7 @@ void ksz_switch_remove(struct ksz_device *dev)
dsa_unregister_switch(dev->ds);
 
if (dev->reset_gpio)
-   gpiod_set_value(dev->reset_gpio, 1);
+   gpiod_set_value_cansleep(dev->reset_gpio, 1);
 
 }
 EXPORT_SYMBOL(ksz_switch_remove);
-- 
2.20.1

Re: [PATCH] sis900: increment revision number

2019-06-23 Thread David Miller

From: Sergej Benilov 
Date: Sun, 23 Jun 2019 09:47:07 +0200

> Increment revision number to 1.08.11 (TX completion fix).
> 
> Signed-off-by: Sergej Benilov 

These are useless, really...

People are going to backport the TX completion fix all by itself
and not this change if I were to merge it.

I really want to heavily discourage these kinds of things, sorry.
Code is code, the fix is there or it isn't.

Re: [PATCH] sis900: increment revision number

2019-06-23 Thread David Miller

From: Daniele Venzano 
Date: Sun, 23 Jun 2019 11:13:28 +0200

> Hello,
> 
> I think it is good to know just by looking at the sources that the
> driver is still kept up-to-date, so I am in favor of this patch.

I absolutely, strongly, disagree.

These are pointless.

Re: [PATCH V1 net-next] net: ena: Fix bug where ring allocation backoff stopped too late

2019-06-23 Thread David Miller

From: 
Date: Sun, 23 Jun 2019 10:11:10 +0300

> From: Sameeh Jubran 
> 
> The current code of create_queues_with_size_backoff() allows the ring size
> to become as small as ENA_MIN_RING_SIZE/2. This is a bug since we don't
> want the queue ring to be smaller than ENA_MIN_RING_SIZE
> 
> In this commit we change the loop's termination condition to look at the
> queue size of the next iteration instead of that of the current one,
> so that the minimal queue size again becomes ENA_MIN_RING_SIZE.
> 
> Fixes: eece4d2ab9d2 ("net: ena: add ethtool function for changing io queue 
> sizes")
> 
> Signed-off-by: Arthur Kiyanovski 
> Signed-off-by: Sameeh Jubran 

Applied, thank you.

Re: [PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout

2019-06-23 Thread Andrew Lunn

> --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
> +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
> @@ -52,8 +52,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
>   container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
>   struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
>   char mtmp_pl[MLXSW_REG_MTMP_LEN];
> - unsigned int temp;
> - int index;
> + int temp, index;
>   int err;
>  
>   index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
> @@ -65,7 +64,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
>   return err;
>   }
>   mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
> - return sprintf(buf, "%u\n", temp);
> + return sprintf(buf, "%d\n", temp);
>  }

If you had used the hwmon core, rather than implementing it yourself,
you could of avoided this part of the bug.

>  static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
> @@ -215,8 +213,8 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device 
> *dev,
>   container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
>   struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
>   char mtmp_pl[MLXSW_REG_MTMP_LEN];
> - unsigned int temp;
>   u8 module;
> + int temp;
>   int err;
>  
>   module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;

I think you missed changing the %u to %d in this function.

> @@ -519,14 +519,14 @@ static int mlxsw_thermal_module_temp_get(struct 
> thermal_zone_device *tzdev,
>   return 0;
>   }
>   mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
> - *p_temp = (int) temp;
> + *p_temp = temp;
>  
>   if (!temp)
>   return 0;
>  
>   /* Update trip points. */
>   err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
> - if (!err)
> + if (!err && temp > 0)
>   mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);

Why the > 0?

Andrew

RE: [PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout

2019-06-23 Thread Vadim Pasternak




> -Original Message-
> From: Andrew Lunn 
> Sent: Sunday, June 23, 2019 6:44 PM
> To: Ido Schimmel 
> Cc: netdev@vger.kernel.org; da...@davemloft.net; Jiri Pirko
> ; mlxsw ; Vadim Pasternak
> ; Ido Schimmel 
> Subject: Re: [PATCH net-next 3/3] mlxsw: core: Add support for negative
> temperature readout
> 
> > --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
> > +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
> > @@ -52,8 +52,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device
> *dev,
> > container_of(attr, struct mlxsw_hwmon_attr,
> dev_attr);
> > struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
> > char mtmp_pl[MLXSW_REG_MTMP_LEN];
> > -   unsigned int temp;
> > -   int index;
> > +   int temp, index;
> > int err;
> >
> > index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr-
> >type_index,
> > @@ -65,7 +64,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device
> *dev,
> > return err;
> > }
> > mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
> > -   return sprintf(buf, "%u\n", temp);
> > +   return sprintf(buf, "%d\n", temp);
> >  }
> 
> If you had used the hwmon core, rather than implementing it yourself, you 
> could
> of avoided this part of the bug.
> 

Hi Andrew.

Yes.
But before we handle only positive temperature.
And currently support for the negative readouts has been added.

> >  static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, @@
> > -215,8 +213,8 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct
> device *dev,
> > container_of(attr, struct mlxsw_hwmon_attr,
> dev_attr);
> > struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
> > char mtmp_pl[MLXSW_REG_MTMP_LEN];
> > -   unsigned int temp;
> > u8 module;
> > +   int temp;
> > int err;
> >
> > module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon-
> >sensor_count;
> 
> I think you missed changing the %u to %d in this function.

If I am not wrong, I think you refer to mlxsw_hwmon_fan_rpm_show(),
where it should be %u.

> 
> > @@ -519,14 +519,14 @@ static int mlxsw_thermal_module_temp_get(struct
> thermal_zone_device *tzdev,
> > return 0;
> > }
> > mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
> > -   *p_temp = (int) temp;
> > +   *p_temp = temp;
> >
> > if (!temp)
> > return 0;
> >
> > /* Update trip points. */
> > err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
> > -   if (!err)
> > +   if (!err && temp > 0)
> > mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips,
> temp);
> 
> Why the > 0?

We don't consider negative temperature for thermal control.

> 
> Andrew

RE: [PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout

2019-06-23 Thread Vadim Pasternak




> -Original Message-
> From: Vadim Pasternak
> Sent: Sunday, June 23, 2019 7:01 PM
> To: Andrew Lunn ; Ido Schimmel 
> Cc: netdev@vger.kernel.org; da...@davemloft.net; Jiri Pirko
> ; mlxsw ; Ido Schimmel
> 
> Subject: RE: [PATCH net-next 3/3] mlxsw: core: Add support for negative
> temperature readout
> 
> 
> 
> > -Original Message-
> > From: Andrew Lunn 
> > Sent: Sunday, June 23, 2019 6:44 PM
> > To: Ido Schimmel 
> > Cc: netdev@vger.kernel.org; da...@davemloft.net; Jiri Pirko
> > ; mlxsw ; Vadim Pasternak
> > ; Ido Schimmel 
> > Subject: Re: [PATCH net-next 3/3] mlxsw: core: Add support for
> > negative temperature readout
> >
> > > --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
> > > +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
> > > @@ -52,8 +52,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device
> > *dev,
> > >   container_of(attr, struct mlxsw_hwmon_attr,
> > dev_attr);
> > >   struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
> > >   char mtmp_pl[MLXSW_REG_MTMP_LEN];
> > > - unsigned int temp;
> > > - int index;
> > > + int temp, index;
> > >   int err;
> > >
> > >   index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr-
> > >type_index,
> > > @@ -65,7 +64,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device
> > *dev,
> > >   return err;
> > >   }
> > >   mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
> > > - return sprintf(buf, "%u\n", temp);
> > > + return sprintf(buf, "%d\n", temp);
> > >  }
> >
> > If you had used the hwmon core, rather than implementing it yourself,
> > you could of avoided this part of the bug.
> >
> 
> Hi Andrew.
> 
> Yes.
> But before we handle only positive temperature.
> And currently support for the negative readouts has been added.
> 
> > >  static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, @@
> > > -215,8 +213,8 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct
> > device *dev,
> > >   container_of(attr, struct mlxsw_hwmon_attr,
> > dev_attr);
> > >   struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
> > >   char mtmp_pl[MLXSW_REG_MTMP_LEN];
> > > - unsigned int temp;
> > >   u8 module;
> > > + int temp;
> > >   int err;
> > >
> > >   module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon-
> sensor_count;
> >
> > I think you missed changing the %u to %d in this function.
> 
> If I am not wrong, I think you refer to mlxsw_hwmon_fan_rpm_show(), where it
> should be %u.
> 
 O, I see what you mentioned.
This is mlxsw_hwmon_module_temp_show().
Yes, right it should be %d.
Thank you.

> >
> > > @@ -519,14 +519,14 @@ static int
> > > mlxsw_thermal_module_temp_get(struct
> > thermal_zone_device *tzdev,
> > >   return 0;
> > >   }
> > >   mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
> > > - *p_temp = (int) temp;
> > > + *p_temp = temp;
> > >
> > >   if (!temp)
> > >   return 0;
> > >
> > >   /* Update trip points. */
> > >   err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
> > > - if (!err)
> > > + if (!err && temp > 0)
> > >   mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips,
> > temp);
> >
> > Why the > 0?
> 
> We don't consider negative temperature for thermal control.
> 
> >
> > Andrew

Re: [PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout

2019-06-23 Thread Andrew Lunn

> > Why the > 0?
> 
> We don't consider negative temperature for thermal control.

Is this because the thermal control is also broken and does not
support negative values? This is just a workaround papering over the
cracks?

I've worked on some systems where the thermal subsystem has controller
a heater. Mostly industrial systems, extended temperature range, and
you have to make sure the hardware is kept above -25C, otherwise the
DRAM timing goes to pot and the system crashed and froze.

Andrew

Re: [PATCH] sis900: increment revision number

2019-06-23 Thread Joe Perches

On Sun, 2019-06-23 at 08:37 -0700, David Miller wrote:
> From: Daniele Venzano 
> Date: Sun, 23 Jun 2019 11:13:28 +0200
> 
> > Hello,
> > 
> > I think it is good to know just by looking at the sources that the
> > driver is still kept up-to-date, so I am in favor of this patch.
> 
> I absolutely, strongly, disagree.
> 
> These are pointless.

Perhaps (most)? all the .get_drvinfo function calls where the
driver version is returned should use the default release.

This is similar to net/wireless/ethtool.c

Maybe:
---
 net/core/ethtool.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 4d1011b2e24f..644a2043714c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -806,6 +807,8 @@ static noinline_for_stack int ethtool_get_drvinfo(struct 
net_device *dev,
devlink_compat_running_version(dev, info.fw_version,
   sizeof(info.fw_version));
 
+   strlcpy(info.version, init_utsname()->release, sizeof(info.version));
+
if (copy_to_user(useraddr, &info, sizeof(info)))
return -EFAULT;
return 0;

RE: [PATCH net-next 3/3] mlxsw: core: Add support for negative temperature readout

2019-06-23 Thread Vadim Pasternak




> -Original Message-
> From: Andrew Lunn 
> Sent: Sunday, June 23, 2019 7:26 PM
> To: Vadim Pasternak 
> Cc: Ido Schimmel ; netdev@vger.kernel.org;
> da...@davemloft.net; Jiri Pirko ; mlxsw
> ; Ido Schimmel 
> Subject: Re: [PATCH net-next 3/3] mlxsw: core: Add support for negative
> temperature readout
> 
> > > Why the > 0?
> >
> > We don't consider negative temperature for thermal control.
> 
> Is this because the thermal control is also broken and does not support 
> negative
> values? This is just a workaround papering over the cracks?

We just have system hardware requirements for minimal speed for system
PWM. It could not be less than 20%.
So for temperature ~40C or below it PWM will set to this speed.

> 
> I've worked on some systems where the thermal subsystem has controller a
> heater. Mostly industrial systems, extended temperature range, and you have to
> make sure the hardware is kept above -25C, otherwise the DRAM timing goes to
> pot and the system crashed and froze.

Interesting input. I didn't know about such feature.
We don't have heaters within our systems.
Maybe we should think about it for the next generatin systems.

> 
>   Andrew

Re: [PATCH net-next 10/16] qlge: Factor out duplicated expression

2019-06-23 Thread David Miller

From: Benjamin Poirier 
Date: Mon, 17 Jun 2019 16:48:52 +0900

> Signed-off-by: Benjamin Poirier 
> ---
>  drivers/net/ethernet/qlogic/qlge/qlge.h  |  6 ++
>  drivers/net/ethernet/qlogic/qlge/qlge_main.c | 18 ++
>  2 files changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h 
> b/drivers/net/ethernet/qlogic/qlge/qlge.h
> index 5a4b2520cd2a..0bb7ccdca6a7 100644
> --- a/drivers/net/ethernet/qlogic/qlge/qlge.h
> +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
> @@ -77,6 +77,12 @@
>  #define LSD(x)  ((u32)((u64)(x)))
>  #define MSD(x)  ((u32)u64)(x)) >> 32)))
>  
> +#define QLGE_FIT16(value) \
> +({ \
> + typeof(value) _value = value; \
> + (_value) == 65536 ? 0 : (u16)(_value); \
> +})
> +

"(u16) 65536" is zero and the range of these values is 0 -- 65536.

This whole expression is way overdone.

Re: [PATCH net-next 10/16] qlge: Factor out duplicated expression

2019-06-23 Thread David Miller

From: David Miller 
Date: Sun, 23 Jun 2019 10:59:35 -0700 (PDT)

> "(u16) 65536" is zero and the range of these values is 0 -- 65536.
> 
> This whole expression is way overdone.

Also, when you post the next revision of this patch series, please
provide a proper "[PATCH net-next 00/16]" header posting explaining
what this patch series does logically at the high level, how it is
doing it, and why it is doing it that way.

Thank you.

Re: [PATCH net] vxlan: do not destroy fdb if register_netdevice() is failed

2019-06-23 Thread David Miller

From: Taehee Yoo 
Date: Thu, 20 Jun 2019 20:51:08 +0900

> __vxlan_dev_create() destroys FDB using specific pointer which indicates
> a fdb when error occurs.
> But that pointer should not be used when register_netdevice() fails because
> register_netdevice() internally destroys fdb when error occurs.
> 
> In order to avoid un-registered dev's notification, fdb destroying routine
> checks dev's register status before notification.

Simply pass do_notify as false in this failure code path of 
__vxlan_dev_create(),
thank you.

Re: network unstable on odroid-c1/meson8b.

2019-06-23 Thread Aymeric



Le 20/06/2019 à 22:54, Aymeric a écrit :
> Le 20/06/2019 à 17:53, Heiner Kallweit a écrit :
>> On 20.06.2019 09:55, Aymeric wrote:
>>> Hi,
>>> On 2019-06-20 00:14, Heiner Kallweit wrote:
 On 19.06.2019 22:18, Aymeric wrote:
> Hello all,
>
 Kernel 3.10 didn't have a dedicated RTL8211F PHY driver yet, therefore
 I assume the genphy driver was used. Do you have a line with
 "attached PHY driver" in dmesg output of the vendor kernel?
>>> No.
>>> Here is the full output of the dmesg from vendor kernel [¹].
>>>
>>> I've also noticed something strange, it might be linked, but mac address of 
>>> the board is set to a random value when using mainline kernel and I've to 
>>> set it manually but not when using vendor kernel.
>>>
 The dedicated PHY driver takes care of the tx delay, if the genphy
 driver is used we have to rely on what uboot configured.
 But if we indeed had an issue with a misconfigured delay, I think
 the connection shouldn't be fine with just another link partner.
 Just to have it tested you could make rtl8211f_config_init() in
 drivers/net/phy/realtek.c a no-op (in current kernels).

>>> I'm not an expert here, just adding a "return 0;" here[²] would be enough?
>>>
 And you could compare at least the basic PHY registers 0x00 - 0x30
 with both kernel versions, e.g. with phytool.

>>> They are not the same but I don't know what I'm looking for, so for kernel 
>>> 3.10 [³] and for kernel 5.1.12 [⁴].
>>>
>>> Aymeric
>>>
>>> [¹]: 
>>> https://paste.aplu.fr/?38ef95b44ebdbfc3#G666/YbhgU+O+tdC/2HaimUCigm8ZTB44qvQip/HJ5A=
>>> [²]: 
>>> https://github.com/torvalds/linux/blob/241e39004581475b2802cd63c111fec43bb0123e/drivers/net/phy/realtek.c#L164
>>> [³]: 
>>> https://paste.aplu.fr/?2dde1c32d5c68f4c#6xIa8MjTm6jpI6citEJAqFTLMMHDjFZRet/M00/EwjU=
>>> [⁴]: 
>>> https://paste.aplu.fr/?32130e9bcb05dde7#N/xdnvb5GklcJtiOxMpTCm+9gsUliRwH8X3dcwSV+ng=
>>>
>> The vendor kernel has some, but not really much magic:
>> https://github.com/hardkernel/linux/blob/odroidc-3.10.y/drivers/amlogic/ethernet/phy/am_rtl8211f.c
>> The write to RTL8211F_PHYCR2 is overwritten later, therefore we don't have 
>> to consider it.
>>
>> The following should make the current Realtek PHY driver behave like in the 
>> vendor driver.
>> Could you test it?
> (sending again for mailing list, sorry, I forgot to force it in plaintext…)
>
> I've applied your patch and tried but it doesn't change anything.
>
> Here is dmesg output and phytool results.
>
> https://paste.aplu.fr/?9735c99907528929#SeCgwR45cgnbDA1tXIVBHCBT8RNct2r41jU6vsguLVc=
>
Hello all,

I had some news from a friend who have the same issue than me, his board
is connected to an "intelligent" switch a Ubiquiti EdgeSwitch.

Also, when he force the link to 100 it is stable.

Aymeric.

-- 
Aymeric

Re: [PATCH v3 net-next 0/5] ipv6: avoid taking refcnt on dst during route lookup

2019-06-23 Thread David Miller

From: Wei Wang 
Date: Thu, 20 Jun 2019 17:36:36 -0700

> v2->v3:
> - Handled fib6_rule_lookup() when CONFIG_IPV6_MULTIPLE_TABLES is not
>   configured in patch 03 (suggested by David Ahern)
> - Removed the renaming of l3mdev_link_scope_lookup() in patch 05
>   (suggested by David Ahern)
> - Moved definition of ip6_route_output_flags() from an inline function
>   in /net/ipv6/route.c to net/ipv6/route.c in order to address kbuild
>   error in patch 05

I'll give David A. a chance to review this before applying.

Re: [PATCH net-next 1/4] cxgb4: Re-work the logic for mps refcounting

2019-06-23 Thread David Miller

From: Raju Rangoju 
Date: Fri, 21 Jun 2019 20:06:33 +0530

> +struct mps_entries_ref {
> + struct list_head list;
> + u8 addr[ETH_ALEN];
> + u8 mask[ETH_ALEN];
> + u16 idx;
> + atomic_t refcnt;
> +};

Since you're making this change, please use refcnt_t.

Re: [PATCH v2 net] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET

2019-06-23 Thread Neil Horman

On Sun, Jun 23, 2019 at 10:39:12AM -0400, Willem de Bruijn wrote:
> On Sun, Jun 23, 2019 at 7:40 AM Neil Horman  wrote:
> >
> > On Sat, Jun 22, 2019 at 10:21:31PM -0400, Willem de Bruijn wrote:
> > > > > -static void __packet_set_status(struct packet_sock *po, void *frame, 
> > > > > int status)
> > > > > +static void __packet_set_status(struct packet_sock *po, void *frame, 
> > > > > int status,
> > > > > +   bool call_complete)
> > > > >  {
> > > > > union tpacket_uhdr h;
> > > > >
> > > > > @@ -381,6 +382,8 @@ static void __packet_set_status(struct 
> > > > > packet_sock *po, void *frame, int status)
> > > > > BUG();
> > > > > }
> > > > >
> > > > > +   if (po->wait_on_complete && call_complete)
> > > > > +   complete(&po->skb_completion);
> > > >
> > > > This wake need not happen before the barrier. Only one caller of
> > > > __packet_set_status passes call_complete (tpacket_destruct_skb).
> > > > Moving this branch to the caller avoids a lot of code churn.
> > > >
> > > > Also, multiple packets may be released before the process is awoken.
> > > > The process will block until packet_read_pending drops to zero. Can
> > > > defer the wait_on_complete to that one instance.
> > >
> > > Eh no. The point of having this sleep in the send loop is that
> > > additional slots may be released for transmission (flipped to
> > > TP_STATUS_SEND_REQUEST) from another thread while this thread is
> > > waiting.
> > >
> > Thats incorrect.  The entirety of tpacket_snd is protected by a mutex. No 
> > other
> > thread can alter the state of the frames in the vector from the kernel send 
> > path
> > while this thread is waiting.
> 
> I meant another user thread updating the memory mapped ring contents.
> 
Yes, thats true, and if that happens, we will loop through this path again (the
do..while section, picking up the next frame for transmit)

> > > Else, it would have been much simpler to move the wait below the send
> > > loop: send as many packets as possible, then wait for all of them
> > > having been released. Much clearer control flow.
> > >
> > Thats (almost) what happens now.  The only difference is that with this
> > implementation, the waiting thread has the opportunity to see if userspace 
> > has
> > queued more frames for transmission during the wait period.  We could
> > potentially change that, but thats outside the scope of this fix.
> 
> Agreed. I think the current, more complex, behavior was intentional.
> We could still restructure to move it out of the loop and jump back.
> But, yes, definitely out of scope for a fix.
> 
Yes, it was, though based on your comments I've moved the wait_for_completion
call to the bottom of the loop, so its only checked after we are guaranteed to
have sent at least one frame.  I think that makes the code a bit more legible.

> > > Where to set and clear the wait_on_complete boolean remains. Integer
> > > assignment is fragile, as the compiler and processor may optimize or
> > > move simple seemingly independent operations. As complete() takes a
> > > spinlock, avoiding that in the DONTWAIT case is worthwhile. But probably
> > > still preferable to set when beginning waiting and clear when calling
> > > complete.
> > We avoid any call to wait_for_complete or complete already, based on the 
> > gating
> > of the need_wait variable in tpacket_snd.  If the transmitting thread 
> > doesn't
> > set MSG_DONTWAIT in the flags of the msg structure, we will never set
> > wait_for_complete, and so we will never manipulate the completion queue.
> 
> But we don't know the state of this at tpacket_destruct_skb time without
> wait_for_completion?
> 
Sure we do, wait_for_complete is stored in the packet_sock structure, which is
available and stable at the time tpacket_destruct_skb is called.
po->wait_for_complete is set in tpacket_snd iff:
1) The MSG_DONTWAIT flag is clear
and
2) We have detected that the next frame in the memory mapped buffer does not
have its status set to TP_STATUS_SEND_REQUEST.

If those two conditions are true, we set po->wait_for_complete to 1, which
indicates that tpacket_destruct_skb should call complete, when all the frames
we've sent to the physical layer have been freed (i.e. when packet_read_pending
is zero).

If wait_for_complete is non-zero, we also can be confident that the
calling task is either:
a) Already blocking on wait_for_completion_interruptible_timeout
or
b) Will be waiting on it shortly

In case (a) the blocking/transmitting task will be woken up, and continue on its
way

In case (b) the transmitting task will call
wait_for_completion_interruptible_timeout, see that the completion has already
been called (based on the completion structs done variable being positive), and
return immediately.

I've made a slight update to the logic/comments in my next version to make that 
a little
more clear

Neil

Re: [PATCH v3 net-next 0/5] ipv6: avoid taking refcnt on dst during route lookup

2019-06-23 Thread David Ahern

On 6/23/19 12:27 PM, David Miller wrote:
> From: Wei Wang 
> Date: Thu, 20 Jun 2019 17:36:36 -0700
> 
>> v2->v3:
>> - Handled fib6_rule_lookup() when CONFIG_IPV6_MULTIPLE_TABLES is not
>>   configured in patch 03 (suggested by David Ahern)
>> - Removed the renaming of l3mdev_link_scope_lookup() in patch 05
>>   (suggested by David Ahern)
>> - Moved definition of ip6_route_output_flags() from an inline function
>>   in /net/ipv6/route.c to net/ipv6/route.c in order to address kbuild
>>   error in patch 05
> 
> I'll give David A. a chance to review this before applying.
> 

Hey Dave: I responded to the cover-letter on Friday.

Re: [PATCH v3 net-next 0/5] ipv6: avoid taking refcnt on dst during route lookup

2019-06-23 Thread David Miller

From: David Ahern 
Date: Sun, 23 Jun 2019 13:29:27 -0600

> On 6/23/19 12:27 PM, David Miller wrote:
>> From: Wei Wang 
>> Date: Thu, 20 Jun 2019 17:36:36 -0700
>> 
>>> v2->v3:
>>> - Handled fib6_rule_lookup() when CONFIG_IPV6_MULTIPLE_TABLES is not
>>>   configured in patch 03 (suggested by David Ahern)
>>> - Removed the renaming of l3mdev_link_scope_lookup() in patch 05
>>>   (suggested by David Ahern)
>>> - Moved definition of ip6_route_output_flags() from an inline function
>>>   in /net/ipv6/route.c to net/ipv6/route.c in order to address kbuild
>>>   error in patch 05
>> 
>> I'll give David A. a chance to review this before applying.
>> 
> 
> Hey Dave: I responded to the cover-letter on Friday.

Indeed, and so did tractor man.

Series applied, thanks everyone.

Re: [EXT] [PATCH] bnx2x: Prevent ptp_task to be rescheduled indefinitely

2019-06-23 Thread Guilherme Piccoli

On Sun, Jun 23, 2019 at 2:13 AM Sudarsana Reddy Kalluru
 wrote:
>
> Thanks for uncovering this issue, and the fix.
> With the proposed fix, if HW latches the timestamp after 3 iterations then it 
> would lead to erroneous PTP functionality. When driver receives the next PTP 
> packet, driver sees that timestamp is available in the register and would 
> assign this value (which corresponds to last/skipped ptp packet) to the PTP 
> packet. In general, FW takes around 1ms to 100ms to perform the timestamp and 
> may take more. Hence the promising solution for this issue would be to wait 
> for timestamp for particular period of time. If Tx timestamp is not available 
> for a pre-determined max period, then declare it as an error scenario and 
> terminate the thread.
>
> Just for the reference, similar fix was added for Marvell qede driver 
> recently. Patch details are,
> 9adebac37e7d: (qede: Handle infinite driver spinning for Tx timestamp.)
> https://www.spinics.net/lists/netdev/msg572838.html
>

Thanks a lot for the quick review and good suggestion Sudarsana!
I'll rework the fix based on your reference, and resubmit.

Cheers,


Guilherme

Re: [PATCH v2 0/3] fix bugs when enable route_localnet

2019-06-23 Thread Zhiqiang Liu

> From: Zhiqiang Liu 
> Date: Sat, 22 Jun 2019 16:41:49 +0800
> 
>> Friendly ping ...
> 
> I'm not applying this patch series without someone reviewing it.
> 
Of course, all patches should be reviewd before deciding whether to apply.
In v2, we add a couple of test for enabling route_localnet in selftests 
suggested
by David Ahern.
In additon, another similar bugfix is added for arp_ignore = 3.

We would appreciate David Ahern or someone could help review the patch series.

Re: 6c409a3aee: kernel_selftests.bpf.test_verifier.fail

2019-06-23 Thread Rong Chen


On 6/21/19 11:52 PM, Alexei Starovoitov wrote:

On Fri, Jun 21, 2019 at 1:36 AM kernel test robot  wrote:

# #340/p direct packet access: test22 (x += pkt_ptr, 3) OK
# #341/p direct packet access: test23 (x += pkt_ptr, 4) FAIL
# Unexpected success to load!
# verification time 17 usec
# stack depth 8
# processed 18 insns (limit 100) max_states_per_insn 0 total_states 1 
peak_states 1 mark_read 0
# #342/p direct packet access: test24 (x += pkt_ptr, 5) OK
# #343/p direct packet access: test25 (marking on <, good access) OK

..

# #673/p meta access, test9 OK
# #674/p meta access, test10 FAIL
# Unexpected success to load!
# verification time 29 usec
# stack depth 8
# processed 19 insns (limit 100) max_states_per_insn 0 total_states 1 
peak_states 1 mark_read 0
# #675/p meta access, test11 OK

Hi Rong,

the patch quoted is not in bpf-next/net-next.
This patch is work-in-progress that I posted to mailing list
and pushed into my own git branch on kernel.org.
It's awesome that build bot does this early testing.
I really like it.
Would be great if the bot can add a tag to email subject that it's testing
this not yet merged patch.

Right now since the email says
commit: 6c409a3aee945e50c6dd4109689f52
it felt that this is real commit and my initial reaction
was that 'ohh something is broken in the merge code'
which wasn't the case :)



Hi Alexei,

Thanks for the advice, we'll improve the email subject.

Best Regards,
Rong Chen

[PATCH v3 net] af_packet: Block execution of tasks waiting for transmit to complete in AF_PACKET

2019-06-23 Thread Neil Horman

When an application is run that:
a) Sets its scheduler to be SCHED_FIFO
and
b) Opens a memory mapped AF_PACKET socket, and sends frames with the
MSG_DONTWAIT flag cleared, its possible for the application to hang
forever in the kernel.  This occurs because when waiting, the code in
tpacket_snd calls schedule, which under normal circumstances allows
other tasks to run, including ksoftirqd, which in some cases is
responsible for freeing the transmitted skb (which in AF_PACKET calls a
destructor that flips the status bit of the transmitted frame back to
available, allowing the transmitting task to complete).

However, when the calling application is SCHED_FIFO, its priority is
such that the schedule call immediately places the task back on the cpu,
preventing ksoftirqd from freeing the skb, which in turn prevents the
transmitting task from detecting that the transmission is complete.

We can fix this by converting the schedule call to a completion
mechanism.  By using a completion queue, we force the calling task, when
it detects there are no more frames to send, to schedule itself off the
cpu until such time as the last transmitted skb is freed, allowing
forward progress to be made.

Tested by myself and the reporter, with good results

Appies to the net tree

Signed-off-by: Neil Horman 
Reported-by: Matteo Croce 
CC: "David S. Miller" 
CC: Willem de Bruijn 

Change Notes:

V1->V2:
Enhance the sleep logic to support being interruptible and
allowing for honoring to SK_SNDTIMEO (Willem de Bruijn)

V2->V3:
Rearrage the point at which we wait for the completion queue, to
avoid needing to check for ph/skb being null at the end of the loop.
Also move the complete call to the skb destructor to avoid needing to
modify __packet_set_status.  Also gate calling complete on
packet_read_pending returning zero to avoid multiple calls to complete.
(Willem de Bruijn)

Move timeo computation within loop, to re-fetch the socket
timeout since we also use the timeo variable to record the return code
from the wait_for_complete call (Neil Horman)
---
 net/packet/af_packet.c | 59 +-
 net/packet/internal.h  |  2 ++
 2 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a29d66da7394..5c48bb7a4fa5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -380,7 +380,6 @@ static void __packet_set_status(struct packet_sock *po, 
void *frame, int status)
WARN(1, "TPACKET version not supported.\n");
BUG();
}
-
smp_wmb();
 }
 
@@ -1148,6 +1147,14 @@ static void *packet_previous_frame(struct packet_sock 
*po,
return packet_lookup_frame(po, rb, previous, status);
 }
 
+static void *packet_next_frame(struct packet_sock *po,
+   struct packet_ring_buffer *rb,
+   int status)
+{
+   unsigned int next = rb->head != rb->frame_max ? rb->head+1 : 0;
+   return packet_lookup_frame(po, rb, next, status);
+}
+
 static void packet_increment_head(struct packet_ring_buffer *buff)
 {
buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
@@ -2401,6 +2408,9 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
 
ts = __packet_set_timestamp(po, ph, skb);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
+
+   if (po->wait_on_complete && !packet_read_pending(&po->tx_ring))
+   complete(&po->skb_completion);
}
 
sock_wfree(skb);
@@ -2600,9 +2610,11 @@ static int tpacket_snd(struct packet_sock *po, struct 
msghdr *msg)
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
int hlen, tlen, copylen = 0;
+   long timeo = 0;
 
mutex_lock(&po->pg_vec_lock);
 
+
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
proto   = po->num;
@@ -2647,16 +2659,16 @@ static int tpacket_snd(struct packet_sock *po, struct 
msghdr *msg)
size_max = dev->mtu + reserve + VLAN_HLEN;
 
do {
+
ph = packet_current_frame(po, &po->tx_ring,
  TP_STATUS_SEND_REQUEST);
-   if (unlikely(ph == NULL)) {
-   if (need_wait && need_resched())
-   schedule();
-   continue;
-   }
+
+   if (unlikely(ph == NULL))
+   break;
 
skb = NULL;
tp_len = tpacket_parse_header(po, ph, size_max, &data);
+
if (tp_len < 0)
goto tpacket_error;
 
@@ -2720,6 +2732,21 @@ static int tpacket_snd(struct packet_sock *po, struct 
msghdr *msg)
 
skb->destructor = tpacket_destruct_skb;
__packet_set_status(po, ph, TP_STATUS_SENDING);
+
+   /*
+* If we need to wait and we've sent the last f

RE: [iproute2-next v5] tipc: support interface name when activating UDP bearer

2019-06-23 Thread Hoang Le

Thanks David. I will update code change as your comments.

For the item:
> + /* remove from cache */
> + ll_drop_by_index(ifr.ifr_ifindex);

why the call to ll_drop_by_index? doing so means that ifindex is looked
up again.

[Hoang]
> + ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name);
This function stored an entry ll_cache in hash map table. We have to call this 
function to prevent memory leaked. 

Regards,
Hoang
-Original Message-
From: David Ahern  
Sent: Saturday, June 22, 2019 5:50 AM
To: Hoang Le ; dsah...@gmail.com; 
jon.ma...@ericsson.com; ma...@donjonn.com; ying@windriver.com; 
netdev@vger.kernel.org; tipc-discuss...@lists.sourceforge.net
Subject: Re: [iproute2-next v5] tipc: support interface name when activating 
UDP bearer

On 6/13/19 2:07 AM, Hoang Le wrote:
> @@ -119,6 +121,74 @@ static int generate_multicast(short af, char *buf, int 
> bufsize)
>   return 0;
>  }
>  
> +static struct ifreq ifr = {};

you don't need to initialize globals, but you could pass a a struct as
the arg to the filter here which is both the addr buffer and the ifindex
of interest.

> +static int nl_dump_addr_filter(struct nlmsghdr *nlh, void *arg)
> +{
> + struct ifaddrmsg *ifa = NLMSG_DATA(nlh);
> + char *r_addr = (char *)arg;
> + int len = nlh->nlmsg_len;
> + struct rtattr *addr_attr;
> +
> + if (ifr.ifr_ifindex != ifa->ifa_index)
> + return 0;
> +
> + if (strlen(r_addr) > 0)
> + return 1;
> +
> + addr_attr = parse_rtattr_one(IFA_ADDRESS, IFA_RTA(ifa),
> +  len - NLMSG_LENGTH(sizeof(*ifa)));
> + if (!addr_attr)
> + return 0;
> +
> + if (ifa->ifa_family == AF_INET) {
> + struct sockaddr_in ip4addr;
> + memcpy(&ip4addr.sin_addr, RTA_DATA(addr_attr),
> +sizeof(struct in_addr));
> + if (inet_ntop(AF_INET, &ip4addr.sin_addr, r_addr,
> +   INET_ADDRSTRLEN) == NULL)
> + return 0;
> + } else if (ifa->ifa_family == AF_INET6) {
> + struct sockaddr_in6 ip6addr;
> + memcpy(&ip6addr.sin6_addr, RTA_DATA(addr_attr),
> +sizeof(struct in6_addr));
> + if (inet_ntop(AF_INET6, &ip6addr.sin6_addr, r_addr,
> +   INET6_ADDRSTRLEN) == NULL)
> + return 0;
> + }
> + return 1;
> +}
> +
> +static int cmd_bearer_validate_and_get_addr(const char *name, char *r_addr)
> +{
> + struct rtnl_handle rth ={ .fd = -1 };

space between '={'

> +
> + memset(&ifr, 0, sizeof(ifr));
> + if (!name || !r_addr || get_ifname(ifr.ifr_name, name))
> + return 0;
> +
> + ifr.ifr_ifindex = ll_name_to_index(ifr.ifr_name);
> + if (!ifr.ifr_ifindex)
> + return 0;
> +
> + /* remove from cache */
> + ll_drop_by_index(ifr.ifr_ifindex);

why the call to ll_drop_by_index? doing so means that ifindex is looked
up again.

> +
> + if (rtnl_open(&rth, 0) < 0)
> + return 0;
> +
> + if (rtnl_addrdump_req(&rth, AF_UNSPEC, 0) < 0) {

If you pass a filter here to set ifa_index, this command on newer
kernels will be much more efficient. See ipaddr_dump_filter.


> + rtnl_close(&rth);
> + return 0;
> + }
> +
> + if (rtnl_dump_filter(&rth, nl_dump_addr_filter, r_addr) < 0) {
> + rtnl_close(&rth);
> + return 0;
> + }
> + rtnl_close(&rth);
> + return 1;
> +}

it would better to have 1 exit with the rtnl_close and return rc based
on above.

Re: [PATCH net] vxlan: do not destroy fdb if register_netdevice() is failed

2019-06-23 Thread Taehee Yoo

On Mon, 24 Jun 2019 at 03:07, David Miller  wrote:
>

Hi David,

Thank you for the review!

> From: Taehee Yoo 
> Date: Thu, 20 Jun 2019 20:51:08 +0900
>
> > __vxlan_dev_create() destroys FDB using specific pointer which indicates
> > a fdb when error occurs.
> > But that pointer should not be used when register_netdevice() fails because
> > register_netdevice() internally destroys fdb when error occurs.
> >
> > In order to avoid un-registered dev's notification, fdb destroying routine
> > checks dev's register status before notification.
>
> Simply pass do_notify as false in this failure code path of 
> __vxlan_dev_create(),
> thank you.

Failure path of __vxlan_dev_create() can't handle do_notify in that case
because if register_netdevice() fails it internally calls
->ndo_uninit() which is
vxlan_uninit().
vxlan_uninit() internally calls vxlan_fdb_delete_default() and it callls
vxlan_fdb_destroy().
do_notify of vxlan_fdb_destroy() in vxlan_fdb_delete_default() is always true.
So, failure path of __vxlan_dev_create() doesn't have any opportunity to
handle do_notify.

Thank you!

Re: [PATCH next 0/3] blackhole device to invalidate dst

2019-06-23 Thread महेश बंडेवार

On Sat, Jun 22, 2019 at 8:35 AM David Ahern  wrote:
>
> On 6/21/19 6:45 PM, Mahesh Bandewar wrote:
> > When we invalidate dst or mark it "dead", we assign 'lo' to
> > dst->dev. First of all this assignment is racy and more over,
> > it has MTU implications.
> >
> > The standard dev MTU is 1500 while the Loopback MTU is 64k. TCP
> > code when dereferencing the dst don't check if the dst is valid
> > or not. TCP when dereferencing a dead-dst while negotiating a
> > new connection, may use dst device which is 'lo' instead of
> > using the correct device. Consider the following scenario:
> >
>
> Why doesn't the TCP code (or any code) check if a cached dst is valid?
> That's the whole point of marking it dead - to tell users not to rely on
> it.

Well, I'm not an expert in TCP, but I could guess. Eric, please help
me being honest with my guess.
The code that marks it dead (control path) and the code that need to
check the validity (data-path) need to have some sort of
synchronization and that could be costly in the data-path. Also what
is the worst case scenario? ... that packet is going to get dropped
since the under-lying device or route has issues (minus this corner
case). May be that was the reason.

As I mentioned in the log, we could  remove the racy-ness  with
barriers or locks but then that would be an additional cost in the
data-path. having a dummy/backhole dev is the cheapest solution with
no changes to the data-path.

Re: [PATCH V3 01/10] net: dsa: microchip: Remove ksz_{read,write}24()

2019-06-23 Thread Andrew Lunn

On Mon, Jun 24, 2019 at 12:34:59AM +0200, Marek Vasut wrote:
> These functions and callbacks are never used, remove them.
> 
> Signed-off-by: Marek Vasut 

Reviewed-by: Andrew Lunn 

Andrew

Re: [PATCH V3 02/10] net: dsa: microchip: Remove ksz_{get,set}()

2019-06-23 Thread Andrew Lunn

On Mon, Jun 24, 2019 at 12:35:00AM +0200, Marek Vasut wrote:
> These functions and callbacks are never used, remove them.
> 
> Signed-off-by: Marek Vasut 

Reviewed-by: Andrew Lunn 

Andrew

Re: [PATCH V3 03/10] net: dsa: microchip: Inline ksz_spi.h

2019-06-23 Thread Andrew Lunn

On Mon, Jun 24, 2019 at 12:35:01AM +0200, Marek Vasut wrote:
> The functions in the header file are static, and the header file is
> included from single C file, just inline the code into the C file.
> The bonus is that it's easier to spot further content to clean up.
> 
> Signed-off-by: Marek Vasut 

Reviewed-by: Andrew Lunn 

Andrew

Re: [PATCH V3 04/10] net: dsa: microchip: Move ksz_cfg and ksz_port_cfg to ksz9477.c

2019-06-23 Thread Andrew Lunn

On Mon, Jun 24, 2019 at 12:35:02AM +0200, Marek Vasut wrote:
> These functions are only used by the KSZ9477 code, move them from
> the header into that code. Note that these functions will be soon
> replaced by regmap equivalents.
> 
> Signed-off-by: Marek Vasut 

Reviewed-by: Andrew Lunn 

Andrew

Re: [PATCH V3 05/10] net: dsa: microchip: Use PORT_CTRL_ADDR() instead of indirect function call

2019-06-23 Thread Andrew Lunn

On Mon, Jun 24, 2019 at 12:35:03AM +0200, Marek Vasut wrote:
> The indirect function call to dev->dev_ops->get_port_addr() is expensive
> especially if called for every single register access, and only returns
> the value of PORT_CTRL_ADDR() macro. Use PORT_CTRL_ADDR() macro directly
> instead.

Hi Marek

Rather than change just one instance, it would be better to change
them all. And then remove dev_ops->get_port_addr().

 Andrew

Re: [PATCH V3 06/10] net: dsa: microchip: Factor out register access opcode generation

2019-06-23 Thread Andrew Lunn

On Mon, Jun 24, 2019 at 12:35:04AM +0200, Marek Vasut wrote:
> Factor out the code which sends out the register read/write opcodes
> to the switch, since the code differs in single bit between read and
> write.
> 
> Signed-off-by: Marek Vasut 

Reviewed-by: Andrew Lunn 

Andrew

[PATCH V3 06/10] net: dsa: microchip: Factor out register access opcode generation

2019-06-23 Thread Marek Vasut

Factor out the code which sends out the register read/write opcodes
to the switch, since the code differs in single bit between read and
write.

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Tristram Ha 
Cc: Woojung Huh 
---
V2: New patch
V3: - Rebase on next/master
- Test on KSZ9477EVB
---
 drivers/net/dsa/microchip/ksz9477_spi.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c 
b/drivers/net/dsa/microchip/ksz9477_spi.c
index a34e66eccbcd..49aeb92d36fc 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -25,19 +25,24 @@
 /* Enough to read all switch port registers. */
 #define SPI_TX_BUF_LEN 0x100
 
-static int ksz9477_spi_read_reg(struct spi_device *spi, u32 reg, u8 *val,
-   unsigned int len)
+static u32 ksz9477_spi_cmd(u32 reg, bool read)
 {
u32 txbuf;
-   int ret;
 
txbuf = reg & SPI_ADDR_MASK;
-   txbuf |= KS_SPIOP_RD << SPI_ADDR_SHIFT;
+   txbuf |= (read ? KS_SPIOP_RD : KS_SPIOP_WR) << SPI_ADDR_SHIFT;
txbuf <<= SPI_TURNAROUND_SHIFT;
txbuf = cpu_to_be32(txbuf);
 
-   ret = spi_write_then_read(spi, &txbuf, 4, val, len);
-   return ret;
+   return txbuf;
+}
+
+static int ksz9477_spi_read_reg(struct spi_device *spi, u32 reg, u8 *val,
+   unsigned int len)
+{
+   u32 txbuf = ksz9477_spi_cmd(reg, true);
+
+   return spi_write_then_read(spi, &txbuf, 4, val, len);
 }
 
 static int ksz9477_spi_write_reg(struct spi_device *spi, u32 reg, u8 *val,
@@ -45,10 +50,7 @@ static int ksz9477_spi_write_reg(struct spi_device *spi, u32 
reg, u8 *val,
 {
u32 *txbuf = (u32 *)val;
 
-   *txbuf = reg & SPI_ADDR_MASK;
-   *txbuf |= (KS_SPIOP_WR << SPI_ADDR_SHIFT);
-   *txbuf <<= SPI_TURNAROUND_SHIFT;
-   *txbuf = cpu_to_be32(*txbuf);
+   *txbuf = ksz9477_spi_cmd(reg, false);
 
return spi_write(spi, txbuf, 4 + len);
 }
-- 
2.20.1

[PATCH V3 09/10] net: dsa: microchip: Factor out regmap config generation into common header

2019-06-23 Thread Marek Vasut

The regmap config tables are rather similar for various generations of
the KSZ8xxx/KSZ9xxx switches. Introduce a macro which allows generating
those tables without duplication. Note that $regalign parameter is not
used right now, but will be used in KSZ87xx series switches.

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Tristram Ha 
Cc: Woojung Huh 
---
V2: New patch
V3: - Rebase on next/master
- Test on KSZ9477EVB
- Increase regmap max register, to cover all switch registers
- Make register swabbing configurable, to allow handling switches
  with only 16bit registers as well as switches with some 32bit ones
---
 drivers/net/dsa/microchip/ksz9477_spi.c | 29 +++---
 drivers/net/dsa/microchip/ksz_common.h  | 32 +
 2 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c 
b/drivers/net/dsa/microchip/ksz9477_spi.c
index 8c8bf3237013..5a9e27b337a8 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -14,37 +14,14 @@
 #include 
 
 #include "ksz_priv.h"
+#include "ksz_common.h"
 
 #define SPI_ADDR_SHIFT 24
 #define SPI_ADDR_ALIGN 3
 #define SPI_TURNAROUND_SHIFT   5
 
-/* SPI frame opcodes */
-#define KS_SPIOP_RD3
-#define KS_SPIOP_WR2
-
-#define KS_SPIOP_FLAG_MASK(opcode) \
-   cpu_to_be32((opcode) << (SPI_ADDR_SHIFT + SPI_TURNAROUND_SHIFT))
-
-#define KSZ_REGMAP_COMMON(width)   \
-   {   \
-   .val_bits = (width),\
-   .reg_stride = (width) / 8,  \
-   .reg_bits = SPI_ADDR_SHIFT + SPI_ADDR_ALIGN,\
-   .pad_bits = SPI_TURNAROUND_SHIFT,   \
-   .max_register = BIT(SPI_ADDR_SHIFT) - 1,\
-   .cache_type = REGCACHE_NONE,\
-   .read_flag_mask = KS_SPIOP_FLAG_MASK(KS_SPIOP_RD),  \
-   .write_flag_mask = KS_SPIOP_FLAG_MASK(KS_SPIOP_WR), \
-   .reg_format_endian = REGMAP_ENDIAN_BIG, \
-   .val_format_endian = REGMAP_ENDIAN_BIG  \
-   }
-
-static const struct regmap_config ksz9477_regmap_config[] = {
-   KSZ_REGMAP_COMMON(8),
-   KSZ_REGMAP_COMMON(16),
-   KSZ_REGMAP_COMMON(32),
-};
+KSZ_REGMAP_TABLE(ksz9477, 32, SPI_ADDR_SHIFT,
+SPI_TURNAROUND_SHIFT, SPI_ADDR_ALIGN);
 
 static int ksz9477_spi_probe(struct spi_device *spi)
 {
diff --git a/drivers/net/dsa/microchip/ksz_common.h 
b/drivers/net/dsa/microchip/ksz_common.h
index c3871ed9b097..78b5ab7db403 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -133,4 +133,36 @@ static inline u32 ksz_pread32_poll(struct ksz_poll_ctx 
*ctx)
return data;
 }
 
+/* Regmap tables generation */
+#define KSZ_SPI_OP_RD  3
+#define KSZ_SPI_OP_WR  2
+
+#define KSZ_SPI_OP_FLAG_MASK(opcode, swp, regbits, regpad) \
+   cpu_to_be##swp((opcode) << ((regbits) + (regpad)))
+
+#define KSZ_REGMAP_ENTRY(width, swp, regbits, regpad, regalign)
\
+   {   \
+   .val_bits = (width),\
+   .reg_stride = (width) / 8,  \
+   .reg_bits = (regbits) + (regalign), \
+   .pad_bits = (regpad),   \
+   .max_register = BIT(regbits) - 1,   \
+   .cache_type = REGCACHE_NONE,\
+   .read_flag_mask =   \
+   KSZ_SPI_OP_FLAG_MASK(KSZ_SPI_OP_RD, swp,\
+regbits, regpad),  \
+   .write_flag_mask =  \
+   KSZ_SPI_OP_FLAG_MASK(KSZ_SPI_OP_WR, swp,\
+regbits, regpad),  \
+   .reg_format_endian = REGMAP_ENDIAN_BIG, \
+   .val_format_endian = REGMAP_ENDIAN_BIG  \
+   }
+
+#define KSZ_REGMAP_TABLE(ksz, swp, regbits, regpad, regalign)  \
+   static const struct regmap_config ksz##_regmap_config[] = { \
+   KSZ_REGMAP_ENTRY(8, swp, (regbits), (regpad), (regalign)), \
+   KSZ_REGMAP_ENTRY(16, swp, (regbits), (regpad), (regalign)), \
+   KSZ_REGMAP_ENTRY(32, swp, (regbits), (regpad), (regalign)), \
+   }
+
 #endif
-- 
2.20.1

[PATCH V3 01/10] net: dsa: microchip: Remove ksz_{read,write}24()

2019-06-23 Thread Marek Vasut

These functions and callbacks are never used, remove them.

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Tristram Ha 
Cc: Woojung Huh 
---
V2: No change
V3: - Rebase on next/master
- Test on KSZ9477EVB
---
 drivers/net/dsa/microchip/ksz9477_spi.c | 25 -
 drivers/net/dsa/microchip/ksz_common.h  | 22 --
 drivers/net/dsa/microchip/ksz_priv.h|  2 --
 3 files changed, 49 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c 
b/drivers/net/dsa/microchip/ksz9477_spi.c
index 75178624d3f5..e7118319c192 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -73,37 +73,12 @@ static int ksz_spi_write(struct ksz_device *dev, u32 reg, 
void *data,
return ksz9477_spi_write_reg(spi, reg, dev->txbuf, len);
 }
 
-static int ksz_spi_read24(struct ksz_device *dev, u32 reg, u32 *val)
-{
-   int ret;
-
-   *val = 0;
-   ret = ksz_spi_read(dev, reg, (u8 *)val, 3);
-   if (!ret) {
-   *val = be32_to_cpu(*val);
-   /* convert to 24bit */
-   *val >>= 8;
-   }
-
-   return ret;
-}
-
-static int ksz_spi_write24(struct ksz_device *dev, u32 reg, u32 value)
-{
-   /* make it to big endian 24bit from MSB */
-   value <<= 8;
-   value = cpu_to_be32(value);
-   return ksz_spi_write(dev, reg, &value, 3);
-}
-
 static const struct ksz_io_ops ksz9477_spi_ops = {
.read8 = ksz_spi_read8,
.read16 = ksz_spi_read16,
-   .read24 = ksz_spi_read24,
.read32 = ksz_spi_read32,
.write8 = ksz_spi_write8,
.write16 = ksz_spi_write16,
-   .write24 = ksz_spi_write24,
.write32 = ksz_spi_write32,
.get = ksz_spi_get,
.set = ksz_spi_set,
diff --git a/drivers/net/dsa/microchip/ksz_common.h 
b/drivers/net/dsa/microchip/ksz_common.h
index 21cd794e18f1..1781539c3a81 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -61,17 +61,6 @@ static inline int ksz_read16(struct ksz_device *dev, u32 
reg, u16 *val)
return ret;
 }
 
-static inline int ksz_read24(struct ksz_device *dev, u32 reg, u32 *val)
-{
-   int ret;
-
-   mutex_lock(&dev->reg_mutex);
-   ret = dev->ops->read24(dev, reg, val);
-   mutex_unlock(&dev->reg_mutex);
-
-   return ret;
-}
-
 static inline int ksz_read32(struct ksz_device *dev, u32 reg, u32 *val)
 {
int ret;
@@ -105,17 +94,6 @@ static inline int ksz_write16(struct ksz_device *dev, u32 
reg, u16 value)
return ret;
 }
 
-static inline int ksz_write24(struct ksz_device *dev, u32 reg, u32 value)
-{
-   int ret;
-
-   mutex_lock(&dev->reg_mutex);
-   ret = dev->ops->write24(dev, reg, value);
-   mutex_unlock(&dev->reg_mutex);
-
-   return ret;
-}
-
 static inline int ksz_write32(struct ksz_device *dev, u32 reg, u32 value)
 {
int ret;
diff --git a/drivers/net/dsa/microchip/ksz_priv.h 
b/drivers/net/dsa/microchip/ksz_priv.h
index c615d2a81dd5..5ef6153bd2cc 100644
--- a/drivers/net/dsa/microchip/ksz_priv.h
+++ b/drivers/net/dsa/microchip/ksz_priv.h
@@ -105,11 +105,9 @@ struct ksz_device {
 struct ksz_io_ops {
int (*read8)(struct ksz_device *dev, u32 reg, u8 *value);
int (*read16)(struct ksz_device *dev, u32 reg, u16 *value);
-   int (*read24)(struct ksz_device *dev, u32 reg, u32 *value);
int (*read32)(struct ksz_device *dev, u32 reg, u32 *value);
int (*write8)(struct ksz_device *dev, u32 reg, u8 value);
int (*write16)(struct ksz_device *dev, u32 reg, u16 value);
-   int (*write24)(struct ksz_device *dev, u32 reg, u32 value);
int (*write32)(struct ksz_device *dev, u32 reg, u32 value);
int (*get)(struct ksz_device *dev, u32 reg, void *data, size_t len);
int (*set)(struct ksz_device *dev, u32 reg, void *data, size_t len);
-- 
2.20.1

[PATCH V3 08/10] net: dsa: microchip: Dispose of ksz_io_ops

2019-06-23 Thread Marek Vasut

Since the driver now uses regmap , get rid of ad-hoc ksz_io_ops
abstraction, which no longer has any meaning. Moreover, since regmap
has it's own locking, get rid of the register access mutex.

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Tristram Ha 
Cc: Woojung Huh 
---
V2: Use separate regmaps for 8/16/32bit registers
V3: - Rebase on next/master
- Test on KSZ9477EVB
---
 drivers/net/dsa/microchip/ksz9477_spi.c | 57 +
 drivers/net/dsa/microchip/ksz_common.c  |  6 +--
 drivers/net/dsa/microchip/ksz_common.h  | 50 ++
 drivers/net/dsa/microchip/ksz_priv.h| 16 +--
 4 files changed, 17 insertions(+), 112 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c 
b/drivers/net/dsa/microchip/ksz9477_spi.c
index 77e3cb100eae..8c8bf3237013 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -46,67 +46,12 @@ static const struct regmap_config ksz9477_regmap_config[] = 
{
KSZ_REGMAP_COMMON(32),
 };
 
-static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val)
-{
-   unsigned int value;
-   int ret = regmap_read(dev->regmap, reg, &value);
-
-   *val = value;
-   return ret;
-}
-
-static int ksz_spi_read16(struct ksz_device *dev, u32 reg, u16 *val)
-{
-   int ret = regmap_bulk_read(dev->regmap, reg, val, 2);
-
-   if (!ret)
-   *val = be16_to_cpu(*val);
-
-   return ret;
-}
-
-static int ksz_spi_read32(struct ksz_device *dev, u32 reg, u32 *val)
-{
-   int ret = regmap_bulk_read(dev->regmap, reg, val, 4);
-
-   if (!ret)
-   *val = be32_to_cpu(*val);
-
-   return ret;
-}
-
-static int ksz_spi_write8(struct ksz_device *dev, u32 reg, u8 value)
-{
-   return regmap_write(dev->regmap, reg, value);
-}
-
-static int ksz_spi_write16(struct ksz_device *dev, u32 reg, u16 value)
-{
-   value = cpu_to_be16(value);
-   return regmap_bulk_write(dev->regmap, reg, &value, 2);
-}
-
-static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value)
-{
-   value = cpu_to_be32(value);
-   return regmap_bulk_write(dev->regmap, reg, &value, 4);
-}
-
-static const struct ksz_io_ops ksz9477_spi_ops = {
-   .read8 = ksz_spi_read8,
-   .read16 = ksz_spi_read16,
-   .read32 = ksz_spi_read32,
-   .write8 = ksz_spi_write8,
-   .write16 = ksz_spi_write16,
-   .write32 = ksz_spi_write32,
-};
-
 static int ksz9477_spi_probe(struct spi_device *spi)
 {
struct ksz_device *dev;
int i, ret;
 
-   dev = ksz_switch_alloc(&spi->dev, &ksz9477_spi_ops, spi);
+   dev = ksz_switch_alloc(&spi->dev, spi);
if (!dev)
return -ENOMEM;
 
diff --git a/drivers/net/dsa/microchip/ksz_common.c 
b/drivers/net/dsa/microchip/ksz_common.c
index 978c59aa8efb..a3d2d67894bd 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -396,9 +396,7 @@ void ksz_disable_port(struct dsa_switch *ds, int port)
 }
 EXPORT_SYMBOL_GPL(ksz_disable_port);
 
-struct ksz_device *ksz_switch_alloc(struct device *base,
-   const struct ksz_io_ops *ops,
-   void *priv)
+struct ksz_device *ksz_switch_alloc(struct device *base, void *priv)
 {
struct dsa_switch *ds;
struct ksz_device *swdev;
@@ -416,7 +414,6 @@ struct ksz_device *ksz_switch_alloc(struct device *base,
 
swdev->ds = ds;
swdev->priv = priv;
-   swdev->ops = ops;
 
return swdev;
 }
@@ -442,7 +439,6 @@ int ksz_switch_register(struct ksz_device *dev,
}
 
mutex_init(&dev->dev_mutex);
-   mutex_init(&dev->reg_mutex);
mutex_init(&dev->stats_mutex);
mutex_init(&dev->alu_mutex);
mutex_init(&dev->vlan_mutex);
diff --git a/drivers/net/dsa/microchip/ksz_common.h 
b/drivers/net/dsa/microchip/ksz_common.h
index fe576a00facf..c3871ed9b097 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -7,6 +7,8 @@
 #ifndef __KSZ_COMMON_H
 #define __KSZ_COMMON_H
 
+#include 
+
 void ksz_port_cleanup(struct ksz_device *dev, int port);
 void ksz_update_port_member(struct ksz_device *dev, int port);
 void ksz_init_mib_timer(struct ksz_device *dev);
@@ -41,68 +43,44 @@ void ksz_disable_port(struct dsa_switch *ds, int port);
 
 static inline int ksz_read8(struct ksz_device *dev, u32 reg, u8 *val)
 {
-   int ret;
-
-   mutex_lock(&dev->reg_mutex);
-   ret = dev->ops->read8(dev, reg, val);
-   mutex_unlock(&dev->reg_mutex);
+   unsigned int value;
+   int ret = regmap_read(dev->regmap[0], reg, &value);
 
+   *val = value;
return ret;
 }
 
 static inline int ksz_read16(struct ksz_device *dev, u32 reg, u16 *val)
 {
-   int ret;
-
-   mutex_lock(&dev->reg_mutex);
-   ret = dev->ops->read16(dev, reg, val);
-   mutex_unlock(&dev->reg_mutex);
+   unsigned int value;
+

[PATCH V3 10/10] net: dsa: microchip: Replace ad-hoc bit manipulation with regmap

2019-06-23 Thread Marek Vasut

Regmap provides bit manipulation functions to set/clear bits, use those
insted of reimplementing them.

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Tristram Ha 
Cc: Woojung Huh 
---
V2: New patch
V3: - Rebase on next/master
- Test on KSZ9477EVB
---
 drivers/net/dsa/microchip/ksz9477.c | 46 -
 1 file changed, 6 insertions(+), 40 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz9477.c 
b/drivers/net/dsa/microchip/ksz9477.c
index 7d209fd9f26f..8f13dcc05a10 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -67,60 +67,26 @@ static const struct {
 
 static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
 {
-   u8 data;
-
-   ksz_read8(dev, addr, &data);
-   if (set)
-   data |= bits;
-   else
-   data &= ~bits;
-   ksz_write8(dev, addr, data);
+   regmap_update_bits(dev->regmap[0], addr, bits, set ? bits : 0);
 }
 
 static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
 bool set)
 {
-   u32 addr;
-   u8 data;
-
-   addr = PORT_CTRL_ADDR(port, offset);
-   ksz_read8(dev, addr, &data);
-
-   if (set)
-   data |= bits;
-   else
-   data &= ~bits;
-
-   ksz_write8(dev, addr, data);
+   regmap_update_bits(dev->regmap[0], PORT_CTRL_ADDR(port, offset),
+  bits, set ? bits : 0);
 }
 
 static void ksz9477_cfg32(struct ksz_device *dev, u32 addr, u32 bits, bool set)
 {
-   u32 data;
-
-   ksz_read32(dev, addr, &data);
-   if (set)
-   data |= bits;
-   else
-   data &= ~bits;
-   ksz_write32(dev, addr, data);
+   regmap_update_bits(dev->regmap[2], addr, bits, set ? bits : 0);
 }
 
 static void ksz9477_port_cfg32(struct ksz_device *dev, int port, int offset,
   u32 bits, bool set)
 {
-   u32 addr;
-   u32 data;
-
-   addr = PORT_CTRL_ADDR(port, offset);
-   ksz_read32(dev, addr, &data);
-
-   if (set)
-   data |= bits;
-   else
-   data &= ~bits;
-
-   ksz_write32(dev, addr, data);
+   regmap_update_bits(dev->regmap[2], PORT_CTRL_ADDR(port, offset),
+  bits, set ? bits : 0);
 }
 
 static int ksz9477_wait_vlan_ctrl_ready(struct ksz_device *dev, u32 waiton,
-- 
2.20.1

[PATCH V3 07/10] net: dsa: microchip: Initial SPI regmap support

2019-06-23 Thread Marek Vasut

Add basic SPI regmap support into the driver.

Previous patches unconver that ksz_spi_write() is always ever called
with len = 1, 2 or 4. We can thus drop the if (len > SPI_TX_BUF_LEN)
check and we can also drop the allocation of the txbuf which is part
of the driver data and wastes 256 bytes for no reason. Regmap covers
the whole thing now.

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Tristram Ha 
Cc: Woojung Huh 
---
V2: - Squash with "net: dsa: microchip: Remove dev->txbuf"
- Use separate regmaps for 8/16/32bit registers
- Increase the regmap size to 0xd00 to cover the entire register area
V3: - Rebase on next/master
- Test on KSZ9477EVB
- Increase regmap max register, to cover all switch registers
- Correct regmap reg_bits value to match the hardware
- Use cpu_to_be32() instead of cpu_to_le16() in register masks, since
  the KSZ9477 has some 32bit registers.
---
 drivers/net/dsa/microchip/Kconfig   |   1 +
 drivers/net/dsa/microchip/ksz9477_spi.c | 114 +++-
 drivers/net/dsa/microchip/ksz_priv.h|   3 +-
 3 files changed, 52 insertions(+), 66 deletions(-)

diff --git a/drivers/net/dsa/microchip/Kconfig 
b/drivers/net/dsa/microchip/Kconfig
index 2c3a6751bdaf..56790d544eb2 100644
--- a/drivers/net/dsa/microchip/Kconfig
+++ b/drivers/net/dsa/microchip/Kconfig
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config NET_DSA_MICROCHIP_KSZ_COMMON
+   select REGMAP_SPI
tristate
 
 menuconfig NET_DSA_MICROCHIP_KSZ9477
diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c 
b/drivers/net/dsa/microchip/ksz9477_spi.c
index 49aeb92d36fc..77e3cb100eae 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -10,78 +10,54 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "ksz_priv.h"
 
-/* SPI frame opcodes */
-#define KS_SPIOP_RD3
-#define KS_SPIOP_WR2
-
 #define SPI_ADDR_SHIFT 24
-#define SPI_ADDR_MASK  (BIT(SPI_ADDR_SHIFT) - 1)
+#define SPI_ADDR_ALIGN 3
 #define SPI_TURNAROUND_SHIFT   5
 
-/* Enough to read all switch port registers. */
-#define SPI_TX_BUF_LEN 0x100
-
-static u32 ksz9477_spi_cmd(u32 reg, bool read)
-{
-   u32 txbuf;
-
-   txbuf = reg & SPI_ADDR_MASK;
-   txbuf |= (read ? KS_SPIOP_RD : KS_SPIOP_WR) << SPI_ADDR_SHIFT;
-   txbuf <<= SPI_TURNAROUND_SHIFT;
-   txbuf = cpu_to_be32(txbuf);
-
-   return txbuf;
-}
-
-static int ksz9477_spi_read_reg(struct spi_device *spi, u32 reg, u8 *val,
-   unsigned int len)
-{
-   u32 txbuf = ksz9477_spi_cmd(reg, true);
-
-   return spi_write_then_read(spi, &txbuf, 4, val, len);
-}
-
-static int ksz9477_spi_write_reg(struct spi_device *spi, u32 reg, u8 *val,
-unsigned int len)
-{
-   u32 *txbuf = (u32 *)val;
-
-   *txbuf = ksz9477_spi_cmd(reg, false);
-
-   return spi_write(spi, txbuf, 4 + len);
-}
-
-static int ksz_spi_read(struct ksz_device *dev, u32 reg, u8 *data,
-   unsigned int len)
-{
-   struct spi_device *spi = dev->priv;
-
-   return ksz9477_spi_read_reg(spi, reg, data, len);
-}
-
-static int ksz_spi_write(struct ksz_device *dev, u32 reg, void *data,
-unsigned int len)
-{
-   struct spi_device *spi = dev->priv;
+/* SPI frame opcodes */
+#define KS_SPIOP_RD3
+#define KS_SPIOP_WR2
 
-   if (len > SPI_TX_BUF_LEN)
-   len = SPI_TX_BUF_LEN;
-   memcpy(&dev->txbuf[4], data, len);
-   return ksz9477_spi_write_reg(spi, reg, dev->txbuf, len);
-}
+#define KS_SPIOP_FLAG_MASK(opcode) \
+   cpu_to_be32((opcode) << (SPI_ADDR_SHIFT + SPI_TURNAROUND_SHIFT))
+
+#define KSZ_REGMAP_COMMON(width)   \
+   {   \
+   .val_bits = (width),\
+   .reg_stride = (width) / 8,  \
+   .reg_bits = SPI_ADDR_SHIFT + SPI_ADDR_ALIGN,\
+   .pad_bits = SPI_TURNAROUND_SHIFT,   \
+   .max_register = BIT(SPI_ADDR_SHIFT) - 1,\
+   .cache_type = REGCACHE_NONE,\
+   .read_flag_mask = KS_SPIOP_FLAG_MASK(KS_SPIOP_RD),  \
+   .write_flag_mask = KS_SPIOP_FLAG_MASK(KS_SPIOP_WR), \
+   .reg_format_endian = REGMAP_ENDIAN_BIG, \
+   .val_format_endian = REGMAP_ENDIAN_BIG  \
+   }
+
+static const struct regmap_config ksz9477_regmap_config[] = {
+   KSZ_REGMAP_COMMON(8),
+   KSZ_REGMAP_COMMON(16),
+   KSZ_REGMAP_COMMON(32),
+};
 
 static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val)

[PATCH V3 04/10] net: dsa: microchip: Move ksz_cfg and ksz_port_cfg to ksz9477.c

2019-06-23 Thread Marek Vasut

These functions are only used by the KSZ9477 code, move them from
the header into that code. Note that these functions will be soon
replaced by regmap equivalents.

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Tristram Ha 
Cc: Woojung Huh 
---
V2: New patch
V3: - Rebase on next/master
- Test on KSZ9477EVB
---
 drivers/net/dsa/microchip/ksz9477.c| 29 ++
 drivers/net/dsa/microchip/ksz_common.h | 29 --
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz9477.c 
b/drivers/net/dsa/microchip/ksz9477.c
index 508380f80875..e8b96566abd9 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -65,6 +65,35 @@ static const struct {
{ 0x83, "tx_discards" },
 };
 
+static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
+{
+   u8 data;
+
+   ksz_read8(dev, addr, &data);
+   if (set)
+   data |= bits;
+   else
+   data &= ~bits;
+   ksz_write8(dev, addr, data);
+}
+
+static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
+bool set)
+{
+   u32 addr;
+   u8 data;
+
+   addr = dev->dev_ops->get_port_addr(port, offset);
+   ksz_read8(dev, addr, &data);
+
+   if (set)
+   data |= bits;
+   else
+   data &= ~bits;
+
+   ksz_write8(dev, addr, data);
+}
+
 static void ksz9477_cfg32(struct ksz_device *dev, u32 addr, u32 bits, bool set)
 {
u32 data;
diff --git a/drivers/net/dsa/microchip/ksz_common.h 
b/drivers/net/dsa/microchip/ksz_common.h
index c15b49528bad..fe576a00facf 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -141,35 +141,6 @@ static inline void ksz_pwrite32(struct ksz_device *dev, 
int port, int offset,
ksz_write32(dev, dev->dev_ops->get_port_addr(port, offset), data);
 }
 
-static void ksz_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set)
-{
-   u8 data;
-
-   ksz_read8(dev, addr, &data);
-   if (set)
-   data |= bits;
-   else
-   data &= ~bits;
-   ksz_write8(dev, addr, data);
-}
-
-static void ksz_port_cfg(struct ksz_device *dev, int port, int offset, u8 bits,
-bool set)
-{
-   u32 addr;
-   u8 data;
-
-   addr = dev->dev_ops->get_port_addr(port, offset);
-   ksz_read8(dev, addr, &data);
-
-   if (set)
-   data |= bits;
-   else
-   data &= ~bits;
-
-   ksz_write8(dev, addr, data);
-}
-
 struct ksz_poll_ctx {
struct ksz_device *dev;
int port;
-- 
2.20.1

[PATCH V3 00/10] net: dsa: microchip: Convert to regmap

2019-06-23 Thread Marek Vasut

This patchset converts KSZ9477 switch driver to regmap.

This was tested with extra patches on KSZ8795. This was also tested
on KSZ9477 on Microchip KSZ9477EVB board, which I now have.

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Tristram Ha 
Cc: Woojung Huh 

Marek Vasut (10):
  net: dsa: microchip: Remove ksz_{read,write}24()
  net: dsa: microchip: Remove ksz_{get,set}()
  net: dsa: microchip: Inline ksz_spi.h
  net: dsa: microchip: Move ksz_cfg and ksz_port_cfg to ksz9477.c
  net: dsa: microchip: Use PORT_CTRL_ADDR() instead of indirect function
call
  net: dsa: microchip: Factor out register access opcode generation
  net: dsa: microchip: Initial SPI regmap support
  net: dsa: microchip: Dispose of ksz_io_ops
  net: dsa: microchip: Factor out regmap config generation into common
header
  net: dsa: microchip: Replace ad-hoc bit manipulation with regmap

 drivers/net/dsa/microchip/Kconfig   |   1 +
 drivers/net/dsa/microchip/ksz9477.c |  35 +++---
 drivers/net/dsa/microchip/ksz9477_spi.c | 114 +++--
 drivers/net/dsa/microchip/ksz_common.c  |   6 +-
 drivers/net/dsa/microchip/ksz_common.h  | 157 +++-
 drivers/net/dsa/microchip/ksz_priv.h|  23 +---
 drivers/net/dsa/microchip/ksz_spi.h |  69 ---
 7 files changed, 84 insertions(+), 321 deletions(-)
 delete mode 100644 drivers/net/dsa/microchip/ksz_spi.h

-- 
2.20.1

[PATCH V3 03/10] net: dsa: microchip: Inline ksz_spi.h

2019-06-23 Thread Marek Vasut

The functions in the header file are static, and the header file is
included from single C file, just inline the code into the C file.
The bonus is that it's easier to spot further content to clean up.

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Tristram Ha 
Cc: Woojung Huh 
---
V2: No change
V3: - Rebase on next/master
- Test on KSZ9477EVB
---
 drivers/net/dsa/microchip/ksz9477_spi.c | 43 +-
 drivers/net/dsa/microchip/ksz_spi.h | 59 -
 2 files changed, 42 insertions(+), 60 deletions(-)
 delete mode 100644 drivers/net/dsa/microchip/ksz_spi.h

diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c 
b/drivers/net/dsa/microchip/ksz9477_spi.c
index 86d12d48a2a9..a34e66eccbcd 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -13,7 +13,6 @@
 #include 
 
 #include "ksz_priv.h"
-#include "ksz_spi.h"
 
 /* SPI frame opcodes */
 #define KS_SPIOP_RD3
@@ -73,6 +72,48 @@ static int ksz_spi_write(struct ksz_device *dev, u32 reg, 
void *data,
return ksz9477_spi_write_reg(spi, reg, dev->txbuf, len);
 }
 
+static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val)
+{
+   return ksz_spi_read(dev, reg, val, 1);
+}
+
+static int ksz_spi_read16(struct ksz_device *dev, u32 reg, u16 *val)
+{
+   int ret = ksz_spi_read(dev, reg, (u8 *)val, 2);
+
+   if (!ret)
+   *val = be16_to_cpu(*val);
+
+   return ret;
+}
+
+static int ksz_spi_read32(struct ksz_device *dev, u32 reg, u32 *val)
+{
+   int ret = ksz_spi_read(dev, reg, (u8 *)val, 4);
+
+   if (!ret)
+   *val = be32_to_cpu(*val);
+
+   return ret;
+}
+
+static int ksz_spi_write8(struct ksz_device *dev, u32 reg, u8 value)
+{
+   return ksz_spi_write(dev, reg, &value, 1);
+}
+
+static int ksz_spi_write16(struct ksz_device *dev, u32 reg, u16 value)
+{
+   value = cpu_to_be16(value);
+   return ksz_spi_write(dev, reg, &value, 2);
+}
+
+static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value)
+{
+   value = cpu_to_be32(value);
+   return ksz_spi_write(dev, reg, &value, 4);
+}
+
 static const struct ksz_io_ops ksz9477_spi_ops = {
.read8 = ksz_spi_read8,
.read16 = ksz_spi_read16,
diff --git a/drivers/net/dsa/microchip/ksz_spi.h 
b/drivers/net/dsa/microchip/ksz_spi.h
deleted file mode 100644
index 976bace31f37..
--- a/drivers/net/dsa/microchip/ksz_spi.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Microchip KSZ series SPI access common header
- *
- * Copyright (C) 2017-2018 Microchip Technology Inc.
- * Tristram Ha 
- */
-
-#ifndef __KSZ_SPI_H
-#define __KSZ_SPI_H
-
-/* Chip dependent SPI access */
-static int ksz_spi_read(struct ksz_device *dev, u32 reg, u8 *data,
-   unsigned int len);
-static int ksz_spi_write(struct ksz_device *dev, u32 reg, void *data,
-unsigned int len);
-
-static int ksz_spi_read8(struct ksz_device *dev, u32 reg, u8 *val)
-{
-   return ksz_spi_read(dev, reg, val, 1);
-}
-
-static int ksz_spi_read16(struct ksz_device *dev, u32 reg, u16 *val)
-{
-   int ret = ksz_spi_read(dev, reg, (u8 *)val, 2);
-
-   if (!ret)
-   *val = be16_to_cpu(*val);
-
-   return ret;
-}
-
-static int ksz_spi_read32(struct ksz_device *dev, u32 reg, u32 *val)
-{
-   int ret = ksz_spi_read(dev, reg, (u8 *)val, 4);
-
-   if (!ret)
-   *val = be32_to_cpu(*val);
-
-   return ret;
-}
-
-static int ksz_spi_write8(struct ksz_device *dev, u32 reg, u8 value)
-{
-   return ksz_spi_write(dev, reg, &value, 1);
-}
-
-static int ksz_spi_write16(struct ksz_device *dev, u32 reg, u16 value)
-{
-   value = cpu_to_be16(value);
-   return ksz_spi_write(dev, reg, &value, 2);
-}
-
-static int ksz_spi_write32(struct ksz_device *dev, u32 reg, u32 value)
-{
-   value = cpu_to_be32(value);
-   return ksz_spi_write(dev, reg, &value, 4);
-}
-
-#endif
-- 
2.20.1

[PATCH V3 02/10] net: dsa: microchip: Remove ksz_{get,set}()

2019-06-23 Thread Marek Vasut

These functions and callbacks are never used, remove them.

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Tristram Ha 
Cc: Woojung Huh 
---
V2: No change
V3: - Rebase on next/master
- Test on KSZ9477EVB
---
 drivers/net/dsa/microchip/ksz9477_spi.c |  2 --
 drivers/net/dsa/microchip/ksz_common.h  | 24 
 drivers/net/dsa/microchip/ksz_priv.h|  2 --
 drivers/net/dsa/microchip/ksz_spi.h | 10 --
 4 files changed, 38 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c 
b/drivers/net/dsa/microchip/ksz9477_spi.c
index e7118319c192..86d12d48a2a9 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -80,8 +80,6 @@ static const struct ksz_io_ops ksz9477_spi_ops = {
.write8 = ksz_spi_write8,
.write16 = ksz_spi_write16,
.write32 = ksz_spi_write32,
-   .get = ksz_spi_get,
-   .set = ksz_spi_set,
 };
 
 static int ksz9477_spi_probe(struct spi_device *spi)
diff --git a/drivers/net/dsa/microchip/ksz_common.h 
b/drivers/net/dsa/microchip/ksz_common.h
index 1781539c3a81..c15b49528bad 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -105,30 +105,6 @@ static inline int ksz_write32(struct ksz_device *dev, u32 
reg, u32 value)
return ret;
 }
 
-static inline int ksz_get(struct ksz_device *dev, u32 reg, void *data,
- size_t len)
-{
-   int ret;
-
-   mutex_lock(&dev->reg_mutex);
-   ret = dev->ops->get(dev, reg, data, len);
-   mutex_unlock(&dev->reg_mutex);
-
-   return ret;
-}
-
-static inline int ksz_set(struct ksz_device *dev, u32 reg, void *data,
- size_t len)
-{
-   int ret;
-
-   mutex_lock(&dev->reg_mutex);
-   ret = dev->ops->set(dev, reg, data, len);
-   mutex_unlock(&dev->reg_mutex);
-
-   return ret;
-}
-
 static inline void ksz_pread8(struct ksz_device *dev, int port, int offset,
  u8 *data)
 {
diff --git a/drivers/net/dsa/microchip/ksz_priv.h 
b/drivers/net/dsa/microchip/ksz_priv.h
index 5ef6153bd2cc..d3ddf98156bb 100644
--- a/drivers/net/dsa/microchip/ksz_priv.h
+++ b/drivers/net/dsa/microchip/ksz_priv.h
@@ -109,8 +109,6 @@ struct ksz_io_ops {
int (*write8)(struct ksz_device *dev, u32 reg, u8 value);
int (*write16)(struct ksz_device *dev, u32 reg, u16 value);
int (*write32)(struct ksz_device *dev, u32 reg, u32 value);
-   int (*get)(struct ksz_device *dev, u32 reg, void *data, size_t len);
-   int (*set)(struct ksz_device *dev, u32 reg, void *data, size_t len);
 };
 
 struct alu_struct {
diff --git a/drivers/net/dsa/microchip/ksz_spi.h 
b/drivers/net/dsa/microchip/ksz_spi.h
index 427811bd60b3..976bace31f37 100644
--- a/drivers/net/dsa/microchip/ksz_spi.h
+++ b/drivers/net/dsa/microchip/ksz_spi.h
@@ -56,14 +56,4 @@ static int ksz_spi_write32(struct ksz_device *dev, u32 reg, 
u32 value)
return ksz_spi_write(dev, reg, &value, 4);
 }
 
-static int ksz_spi_get(struct ksz_device *dev, u32 reg, void *data, size_t len)
-{
-   return ksz_spi_read(dev, reg, data, len);
-}
-
-static int ksz_spi_set(struct ksz_device *dev, u32 reg, void *data, size_t len)
-{
-   return ksz_spi_write(dev, reg, data, len);
-}
-
 #endif
-- 
2.20.1

[PATCH V3 05/10] net: dsa: microchip: Use PORT_CTRL_ADDR() instead of indirect function call

2019-06-23 Thread Marek Vasut

The indirect function call to dev->dev_ops->get_port_addr() is expensive
especially if called for every single register access, and only returns
the value of PORT_CTRL_ADDR() macro. Use PORT_CTRL_ADDR() macro directly
instead.

Signed-off-by: Marek Vasut 
Cc: Andrew Lunn 
Cc: Florian Fainelli 
Cc: Tristram Ha 
Cc: Woojung Huh 
---
V2: New patch
V3: - Rebase on next/master
- Test on KSZ9477EVB
---
 drivers/net/dsa/microchip/ksz9477.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/microchip/ksz9477.c 
b/drivers/net/dsa/microchip/ksz9477.c
index e8b96566abd9..7d209fd9f26f 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -83,7 +83,7 @@ static void ksz_port_cfg(struct ksz_device *dev, int port, 
int offset, u8 bits,
u32 addr;
u8 data;
 
-   addr = dev->dev_ops->get_port_addr(port, offset);
+   addr = PORT_CTRL_ADDR(port, offset);
ksz_read8(dev, addr, &data);
 
if (set)
-- 
2.20.1

Re: [PATCH v2 0/3] fix bugs when enable route_localnet

2019-06-23 Thread David Ahern

On 6/22/19 6:46 AM, David Miller wrote:
> From: Zhiqiang Liu 
> Date: Sat, 22 Jun 2019 16:41:49 +0800
> 
>> Friendly ping ...
> 
> I'm not applying this patch series without someone reviewing it.
> 

I have stared at it a few times since the patches were sent and can not
find anything obviously wrong about it. The fallout seems limited to
users of route_localnet which I have to believe is small (I only know of
2 other users of 127/8 for non-loopback and those were almost 10 years ago).

Putting in net-next is the safest.

Re: [PATCH net-next 1/7] net: aquantia: replace internal driver version code with uts

2019-06-23 Thread Jakub Kicinski

On Sat, 22 Jun 2019 17:05:14 +0200, Andrew Lunn wrote:
> On Sat, Jun 22, 2019 at 01:45:12PM +, Igor Russkikh wrote:
> > As it was discussed some time previously, driver is better to
> > report kernel version string, as it in a best way identifies
> > the codebase.
> > 
> > Signed-off-by: Igor Russkikh   
> 
> Nice.

Indeed!

> Devlink has just gained something similar to ethtool -i. Maybe we
> should get the devlink core to also report the kernel version?

I don't think we have the driver version at all there, my usual
inclination being to not duplicate information across APIs.  Do we 
have non-hypothetical instances of users reporting ethtool -i without
uname output?  Admittedly I may work with above-average Linux-trained
engineers :S  Would it be okay to just get devlink user space to use
uname() to get the info?

[PATCH net] net/tls: fix page double free on TX cleanup

2019-06-23 Thread Jakub Kicinski

From: Dirk van der Merwe 

With commit 94850257cf0f ("tls: Fix tls_device handling of partial records")
a new path was introduced to cleanup partial records during sk_proto_close.
This path does not handle the SW KTLS tx_list cleanup.

This is unnecessary though since the free_resources calls for both
SW and offload paths will cleanup a partial record.

The visible effect is the following warning, but this bug also causes
a page double free.

WARNING: CPU: 7 PID: 4000 at net/core/stream.c:206 
sk_stream_kill_queues+0x103/0x110
RIP: 0010:sk_stream_kill_queues+0x103/0x110
RSP: 0018:b6df87e07bd0 EFLAGS: 00010206
RAX:  RBX: 8c21db4971c0 RCX: 0007
RDX: ffa0 RSI: 001d RDI: 8c21db497270
RBP: 8c21db497270 R08: 8c29f4748600 R09: 00010020001a
R10: b6df87e07aa0 R11: 9a445600 R12: 0007
R13:  R14: 8c21f03f2900 R15: 8c21f03b8df0
Call Trace:
 inet_csk_destroy_sock+0x55/0x100
 tcp_close+0x25d/0x400
 ? tcp_check_oom+0x120/0x120
 tls_sk_proto_close+0x127/0x1c0
 inet_release+0x3c/0x60
 __sock_release+0x3d/0xb0
 sock_close+0x11/0x20
 __fput+0xd8/0x210
 task_work_run+0x84/0xa0
 do_exit+0x2dc/0xb90
 ? release_sock+0x43/0x90
 do_group_exit+0x3a/0xa0
 get_signal+0x295/0x720
 do_signal+0x36/0x610
 ? SYSC_recvfrom+0x11d/0x130
 exit_to_usermode_loop+0x69/0xb0
 do_syscall_64+0x173/0x180
 entry_SYSCALL_64_after_hwframe+0x3d/0xa2
RIP: 0033:0x7fe9b9abc10d
RSP: 002b:7fe9b19a1d48 EFLAGS: 0246 ORIG_RAX: 00ca
RAX: fe00 RBX: 0006 RCX: 7fe9b9abc10d
RDX: 0002 RSI: 0080 RDI: 7fe948003430
RBP: 7fe948003410 R08: 7fe948003430 R09: 
R10:  R11: 0246 R12: 5603739d9080
R13: 7fe9b9ab9f90 R14: 7fe948003430 R15: 

Fixes: 94850257cf0f ("tls: Fix tls_device handling of partial records")
Signed-off-by: Dirk van der Merwe 
Signed-off-by: Jakub Kicinski 
---
 include/net/tls.h  | 15 ---
 net/tls/tls_main.c |  3 ++-
 2 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 4a55ce6a303f..53d96bca220d 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -373,21 +373,6 @@ static inline bool tls_is_partially_sent_record(struct 
tls_context *ctx)
return !!ctx->partially_sent_record;
 }
 
-static inline int tls_complete_pending_work(struct sock *sk,
-   struct tls_context *ctx,
-   int flags, long *timeo)
-{
-   int rc = 0;
-
-   if (unlikely(sk->sk_write_pending))
-   rc = wait_on_pending_writer(sk, timeo);
-
-   if (!rc && tls_is_partially_sent_record(ctx))
-   rc = tls_push_partial_record(sk, ctx, flags);
-
-   return rc;
-}
-
 static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
 {
return tls_ctx->pending_open_record_frags;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index fc81ae18cc44..e2b69e805d46 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -279,7 +279,8 @@ static void tls_sk_proto_close(struct sock *sk, long 
timeout)
goto skip_tx_cleanup;
}
 
-   if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
+   if (unlikely(sk->sk_write_pending) &&
+   !wait_on_pending_writer(sk, &timeo))
tls_handle_open_record(sk, 0);
 
/* We need these for tls_sw_fallback handling of other packets */
-- 
2.21.0

[net-next v2] tipc: add loopback device tracking

2019-06-23 Thread john . rutherford

Since node internal messages are passed directly to socket it is not
possible to observe this message exchange via tcpdump or wireshark.

We now remedy this by making it possible to clone such messages and send
the clones to the loopback interface.  The clones are dropped at reception
and have no functional role except making the traffic visible.

The feature is turned on/off by enabling/disabling the loopback "bearer"
"eth:lo".

Acked-by: Jon Maloy 
Signed-off-by: John Rutherford 
---
 net/tipc/bcast.c  |  4 +++-
 net/tipc/bearer.c | 67 +++
 net/tipc/bearer.h |  3 +++
 net/tipc/core.c   |  5 -
 net/tipc/core.h   | 12 ++
 net/tipc/node.c   |  1 +
 net/tipc/topsrv.c |  2 ++
 7 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 6c997d4..235331d 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -406,8 +406,10 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head 
*pkts,
rc = tipc_bcast_xmit(net, pkts, cong_link_cnt);
}
 
-   if (dests->local)
+   if (dests->local) {
+   tipc_loopback_trace(net, &localq);
tipc_sk_mcast_rcv(net, &localq, &inputq);
+   }
 exit:
/* This queue should normally be empty by now */
__skb_queue_purge(pkts);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 2bed658..27b4fd7 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -836,6 +836,12 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct 
genl_info *info)
 
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
+   if (!strcmp(name, "eth:lo")) {
+   tipc_net(net)->loopback_trace = false;
+   pr_info("Disabled packet tracing on loopback interface\n");
+   return 0;
+   }
+
bearer = tipc_bearer_find(net, name);
if (!bearer)
return -EINVAL;
@@ -881,6 +887,12 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct 
genl_info *info)
 
bearer = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
 
+   if (!strcmp(bearer, "eth:lo")) {
+   tipc_net(net)->loopback_trace = true;
+   pr_info("Enabled packet tracing on loopback interface\n");
+   return 0;
+   }
+
if (attrs[TIPC_NLA_BEARER_DOMAIN])
domain = nla_get_u32(attrs[TIPC_NLA_BEARER_DOMAIN]);
 
@@ -1021,6 +1033,61 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct 
genl_info *info)
return err;
 }
 
+void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *xmitq)
+{
+   struct net_device *dev = net->loopback_dev;
+   struct sk_buff *skb, *_skb;
+   int exp;
+
+   skb_queue_walk(xmitq, _skb) {
+   skb = pskb_copy(_skb, GFP_ATOMIC);
+   if (!skb)
+   continue;
+   exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb));
+   if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) {
+   kfree_skb(skb);
+   continue;
+   }
+   skb_reset_network_header(skb);
+   skb->dev = dev;
+   skb->protocol = htons(ETH_P_TIPC);
+   dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr,
+   dev->dev_addr, skb->len);
+   dev_queue_xmit(skb);
+   }
+}
+
+static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev,
+struct packet_type *pt, struct net_device *od)
+{
+   consume_skb(skb);
+   return NET_RX_SUCCESS;
+}
+
+int tipc_attach_loopback(struct net *net)
+{
+   struct net_device *dev = net->loopback_dev;
+   struct tipc_net *tn = tipc_net(net);
+
+   if (!dev)
+   return -ENODEV;
+   dev_hold(dev);
+   tn->loopback_pt.dev = dev;
+   tn->loopback_pt.type = htons(ETH_P_TIPC);
+   tn->loopback_pt.func = tipc_loopback_rcv_pkt;
+   tn->loopback_trace = false;
+   dev_add_pack(&tn->loopback_pt);
+   return 0;
+}
+
+void tipc_detach_loopback(struct net *net)
+{
+   struct tipc_net *tn = tipc_net(net);
+
+   dev_remove_pack(&tn->loopback_pt);
+   dev_put(net->loopback_dev);
+}
+
 static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
   struct tipc_media *media, int nlflags)
 {
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 7f4c569..ef7fad9 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -232,6 +232,9 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
  struct tipc_media_addr *dst);
 void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
 struct sk_buff_head *xmitq);
+void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *xmitq);
+int tipc_attach_loopback(struct net *net);
+void tipc_detach_loopback(struct net *net);
 
 /* check if device MTU is too low for ti

[net-next] tipc: fix missing indentation in source code

2019-06-23 Thread john . rutherford

Fix misalignment of policy statement in netlink.c due to automatic
spatch code transformation.

Fixes: 3b0f31f2b8c9 ("genetlink: make policy common to family")
Acked-by: Jon Maloy 
Signed-off-by: John Rutherford 
---
 net/tipc/netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 99bd166..d6165ad 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -261,7 +261,7 @@ struct genl_family tipc_genl_family __ro_after_init = {
.version= TIPC_GENL_V2_VERSION,
.hdrsize= 0,
.maxattr= TIPC_NLA_MAX,
-   .policy = tipc_nl_policy,
+   .policy = tipc_nl_policy,
.netnsok= true,
.module = THIS_MODULE,
.ops= tipc_genl_v2_ops,
-- 
2.11.0

75 matches

Mail list logo