date:20190627

Re: [PATCH bpf-next] virtio_net: add XDP meta data support in receive_small()

2019-06-27 Thread Yuya Kusakabe

>> This adds XDP meta data support to the code path receive_small().
>>
>> mrg_rxbuf=off is required on qemu, because receive_mergeable() still
>> doesn't support XDP meta data.
>
>
>What's the reason for this?

I didn't have enough time to add support XDP meta data to
receive_mergeable(). But I'll try it a little more. Please wait for the
next patch.

>>
>> Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
>> Signed-off-by: Yuya Kusakabe 
>
>
>Could you please cc virtio maintainer through get_maintainer.pl?
>
>Thanks

Sorry. I added them.

Thanks.

> ---
>   drivers/net/virtio_net.c | 10 --
>   1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 4f3de0ac8b0b..14165c5edb7d 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -644,6 +644,7 @@ static struct sk_buff *receive_small(struct net_device 
> *dev,
>   unsigned int delta = 0;
>   struct page *xdp_page;
>   int err;
> + unsigned int metasize = 0;
>   
>   len -= vi->hdr_len;
>   stats->bytes += len;
> @@ -683,8 +684,8 @@ static struct sk_buff *receive_small(struct net_device 
> *dev,
>   
>   xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
>   xdp.data = xdp.data_hard_start + xdp_headroom;
> - xdp_set_data_meta_invalid(&xdp);
>   xdp.data_end = xdp.data + len;
> + xdp.data_meta = xdp.data;
>   xdp.rxq = &rq->xdp_rxq;
>   orig_data = xdp.data;
>   act = bpf_prog_run_xdp(xdp_prog, &xdp);
> @@ -695,9 +696,11 @@ static struct sk_buff *receive_small(struct net_device 
> *dev,
>   /* Recalculate length in case bpf program changed it */
>   delta = orig_data - xdp.data;
>   len = xdp.data_end - xdp.data;
> + metasize = xdp.data - xdp.data_meta;
>   break;
>   case XDP_TX:
>   stats->xdp_tx++;
> + xdp.data_meta = xdp.data;
>   xdpf = convert_to_xdp_frame(&xdp);
>   if (unlikely(!xdpf))
>   goto err_xdp;
> @@ -735,11 +738,14 @@ static struct sk_buff *receive_small(struct net_device 
> *dev,
>   }
>   skb_reserve(skb, headroom - delta);
>   skb_put(skb, len);
> - if (!delta) {
> + if (!delta && !metasize) {
>   buf += header_offset;
>   memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
>   } /* keep zeroed vnet hdr since packet was changed by bpf */
>   
> + if (metasize)
> + skb_metadata_set(skb, metasize);
> +
>   err:
>   return skb;
>

[PATCH bpf-next] virtio_net: add XDP meta data support

2019-06-27 Thread Yuya Kusakabe

This adds XDP meta data support to both receive_small() and
receive_mergeable().

Fixes: de8f3a83b0a0 ("bpf: add meta pointer for direct access")
Signed-off-by: Yuya Kusakabe 
---
 drivers/net/virtio_net.c | 40 +---
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4f3de0ac8b0b..e787657fc568 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -371,7 +371,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
   struct receive_queue *rq,
   struct page *page, unsigned int offset,
   unsigned int len, unsigned int truesize,
-  bool hdr_valid)
+  bool hdr_valid, unsigned int metasize)
 {
struct sk_buff *skb;
struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -393,17 +393,25 @@ static struct sk_buff *page_to_skb(struct virtnet_info 
*vi,
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);
 
-   if (hdr_valid)
+   if (hdr_valid && !metasize)
memcpy(hdr, p, hdr_len);
 
len -= hdr_len;
offset += hdr_padded_len;
p += hdr_padded_len;
 
-   copy = len;
+   copy = len + metasize;
if (copy > skb_tailroom(skb))
copy = skb_tailroom(skb);
-   skb_put_data(skb, p, copy);
+
+   if (metasize) {
+   skb_put_data(skb, p - metasize, copy);
+   __skb_pull(skb, metasize);
+   skb_metadata_set(skb, metasize);
+   copy -= metasize;
+   } else {
+   skb_put_data(skb, p, copy);
+   }
 
len -= copy;
offset += copy;
@@ -644,6 +652,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
unsigned int delta = 0;
struct page *xdp_page;
int err;
+   unsigned int metasize = 0;
 
len -= vi->hdr_len;
stats->bytes += len;
@@ -683,8 +692,8 @@ static struct sk_buff *receive_small(struct net_device *dev,
 
xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
xdp.data = xdp.data_hard_start + xdp_headroom;
-   xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
+   xdp.data_meta = xdp.data;
xdp.rxq = &rq->xdp_rxq;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -695,9 +704,11 @@ static struct sk_buff *receive_small(struct net_device 
*dev,
/* Recalculate length in case bpf program changed it */
delta = orig_data - xdp.data;
len = xdp.data_end - xdp.data;
+   metasize = xdp.data - xdp.data_meta;
break;
case XDP_TX:
stats->xdp_tx++;
+   xdp.data_meta = xdp.data;
xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf))
goto err_xdp;
@@ -735,11 +746,14 @@ static struct sk_buff *receive_small(struct net_device 
*dev,
}
skb_reserve(skb, headroom - delta);
skb_put(skb, len);
-   if (!delta) {
+   if (!delta && !metasize) {
buf += header_offset;
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
} /* keep zeroed vnet hdr since packet was changed by bpf */
 
+   if (metasize)
+   skb_metadata_set(skb, metasize);
+
 err:
return skb;
 
@@ -761,7 +775,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
 {
struct page *page = buf;
struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
- PAGE_SIZE, true);
+ PAGE_SIZE, true, 0);
 
stats->bytes += len - vi->hdr_len;
if (unlikely(!skb))
@@ -793,6 +807,7 @@ static struct sk_buff *receive_mergeable(struct net_device 
*dev,
unsigned int truesize;
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
int err;
+   unsigned int metasize = 0;
 
head_skb = NULL;
stats->bytes += len - vi->hdr_len;
@@ -839,8 +854,8 @@ static struct sk_buff *receive_mergeable(struct net_device 
*dev,
data = page_address(xdp_page) + offset;
xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
xdp.data = data + vi->hdr_len;
-   xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + (len - vi->hdr_len);
+   xdp.data_meta = xdp.data;
xdp.rxq = &rq->xdp_rxq;
 
act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -859,18 +874,20 @@ static struct sk_buff *receive_mergeable(struct 
net_device *dev,
 *

[PATCH net-next v3 0/4] em_ipt: add support for addrtype

2019-06-27 Thread Nikolay Aleksandrov

Hi,
We would like to be able to use the addrtype from tc for ACL rules and
em_ipt seems the best place to add support for the already existing xt
match. The biggest issue is that addrtype revision 1 (with ipv6 support)
is NFPROTO_UNSPEC and currently em_ipt can't differentiate between v4/v6
if such xt match is used because it passes the match's family instead of
the packet one. The first 3 patches make em_ipt match only on IP
traffic (currently both policy and addrtype recognize such traffic
only) and make it pass the actual packet's protocol instead of the xt
match family when it's unspecified. They also add support for NFPROTO_UNSPEC
xt matches. The last patch allows to add addrtype rules via em_ipt.
We need to keep the user-specified nfproto for dumping in order to be
compatible with libxtables, we cannot dump NFPROTO_UNSPEC as the nfproto
or we'll get an error from libxtables, thus the nfproto is limited to
ipv4/ipv6 in patch 03 and is recorded.

v3: don't use the user nfproto for matching, only for dumping, more
information is available in the commit message in patch 03
v2: change patch 02 to set the nfproto only when unspecified and drop
patch 04 from v1 (Eyal Birger)

Thank you,
  Nikolay Aleksandrov


Nikolay Aleksandrov (4):
  net: sched: em_ipt: match only on ip/ipv6 traffic
  net: sched: em_ipt: set the family based on the packet if it's
unspecified
  net: sched: em_ipt: keep the user-specified nfproto and dump it
  net: sched: em_ipt: add support for addrtype matching

 net/sched/em_ipt.c | 48 --
 1 file changed, 46 insertions(+), 2 deletions(-)

-- 
2.21.0

[PATCH net-next v3 1/4] net: sched: em_ipt: match only on ip/ipv6 traffic

2019-06-27 Thread Nikolay Aleksandrov

Restrict matching only to ip/ipv6 traffic and make sure we can use the
headers, otherwise matches will be attempted on any protocol which can
be unexpected by the xt matches. Currently policy supports only ipv4/6.

Signed-off-by: Nikolay Aleksandrov 
---
v3: no change
v2: no change

 net/sched/em_ipt.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
index 243fd22f2248..64dbafe4e94c 100644
--- a/net/sched/em_ipt.c
+++ b/net/sched/em_ipt.c
@@ -185,6 +185,19 @@ static int em_ipt_match(struct sk_buff *skb, struct 
tcf_ematch *em,
struct nf_hook_state state;
int ret;
 
+   switch (tc_skb_protocol(skb)) {
+   case htons(ETH_P_IP):
+   if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+   return 0;
+   break;
+   case htons(ETH_P_IPV6):
+   if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+   return 0;
+   break;
+   default:
+   return 0;
+   }
+
rcu_read_lock();
 
if (skb->skb_iif)
-- 
2.21.0

[PATCH net-next v3 2/4] net: sched: em_ipt: set the family based on the packet if it's unspecified

2019-06-27 Thread Nikolay Aleksandrov

Set the family based on the packet if it's unspecified otherwise
protocol-neutral matches will have wrong information (e.g. NFPROTO_UNSPEC).
In preparation for using NFPROTO_UNSPEC xt matches.

v2: set the nfproto only when unspecified

Suggested-by: Eyal Birger 
Signed-off-by: Nikolay Aleksandrov 
---
v3: no change

 net/sched/em_ipt.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
index 64dbafe4e94c..fd7f5b288c31 100644
--- a/net/sched/em_ipt.c
+++ b/net/sched/em_ipt.c
@@ -182,6 +182,7 @@ static int em_ipt_match(struct sk_buff *skb, struct 
tcf_ematch *em,
const struct em_ipt_match *im = (const void *)em->data;
struct xt_action_param acpar = {};
struct net_device *indev = NULL;
+   u8 nfproto = im->match->family;
struct nf_hook_state state;
int ret;
 
@@ -189,10 +190,14 @@ static int em_ipt_match(struct sk_buff *skb, struct 
tcf_ematch *em,
case htons(ETH_P_IP):
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
return 0;
+   if (nfproto == NFPROTO_UNSPEC)
+   nfproto = NFPROTO_IPV4;
break;
case htons(ETH_P_IPV6):
if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
return 0;
+   if (nfproto == NFPROTO_UNSPEC)
+   nfproto = NFPROTO_IPV6;
break;
default:
return 0;
@@ -203,7 +208,7 @@ static int em_ipt_match(struct sk_buff *skb, struct 
tcf_ematch *em,
if (skb->skb_iif)
indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
 
-   nf_hook_state_init(&state, im->hook, im->match->family,
+   nf_hook_state_init(&state, im->hook, nfproto,
   indev ?: skb->dev, skb->dev, NULL, em->net, NULL);
 
acpar.match = im->match;
-- 
2.21.0

[PATCH net-next v3 4/4] net: sched: em_ipt: add support for addrtype matching

2019-06-27 Thread Nikolay Aleksandrov

Allow em_ipt to use addrtype for matching. Restrict the use only to
revision 1 which has IPv6 support. Since it's a NFPROTO_UNSPEC xt match
we use the user-specified nfproto for matching, in case it's unspecified
both v4/v6 will be matched by the rule.

v2: no changes, was patch 5 in v1

Signed-off-by: Nikolay Aleksandrov 
---
v3: no changes

 net/sched/em_ipt.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
index 3c356d6f719a..9fff6480acc6 100644
--- a/net/sched/em_ipt.c
+++ b/net/sched/em_ipt.c
@@ -72,11 +72,25 @@ static int policy_validate_match_data(struct nlattr **tb, 
u8 mrev)
return 0;
 }
 
+static int addrtype_validate_match_data(struct nlattr **tb, u8 mrev)
+{
+   if (mrev != 1) {
+   pr_err("only addrtype match revision 1 supported");
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
 static const struct em_ipt_xt_match em_ipt_xt_matches[] = {
{
.match_name = "policy",
.validate_match_data = policy_validate_match_data
},
+   {
+   .match_name = "addrtype",
+   .validate_match_data = addrtype_validate_match_data
+   },
{}
 };
 
-- 
2.21.0

[PATCH net-next v3 3/4] net: sched: em_ipt: keep the user-specified nfproto and dump it

2019-06-27 Thread Nikolay Aleksandrov

If we dump NFPROTO_UNSPEC as nfproto user-space libxtables can't handle
it and would exit with an error like:
"libxtables: unhandled NFPROTO in xtables_set_nfproto"
In order to avoid the error return the user-specified nfproto. If we
don't record it then the match family is used which can be
NFPROTO_UNSPEC. Even if we add support to mask NFPROTO_UNSPEC in
iproute2 we have to be compatible with older versions which would be
also be allowed to add NFPROTO_UNSPEC matches (e.g. addrtype after the
last patch).

v3: don't use the user nfproto for matching, only for dumping the rule,
also don't allow the nfproto to be unspecified (explained above)
v2: adjust changes to missing patch, was patch 04 in v1

Signed-off-by: Nikolay Aleksandrov 
---
Unfortunately we still have to save the user-nfproto for dumping
otherwise we'll break user-space because it can add a rule which it
won't be able to dump later and in fact will terminate the whole dump.
I also thought about masking it but that seems more hacky, I'd prefer
to return an expected value which was passed when the rule was created.

 net/sched/em_ipt.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
index fd7f5b288c31..3c356d6f719a 100644
--- a/net/sched/em_ipt.c
+++ b/net/sched/em_ipt.c
@@ -21,6 +21,7 @@
 struct em_ipt_match {
const struct xt_match *match;
u32 hook;
+   u8 nfproto;
u8 match_data[0] __aligned(8);
 };
 
@@ -115,6 +116,7 @@ static int em_ipt_change(struct net *net, void *data, int 
data_len,
struct em_ipt_match *im = NULL;
struct xt_match *match;
int mdata_len, ret;
+   u8 nfproto;
 
ret = nla_parse_deprecated(tb, TCA_EM_IPT_MAX, data, data_len,
   em_ipt_policy, NULL);
@@ -125,6 +127,15 @@ static int em_ipt_change(struct net *net, void *data, int 
data_len,
!tb[TCA_EM_IPT_MATCH_DATA] || !tb[TCA_EM_IPT_NFPROTO])
return -EINVAL;
 
+   nfproto = nla_get_u8(tb[TCA_EM_IPT_NFPROTO]);
+   switch (nfproto) {
+   case NFPROTO_IPV4:
+   case NFPROTO_IPV6:
+   break;
+   default:
+   return -EINVAL;
+   }
+
match = get_xt_match(tb);
if (IS_ERR(match)) {
pr_err("unable to load match\n");
@@ -140,6 +151,7 @@ static int em_ipt_change(struct net *net, void *data, int 
data_len,
 
im->match = match;
im->hook = nla_get_u32(tb[TCA_EM_IPT_HOOK]);
+   im->nfproto = nfproto;
nla_memcpy(im->match_data, tb[TCA_EM_IPT_MATCH_DATA], mdata_len);
 
ret = check_match(net, im, mdata_len);
@@ -231,7 +243,7 @@ static int em_ipt_dump(struct sk_buff *skb, struct 
tcf_ematch *em)
return -EMSGSIZE;
if (nla_put_u8(skb, TCA_EM_IPT_MATCH_REVISION, im->match->revision) < 0)
return -EMSGSIZE;
-   if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->match->family) < 0)
+   if (nla_put_u8(skb, TCA_EM_IPT_NFPROTO, im->nfproto) < 0)
return -EMSGSIZE;
if (nla_put(skb, TCA_EM_IPT_MATCH_DATA,
im->match->usersize ?: im->match->matchsize,
-- 
2.21.0

[PATCH net] igmp: fix memory leak in igmpv3_del_delrec()

2019-06-27 Thread Eric Dumazet

im->tomb and/or im->sources might not be NULL, but we
currently overwrite their values blindly.

Using swap() will make sure the following call to kfree_pmc(pmc)
will properly free the psf structures.

Tested with the C repro provided by syzbot, which basically does :

 socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 3
 setsockopt(3, SOL_IP, IP_ADD_MEMBERSHIP, "\340\0\0\2\177\0\0\1\0\0\0\0", 12) = 0
 ioctl(3, SIOCSIFFLAGS, {ifr_name="lo", ifr_flags=0}) = 0
 setsockopt(3, SOL_IP, IP_MSFILTER, 
"\340\0\0\2\177\0\0\1\1\0\0\0\1\0\0\0\377\377\377\377", 20) = 0
 ioctl(3, SIOCSIFFLAGS, {ifr_name="lo", ifr_flags=IFF_UP}) = 0
 exit_group(0)= ?

BUG: memory leak
unreferenced object 0x88811450f140 (size 64):
  comm "softirq", pid 0, jiffies 4294942448 (age 32.070s)
  hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 ff ff ff ff 00 00 00 00  
00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00  
  backtrace:
[] kmemleak_alloc_recursive include/linux/kmemleak.h:43 
[inline]
[] slab_post_alloc_hook mm/slab.h:439 [inline]
[] slab_alloc mm/slab.c:3326 [inline]
[] kmem_cache_alloc_trace+0x13d/0x280 mm/slab.c:3553
[<9acc4151>] kmalloc include/linux/slab.h:547 [inline]
[<9acc4151>] kzalloc include/linux/slab.h:742 [inline]
[<9acc4151>] ip_mc_add1_src net/ipv4/igmp.c:1976 [inline]
[<9acc4151>] ip_mc_add_src+0x36b/0x400 net/ipv4/igmp.c:2100
[<4ac14566>] ip_mc_msfilter+0x22d/0x310 net/ipv4/igmp.c:2484
[<52d8f995>] do_ip_setsockopt.isra.0+0x1795/0x1930 
net/ipv4/ip_sockglue.c:959
[<4ee1e21f>] ip_setsockopt+0x3b/0xb0 net/ipv4/ip_sockglue.c:1248
[<66cdfe74>] udp_setsockopt+0x4e/0x90 net/ipv4/udp.c:2618
[<9383a786>] sock_common_setsockopt+0x38/0x50 net/core/sock.c:3126
[] __sys_setsockopt+0x98/0x120 net/socket.c:2072
[<1b1e9666>] __do_sys_setsockopt net/socket.c:2083 [inline]
[<1b1e9666>] __se_sys_setsockopt net/socket.c:2080 [inline]
[<1b1e9666>] __x64_sys_setsockopt+0x26/0x30 net/socket.c:2080
[<420d395e>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:301
[<7fd83a4b>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 24803f38a5c0 ("igmp: do not remove igmp souce list info when set link 
down")
Signed-off-by: Eric Dumazet 
Cc: Hangbin Liu 
Reported-by: syzbot+6ca1abd0db68b5173...@syzkaller.appspotmail.com
---
 net/ipv4/igmp.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 
a57f0d69eadb9bdcc4b2c4a82819d2dce44bf428..85107bf812f228ae34e767b2e440aec4776fbe6c
 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1228,12 +1228,8 @@ static void igmpv3_del_delrec(struct in_device *in_dev, 
struct ip_mc_list *im)
if (pmc) {
im->interface = pmc->interface;
if (im->sfmode == MCAST_INCLUDE) {
-   im->tomb = pmc->tomb;
-   pmc->tomb = NULL;
-
-   im->sources = pmc->sources;
-   pmc->sources = NULL;
-
+   swap(im->tomb, pmc->tomb);
+   swap(im->sources, pmc->sources);
for (psf = im->sources; psf; psf = psf->sf_next)
psf->sf_crcount = in_dev->mr_qrv ?: 
net->ipv4.sysctl_igmp_qrv;
} else {
-- 
2.22.0.410.gd8fdbe21b5-goog

Re: [RFC, PATCH 1/2, net-next] net: netsec: Use page_pool API

2019-06-27 Thread Jesper Dangaard Brouer

On Tue, 25 Jun 2019 18:06:18 +0300
Ilias Apalodimas  wrote:

> @@ -1059,7 +1059,23 @@ static void netsec_setup_tx_dring(struct netsec_priv 
> *priv)
>  static int netsec_setup_rx_dring(struct netsec_priv *priv)
>  {
>   struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> - int i;
> + struct page_pool_params pp_params = { 0 };
> + int i, err;
> +
> + pp_params.order = 0;
> + /* internal DMA mapping in page_pool */
> + pp_params.flags = PP_FLAG_DMA_MAP;
> + pp_params.pool_size = DESC_NUM;
> + pp_params.nid = cpu_to_node(0);
> + pp_params.dev = priv->dev;
> + pp_params.dma_dir = DMA_FROM_DEVICE;

I was going to complain about this DMA_FROM_DEVICE, until I noticed
that in next patch you have:

 pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;

Making a note here to help other reviewers.

> + dring->page_pool = page_pool_create(&pp_params);
> + if (IS_ERR(dring->page_pool)) {
> + err = PTR_ERR(dring->page_pool);
> + dring->page_pool = NULL;
> + goto err_out;
> + }
>  


-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

Re: [RFC, PATCH 1/2, net-next] net: netsec: Use page_pool API

2019-06-27 Thread Ilias Apalodimas

Hi Jepser, 

> On Tue, 25 Jun 2019 18:06:18 +0300
> Ilias Apalodimas  wrote:
> 
> > @@ -1059,7 +1059,23 @@ static void netsec_setup_tx_dring(struct netsec_priv 
> > *priv)
> >  static int netsec_setup_rx_dring(struct netsec_priv *priv)
> >  {
> > struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> > -   int i;
> > +   struct page_pool_params pp_params = { 0 };
> > +   int i, err;
> > +
> > +   pp_params.order = 0;
> > +   /* internal DMA mapping in page_pool */
> > +   pp_params.flags = PP_FLAG_DMA_MAP;
> > +   pp_params.pool_size = DESC_NUM;
> > +   pp_params.nid = cpu_to_node(0);
> > +   pp_params.dev = priv->dev;
> > +   pp_params.dma_dir = DMA_FROM_DEVICE;
> 
> I was going to complain about this DMA_FROM_DEVICE, until I noticed
> that in next patch you have:
> 
>  pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
True. Since the first patch only adds page_pool support, i wanted to be clear
that DMA_BIDIRECTIONAL is only needed for XDP use cases (and especially XDP_TX)

> 
> Making a note here to help other reviewers.
Thanks

> 
> > +   dring->page_pool = page_pool_create(&pp_params);
> > +   if (IS_ERR(dring->page_pool)) {
> > +   err = PTR_ERR(dring->page_pool);
> > +   dring->page_pool = NULL;
> > +   goto err_out;
> > +   }
> >  

Cheers
/Ilias

[RFC] longer netdev names proposal

2019-06-27 Thread Jiri Pirko

Hi all.

In the past, there was repeatedly discussed the IFNAMSIZ (16) limit for
netdevice name length. Now when we have PF and VF representors
with port names like "pfXvfY", it became quite common to hit this limit:
0123456789012345
enp131s0f1npf0vf6
enp131s0f1npf0vf22

Since IFLA_NAME is just a string, I though it might be possible to use
it to carry longer names as it is. However, the userspace tools, like
iproute2, are doing checks before print out. So for example in output of
"ip addr" when IFLA_NAME is longer than IFNAMSIZE, the netdevice is
completely avoided.

So here is a proposal that might work:
1) Add a new attribute IFLA_NAME_EXT that could carry names longer than
   IFNAMSIZE, say 64 bytes. The max size should be only defined in kernel,
   user should be prepared for any string size.
2) Add a file in sysfs that would indicate that NAME_EXT is supported by
   the kernel.
3) Udev is going to look for the sysfs indication file. In case when
   kernel supports long names, it will do rename to longer name, setting
   IFLA_NAME_EXT. If not, it does what it does now - fail.
4) There are two cases that can happen during rename:
   A) The name is shorter than IFNAMSIZ
  -> both IFLA_NAME and IFLA_NAME_EXT would contain the same string:
 original IFLA_NAME = eth0
 original IFLA_NAME_EXT = eth0
 renamed  IFLA_NAME = enp5s0f1npf0vf1
 renamed  IFLA_NAME_EXT = enp5s0f1npf0vf1
   B) The name is longer tha IFNAMSIZ
  -> IFLA_NAME would contain the original one, IFLA_NAME_EXT would 
 contain the new one:
 original IFLA_NAME = eth0
 original IFLA_NAME_EXT = eth0
 renamed  IFLA_NAME = eth0
 renamed  IFLA_NAME_EXT = enp131s0f1npf0vf22

This would allow the old tools to work with "eth0" and the new
tools would work with "enp131s0f1npf0vf22". In sysfs, there would
be symlink from one name to another.
  
Also, there might be a warning added to kernel if someone works
with IFLA_NAME that the userspace tool should be upgraded.

Eventually, only IFLA_NAME_EXT is going to be used by everyone.

I'm aware there are other places where similar new attribute
would have to be introduced too (ip rule for example).
I'm not saying this is a simple work.

Question is what to do with the ioctl api (get ifindex etc). I would
probably leave it as is and push tools to use rtnetlink instead.

Any ideas why this would not work? Any ideas how to solve this
differently?

Thanks!

Jiri

Re: [PATCH net] igmp: fix memory leak in igmpv3_del_delrec()

2019-06-27 Thread Hangbin Liu

On Thu, Jun 27, 2019 at 01:27:01AM -0700, Eric Dumazet wrote:
> im->tomb and/or im->sources might not be NULL, but we
> currently overwrite their values blindly.
> 
> Using swap() will make sure the following call to kfree_pmc(pmc)
> will properly free the psf structures.
> 
> Tested with the C repro provided by syzbot, which basically does :
> 
>  socket(PF_INET, SOCK_DGRAM, IPPROTO_IP) = 3
>  setsockopt(3, SOL_IP, IP_ADD_MEMBERSHIP, "\340\0\0\2\177\0\0\1\0\0\0\0", 12) 
> = 0
>  ioctl(3, SIOCSIFFLAGS, {ifr_name="lo", ifr_flags=0}) = 0
>  setsockopt(3, SOL_IP, IP_MSFILTER, 
> "\340\0\0\2\177\0\0\1\1\0\0\0\1\0\0\0\377\377\377\377", 20) = 0
>  ioctl(3, SIOCSIFFLAGS, {ifr_name="lo", ifr_flags=IFF_UP}) = 0
>  exit_group(0)= ?
> 
> BUG: memory leak
> unreferenced object 0x88811450f140 (size 64):
>   comm "softirq", pid 0, jiffies 4294942448 (age 32.070s)
>   hex dump (first 32 bytes):
> 00 00 00 00 00 00 00 00 ff ff ff ff 00 00 00 00  
> 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00  
>   backtrace:
> [] kmemleak_alloc_recursive include/linux/kmemleak.h:43 
> [inline]
> [] slab_post_alloc_hook mm/slab.h:439 [inline]
> [] slab_alloc mm/slab.c:3326 [inline]
> [] kmem_cache_alloc_trace+0x13d/0x280 mm/slab.c:3553
> [<9acc4151>] kmalloc include/linux/slab.h:547 [inline]
> [<9acc4151>] kzalloc include/linux/slab.h:742 [inline]
> [<9acc4151>] ip_mc_add1_src net/ipv4/igmp.c:1976 [inline]
> [<9acc4151>] ip_mc_add_src+0x36b/0x400 net/ipv4/igmp.c:2100
> [<4ac14566>] ip_mc_msfilter+0x22d/0x310 net/ipv4/igmp.c:2484
> [<52d8f995>] do_ip_setsockopt.isra.0+0x1795/0x1930 
> net/ipv4/ip_sockglue.c:959
> [<4ee1e21f>] ip_setsockopt+0x3b/0xb0 net/ipv4/ip_sockglue.c:1248
> [<66cdfe74>] udp_setsockopt+0x4e/0x90 net/ipv4/udp.c:2618
> [<9383a786>] sock_common_setsockopt+0x38/0x50 net/core/sock.c:3126
> [] __sys_setsockopt+0x98/0x120 net/socket.c:2072
> [<1b1e9666>] __do_sys_setsockopt net/socket.c:2083 [inline]
> [<1b1e9666>] __se_sys_setsockopt net/socket.c:2080 [inline]
> [<1b1e9666>] __x64_sys_setsockopt+0x26/0x30 net/socket.c:2080
> [<420d395e>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:301
> [<7fd83a4b>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
> 
> Fixes: 24803f38a5c0 ("igmp: do not remove igmp souce list info when set link 
> down")
> Signed-off-by: Eric Dumazet 
> Cc: Hangbin Liu 
> Reported-by: syzbot+6ca1abd0db68b5173...@syzkaller.appspotmail.com
> ---

Hi Eric,

Thanks for the fixup.

Cheers
Hangbin

Re: [PATCH net-next v3 0/4] em_ipt: add support for addrtype

2019-06-27 Thread Eyal Birger

On Thu, 27 Jun 2019 11:10:43 +0300
Nikolay Aleksandrov  wrote:

> Hi,
> We would like to be able to use the addrtype from tc for ACL rules and
> em_ipt seems the best place to add support for the already existing xt
> match. The biggest issue is that addrtype revision 1 (with ipv6
> support) is NFPROTO_UNSPEC and currently em_ipt can't differentiate
> between v4/v6 if such xt match is used because it passes the match's
> family instead of the packet one. The first 3 patches make em_ipt
> match only on IP traffic (currently both policy and addrtype
> recognize such traffic only) and make it pass the actual packet's
> protocol instead of the xt match family when it's unspecified. They
> also add support for NFPROTO_UNSPEC xt matches. The last patch allows
> to add addrtype rules via em_ipt. We need to keep the user-specified
> nfproto for dumping in order to be compatible with libxtables, we
> cannot dump NFPROTO_UNSPEC as the nfproto or we'll get an error from
> libxtables, thus the nfproto is limited to ipv4/ipv6 in patch 03 and
> is recorded.
> 
> v3: don't use the user nfproto for matching, only for dumping, more
> information is available in the commit message in patch 03
> v2: change patch 02 to set the nfproto only when unspecified and drop
> patch 04 from v1 (Eyal Birger)
> 
> Thank you,
>   Nikolay Aleksandrov
> 
> 
> Nikolay Aleksandrov (4):
>   net: sched: em_ipt: match only on ip/ipv6 traffic
>   net: sched: em_ipt: set the family based on the packet if it's
> unspecified
>   net: sched: em_ipt: keep the user-specified nfproto and dump it
>   net: sched: em_ipt: add support for addrtype matching
> 
>  net/sched/em_ipt.c | 48
> -- 1 file changed, 46
> insertions(+), 2 deletions(-)
> 

Looks great! thanks for adding this!

For the series:

Acked-by: Eyal Birger

RE: [EXT] [PATCH net-next 07/16] qlge: Deduplicate rx buffer queue management

2019-06-27 Thread Manish Chopra

>   while (curr_idx != clean_idx) {
> - lbq_desc = &rx_ring->lbq[curr_idx];
> + struct qlge_bq_desc *lbq_desc = &rx_ring-
> >lbq.queue[curr_idx];
> 
>   if (lbq_desc->p.pg_chunk.offset == last_offset)
> - pci_unmap_page(qdev->pdev, lbq_desc-
> >p.pg_chunk.map,
> + pci_unmap_page(qdev->pdev, lbq_desc->dma_addr,
>  ql_lbq_block_size(qdev),
>  PCI_DMA_FROMDEVICE);

In this patch, lbq_desc->dma_addr points to offset in the page. So unmapping is 
broken within this patch.
Would have been nicer to fix this in the same patch although it might have been 
taken care in next patches probably.

RE: [EXT] [PATCH V3] bnx2x: Prevent ptp_task to be rescheduled indefinitely

2019-06-27 Thread Sudarsana Reddy Kalluru



> -Original Message-
> From: Guilherme G. Piccoli 
> Sent: Thursday, June 27, 2019 1:49 AM
> To: GR-everest-linux-l2 ;
> netdev@vger.kernel.org; Sudarsana Reddy Kalluru 
> Cc: Ariel Elior ; gpicc...@canonical.com;
> jay.vosbu...@canonical.com
> Subject: [EXT] [PATCH V3] bnx2x: Prevent ptp_task to be rescheduled
> indefinitely
> 
> External Email
> 
> --
> Currently bnx2x ptp worker tries to read a register with timestamp
> information in case of TX packet timestamping and in case it fails, the 
> routine
> reschedules itself indefinitely. This was reported as a kworker always at 100%
> of CPU usage, which was narrowed down to be bnx2x ptp_task.
> 
> By following the ioctl handler, we could narrow down the problem to an NTP
> tool (chrony) requesting HW timestamping from bnx2x NIC with RX filter
> zeroed; this isn't reproducible for example with ptp4l (from linuxptp) since
> this tool requests a supported RX filter.
> It seems NIC FW timestamp mechanism cannot work well with
> RX_FILTER_NONE - driver's PTP filter init routine skips a register write to 
> the
> adapter if there's not a supported filter request.
> 
> This patch addresses the problem of bnx2x ptp thread's everlasting
> reschedule by retrying the register read 10 times; between the read
> attempts the thread sleeps for an increasing amount of time starting in 1ms
> to give FW some time to perform the timestamping. If it still fails after all
> retries, we bail out in order to prevent an unbound resource consumption
> from bnx2x.
> 
> The patch also adds an ethtool statistic for accounting the skipped TX
> timestamp packets and it reduces the priority of timestamping error
> messages to prevent log flooding. The code was tested using both linuxptp
> and chrony.
> 
> Reported-and-tested-by: Przemyslaw Hausman
> 
> Suggested-by: Sudarsana Reddy Kalluru 
> Signed-off-by: Guilherme G. Piccoli 
> ---
> 
> Sudarsana, thanks again for your feedback. I've reduced the sleep times to
> start in 1ms and goes up to 512ms - the sum of sleep times is 1023ms, but
> due to the inherent overhead in sleeping/waking-up procedure, I've
> measured the total delay in the register read loop (on bnx2x_ptp_task) to be
> ~1.6 seconds.  It is almost the 2s value you first suggested as PTP_TIMEOUT.
> 
>  .../net/ethernet/broadcom/bnx2x/bnx2x_cmn.c   | 12 +--
>  .../ethernet/broadcom/bnx2x/bnx2x_ethtool.c   |  4 ++-
>  .../net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 36 ++
> -  .../net/ethernet/broadcom/bnx2x/bnx2x_stats.h |  3 ++
>  4 files changed, 43 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> index 008ad0ca89ba..6751cd04e8d8 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> @@ -3857,9 +3857,17 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff
> *skb, struct net_device *dev)
> 
>   if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
>   if (!(bp->flags & TX_TIMESTAMPING_EN)) {
> - BNX2X_ERR("Tx timestamping was not enabled, this
> packet will not be timestamped\n");
> + bp->eth_stats.ptp_skip_tx_ts++;
> + netdev_err_once(bp->dev,
> + "Tx timestamping isn't enabled, this
> packet won't be timestamped\n");
> + DP(BNX2X_MSG_PTP,
> +"Tx timestamping isn't enabled, this packet won't
> be
> +timestamped\n");

Hitting this path is very unlikely and also PTP packets arrive once in a second 
in general. Either retain BNX2X_ERR() statement or remove the extra call 
netdev_err_once().

>   } else if (bp->ptp_tx_skb) {
> - BNX2X_ERR("The device supports only a single
> outstanding packet to timestamp, this packet will not be timestamped\n");
> + bp->eth_stats.ptp_skip_tx_ts++;
> + netdev_err_once(bp->dev,
> + "Device supports only a single
> outstanding packet to timestamp, this packet won't be timestamped\n");
> + DP(BNX2X_MSG_PTP,
> +"Device supports only a single outstanding packet to
> timestamp,
> +this packet won't be timestamped\n");
Same as above.

>   } else {
>   skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
>   /* schedule check for Tx timestamp */ diff --git
> a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
> b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
> index 51fc845de31a..4a0ba6801c9e 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
> @@ -182,7 +182,9 @@ static const struct {
>   { STATS_OFFSET32(driver_filtered_tx_pkt),
>

[RFC PATCH 0/1] Document the configuration of b53

2019-06-27 Thread Benedikt Spranger

Hi,

my comment about the configuration got misunderstood.
I apologize for that.

I try to update the Debian ifupdown util to handle DSA capable
configurations. To avoid bafflement and frayed nerves I would like to get
some conclusion about the configuration of the b53. I would like to know
if this configuration can be expected to be expected to remain unchanged,
or if some parts need to be handled with more or special care.

Please consider this patch as starting/discussion point to get a quotable
reference in the kernel documentation.

This reference would ease the development and justification to upstream
changes to tools like ifupdown.

Regards
Bene Spranger

Benedikt Spranger (1):
  Documentation: net: dsa: b53: Describe b53 configuration

 Documentation/networking/dsa/b53.rst | 300 +++
 1 file changed, 300 insertions(+)
 create mode 100644 Documentation/networking/dsa/b53.rst

-- 
2.20.1

[RFC PATCH 1/1] Documentation: net: dsa: b53: Describe b53 configuration

2019-06-27 Thread Benedikt Spranger

Document the different needs of documentation for the b53 driver.

Signed-off-by: Benedikt Spranger 
---
 Documentation/networking/dsa/b53.rst | 300 +++
 1 file changed, 300 insertions(+)
 create mode 100644 Documentation/networking/dsa/b53.rst

diff --git a/Documentation/networking/dsa/b53.rst 
b/Documentation/networking/dsa/b53.rst
new file mode 100644
index ..5838cf6230da
--- /dev/null
+++ b/Documentation/networking/dsa/b53.rst
@@ -0,0 +1,300 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==
+Broadcom RoboSwitch Ethernet switch driver
+==
+
+The Broadcom RoboSwitch Ethernet switch family is used in quite a range of
+xDSL router, cable modems and other multimedia devices.
+
+The actual implementation supports the devices BCM5325E, BCM5365, BCM539x,
+BCM53115 and BCM53125 as well as BCM63XX.
+
+Implementation details
+==
+
+The driver is located in ``drivers/net/dsa/bcm_sf2.c`` and is implemented as a
+DSA driver; see ``Documentation/networking/dsa/dsa.rst`` for details on the
+subsystemand what it provides.
+
+The switch is, if possible, configured to enable a Broadcom specific 4-bytes
+switch tag which gets inserted by the switch for every packet forwarded to the
+CPU interface, conversely, the CPU network interface should insert a similar
+tag for packets entering the CPU port. The tag format is described in
+``net/dsa/tag_brcm.c``.
+
+The configuration of the device depends on whether or not tagging is
+supported.
+
+Configuration with tagging support
+--
+
+The tagging based configuration is desired.
+
+To use the b53 DSA driver some configuration need to be performed. As
+example configuration the following scenarios are used:
+
+*single port*
+  Every switch port acts as a different configurable ethernet port
+
+*bridge*
+  Every switch port is part of one configurable ethernet bridge
+
+*gateway*
+  Every switch port except one upstream port is part of a configurable
+  ethernet bridge.
+  The upstream port acts as different configurable ethernet port.
+
+All configurations are performed with tools from iproute2, wich is available at
+https://www.kernel.org/pub/linux/utils/net/iproute2/
+
+In this documentation the following ethernet ports are used:
+
+*eth0*
+  CPU port
+
+*LAN1*
+  a switch port
+
+*LAN2*
+  another switch port
+
+*WAN*
+  A switch port dedicated as upstream port
+
+Further ethernet ports can be configured similar.
+The configured IPs and networks are:
+
+*single port*
+  *  wan: 192.0.2.1/30 (192.0.2.0 - 192.0.2.3)
+  * lan1: 192.0.2.5/30 (192.0.2.4 - 192.0.2.7)
+  * lan2: 192.0.2.9/30 (192.0.2.8 - 192.0.2.11)
+
+*bridge*
+  * br0: 192.0.2.129/25 (192.0.2.128 - 192.0.2.255)
+
+*gateway*
+  * br0: 192.0.2.129/25 (192.0.2.128 - 192.0.2.255)
+  * wan: 192.0.2.1/30 (192.0.2.0 - 192.0.2.3)
+
+single port
+~~~
+
+.. code-block:: sh
+
+  # configure each interface
+  ip addr add 192.0.2.1/30 dev wan
+  ip addr add 192.0.2.5/30 dev lan1
+  ip addr add 192.0.2.9/30 dev lan2
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+
+  # bring up the slave interfaces
+  ip link set wan up
+  ip link set lan1 up
+  ip link set lan2 up
+
+bridge
+~~
+
+.. code-block:: sh
+
+  # create bridge
+  ip link add name br0 type bridge
+
+  # add ports to bridge
+  ip link set dev wan master br0
+  ip link set dev lan1 master br0
+  ip link set dev lan2 master br0
+
+  # configure the bridge
+  ip addr add 192.0.2.129/25 dev br0
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+
+  # bring up the slave interfaces
+  ip link set wan up
+  ip link set lan1 up
+  ip link set lan2 up
+
+  # bring up the bridge
+  ip link set dev br0 up
+
+gateway
+~~~
+
+.. code-block:: sh
+
+  # create bridge
+  ip link add name br0 type bridge
+
+  # add ports to bridge
+  ip link set dev lan1 master br0
+  ip link set dev lan2 master br0
+
+  # configure the bridge
+  ip addr add 192.0.2.129/25 dev br0
+
+  # configure the upstream port
+  ip addr add 192.0.2.1/30 dev wan
+
+  # The master interface needs to be brought up before the slave ports.
+  ip link set eth0 up
+
+  # bring up the slave interfaces
+  ip link set wan up
+  ip link set lan1 up
+  ip link set lan2 up
+
+  # bring up the bridge
+  ip link set dev br0 up
+
+Configuration without tagging support
+-
+
+Older models (5325, 5365) support a different tag format that is not supported
+yet. 539x and 531x5 require managed mode and some special handling, which is
+also not yet supported. The tagging support is disabled in these cases and the
+switch need a different configuration.
+
+single port
+~~~
+The configuration can only be set up via VLAN tagging and bridge setup.
+By default packages are tagged with vid 1:
+
+.. code-block:: sh
+
+  # tag

RE: [PATCH net-next 11/16] qlge: Remove qlge_bq.len & size

2019-06-27 Thread Manish Chopra

> 
> - for (i = 0; i < qdev->rx_ring_count; i++) {
> + for (i = 0; i < qdev->rss_ring_count; i++) {
>   struct rx_ring *rx_ring = &qdev->rx_ring[i];
> 
> - if (rx_ring->lbq.queue)
> - ql_free_lbq_buffers(qdev, rx_ring);
> - if (rx_ring->sbq.queue)
> - ql_free_sbq_buffers(qdev, rx_ring);
> + ql_free_lbq_buffers(qdev, rx_ring);
> + ql_free_sbq_buffers(qdev, rx_ring);
>   }
>  }
> 

Seems irrelevant change as per what this patch is supposed to do exactly.

Re: [PATCH 00/11] XDP unaligned chunk placement support

2019-06-27 Thread Laatz, Kevin




On 25/06/2019 19:44, Jonathan Lemon wrote:

On 20 Jun 2019, at 1:39, Kevin Laatz wrote:


This patchset adds the ability to use unaligned chunks in the XDP umem.

Currently, all chunk addresses passed to the umem are masked to be chunk
size aligned (default is 2k, max is PAGE_SIZE). This limits where we can
place chunks within the umem as well as limiting the packet sizes 
that are

supported.

The changes in this patchset removes these restrictions, allowing XDP 
to be
more flexible in where it can place a chunk within a umem. By 
relaxing where
the chunks can be placed, it allows us to use an arbitrary buffer 
size and
place that wherever we have a free address in the umem. These changes 
add the

ability to support jumboframes and make it easy to integrate with other
existing frameworks that have their own memory management systems, 
such as

DPDK.


I'm a little unclear on how this should work, and have a few issues here:

 1) There isn't any support for the user defined umem->headroom



For the unaligned chunks case, it does not make sense to to support a 
user defined headroom since the user can point directly to where they 
want the data to start via the buffer address. Therefore, for unaligned 
chunks, the user defined headroom should always be 0 (aka the user did 
not define a headroom and the default value of 0 is used). Any other 
value will be caught and we return an invalid argument error.




 2) When queuing RX buffers, the handle (aka umem offset) is used, which
    points to the start of the buffer area.  When the buffer appears in
    the completion queue, handle points to the start of the received 
data,

    which might be different from the buffer start address.

    Normally, this RX address is just put back in the fill queue, and the
    mask is used to find the buffer start address again.  This no longer
    works, so my question is, how is the buffer start address recomputed
    from the actual data payload address?

    Same with TX - if the TX payload isn't aligned in with the start of
    the buffer, what happens?


On the application side (xdpsock), we don't have to worry about the user 
defined headroom, since it is 0, so we only need to account for the 
XDP_PACKET_HEADROOM when computing the original address (in the default 
scenario). This was missing from the v1, will add this in the v2, to 
have xdpsock use the default value from libbpf! If the user is using 
another BPF program that uses a different offset, then the computation 
will need to be adjusted for that accordingly. In v2 we'll add support 
for this via command-line parameter.


However, we are also working on an "in-order" patchset, hopefully to be 
published soon, to guarantee the buffers returned to the application are 
in the same order as those provided to the kernel. Longer term, this is 
the best solution here as it allows the application to track itself, via 
a "shadow ring" or otherwise, the buffers sent to the kernel and any 
metadata associated with them, such as the start of buffer address.




 3) This appears limited to crossing a single page boundary, but there
    is no constraint check on chunk_size.


There is an existing check for chunk_size during xdp_umem_reg (in 
xdp_umem.c) The check makes sure that chunk size is at least 
XDP_UMEM_MIN_CHUNK_SIZE and at most PAGE_SIZE. Since the max is page 
size, we only need to check the immediate next page for contiguity.
While this patchset allows a max of 4k sized buffers, it is still an 
improvement from the current state. Future enhancements could look into 
extending the 4k limit but for now it is a good first step towards 
supporting hugepages efficiently.


Best regards,
Kevin

Re: [PATCH 1/2 nf-next] netfilter: nft_meta: add NFT_META_BRI_VLAN_PROTO support

2019-06-27 Thread Pablo Neira Ayuso

On Thu, Jun 27, 2019 at 10:09:16AM +0800, we...@ucloud.cn wrote:
> From: wenxu 
> 
> This patch provide a meta to get the bridge vlan proto
> 
> nft add rule bridge firewall zones counter meta br_vlan_proto 0x8100
> 
> Signed-off-by: wenxu 
> ---
>  include/uapi/linux/netfilter/nf_tables.h | 2 ++
>  net/netfilter/nft_meta.c | 9 +
>  2 files changed, 11 insertions(+)
> 
> diff --git a/include/uapi/linux/netfilter/nf_tables.h 
> b/include/uapi/linux/netfilter/nf_tables.h
> index 8859535..0b18646 100644
> --- a/include/uapi/linux/netfilter/nf_tables.h
> +++ b/include/uapi/linux/netfilter/nf_tables.h
> @@ -796,6 +796,7 @@ enum nft_exthdr_attributes {
>   * @NFT_META_IIFKIND: packet input interface kind name 
> (dev->rtnl_link_ops->kind)
>   * @NFT_META_OIFKIND: packet output interface kind name 
> (dev->rtnl_link_ops->kind)
>   * @NFT_META_BRI_PVID: packet input bridge port pvid
> + * @NFT_META_BRI_VLAN_PROTO: packet input bridge vlan proto

Looks good.

Since this only works for the input path, should we rename these to?

NFT_META_BRI_IIFVID
NFT_META_BRI_IIFVPROTO

so we leave room for _OIF (output interface) in the future?

Apart from that, this looks good to me.

Re: samples/bpf compilation failures - 5.2.0

2019-06-27 Thread Joel Fernandes

> > On 5/28/2019 2:27 PM, Srinivas Ramana wrote:
> > > Hello,
> > >
> > > I am trying to build samples/bpf in kernel(5.2.0-rc1) but unsuccessful
> > > with below errors. Can you help to point what i am missing or if there
> > > is some known issue?

By the way have you just tried building it on an ARM debian chroot? It
is not worth IMO spending time on cross compiler issues if you can
just native compile it within a chroot (as I do). Cross compilation
does not get a lot of testing, so even if we fix it its likely to come
up again as I've experienced. If you want a debian chroot that is
Android friendly, you can find one here:
https://github.com/joelagnel/adeb (comes with llvm, gcc etc).  I have
done lots of native compilation on a Pixel phone.

J.



> > >
> > > ==8<===
> > > $ make samples/bpf/
> > > LLC=/local/mnt/workspace/tools/clang_ubuntu/clang/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-14.04/bin/llc
> > > CLANG=/local/mnt/workspace/tools/clang_ubuntu/clang/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-14.04/bin/clang
> > > V=1
> > > make -C /local/mnt/workspace/sramana/kdev_torvalds/kdev/kernel -f
> > > /local/mnt/workspace/sramana/kdev_torvalds/kdev/kernel/Makefile
> > > samples/bpf/
> > > 
> > > 
> > > 
> > > make KBUILD_MODULES=1 -f ./scripts/Makefile.build obj=samples/bpf
> > > (cat /dev/null; ) > samples/bpf/modules.order
> > > make -C
> > > /local/mnt/workspace/sramana/kdev_torvalds/kdev/kernel/samples/bpf/../../tools/lib/bpf/
> > > RM='rm -rf' LDFLAGS=
> > > srctree=/local/mnt/workspace/sramana/kdev_torvalds/kdev/kernel/samples/bpf/../../
> > > O=
> > >
> > > Auto-detecting system features:
> > > ...libelf: [ on  ]
> > > ...   bpf: [ on  ]
> > >
> > > make -C
> > > /local/mnt/workspace/sramana/kdev_torvalds/kdev/kernel/samples/bpf/../..//tools/build
> > > CFLAGS= LDFLAGS= fixdep
> > > make -f
> > > /local/mnt/workspace/sramana/kdev_torvalds/kdev/kernel/samples/bpf/../..//tools/build/Makefile.build
> > > dir=. obj=fixdep
> > > ld -r -o fixdep-in.o  fixdep.o
> > > ld: fixdep.o: Relocations in generic ELF (EM: 183)
> > > ld: fixdep.o: Relocations in generic ELF (EM: 183)
> > > fixdep.o: error adding symbols: File in wrong format
> > > make[5]: *** [fixdep-in.o] Error 1
> > > make[4]: *** [fixdep-in.o] Error 2
> > > make[3]: *** [fixdep] Error 2
> > > make[2]: ***
> > > [/local/mnt/workspace/sramana/kdev_torvalds/kdev/kernel/samples/bpf/../../tools/lib/bpf/libbpf.a]
> > > Error 2
> > > make[1]: *** [samples/bpf/] Error 2
> > > make: *** [sub-make] Error 2
> > > ==>8===
> > >
> > >
> > > I am using the below commands to build:
> > > 
> > > export ARCH=arm64
> > > export CROSS_COMPILE=linaro-toolchain/5.1/bin/aarch64-linux-gnu-
> > > export CLANG_TRIPLE=arm64-linux-gnu-
> > >
> > > make
> > > CC=/clang_ubuntu/clang/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-14.04/bin/clang
> > > defconfig
> > >
> > > make
> > > CC=/clang_ubuntu/clang/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-14.04/bin/clang
> > > -j8
> > >
> > > make
> > > CC=/clang_ubuntu/clang/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-14.04/bin/clang
> > > headers_install INSTALL_HDR_PATH=./usr
> > >
> > > make samples/bpf/
> > > LLC=/clang_ubuntu/clang/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-14.04/bin/llc
> > > CLANG=/clang_ubuntu/clang/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-14.04/bin/clang
> > > V=1
> > > CC=/clang_ubuntu/clang/clang+llvm-8.0.0-x86_64-linux-gnu-ubuntu-14.04/bin/clang
> > >
> > > 
> > >
> > > Thanks,
> > > -- Srinivas R
> > >
> >
> >
> > --
> > Qualcomm India Private Limited, on behalf of Qualcomm Innovation
> > Center, Inc., is a member of Code Aurora Forum, a Linux Foundation
> > Collaborative Project

[PATCH net-next 0/2] net: ipv4: fix circular-list infinite loop

2019-06-27 Thread Florian Westphal

Tariq and Ran reported a regression caused by net-next commit
2638eb8b50cf ("net: ipv4: provide __rcu annotation for ifa_list").

This happens when net.ipv4.conf.$dev.promote_secondaries sysctl is
enabled -- we can arrange for ifa->next to point at ifa, so next
process that tries to walk the list loops forever.

Fix this and extend rtnetlink.sh with a small test case for this.

Florian Westphal (2):
  net: ipv4: fix infinite loop on secondary addr promotion
  selftests: rtnetlink: add small test case with 'promote_secondaries' 
enabled

 net/ipv4/devinet.c   |3 ++-
 tools/testing/selftests/net/rtnetlink.sh |   20 
 2 files changed, 22 insertions(+), 1 deletion(-)

[PATCH net-next 2/2] selftests: rtnetlink: add small test case with 'promote_secondaries' enabled

2019-06-27 Thread Florian Westphal

This exercises the 'promote_secondaries' code path.

Without previous fix, this triggers infinite loop/soft lockup:
ifconfig process spinning at 100%, never to return.

Signed-off-by: Florian Westphal 
---
 tools/testing/selftests/net/rtnetlink.sh | 20 
 1 file changed, 20 insertions(+)

diff --git a/tools/testing/selftests/net/rtnetlink.sh 
b/tools/testing/selftests/net/rtnetlink.sh
index ed606a2e3865..505628884783 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -269,6 +269,25 @@ kci_test_addrlft()
echo "PASS: preferred_lft addresses have expired"
 }
 
+kci_test_promote_secondaries()
+{
+   promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries)
+
+   sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1
+
+   for i in $(seq 2 254);do
+   IP="10.23.11.$i"
+   ip -f inet addr add $IP/16 brd + dev "$devdummy"
+   ifconfig "$devdummy" $IP netmask 255.255.0.0
+   done
+
+   ip addr flush dev "$devdummy"
+
+   [ $promote -eq 0 ] && sysctl -q 
net.ipv4.conf.$devdummy.promote_secondaries=0
+
+   echo "PASS: promote_secondaries complete"
+}
+
 kci_test_addrlabel()
 {
ret=0
@@ -1161,6 +1180,7 @@ kci_test_rtnl()
kci_test_polrouting
kci_test_route_get
kci_test_addrlft
+   kci_test_promote_secondaries
kci_test_tc
kci_test_gre
kci_test_gretap
-- 
2.21.0

[PATCH net-next 1/2] net: ipv4: fix infinite loop on secondary addr promotion

2019-06-27 Thread Florian Westphal

secondary address promotion causes infinite loop -- it arranges
for ifa->ifa_next to point back to itself.

Problem is that 'prev_prom' and 'last_prim' might point at the same entry,
so 'last_sec' pointer must be obtained after prev_prom->next update.

Fixes: 2638eb8b50cf ("net: ipv4: provide __rcu annotation for ifa_list")
Reported-by: Ran Rozenstein 
Reported-by: Tariq Toukan 
Signed-off-by: Florian Westphal 
---
 net/ipv4/devinet.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7874303220c5..137d1892395d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -428,8 +428,9 @@ static void __inet_del_ifa(struct in_device *in_dev,
if (prev_prom) {
struct in_ifaddr *last_sec;
 
-   last_sec = rtnl_dereference(last_prim->ifa_next);
rcu_assign_pointer(prev_prom->ifa_next, next_sec);
+
+   last_sec = rtnl_dereference(last_prim->ifa_next);
rcu_assign_pointer(promote->ifa_next, last_sec);
rcu_assign_pointer(last_prim->ifa_next, promote);
}
-- 
2.21.0

Re: [RFC iproute2 1/1] ip: netns: add mounted state file for each netns

2019-06-27 Thread Nicolas Dichtel

Le 26/06/2019 à 21:03, Alexander Aring a écrit :
> This patch adds a state file for each generated namespace to ensure the
> namespace is mounted. There exists no way to tell another programm that
> the namespace is mounted when iproute is creating one. An example
> application would be an inotify watcher to use the generated namespace
> when it's discovers one. In this case we cannot use the generated
> namespace file in /var/run/netns in the time when it's not mounted yet.
> A primitiv approach is to generate another file after the mount
> systemcall was done. In my case inotify waits until the mount statefile
> is generated to be sure that iproute2 did a mount bind.
We (at 6WIND) already hit this problem. The solution was: if setns() fails, wait
a bit and retry the setns() and continue this loop with a predefined timeout.
netns may be created by other app than iproute2, it would be nice to find a
generic solution.

David Howells was working on a mount notification mechanism:
https://lwn.net/Articles/760714/
https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git/log/?h=notifications

I don't know what is the status of this series.


Regards,
Nicolas

Re: [RFC, PATCH 2/2, net-next] net: netsec: add XDP support

2019-06-27 Thread Jesper Dangaard Brouer

On Tue, 25 Jun 2019 18:06:19 +0300
Ilias Apalodimas  wrote:

> @@ -609,6 +639,9 @@ static bool netsec_clean_tx_dring(struct netsec_priv 
> *priv)
>   int tail = dring->tail;
>   int cnt = 0;
>  
> + if (dring->is_xdp)
> + spin_lock(&dring->lock);
> +
>   pkts = 0;
>   bytes = 0;
>   entry = dring->vaddr + DESC_SZ * tail;
> @@ -622,16 +655,24 @@ static bool netsec_clean_tx_dring(struct netsec_priv 
> *priv)
>   eop = (entry->attr >> NETSEC_TX_LAST) & 1;
>   dma_rmb();
>  
> - dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
> -  DMA_TO_DEVICE);
> - if (eop) {
> - pkts++;
> + if (!eop)
> + goto next;
> +
> + if (desc->buf_type == TYPE_NETSEC_SKB) {
> + dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
> +  DMA_TO_DEVICE);

I don't think this is correct.  If I read the code correctly, you will
miss the DMA unmap for !eop packets.

>   bytes += desc->skb->len;
>   dev_kfree_skb(desc->skb);
> + } else {
> + if (desc->buf_type == TYPE_NETSEC_XDP_NDO)
> + dma_unmap_single(priv->dev, desc->dma_addr,
> +  desc->len, DMA_TO_DEVICE);
> + xdp_return_frame(desc->xdpf);
>   }
>   /* clean up so netsec_uninit_pkt_dring() won't free the skb
>* again
>*/
> +next:
>   *desc = (struct netsec_desc){};
>  
>   /* entry->attr is not going to be accessed by the NIC until
> @@ -645,6 +686,8 @@ static bool netsec_clean_tx_dring(struct netsec_priv 
> *priv)
>   entry = dring->vaddr + DESC_SZ * tail;
>   cnt++;
>   }
> + if (dring->is_xdp)
> + spin_unlock(&dring->lock);
>  
>   if (!cnt)
>   return false;



-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

Re: [RFC, PATCH 2/2, net-next] net: netsec: add XDP support

2019-06-27 Thread Ilias Apalodimas

On Thu, Jun 27, 2019 at 02:23:05PM +0200, Jesper Dangaard Brouer wrote:
> On Tue, 25 Jun 2019 18:06:19 +0300
> Ilias Apalodimas  wrote:
> 
> > @@ -609,6 +639,9 @@ static bool netsec_clean_tx_dring(struct netsec_priv 
> > *priv)
> > int tail = dring->tail;
> > int cnt = 0;
> >  
> > +   if (dring->is_xdp)
> > +   spin_lock(&dring->lock);
> > +
> > pkts = 0;
> > bytes = 0;
> > entry = dring->vaddr + DESC_SZ * tail;
> > @@ -622,16 +655,24 @@ static bool netsec_clean_tx_dring(struct netsec_priv 
> > *priv)
> > eop = (entry->attr >> NETSEC_TX_LAST) & 1;
> > dma_rmb();
> >  
> > -   dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
> > -DMA_TO_DEVICE);
> > -   if (eop) {
> > -   pkts++;
> > +   if (!eop)
> > +   goto next;
> > +
> > +   if (desc->buf_type == TYPE_NETSEC_SKB) {
> > +   dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
> > +DMA_TO_DEVICE);
> 
> I don't think this is correct.  If I read the code correctly, you will
> miss the DMA unmap for !eop packets.
> 

You are reading it correct, thanks for catching this.
I'll fix it on the proper patch

Thanks
/Ilias

Re: [PATCH 2/2 nf-next] netfilter:nft_meta: add NFT_META_VLAN support

2019-06-27 Thread Pablo Neira Ayuso

On Thu, Jun 27, 2019 at 10:09:17AM +0800, we...@ucloud.cn wrote:
> From: wenxu 
> 
> This patch provide a meta vlan to set the vlan tag of the packet.
> 
> for q-in-q vlan id 20:
> meta vlan set 0x88a8:20

Actually, I think this is not very useful for stacked vlan since this
just sets/mangles the existing meta vlan data.

We'll need infrastructure that uses skb_vlan_push() and _pop().

Patch looks good anyway, such infrastructure to push/pop can be added
later on.

Thanks.

> set the default 0x8100 vlan type with vlan id 20
> meta vlan set 20
> 
> Signed-off-by: wenxu 
> ---
>  include/uapi/linux/netfilter/nf_tables.h |  4 
>  net/netfilter/nft_meta.c | 27 ++-
>  2 files changed, 30 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/netfilter/nf_tables.h 
> b/include/uapi/linux/netfilter/nf_tables.h
> index 0b18646..cf037f2 100644
> --- a/include/uapi/linux/netfilter/nf_tables.h
> +++ b/include/uapi/linux/netfilter/nf_tables.h
> @@ -797,6 +797,7 @@ enum nft_exthdr_attributes {
>   * @NFT_META_OIFKIND: packet output interface kind name 
> (dev->rtnl_link_ops->kind)
>   * @NFT_META_BRI_PVID: packet input bridge port pvid
>   * @NFT_META_BRI_VLAN_PROTO: packet input bridge vlan proto
> + * @NFT_META_VLAN: packet vlan metadata
>   */
>  enum nft_meta_keys {
>   NFT_META_LEN,
> @@ -829,6 +830,7 @@ enum nft_meta_keys {
>   NFT_META_OIFKIND,
>   NFT_META_BRI_PVID,
>   NFT_META_BRI_VLAN_PROTO,
> + NFT_META_VLAN,
>  };
>  
>  /**
> @@ -895,12 +897,14 @@ enum nft_hash_attributes {
>   * @NFTA_META_DREG: destination register (NLA_U32)
>   * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
>   * @NFTA_META_SREG: source register (NLA_U32)
> + * @NFTA_META_SREG2: source register (NLA_U32)
>   */
>  enum nft_meta_attributes {
>   NFTA_META_UNSPEC,
>   NFTA_META_DREG,
>   NFTA_META_KEY,
>   NFTA_META_SREG,
> + NFTA_META_SREG2,
>   __NFTA_META_MAX
>  };
>  #define NFTA_META_MAX(__NFTA_META_MAX - 1)
> diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
> index e3adf6a..29a6679 100644
> --- a/net/netfilter/nft_meta.c
> +++ b/net/netfilter/nft_meta.c
> @@ -28,7 +28,10 @@ struct nft_meta {
>   enum nft_meta_keys  key:8;
>   union {
>   enum nft_registers  dreg:8;
> - enum nft_registers  sreg:8;
> + struct {
> + enum nft_registers  sreg:8;
> + enum nft_registers  sreg2:8;
> + };
>   };
>  };
>  
> @@ -312,6 +315,17 @@ static void nft_meta_set_eval(const struct nft_expr 
> *expr,
>   skb->secmark = value;
>   break;
>  #endif
> + case NFT_META_VLAN: {
> + u32 *sreg2 = ®s->data[meta->sreg2];
> + __be16 vlan_proto;
> + u16 vlan_tci;
> +
> + vlan_tci = nft_reg_load16(sreg);
> + vlan_proto = nft_reg_load16(sreg2);
> +
> + __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
> + break;
> + }
>   default:
>   WARN_ON(1);
>   }
> @@ -321,6 +335,7 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
>   [NFTA_META_DREG]= { .type = NLA_U32 },
>   [NFTA_META_KEY] = { .type = NLA_U32 },
>   [NFTA_META_SREG]= { .type = NLA_U32 },
> + [NFTA_META_SREG2]   = { .type = NLA_U32 },
>  };
>  
>  static int nft_meta_get_init(const struct nft_ctx *ctx,
> @@ -483,6 +498,13 @@ static int nft_meta_set_init(const struct nft_ctx *ctx,
>   case NFT_META_PKTTYPE:
>   len = sizeof(u8);
>   break;
> + case NFT_META_VLAN:
> + len = sizeof(u16);
> + priv->sreg2 = nft_parse_register(tb[NFTA_META_SREG2]);
> + err = nft_validate_register_load(priv->sreg2, len);
> + if (err < 0)
> + return err;
> + break;
>   default:
>   return -EOPNOTSUPP;
>   }
> @@ -521,6 +543,9 @@ static int nft_meta_set_dump(struct sk_buff *skb, const 
> struct nft_expr *expr)
>   goto nla_put_failure;
>   if (nft_dump_register(skb, NFTA_META_SREG, priv->sreg))
>   goto nla_put_failure;
> + if (priv->key == NFT_META_VLAN &&
> + nft_dump_register(skb, NFTA_META_SREG2, priv->sreg2))
> + goto nla_put_failure;
>  
>   return 0;
>  
> -- 
> 1.8.3.1
>

Re: [PATCH 2/3 nf-next] netfilter:nf_flow_table: Support bridge type flow offload

2019-06-27 Thread Pablo Neira Ayuso

On Thu, Jun 27, 2019 at 02:22:36PM +0800, wenxu wrote:
> On 6/27/2019 3:19 AM, Florian Westphal wrote:
> > Florian Westphal  wrote:
[...]
> >> Whats the idea with this patch?
> >>
> >> Do you see a performance improvement when bypassing bridge layer? If so,
> >> how much?
> >>
> >> I just wonder if its really cheaper than not using bridge conntrack in
> >> the first place :-)
> 
> This patch is based on the conntrack function in bridge.  It will
> bypass the fdb lookup and conntrack lookup to get the performance 
> improvement. The more important things for hardware offload in the
> future with nf_tables add hardware offload support

Florian would like to see numbers / benchmark.

[PATCH 2/2 nf-next v2] netfilter:nft_meta: add NFT_META_VLAN support

2019-06-27 Thread wenxu

From: wenxu 

This patch provide a meta vlan to set the vlan tag of the packet.

for q-in-q vlan id 20:
meta vlan set 0x88a8:20

set the default 0x8100 vlan type with vlan id 20
meta vlan set 20

Signed-off-by: wenxu 
---
 include/uapi/linux/netfilter/nf_tables.h |  4 
 net/netfilter/nft_meta.c | 27 ++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 0f75a6d..acb8b75 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -798,6 +798,7 @@ enum nft_exthdr_attributes {
  * @NFT_META_BRI_PVID: packet input bridge port pvid
  * @NFT_META_BRI_IIFVPROTO: packet input bridge port vlan proto
  * @NFT_META_BRI_OIFVPROTO: packet output bridge port vlan proto
+ * @NFT_META_VLAN: packet vlan metadata
  */
 enum nft_meta_keys {
NFT_META_LEN,
@@ -831,6 +832,7 @@ enum nft_meta_keys {
NFT_META_BRI_PVID,
NFT_META_BRI_IIFVPROTO,
NFT_META_BRI_OIFVPROTO,
+   NFT_META_VLAN,
 };
 
 /**
@@ -897,12 +899,14 @@ enum nft_hash_attributes {
  * @NFTA_META_DREG: destination register (NLA_U32)
  * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
  * @NFTA_META_SREG: source register (NLA_U32)
+ * @NFTA_META_SREG2: source register (NLA_U32)
  */
 enum nft_meta_attributes {
NFTA_META_UNSPEC,
NFTA_META_DREG,
NFTA_META_KEY,
NFTA_META_SREG,
+   NFTA_META_SREG2,
__NFTA_META_MAX
 };
 #define NFTA_META_MAX  (__NFTA_META_MAX - 1)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e7e10fb..53f4547 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -28,7 +28,10 @@ struct nft_meta {
enum nft_meta_keys  key:8;
union {
enum nft_registers  dreg:8;
-   enum nft_registers  sreg:8;
+   struct {
+   enum nft_registers  sreg:8;
+   enum nft_registers  sreg2:8;
+   };
};
 };
 
@@ -320,6 +323,17 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
skb->secmark = value;
break;
 #endif
+   case NFT_META_VLAN: {
+   u32 *sreg2 = ®s->data[meta->sreg2];
+   __be16 vlan_proto;
+   u16 vlan_tci;
+
+   vlan_tci = nft_reg_load16(sreg);
+   vlan_proto = nft_reg_load16(sreg2);
+
+   __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
+   break;
+   }
default:
WARN_ON(1);
}
@@ -329,6 +343,7 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
[NFTA_META_DREG]= { .type = NLA_U32 },
[NFTA_META_KEY] = { .type = NLA_U32 },
[NFTA_META_SREG]= { .type = NLA_U32 },
+   [NFTA_META_SREG2]   = { .type = NLA_U32 },
 };
 
 static int nft_meta_get_init(const struct nft_ctx *ctx,
@@ -492,6 +507,13 @@ static int nft_meta_set_init(const struct nft_ctx *ctx,
case NFT_META_PKTTYPE:
len = sizeof(u8);
break;
+   case NFT_META_VLAN:
+   len = sizeof(u16);
+   priv->sreg2 = nft_parse_register(tb[NFTA_META_SREG2]);
+   err = nft_validate_register_load(priv->sreg2, len);
+   if (err < 0)
+   return err;
+   break;
default:
return -EOPNOTSUPP;
}
@@ -530,6 +552,9 @@ static int nft_meta_set_dump(struct sk_buff *skb, const 
struct nft_expr *expr)
goto nla_put_failure;
if (nft_dump_register(skb, NFTA_META_SREG, priv->sreg))
goto nla_put_failure;
+   if (priv->key == NFT_META_VLAN &&
+   nft_dump_register(skb, NFTA_META_SREG2, priv->sreg2))
+   goto nla_put_failure;
 
return 0;
 
-- 
1.8.3.1

[PATCH] net/smc: common release code for non-accepted sockets

2019-06-27 Thread Karsten Graul

From: Ursula Braun 

There are common steps when releasing an accepted or unaccepted socket.
Move this code into a common routine.

Signed-off-by: Ursula Braun 
Signed-off-by: Karsten Graul 
---
 net/smc/af_smc.c | 73 +---
 1 file changed, 32 insertions(+), 41 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 7621ec2f539c..302e355f2ebc 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -123,30 +123,11 @@ struct proto smc_proto6 = {
 };
 EXPORT_SYMBOL_GPL(smc_proto6);
 
-static int smc_release(struct socket *sock)
+static int __smc_release(struct smc_sock *smc)
 {
-   struct sock *sk = sock->sk;
-   struct smc_sock *smc;
+   struct sock *sk = &smc->sk;
int rc = 0;
 
-   if (!sk)
-   goto out;
-
-   smc = smc_sk(sk);
-
-   /* cleanup for a dangling non-blocking connect */
-   if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
-   tcp_abort(smc->clcsock->sk, ECONNABORTED);
-   flush_work(&smc->connect_work);
-
-   if (sk->sk_state == SMC_LISTEN)
-   /* smc_close_non_accepted() is called and acquires
-* sock lock for child sockets again
-*/
-   lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
-   else
-   lock_sock(sk);
-
if (!smc->use_fallback) {
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
@@ -174,6 +155,35 @@ static int smc_release(struct socket *sock)
smc_conn_free(&smc->conn);
}
 
+   return rc;
+}
+
+static int smc_release(struct socket *sock)
+{
+   struct sock *sk = sock->sk;
+   struct smc_sock *smc;
+   int rc = 0;
+
+   if (!sk)
+   goto out;
+
+   smc = smc_sk(sk);
+
+   /* cleanup for a dangling non-blocking connect */
+   if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
+   tcp_abort(smc->clcsock->sk, ECONNABORTED);
+   flush_work(&smc->connect_work);
+
+   if (sk->sk_state == SMC_LISTEN)
+   /* smc_close_non_accepted() is called and acquires
+* sock lock for child sockets again
+*/
+   lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+   else
+   lock_sock(sk);
+
+   rc = __smc_release(smc);
+
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
@@ -964,26 +974,7 @@ void smc_close_non_accepted(struct sock *sk)
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
-   if (!smc->use_fallback) {
-   smc_close_active(smc);
-   sock_set_flag(sk, SOCK_DEAD);
-   sk->sk_shutdown |= SHUTDOWN_MASK;
-   }
-   sk->sk_prot->unhash(sk);
-   if (smc->clcsock) {
-   struct socket *tcp;
-
-   tcp = smc->clcsock;
-   smc->clcsock = NULL;
-   sock_release(tcp);
-   }
-   if (smc->use_fallback) {
-   sock_put(sk); /* passive closing */
-   sk->sk_state = SMC_CLOSED;
-   } else {
-   if (sk->sk_state == SMC_CLOSED)
-   smc_conn_free(&smc->conn);
-   }
+   __smc_release(smc);
release_sock(sk);
sock_put(sk); /* final sock_put */
 }
-- 
2.21.0

[PATCH 1/2 nf-next v2] netfilter: nft_meta: add NFT_META_BRI_O/IIFVPROTO support

2019-06-27 Thread wenxu

From: wenxu 

This patch provide a meta to get the bridge vlan proto

nft add rule bridge firewall zones counter meta br_iifvproto 0x8100

Signed-off-by: wenxu 
---
 include/uapi/linux/netfilter/nf_tables.h |  4 
 net/netfilter/nft_meta.c | 18 ++
 2 files changed, 22 insertions(+)

diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index 8859535..0f75a6d 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -796,6 +796,8 @@ enum nft_exthdr_attributes {
  * @NFT_META_IIFKIND: packet input interface kind name 
(dev->rtnl_link_ops->kind)
  * @NFT_META_OIFKIND: packet output interface kind name 
(dev->rtnl_link_ops->kind)
  * @NFT_META_BRI_PVID: packet input bridge port pvid
+ * @NFT_META_BRI_IIFVPROTO: packet input bridge port vlan proto
+ * @NFT_META_BRI_OIFVPROTO: packet output bridge port vlan proto
  */
 enum nft_meta_keys {
NFT_META_LEN,
@@ -827,6 +829,8 @@ enum nft_meta_keys {
NFT_META_IIFKIND,
NFT_META_OIFKIND,
NFT_META_BRI_PVID,
+   NFT_META_BRI_IIFVPROTO,
+   NFT_META_BRI_OIFVPROTO,
 };
 
 /**
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 4f8116d..e7e10fb 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -248,6 +248,22 @@ void nft_meta_get_eval(const struct nft_expr *expr,
return;
}
goto err;
+   case NFT_META_BRI_IIFVPROTO:
+   if (in == NULL || (p = br_port_get_rtnl_rcu(in)) == NULL)
+   goto err;
+   if (br_opt_get(p->br, BROPT_VLAN_ENABLED)) {
+   nft_reg_store16(dest, p->br->vlan_proto);
+   return;
+   }
+   goto err;
+   case NFT_META_BRI_OIFVPROTO:
+   if (out == NULL || (p = br_port_get_rtnl_rcu(out)) == NULL)
+   goto err;
+   if (br_opt_get(p->br, BROPT_VLAN_ENABLED)) {
+   nft_reg_store16(dest, p->br->vlan_proto);
+   return;
+   }
+   goto err;
 #endif
case NFT_META_IIFKIND:
if (in == NULL || in->rtnl_link_ops == NULL)
@@ -376,6 +392,8 @@ static int nft_meta_get_init(const struct nft_ctx *ctx,
len = IFNAMSIZ;
break;
case NFT_META_BRI_PVID:
+   case NFT_META_BRI_IIFVPROTO:
+   case NFT_META_BRI_OIFVPROTO:
if (ctx->family != NFPROTO_BRIDGE)
return -EOPNOTSUPP;
len = sizeof(u16);
-- 
1.8.3.1

Re: [PATCH] net: dsa: mv88e6xxx: wait after reset deactivation

2019-06-27 Thread Andrew Lunn

On Thu, Jun 27, 2019 at 07:29:46AM +0300, Baruch Siach wrote:
> Add a 1ms delay after reset deactivation. Otherwise the chip returns
> bogus ID value. This is observed with 88E6390 (Peridot) chip.
> 
> Signed-off-by: Baruch Siach 
> ---
>  drivers/net/dsa/mv88e6xxx/chip.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/net/dsa/mv88e6xxx/chip.c 
> b/drivers/net/dsa/mv88e6xxx/chip.c
> index f4e2db44ad91..549f528f216c 100644
> --- a/drivers/net/dsa/mv88e6xxx/chip.c
> +++ b/drivers/net/dsa/mv88e6xxx/chip.c
> @@ -4910,6 +4910,7 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
>   err = PTR_ERR(chip->reset);
>   goto out;
>   }
> + mdelay(1);
>  
>   err = mv88e6xxx_detect(chip);
>   if (err)

Hi Baruch

So your switch is held in reset by default, by the bootloader? So you
need to take it out of reset in order to detect it. Yes, this makes
sense.

However, please use usleep_range(1, 2), and only do this if
the GPIO is valid.

Thanks

Andrew

Re: [PATCH] net/smc: common release code for non-accepted sockets

2019-06-27 Thread Karsten Graul

Hi Dave,

I forgot to add that this patch is intended for the net-next tree.


On 27/06/2019 15:04, Karsten Graul wrote:
> From: Ursula Braun 
> 
> There are common steps when releasing an accepted or unaccepted socket.
> Move this code into a common routine.
> 
> Signed-off-by: Ursula Braun 
> Signed-off-by: Karsten Graul 
> ---
>  net/smc/af_smc.c | 73 +---
>  1 file changed, 32 insertions(+), 41 deletions(-)
> 
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index 7621ec2f539c..302e355f2ebc 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -123,30 +123,11 @@ struct proto smc_proto6 = {
>  };
>  EXPORT_SYMBOL_GPL(smc_proto6);
>  
> -static int smc_release(struct socket *sock)
> +static int __smc_release(struct smc_sock *smc)
>  {
> - struct sock *sk = sock->sk;
> - struct smc_sock *smc;
> + struct sock *sk = &smc->sk;
>   int rc = 0;
>  
> - if (!sk)
> - goto out;
> -
> - smc = smc_sk(sk);
> -
> - /* cleanup for a dangling non-blocking connect */
> - if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
> - tcp_abort(smc->clcsock->sk, ECONNABORTED);
> - flush_work(&smc->connect_work);
> -
> - if (sk->sk_state == SMC_LISTEN)
> - /* smc_close_non_accepted() is called and acquires
> -  * sock lock for child sockets again
> -  */
> - lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
> - else
> - lock_sock(sk);
> -
>   if (!smc->use_fallback) {
>   rc = smc_close_active(smc);
>   sock_set_flag(sk, SOCK_DEAD);
> @@ -174,6 +155,35 @@ static int smc_release(struct socket *sock)
>   smc_conn_free(&smc->conn);
>   }
>  
> + return rc;
> +}
> +
> +static int smc_release(struct socket *sock)
> +{
> + struct sock *sk = sock->sk;
> + struct smc_sock *smc;
> + int rc = 0;
> +
> + if (!sk)
> + goto out;
> +
> + smc = smc_sk(sk);
> +
> + /* cleanup for a dangling non-blocking connect */
> + if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
> + tcp_abort(smc->clcsock->sk, ECONNABORTED);
> + flush_work(&smc->connect_work);
> +
> + if (sk->sk_state == SMC_LISTEN)
> + /* smc_close_non_accepted() is called and acquires
> +  * sock lock for child sockets again
> +  */
> + lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
> + else
> + lock_sock(sk);
> +
> + rc = __smc_release(smc);
> +
>   /* detach socket */
>   sock_orphan(sk);
>   sock->sk = NULL;
> @@ -964,26 +974,7 @@ void smc_close_non_accepted(struct sock *sk)
>   if (!sk->sk_lingertime)
>   /* wait for peer closing */
>   sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
> - if (!smc->use_fallback) {
> - smc_close_active(smc);
> - sock_set_flag(sk, SOCK_DEAD);
> - sk->sk_shutdown |= SHUTDOWN_MASK;
> - }
> - sk->sk_prot->unhash(sk);
> - if (smc->clcsock) {
> - struct socket *tcp;
> -
> - tcp = smc->clcsock;
> - smc->clcsock = NULL;
> - sock_release(tcp);
> - }
> - if (smc->use_fallback) {
> - sock_put(sk); /* passive closing */
> - sk->sk_state = SMC_CLOSED;
> - } else {
> - if (sk->sk_state == SMC_CLOSED)
> - smc_conn_free(&smc->conn);
> - }
> + __smc_release(smc);
>   release_sock(sk);
>   sock_put(sk); /* final sock_put */
>  }
> 

-- 
Karsten

(I'm a dude!)

[PATCH net 1/1] net: openvswitch: fix csum updates for MPLS actions

2019-06-27 Thread John Hurley

Skbs may have their checksum value populated by HW. If this is a checksum
calculated over the entire packet then the CHECKSUM_COMPLETE field is
marked. Changes to the data pointer on the skb throughout the network
stack still try to maintain this complete csum value if it is required
through functions such as skb_postpush_rcsum.

The MPLS actions in Open vSwitch modify a CHECKSUM_COMPLETE value when
changes are made to packet data without a push or a pull. This occurs when
the ethertype of the MAC header is changed or when MPLS lse fields are
modified.

The modification is carried out using the csum_partial function to get the
csum of a buffer and add it into the larger checksum. The buffer is an
inversion of the data to be removed followed by the new data. Because the
csum is calculated over 16 bits and these values align with 16 bits, the
effect is the removal of the old value from the CHECKSUM_COMPLETE and
addition of the new value.

However, the csum fed into the function and the outcome of the
calculation are also inverted. This would only make sense if it was the
new value rather than the old that was inverted in the input buffer.

Fix the issue by removing the bit inverts in the csum_partial calculation.

The bug was verified and the fix tested by comparing the folded value of
the updated CHECKSUM_COMPLETE value with the folded value of a full
software checksum calculation (reset skb->csum to 0 and run
skb_checksum_complete(skb)). Prior to the fix the outcomes differed but
after they produce the same result.

Fixes: 25cd9ba0abc0 ("openvswitch: Add basic MPLS support to kernel")
Fixes: bc7cc5999fd3 ("openvswitch: update checksum in {push,pop}_mpls")
Signed-off-by: John Hurley 
Reviewed-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
---
 net/openvswitch/actions.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 151518d..bd13146 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -166,8 +166,7 @@ static void update_ethertype(struct sk_buff *skb, struct 
ethhdr *hdr,
if (skb->ip_summed == CHECKSUM_COMPLETE) {
__be16 diff[] = { ~(hdr->h_proto), ethertype };
 
-   skb->csum = ~csum_partial((char *)diff, sizeof(diff),
-   ~skb->csum);
+   skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
}
 
hdr->h_proto = ethertype;
@@ -259,8 +258,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key 
*flow_key,
if (skb->ip_summed == CHECKSUM_COMPLETE) {
__be32 diff[] = { ~(stack->label_stack_entry), lse };
 
-   skb->csum = ~csum_partial((char *)diff, sizeof(diff),
- ~skb->csum);
+   skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum);
}
 
stack->label_stack_entry = lse;
-- 
2.7.4

Re: [PATCH 2/2 nf-next] netfilter:nft_meta: add NFT_META_VLAN support

2019-06-27 Thread wenxu



在 2019/6/27 20:35, Pablo Neira Ayuso 写道:
> On Thu, Jun 27, 2019 at 10:09:17AM +0800, we...@ucloud.cn wrote:
>> From: wenxu 
>>
>> This patch provide a meta vlan to set the vlan tag of the packet.
>>
>> for q-in-q vlan id 20:
>> meta vlan set 0x88a8:20
> Actually, I think this is not very useful for stacked vlan since this
> just sets/mangles the existing meta vlan data.
>
> We'll need infrastructure that uses skb_vlan_push() and _pop().
>
> Patch looks good anyway, such infrastructure to push/pop can be added
> later on.
>
> Thanks.

yes, It's just ste/mangle the meta vlan data. I just wonder if we set for 
stacked vlan.

vlan meta 0x88a8:20. The packet should contain a 0x8100 vlan tag, we just push 
the

inner vlan and the the vlan meta with the outer 0x88a8:20. Or the packet don't 
contain

only vlan tag, we add a inner 0x8100:20 tag and outer 0x88a8:20 tag?

So wen should check for this

>
>> set the default 0x8100 vlan type with vlan id 20
>> meta vlan set 20
>>
>> Signed-off-by: wenxu 
>> ---
>>  include/uapi/linux/netfilter/nf_tables.h |  4 
>>  net/netfilter/nft_meta.c | 27 ++-
>>  2 files changed, 30 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/uapi/linux/netfilter/nf_tables.h 
>> b/include/uapi/linux/netfilter/nf_tables.h
>> index 0b18646..cf037f2 100644
>> --- a/include/uapi/linux/netfilter/nf_tables.h
>> +++ b/include/uapi/linux/netfilter/nf_tables.h
>> @@ -797,6 +797,7 @@ enum nft_exthdr_attributes {
>>   * @NFT_META_OIFKIND: packet output interface kind name 
>> (dev->rtnl_link_ops->kind)
>>   * @NFT_META_BRI_PVID: packet input bridge port pvid
>>   * @NFT_META_BRI_VLAN_PROTO: packet input bridge vlan proto
>> + * @NFT_META_VLAN: packet vlan metadata
>>   */
>>  enum nft_meta_keys {
>>  NFT_META_LEN,
>> @@ -829,6 +830,7 @@ enum nft_meta_keys {
>>  NFT_META_OIFKIND,
>>  NFT_META_BRI_PVID,
>>  NFT_META_BRI_VLAN_PROTO,
>> +NFT_META_VLAN,
>>  };
>>  
>>  /**
>> @@ -895,12 +897,14 @@ enum nft_hash_attributes {
>>   * @NFTA_META_DREG: destination register (NLA_U32)
>>   * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
>>   * @NFTA_META_SREG: source register (NLA_U32)
>> + * @NFTA_META_SREG2: source register (NLA_U32)
>>   */
>>  enum nft_meta_attributes {
>>  NFTA_META_UNSPEC,
>>  NFTA_META_DREG,
>>  NFTA_META_KEY,
>>  NFTA_META_SREG,
>> +NFTA_META_SREG2,
>>  __NFTA_META_MAX
>>  };
>>  #define NFTA_META_MAX   (__NFTA_META_MAX - 1)
>> diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
>> index e3adf6a..29a6679 100644
>> --- a/net/netfilter/nft_meta.c
>> +++ b/net/netfilter/nft_meta.c
>> @@ -28,7 +28,10 @@ struct nft_meta {
>>  enum nft_meta_keys  key:8;
>>  union {
>>  enum nft_registers  dreg:8;
>> -enum nft_registers  sreg:8;
>> +struct {
>> +enum nft_registers  sreg:8;
>> +enum nft_registers  sreg2:8;
>> +};
>>  };
>>  };
>>  
>> @@ -312,6 +315,17 @@ static void nft_meta_set_eval(const struct nft_expr 
>> *expr,
>>  skb->secmark = value;
>>  break;
>>  #endif
>> +case NFT_META_VLAN: {
>> +u32 *sreg2 = ®s->data[meta->sreg2];
>> +__be16 vlan_proto;
>> +u16 vlan_tci;
>> +
>> +vlan_tci = nft_reg_load16(sreg);
>> +vlan_proto = nft_reg_load16(sreg2);
>> +
>> +__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
>> +break;
>> +}
>>  default:
>>  WARN_ON(1);
>>  }
>> @@ -321,6 +335,7 @@ static void nft_meta_set_eval(const struct nft_expr 
>> *expr,
>>  [NFTA_META_DREG]= { .type = NLA_U32 },
>>  [NFTA_META_KEY] = { .type = NLA_U32 },
>>  [NFTA_META_SREG]= { .type = NLA_U32 },
>> +[NFTA_META_SREG2]   = { .type = NLA_U32 },
>>  };
>>  
>>  static int nft_meta_get_init(const struct nft_ctx *ctx,
>> @@ -483,6 +498,13 @@ static int nft_meta_set_init(const struct nft_ctx *ctx,
>>  case NFT_META_PKTTYPE:
>>  len = sizeof(u8);
>>  break;
>> +case NFT_META_VLAN:
>> +len = sizeof(u16);
>> +priv->sreg2 = nft_parse_register(tb[NFTA_META_SREG2]);
>> +err = nft_validate_register_load(priv->sreg2, len);
>> +if (err < 0)
>> +return err;
>> +break;
>>  default:
>>  return -EOPNOTSUPP;
>>  }
>> @@ -521,6 +543,9 @@ static int nft_meta_set_dump(struct sk_buff *skb, const 
>> struct nft_expr *expr)
>>  goto nla_put_failure;
>>  if (nft_dump_register(skb, NFTA_META_SREG, priv->sreg))
>>  goto nla_put_failure;
>> +if (priv->key == NFT_META_VLAN &&
>> +nft_dump_register(skb, NFTA_META_SREG2, priv->sreg2))
>> +goto nla_put_failure;
>>  
>>  return 0;
>>  
>> -- 
>> 1.8.3.1
>>

Re: [RFC PATCH 1/1] Documentation: net: dsa: b53: Describe b53 configuration

2019-06-27 Thread Andrew Lunn

On Thu, Jun 27, 2019 at 12:15:06PM +0200, Benedikt Spranger wrote:

Hi Benedikt

> +Configuration with tagging support
> +--
> +
> +The tagging based configuration is desired.
> +
> +To use the b53 DSA driver some configuration need to be performed. As
> +example configuration the following scenarios are used:
> +
> +*single port*
> +  Every switch port acts as a different configurable ethernet port
> +
> +*bridge*
> +  Every switch port is part of one configurable ethernet bridge
> +
> +*gateway*
> +  Every switch port except one upstream port is part of a configurable
> +  ethernet bridge.
> +  The upstream port acts as different configurable ethernet port.
> +
> +All configurations are performed with tools from iproute2, wich is available 
> at
> +https://www.kernel.org/pub/linux/utils/net/iproute2/
> +
> +In this documentation the following ethernet ports are used:
> +
> +*eth0*
> +  CPU port

In DSA terminology, this is the master interface. The switch port
which the master is connected to is called the CPU port. So you are
causing confusion with DSA terms here.

> +
> +*LAN1*
> +  a switch port
> +
> +*LAN2*
> +  another switch port
> +
> +*WAN*
> +  A switch port dedicated as upstream port

These are all slave interfaces, when using DSA terms.

> +Further ethernet ports can be configured similar.
> +The configured IPs and networks are:
> +
> +*single port*
> +  *  wan: 192.0.2.1/30 (192.0.2.0 - 192.0.2.3)
> +  * lan1: 192.0.2.5/30 (192.0.2.4 - 192.0.2.7)
> +  * lan2: 192.0.2.9/30 (192.0.2.8 - 192.0.2.11)
> +
> +*bridge*
> +  * br0: 192.0.2.129/25 (192.0.2.128 - 192.0.2.255)
> +
> +*gateway*
> +  * br0: 192.0.2.129/25 (192.0.2.128 - 192.0.2.255)
> +  * wan: 192.0.2.1/30 (192.0.2.0 - 192.0.2.3)
> +
> +single port
> +~~~
> +
> +.. code-block:: sh
> +
> +  # configure each interface
> +  ip addr add 192.0.2.1/30 dev wan
> +  ip addr add 192.0.2.5/30 dev lan1
> +  ip addr add 192.0.2.9/30 dev lan2
> +
> +  # The master interface needs to be brought up before the slave ports.
> +  ip link set eth0 up
> +
> +  # bring up the slave interfaces
> +  ip link set wan up
> +  ip link set lan1 up
> +  ip link set lan2 up
> +
> +bridge
> +~~
> +
> +.. code-block:: sh
> +
> +  # create bridge
> +  ip link add name br0 type bridge
> +
> +  # add ports to bridge
> +  ip link set dev wan master br0
> +  ip link set dev lan1 master br0
> +  ip link set dev lan2 master br0
> +
> +  # configure the bridge
> +  ip addr add 192.0.2.129/25 dev br0
> +
> +  # The master interface needs to be brought up before the slave ports.
> +  ip link set eth0 up
> +
> +  # bring up the slave interfaces
> +  ip link set wan up
> +  ip link set lan1 up
> +  ip link set lan2 up

I would probably do this in a different order. Bring the master up
first, then the slaves. Then enslave the slaves to bridge, and lastly
configure the bridge.

> +
> +  # bring up the bridge
> +  ip link set dev br0 up
> +
> +gateway
> +~~~
> +
> +.. code-block:: sh
> +
> +  # create bridge
> +  ip link add name br0 type bridge
> +
> +  # add ports to bridge
> +  ip link set dev lan1 master br0
> +  ip link set dev lan2 master br0
> +
> +  # configure the bridge
> +  ip addr add 192.0.2.129/25 dev br0
> +
> +  # configure the upstream port
> +  ip addr add 192.0.2.1/30 dev wan
> +
> +  # The master interface needs to be brought up before the slave ports.
> +  ip link set eth0 up
> +
> +  # bring up the slave interfaces
> +  ip link set wan up
> +  ip link set lan1 up
> +  ip link set lan2 up
> +
> +  # bring up the bridge
> +  ip link set dev br0 up

It would be good to add a note that there is nothing specific to the
B53 here. This same process will work for all DSA drivers which
support tagging, which is actually the majority.

I also tell people that once you configure the master interface up,
they should just use the slave interfaces a normal linux
interfaces. The fact they are on a switch does not matter, and should
not matter. Just use them as normal.

Andrew

[PATCH net-next 10/16] mlxsw: spectrum: PTP: Add PTP initialization / finalization

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

Add two ptp_ops: init and fini, to initialize and finalize the PTP
subsystem. Call as appropriate from mlxsw_sp_init() and _fini().

Lay the groundwork for Spectrum-1 support. On Spectrum-1, the received
timestamped packets and their corresponding timestamps arrive
independently, and need to be matched up. Introduce the related data types
and add to struct mlxsw_sp_ptp_state the hash table that will keep the
unmatched entries.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c| 24 ++-
 .../net/ethernet/mellanox/mlxsw/spectrum.h|  2 +
 .../ethernet/mellanox/mlxsw/spectrum_ptp.c| 70 +++
 .../ethernet/mellanox/mlxsw/spectrum_ptp.h| 29 +++-
 4 files changed, 122 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 6a907e491868..6cb7aeac0657 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -152,6 +152,9 @@ struct mlxsw_sp_ptp_ops {
(*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev);
void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock);
 
+   struct mlxsw_sp_ptp_state *(*init)(struct mlxsw_sp *mlxsw_sp);
+   void (*fini)(struct mlxsw_sp_ptp_state *ptp_state);
+
/* Notify a driver that a packet that might be PTP was received. Driver
 * is responsible for freeing the passed-in SKB.
 */
@@ -4429,6 +4432,8 @@ static int mlxsw_sp_basic_trap_groups_set(struct 
mlxsw_core *mlxsw_core)
 static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = {
.clock_init = mlxsw_sp1_ptp_clock_init,
.clock_fini = mlxsw_sp1_ptp_clock_fini,
+   .init   = mlxsw_sp1_ptp_init,
+   .fini   = mlxsw_sp1_ptp_fini,
.receive= mlxsw_sp1_ptp_receive,
.transmitted= mlxsw_sp1_ptp_transmitted,
 };
@@ -4436,6 +4441,8 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = {
 static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
.clock_init = mlxsw_sp2_ptp_clock_init,
.clock_fini = mlxsw_sp2_ptp_clock_fini,
+   .init   = mlxsw_sp2_ptp_init,
+   .fini   = mlxsw_sp2_ptp_fini,
.receive= mlxsw_sp2_ptp_receive,
.transmitted= mlxsw_sp2_ptp_transmitted,
 };
@@ -4549,6 +4556,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
}
}
 
+   if (mlxsw_sp->clock) {
+   /* NULL is a valid return value from ptp_ops->init */
+   mlxsw_sp->ptp_state = mlxsw_sp->ptp_ops->init(mlxsw_sp);
+   if (IS_ERR(mlxsw_sp->ptp_state)) {
+   err = PTR_ERR(mlxsw_sp->ptp_state);
+   dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize 
PTP\n");
+   goto err_ptp_init;
+   }
+   }
+
/* Initialize netdevice notifier after router and SPAN is initialized,
 * so that the event handler can use router structures and call SPAN
 * respin.
@@ -4579,6 +4596,9 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 err_dpipe_init:
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
 err_netdev_notifier:
+   if (mlxsw_sp->clock)
+   mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
+err_ptp_init:
if (mlxsw_sp->clock)
mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
 err_ptp_clock_init:
@@ -4659,8 +4679,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_ports_remove(mlxsw_sp);
mlxsw_sp_dpipe_fini(mlxsw_sp);
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
-   if (mlxsw_sp->clock)
+   if (mlxsw_sp->clock) {
+   mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
+   }
mlxsw_sp_router_fini(mlxsw_sp);
mlxsw_sp_acl_fini(mlxsw_sp);
mlxsw_sp_nve_fini(mlxsw_sp);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 139fb1c53f96..7e1808179a2a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -136,6 +136,7 @@ struct mlxsw_sp_acl_tcam_ops;
 struct mlxsw_sp_nve_ops;
 struct mlxsw_sp_sb_vals;
 struct mlxsw_sp_port_type_speed_ops;
+struct mlxsw_sp_ptp_state;
 struct mlxsw_sp_ptp_ops;
 
 struct mlxsw_sp {
@@ -157,6 +158,7 @@ struct mlxsw_sp {
struct mlxsw_sp_nve *nve;
struct notifier_block netdevice_nb;
struct mlxsw_sp_ptp_clock *clock;
+   struct mlxsw_sp_ptp_state *ptp_state;
 
struct mlxsw_sp_counter_pool *counter_pool;
struct {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index

[PATCH net-next 03/16] mlxsw: reg: Add Time Precision Packet Timestamping Reading

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

The MTPPTR is used for reading the per port PTP timestamp FIFO.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 110 ++
 1 file changed, 110 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 5c5f63289468..197599890bdf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -9185,6 +9185,115 @@ static inline void mlxsw_reg_mtpppc_pack(char *payload, 
u16 ing, u16 egr)
mlxsw_reg_mtpppc_egr_timestamp_message_type_set(payload, egr);
 }
 
+/* MTPPTR - Time Precision Packet Timestamping Reading
+ * ---
+ * The MTPPTR is used for reading the per port PTP timestamp FIFO.
+ * There is a trap for packets which are latched to the timestamp FIFO, thus 
the
+ * SW knows which FIFO to read. Note that packets enter the FIFO before been
+ * trapped. The sequence number is used to synchronize the timestamp FIFO
+ * entries and the trapped packets.
+ * Reserved when Spectrum-2.
+ */
+
+#define MLXSW_REG_MTPPTR_ID 0x9091
+#define MLXSW_REG_MTPPTR_BASE_LEN 0x10 /* base length, without records */
+#define MLXSW_REG_MTPPTR_REC_LEN 0x10 /* record length */
+#define MLXSW_REG_MTPPTR_REC_MAX_COUNT 4
+#define MLXSW_REG_MTPPTR_LEN (MLXSW_REG_MTPPTR_BASE_LEN +  \
+   MLXSW_REG_MTPPTR_REC_LEN * MLXSW_REG_MTPPTR_REC_MAX_COUNT)
+
+MLXSW_REG_DEFINE(mtpptr, MLXSW_REG_MTPPTR_ID, MLXSW_REG_MTPPTR_LEN);
+
+/* reg_mtpptr_local_port
+ * Not supported for CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpptr, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_mtpptr_dir {
+   MLXSW_REG_MTPPTR_DIR_INGRESS,
+   MLXSW_REG_MTPPTR_DIR_EGRESS,
+};
+
+/* reg_mtpptr_dir
+ * Direction.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtpptr, dir, 0x00, 0, 1);
+
+/* reg_mtpptr_clr
+ * Clear the records.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, mtpptr, clr, 0x04, 31, 1);
+
+/* reg_mtpptr_num_rec
+ * Number of valid records in the response
+ * Range 0.. cap_ptp_timestamp_fifo
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mtpptr, num_rec, 0x08, 0, 4);
+
+/* reg_mtpptr_rec_message_type
+ * MessageType field as defined by IEEE 1588 Each bit corresponds to a value
+ * (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_message_type,
+MLXSW_REG_MTPPTR_BASE_LEN, 8, 4,
+MLXSW_REG_MTPPTR_REC_LEN, 0, false);
+
+/* reg_mtpptr_rec_domain_number
+ * DomainNumber field as defined by IEEE 1588
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_domain_number,
+MLXSW_REG_MTPPTR_BASE_LEN, 0, 8,
+MLXSW_REG_MTPPTR_REC_LEN, 0, false);
+
+/* reg_mtpptr_rec_sequence_id
+ * SequenceId field as defined by IEEE 1588
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_sequence_id,
+MLXSW_REG_MTPPTR_BASE_LEN, 0, 16,
+MLXSW_REG_MTPPTR_REC_LEN, 0x4, false);
+
+/* reg_mtpptr_rec_timestamp_high
+ * Timestamp of when the PTP packet has passed through the port Units of PLL
+ * clock time.
+ * For Spectrum-1 the PLL clock is 156.25Mhz and PLL clock time is 6.4nSec.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_high,
+MLXSW_REG_MTPPTR_BASE_LEN, 0, 32,
+MLXSW_REG_MTPPTR_REC_LEN, 0x8, false);
+
+/* reg_mtpptr_rec_timestamp_low
+ * See rec_timestamp_high.
+ * Access: RO
+ */
+MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_low,
+MLXSW_REG_MTPPTR_BASE_LEN, 0, 32,
+MLXSW_REG_MTPPTR_REC_LEN, 0xC, false);
+
+static inline void mlxsw_reg_mtpptr_unpack(const char *payload,
+  unsigned int rec,
+  u8 *p_message_type,
+  u8 *p_domain_number,
+  u16 *p_sequence_id,
+  u64 *p_timestamp)
+{
+   u32 timestamp_high, timestamp_low;
+
+   *p_message_type = mlxsw_reg_mtpptr_rec_message_type_get(payload, rec);
+   *p_domain_number = mlxsw_reg_mtpptr_rec_domain_number_get(payload, rec);
+   *p_sequence_id = mlxsw_reg_mtpptr_rec_sequence_id_get(payload, rec);
+   timestamp_high = mlxsw_reg_mtpptr_rec_timestamp_high_get(payload, rec);
+   timestamp_low = mlxsw_reg_mtpptr_rec_timestamp_low_get(payload, rec);
+   *p_timestamp = (u64)timestamp_high << 32 | timestamp_low;
+}
+
 /* MTPTPT - Monitoring Precision Time Protocol Trap Register
  * -
  * This register is used for configuring under which trap to deliver PTP
@@ -10292,6 +10401,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = 
{
MLXSW_REG(mgpc),
MLXSW_REG(mprs),

[PATCH net-next 02/16] mlxsw: reg: Add Monitoring Precision Time Protocol Trap Register

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

This register is used for configuring under which trap to deliver PTP
packets depending on type of the packet.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 971e336aa9ac..5c5f63289468 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -9185,6 +9185,44 @@ static inline void mlxsw_reg_mtpppc_pack(char *payload, 
u16 ing, u16 egr)
mlxsw_reg_mtpppc_egr_timestamp_message_type_set(payload, egr);
 }
 
+/* MTPTPT - Monitoring Precision Time Protocol Trap Register
+ * -
+ * This register is used for configuring under which trap to deliver PTP
+ * packets depending on type of the packet.
+ */
+#define MLXSW_REG_MTPTPT_ID 0x9092
+#define MLXSW_REG_MTPTPT_LEN 0x08
+
+MLXSW_REG_DEFINE(mtptpt, MLXSW_REG_MTPTPT_ID, MLXSW_REG_MTPTPT_LEN);
+
+enum mlxsw_reg_mtptpt_trap_id {
+   MLXSW_REG_MTPTPT_TRAP_ID_PTP0,
+   MLXSW_REG_MTPTPT_TRAP_ID_PTP1,
+};
+
+/* reg_mtptpt_trap_id
+ * Trap id.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mtptpt, trap_id, 0x00, 0, 4);
+
+/* reg_mtptpt_message_type
+ * Bitwise vector of PTP message types to trap. This is a necessary but
+ * non-sufficient condition since need to enable also per port. See MTPPPC.
+ * Message types are defined by IEEE 1588 Each bit corresponds to a value (e.g.
+ * Bit0: Sync, Bit1: Delay_Req)
+ */
+MLXSW_ITEM32(reg, mtptpt, message_type, 0x04, 0, 16);
+
+static inline void mlxsw_reg_mtptptp_pack(char *payload,
+ enum mlxsw_reg_mtptpt_trap_id trap_id,
+ u16 message_type)
+{
+   MLXSW_REG_ZERO(mtptpt, payload);
+   mlxsw_reg_mtptpt_trap_id_set(payload, trap_id);
+   mlxsw_reg_mtptpt_message_type_set(payload, message_type);
+}
+
 /* MGPIR - Management General Peripheral Information Register
  * --
  * MGPIR register allows software to query the hardware and
@@ -10254,6 +10292,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = 
{
MLXSW_REG(mgpc),
MLXSW_REG(mprs),
MLXSW_REG(mtpppc),
+   MLXSW_REG(mtptpt),
MLXSW_REG(mgpir),
MLXSW_REG(tngcr),
MLXSW_REG(tnumt),
-- 
2.20.1

[PATCH net-next 00/16] mlxsw: PTP timestamping support

2019-06-27 Thread Ido Schimmel

From: Ido Schimmel 

This is the second patchset adding PTP support in mlxsw. Next patchset
will add PTP shapers which are required to maintain accuracy under rates
lower than 40Gb/s, while subsequent patchsets will add tracepoints and
selftests.

Petr says:

This patch set introduces support for retrieving and processing hardware
timestamps for PTP packets.

The way PTP timestamping works on Spectrum-1 is that there are two queues
associated with each front panel port. When a packet is timestamped, the
timestamp is put to one of the queues: timestamps for transmitted packets
to one and for received packets to the other. Activity on these queues is
signaled through the events PTP_ING_FIFO and PTP_EGR_FIFO.

Packets themselves arrive through two traps: PTP0 and PTP1. It is possible
to configure which PTP messages should be trapped under which PTP trap. On
Spectrum systems, mlxsw will use PTP0 for event messages (which need
timestamping), and PTP1 for general messages (which do not).

There are therefore four relevant traps: receive of PTP event resp. general
message, and receive of timestamp for a transmitted resp. received PTP
packet. The obvious point where to put the new logic is a custom listener
to the mentioned traps.

Besides handling ingress traffic (be in packets or timestamps), the driver
also needs to handle timestamping of transmitted packets. One option would
be to invoke the relevant logic from mlxsw_core_ptp_transmitted(). However
on Spectrum-2, the timestamps are actually delivered through the completion
queue, and for that reason this patchset opts to invoke the logic from the
PCI code, via core and the driver, to a chip-specific operation. That way
the invocation will be done in a place where a Spectrum-2 implementation
will have an opportunity to extract the timestamp.

As indicated above, the PTP FIFO signaling happens independently from
packet delivery. A packet corresponding to any given timestamp could be
delivered sooner or later than the timestamp itself. Additionally, the
queues are only four elements deep, and it is therefore possible that the
timestamp for a delivered packet never arrives at all. Similarly a PTP
packet might be dropped due to CPU traffic pressure, and never be delivered
even if the corresponding timestamp was.

The driver thus needs to hold a cache of as-yet-unmatched SKBs and
timestamps. The first piece to arrive (be it timestamp or SKB) is put to
this cache. When the other piece arrives, the timestamp is attached to the
SKB and that is passed on. A delayed work is run at regular intervals to
prune the old unmatched entries.

As mentioned above, the mechanism for timestamp delivery changes on
Spectrum-2, where timestamps are part of completion queue elements, and all
packets are timestamped. All this bookkeeping is therefore unnecessary on
Spectrum-2. For this reason, this patchset spends some time introducing
Spectrum-1 specific artifacts such as a possibility to register a given
trap only on Spectrum-1.

Patches #1-#4 describe new registers.

Patches #5 and #6 introduce the possibility to register certain traps
only on some systems. The list of Spectrum-1 specific traps is left empty
at this point.

Patch #7 hooks into packet receive path by registering PTP traps
and appropriate handlers (that however do nothing of substance yet).

Patch #8 adds a helper to allow storing custom data to SKB->cb.

Patch #9 adds a call into the PCI completion queue handler that invokes,
via core and spectrum code, a PTP transmit handler. (Which also does not do
anything interesting yet.)

Patch #10 introduces code to invoke PTP initialization and adds data types
for the cache of unmatched entries.

Patches #11 and #12 implement the timestamping itself. In #11, the PHC
spin_locks are converted to _bh variants, because unlike normal PHC path,
which runs in process context, timestamp processing runs as soft interrupt.
Then #12 introduces the code for saving and retrieval of unmatched entries,
invokes PTP classifier to identify packets of interest, registers timestamp
FIFO events, and handles decoding and attaching timestamps to packets.

Patch #13 introduces a garbage collector for left-behind entries that have
not been matched for about a second.

In patch #14, PTP message types are configured to arrive as PTP0
(events) or PTP1 (everything else) as appropriate. At this point, the PTP
packets start arriving through the traps, but because PTP is disabled and
there is no way to enable it yet, they are always just passed to the usual
receive path right away.

Finally patches #15 and #16 add the plumbing to actually make it possible
to enable this code through SIOCSHWTSTAMP ioctl, and to advertise the
hardware timestamping capabilities through ethtool.

Petr Machata (16):
  mlxsw: reg: Add Monitoring Time Precision Packet Port Configuration
Register
  mlxsw: reg: Add Monitoring Precision Time Protocol Trap Register
  mlxsw: reg: Add Time Precision Packet Timestamping Reading
  mlxsw

[PATCH net-next 09/16] mlxsw: pci: PTP: Hook into packet transmit path

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

On Spectrum-1, timestamps are delivered separately from the packets, and
need to paired up. Therefore, at some point after mlxsw_sp_port_xmit()
is invoked, it is necessary to involve the chip-specific driver code to
allow it to do the necessary bookkeeping and matching.

On Spectrum-2, timestamps are delivered in CQE. For that reason,
position the point of driver involvement into mlxsw_pci_cqe_sdq_handle()
to make it hopefully easier to extend for Spectrum-2 in the future.

To tell the driver what port the packet was sent on, keep tx_info
in SKB control buffer.

Introduce a new driver core interface mlxsw_core_ptp_transmitted(), a
driver callback ptp_transmitted, and a PTP op transmitted. The callee is
responsible for taking care of releasing the SKB passed to the new
interfaces, and correspondingly have the new stub callbacks just call
dev_kfree_skb_any().

Follow-up patches will introduce the actual content into
mlxsw_sp1_ptp_transmitted() in particular.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/core.c|  9 +
 drivers/net/ethernet/mellanox/mlxsw/core.h| 10 ++
 drivers/net/ethernet/mellanox/mlxsw/pci.c | 17 -
 .../net/ethernet/mellanox/mlxsw/spectrum.c| 19 +++
 .../ethernet/mellanox/mlxsw/spectrum_ptp.c|  6 ++
 .../ethernet/mellanox/mlxsw/spectrum_ptp.h| 15 +++
 6 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c 
b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 30e0526a9cf6..17ceac7505e5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1245,6 +1245,15 @@ int mlxsw_core_skb_transmit(struct mlxsw_core 
*mlxsw_core, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(mlxsw_core_skb_transmit);
 
+void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+   struct sk_buff *skb, u8 local_port)
+{
+   if (mlxsw_core->driver->ptp_transmitted)
+   mlxsw_core->driver->ptp_transmitted(mlxsw_core, skb,
+   local_port);
+}
+EXPORT_SYMBOL(mlxsw_core_ptp_transmitted);
+
 static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a,
   const struct mlxsw_rx_listener *rxl_b)
 {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h 
b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 06babcc58c7a..8efcff4b59cb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -48,6 +48,8 @@ bool mlxsw_core_skb_transmit_busy(struct mlxsw_core 
*mlxsw_core,
  const struct mlxsw_tx_info *tx_info);
 int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb,
const struct mlxsw_tx_info *tx_info);
+void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core,
+   struct sk_buff *skb, u8 local_port);
 
 struct mlxsw_rx_listener {
void (*func)(struct sk_buff *skb, u8 local_port, void *priv);
@@ -296,6 +298,13 @@ struct mlxsw_driver {
 u64 *p_linear_size);
int (*params_register)(struct mlxsw_core *mlxsw_core);
void (*params_unregister)(struct mlxsw_core *mlxsw_core);
+
+   /* Notify a driver that a timestamped packet was transmitted. Driver
+* is responsible for freeing the passed-in SKB.
+*/
+   void (*ptp_transmitted)(struct mlxsw_core *mlxsw_core,
+   struct sk_buff *skb, u8 local_port);
+
u8 txhdr_len;
const struct mlxsw_config_profile *profile;
bool res_query_enabled;
@@ -419,6 +428,7 @@ enum mlxsw_devlink_param_id {
 };
 
 struct mlxsw_skb_cb {
+   struct mlxsw_tx_info tx_info;
 };
 
 static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c 
b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 6acb9bbfdf89..051b19388a81 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -508,17 +508,28 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci 
*mlxsw_pci,
 {
struct pci_dev *pdev = mlxsw_pci->pdev;
struct mlxsw_pci_queue_elem_info *elem_info;
+   struct mlxsw_tx_info tx_info;
char *wqe;
struct sk_buff *skb;
int i;
 
spin_lock(&q->lock);
elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+   tx_info = mlxsw_skb_cb(elem_info->u.sdq.skb)->tx_info;
skb = elem_info->u.sdq.skb;
wqe = elem_info->elem;
for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
-   dev_kfree_skb_any(skb);
+
+   if (unlikely(!tx_info.is_emad &&
+skb_shin

[PATCH net-next 05/16] mlxsw: spectrum: Extract a helper for trap registration

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

On Spectrum-1, timestamps for PTP packets are delivered through queues
of ingress and egress timestamps. There are two event traps
corresponding to activity on each of those queues. This mechanism is
absent on Spectrum-2, and therefore the traps should only be registered
on Spectrum-1.

Extract out of mlxsw_sp_traps_init() a generic helper,
mlxsw_sp_traps_register(), and likewise with _unregister(). The new helpers
will later be called with Spectrum-1-specific traps.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c| 48 +--
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 3e8593824b33..0119efe0ea7a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4251,22 +4251,16 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core 
*mlxsw_core)
return 0;
 }
 
-static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
+static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp,
+  const struct mlxsw_listener listeners[],
+  size_t listeners_count)
 {
int i;
int err;
 
-   err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
-   if (err)
-   return err;
-
-   err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
-   if (err)
-   return err;
-
-   for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+   for (i = 0; i < listeners_count; i++) {
err = mlxsw_core_trap_register(mlxsw_sp->core,
-  &mlxsw_sp_listener[i],
+  &listeners[i],
   mlxsw_sp);
if (err)
goto err_listener_register;
@@ -4277,23 +4271,47 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp 
*mlxsw_sp)
 err_listener_register:
for (i--; i >= 0; i--) {
mlxsw_core_trap_unregister(mlxsw_sp->core,
-  &mlxsw_sp_listener[i],
+  &listeners[i],
   mlxsw_sp);
}
return err;
 }
 
-static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_listener listeners[],
+ size_t listeners_count)
 {
int i;
 
-   for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+   for (i = 0; i < listeners_count; i++) {
mlxsw_core_trap_unregister(mlxsw_sp->core,
-  &mlxsw_sp_listener[i],
+  &listeners[i],
   mlxsw_sp);
}
 }
 
+static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
+{
+   int err;
+
+   err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
+   if (err)
+   return err;
+
+   err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
+   if (err)
+   return err;
+
+   return mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener,
+  ARRAY_SIZE(mlxsw_sp_listener));
+}
+
+static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+{
+   mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener));
+}
+
 #define MLXSW_SP_LAG_SEED_INIT 0xcafecafe
 
 static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
-- 
2.20.1

[PATCH net-next 04/16] mlxsw: reg: Add Monitoring Global Configuration Register

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

This register serves to configure global parameters of certain
monitoring operations. The following patches will use it to configure
that when PTP timestamps are delivered through the PTP FIFO traps, the
FIFO in question is cleared as well.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 197599890bdf..8de9333e6eb1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -9148,6 +9148,32 @@ static inline void mlxsw_reg_mprs_pack(char *payload, 
u16 parsing_depth,
mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport);
 }
 
+/* MOGCR - Monitoring Global Configuration Register
+ * 
+ */
+#define MLXSW_REG_MOGCR_ID 0x9086
+#define MLXSW_REG_MOGCR_LEN 0x20
+
+MLXSW_REG_DEFINE(mogcr, MLXSW_REG_MOGCR_ID, MLXSW_REG_MOGCR_LEN);
+
+/* reg_mogcr_ptp_iftc
+ * PTP Ingress FIFO Trap Clear
+ * The PTP_ING_FIFO trap provides MTPPTR with clr according
+ * to this value. Default 0.
+ * Reserved when IB switches and when SwitchX/-2, Spectrum-2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mogcr, ptp_iftc, 0x00, 1, 1);
+
+/* reg_mogcr_ptp_eftc
+ * PTP Egress FIFO Trap Clear
+ * The PTP_EGR_FIFO trap provides MTPPTR with clr according
+ * to this value. Default 0.
+ * Reserved when IB switches and when SwitchX/-2, Spectrum-2
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mogcr, ptp_eftc, 0x00, 0, 1);
+
 /* MTPPPC - Time Precision Packet Port Configuration
  * -
  * This register serves for configuration of which PTP messages should be
@@ -10400,6 +10426,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = 
{
MLXSW_REG(mcda),
MLXSW_REG(mgpc),
MLXSW_REG(mprs),
+   MLXSW_REG(mogcr),
MLXSW_REG(mtpppc),
MLXSW_REG(mtpptr),
MLXSW_REG(mtptpt),
-- 
2.20.1

[PATCH net-next 11/16] mlxsw: spectrum: PTP: Disable BH when working with PHC

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

Up until now, the PTP hardware clock code was only invoked in the process
context (SYS_clock_adjtime -> do_clock_adjtime -> k_clock::clock_adj ->
pc_clock_adjtime -> posix_clock_operations::clock_adjtime ->
ptp_clock_info::adjtime -> mlxsw_spectrum).

In order to enable HW timestamping, which is tied into trap handling, it
will be necessary to take the clock lock from the PCI queue handler
tasklets as well.

Therefore use the _bh variants when handling the clock lock. Incidentally,
Documentation/ptp/ptp.txt recommends _irqsave variants, but that's
unnecessarily strong for our needs.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 .../ethernet/mellanox/mlxsw/spectrum_ptp.c| 24 +--
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index 6725a4d53f87..1eb6eefa1afc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -117,9 +117,9 @@ mlxsw_sp1_ptp_phc_settime(struct mlxsw_sp_ptp_clock *clock, 
u64 nsec)
next_sec = div_u64(nsec, NSEC_PER_SEC) + 1;
next_sec_in_nsec = next_sec * NSEC_PER_SEC;
 
-   spin_lock(&clock->lock);
+   spin_lock_bh(&clock->lock);
cycles = mlxsw_sp1_ptp_ns2cycles(&clock->tc, next_sec_in_nsec);
-   spin_unlock(&clock->lock);
+   spin_unlock_bh(&clock->lock);
 
mlxsw_reg_mtpps_vpin_pack(mtpps_pl, cycles);
err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtpps), mtpps_pl);
@@ -152,11 +152,11 @@ static int mlxsw_sp1_ptp_adjfine(struct ptp_clock_info 
*ptp, long scaled_ppm)
adj *= ppb;
diff = div_u64(adj, NSEC_PER_SEC);
 
-   spin_lock(&clock->lock);
+   spin_lock_bh(&clock->lock);
timecounter_read(&clock->tc);
clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
   clock->nominal_c_mult + diff;
-   spin_unlock(&clock->lock);
+   spin_unlock_bh(&clock->lock);
 
return mlxsw_sp1_ptp_phc_adjfreq(clock, neg_adj ? -ppb : ppb);
 }
@@ -167,10 +167,10 @@ static int mlxsw_sp1_ptp_adjtime(struct ptp_clock_info 
*ptp, s64 delta)
container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
u64 nsec;
 
-   spin_lock(&clock->lock);
+   spin_lock_bh(&clock->lock);
timecounter_adjtime(&clock->tc, delta);
nsec = timecounter_read(&clock->tc);
-   spin_unlock(&clock->lock);
+   spin_unlock_bh(&clock->lock);
 
return mlxsw_sp1_ptp_phc_settime(clock, nsec);
 }
@@ -183,10 +183,10 @@ static int mlxsw_sp1_ptp_gettimex(struct ptp_clock_info 
*ptp,
container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
u64 cycles, nsec;
 
-   spin_lock(&clock->lock);
+   spin_lock_bh(&clock->lock);
cycles = __mlxsw_sp1_ptp_read_frc(clock, sts);
nsec = timecounter_cyc2time(&clock->tc, cycles);
-   spin_unlock(&clock->lock);
+   spin_unlock_bh(&clock->lock);
 
*ts = ns_to_timespec64(nsec);
 
@@ -200,10 +200,10 @@ static int mlxsw_sp1_ptp_settime(struct ptp_clock_info 
*ptp,
container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info);
u64 nsec = timespec64_to_ns(ts);
 
-   spin_lock(&clock->lock);
+   spin_lock_bh(&clock->lock);
timecounter_init(&clock->tc, &clock->cycles, nsec);
nsec = timecounter_read(&clock->tc);
-   spin_unlock(&clock->lock);
+   spin_unlock_bh(&clock->lock);
 
return mlxsw_sp1_ptp_phc_settime(clock, nsec);
 }
@@ -225,9 +225,9 @@ static void mlxsw_sp1_ptp_clock_overflow(struct work_struct 
*work)
 
clock = container_of(dwork, struct mlxsw_sp_ptp_clock, overflow_work);
 
-   spin_lock(&clock->lock);
+   spin_lock_bh(&clock->lock);
timecounter_read(&clock->tc);
-   spin_unlock(&clock->lock);
+   spin_unlock_bh(&clock->lock);
mlxsw_core_schedule_dw(&clock->overflow_work, clock->overflow_period);
 }
 
-- 
2.20.1

[PATCH net-next 01/16] mlxsw: reg: Add Monitoring Time Precision Packet Port Configuration Register

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

This register serves for configuration of which PTP messages should be
timestamped. This is a global configuration, despite the register name.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 38 +++
 1 file changed, 38 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index e5f6bfd8a35a..971e336aa9ac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -9148,6 +9148,43 @@ static inline void mlxsw_reg_mprs_pack(char *payload, 
u16 parsing_depth,
mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport);
 }
 
+/* MTPPPC - Time Precision Packet Port Configuration
+ * -
+ * This register serves for configuration of which PTP messages should be
+ * timestamped. This is a global configuration, despite the register name.
+ *
+ * Reserved when Spectrum-2.
+ */
+#define MLXSW_REG_MTPPPC_ID 0x9090
+#define MLXSW_REG_MTPPPC_LEN 0x28
+
+MLXSW_REG_DEFINE(mtpppc, MLXSW_REG_MTPPPC_ID, MLXSW_REG_MTPPPC_LEN);
+
+/* reg_mtpppc_ing_timestamp_message_type
+ * Bitwise vector of PTP message types to timestamp at ingress.
+ * MessageType field as defined by IEEE 1588
+ * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Default all 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpppc, ing_timestamp_message_type, 0x08, 0, 16);
+
+/* reg_mtpppc_egr_timestamp_message_type
+ * Bitwise vector of PTP message types to timestamp at egress.
+ * MessageType field as defined by IEEE 1588
+ * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req)
+ * Default all 0
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mtpppc, egr_timestamp_message_type, 0x0C, 0, 16);
+
+static inline void mlxsw_reg_mtpppc_pack(char *payload, u16 ing, u16 egr)
+{
+   MLXSW_REG_ZERO(mtpppc, payload);
+   mlxsw_reg_mtpppc_ing_timestamp_message_type_set(payload, ing);
+   mlxsw_reg_mtpppc_egr_timestamp_message_type_set(payload, egr);
+}
+
 /* MGPIR - Management General Peripheral Information Register
  * --
  * MGPIR register allows software to query the hardware and
@@ -10216,6 +10253,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = 
{
MLXSW_REG(mcda),
MLXSW_REG(mgpc),
MLXSW_REG(mprs),
+   MLXSW_REG(mtpppc),
MLXSW_REG(mgpir),
MLXSW_REG(tngcr),
MLXSW_REG(tnumt),
-- 
2.20.1

[PATCH net-next 13/16] mlxsw: spectrum: PTP: Garbage-collect unmatched entries

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

On Spectrum-1, timestamped PTP packets and the corresponding timestamps
need to be kept in caches until both are available, at which point they are
matched up and packets forwarded as appropriate. However, not all packets
will ever see their timestamp, and not all timestamps will ever see their
packet. It is therefore necessary to dispose of such abandoned entries.

To that end, introduce a garbage collector to collect entries that have
not had their counterpart turn up within about a second. The GC
maintains a monotonously-increasing value of GC cycle. Every entry that
is put to the hash table is annotated with the GC cycle at which it
should be collected. When the GC runs, it walks the hash table, and
collects the objects according to their GC cycle annotation.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 .../ethernet/mellanox/mlxsw/spectrum_ptp.c| 86 +++
 1 file changed, 86 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index e87066f65860..f0f0c20ecc2e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -19,9 +19,19 @@
 #define MLXSW_SP1_PTP_CLOCK_FREQ_KHZ   156257 /* 6.4nSec */
 #define MLXSW_SP1_PTP_CLOCK_MASK   64
 
+#define MLXSW_SP1_PTP_HT_GC_INTERVAL   500 /* ms */
+
+/* How long, approximately, should the unmatched entries stay in the hash table
+ * before they are collected. Should be evenly divisible by the GC interval.
+ */
+#define MLXSW_SP1_PTP_HT_GC_TIMEOUT1000 /* ms */
+
 struct mlxsw_sp_ptp_state {
+   struct mlxsw_sp *mlxsw_sp;
struct rhashtable unmatched_ht;
spinlock_t unmatched_lock; /* protects the HT */
+   struct delayed_work ht_gc_dw;
+   u32 gc_cycle;
 };
 
 struct mlxsw_sp1_ptp_key {
@@ -38,6 +48,7 @@ struct mlxsw_sp1_ptp_unmatched {
struct rcu_head rcu;
struct sk_buff *skb;
u64 timestamp;
+   u32 gc_cycle;
 };
 
 static const struct rhashtable_params mlxsw_sp1_ptp_unmatched_ht_params = {
@@ -353,6 +364,7 @@ mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp,
 struct sk_buff *skb,
 u64 timestamp)
 {
+   int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL;
struct mlxsw_sp_ptp_state *ptp_state = mlxsw_sp->ptp_state;
struct mlxsw_sp1_ptp_unmatched *unmatched;
struct mlxsw_sp1_ptp_unmatched *conflict;
@@ -364,6 +376,7 @@ mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp,
unmatched->key = key;
unmatched->skb = skb;
unmatched->timestamp = timestamp;
+   unmatched->gc_cycle = mlxsw_sp->ptp_state->gc_cycle + cycles;
 
conflict = rhashtable_lookup_get_insert_fast(&ptp_state->unmatched_ht,
&unmatched->ht_node,
@@ -396,6 +409,8 @@ mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp,
  * 1) When a packet is matched with its timestamp.
  * 2) In several situation when it is necessary to immediately pass on
  *an SKB without a timestamp.
+ * 3) From GC indirectly through mlxsw_sp1_ptp_unmatched_finish().
+ *This case is similar to 2) above.
  */
 static void mlxsw_sp1_ptp_packet_finish(struct mlxsw_sp *mlxsw_sp,
struct sk_buff *skb, u8 local_port,
@@ -637,6 +652,72 @@ void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, false);
 }
 
+static void
+mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state,
+   struct mlxsw_sp1_ptp_unmatched *unmatched)
+{
+   int err;
+
+   /* If an unmatched entry has an SKB, it has to be handed over to the
+* networking stack. This is usually done from a trap handler, which is
+* invoked in a softirq context. Here we are going to do it in process
+* context. If that were to be interrupted by a softirq, it could cause
+* a deadlock when an attempt is made to take an already-taken lock
+* somewhere along the sending path. Disable softirqs to prevent this.
+*/
+   local_bh_disable();
+
+   spin_lock(&ptp_state->unmatched_lock);
+   err = rhashtable_remove_fast(&ptp_state->unmatched_ht,
+&unmatched->ht_node,
+mlxsw_sp1_ptp_unmatched_ht_params);
+   spin_unlock(&ptp_state->unmatched_lock);
+
+   if (err)
+   /* The packet was matched with timestamp during the walk. */
+   goto out;
+
+   /* mlxsw_sp1_ptp_unmatched_finish() invokes netif_receive_skb(). While
+* the comment at that function states that it can only be called in
+* soft IRQ context, this pattern of local_bh_disable() +
+* netif_receive_skb()

[PATCH net-next 06/16] mlxsw: spectrum: Add support for traps specific to Spectrum-1

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

On Spectrum-1, timestamps for PTP packets are delivered through queues
of ingress and egress timestamps. There are two event traps
corresponding to activity on each of those queues. This mechanism is
absent on Spectrum-2, and therefore the traps should only be registered
on Spectrum-1.

Carry a chip-specific listener array in mlxsw_sp->listeners and
listeners_count. Register listeners from that array in
mlxsw_sp_traps_init(). Add a new listener array for Spectrum-1 traps and
configure the newly-added mlxsw_sp->listeners with this array.

The listener array is empty for now, the events will be added in a later
patch.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c| 25 +--
 .../net/ethernet/mellanox/mlxsw/spectrum.h|  2 ++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 0119efe0ea7a..91486193454a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4114,6 +4114,9 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, ARP, false),
 };
 
+static const struct mlxsw_listener mlxsw_sp1_listener[] = {
+};
+
 static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 {
char qpcr_pl[MLXSW_REG_QPCR_LEN];
@@ -4302,12 +4305,28 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp 
*mlxsw_sp)
if (err)
return err;
 
-   return mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener,
-  ARRAY_SIZE(mlxsw_sp_listener));
+   err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener));
+   if (err)
+   return err;
+
+   err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners,
+ mlxsw_sp->listeners_count);
+   if (err)
+   goto err_extra_traps_init;
+
+   return 0;
+
+err_extra_traps_init:
+   mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
+ ARRAY_SIZE(mlxsw_sp_listener));
+   return err;
 }
 
 static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
 {
+   mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners,
+ mlxsw_sp->listeners_count);
mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener,
  ARRAY_SIZE(mlxsw_sp_listener));
 }
@@ -4566,6 +4585,8 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals;
mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops;
mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops;
+   mlxsw_sp->listeners = mlxsw_sp1_listener;
+   mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
 
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 84f4276193b3..9136a86dc55f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -175,6 +175,8 @@ struct mlxsw_sp {
const struct mlxsw_sp_sb_vals *sb_vals;
const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
const struct mlxsw_sp_ptp_ops *ptp_ops;
+   const struct mlxsw_listener *listeners;
+   size_t listeners_count;
 };
 
 static inline struct mlxsw_sp_upper *
-- 
2.20.1

[PATCH net-next 08/16] mlxsw: core: Add support for using SKB control buffer

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

The SKB control buffer is useful (and used) for bookkeeping of information
related to that SKB. Add helpers so that the mlxsw driver(s) can safely use
the buffer as well. The structure is currently empty, individual users will
add members to it as necessary.

Note that SKB allocation functions already clear the buffer, so the cleanup
is only necessary when ndo_start_xmit is called.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/core.h | 9 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 ++
 drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 2 ++
 3 files changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h 
b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 6dbb0ede502e..06babcc58c7a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -418,4 +418,13 @@ enum mlxsw_devlink_param_id {
MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL,
 };
 
+struct mlxsw_skb_cb {
+};
+
+static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb)
+{
+   BUILD_BUG_ON(sizeof(mlxsw_skb_cb) > sizeof(skb->cb));
+   return (struct mlxsw_skb_cb *) skb->cb;
+}
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 84f4077b4b37..a0376d4f94a8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -790,6 +790,8 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
u64 len;
int err;
 
+   memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb));
+
if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info))
return NETDEV_TX_BUSY;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c 
b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index fc4f19167262..bdab96f5bc70 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -299,6 +299,8 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
u64 len;
int err;
 
+   memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb));
+
if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info))
return NETDEV_TX_BUSY;
 
-- 
2.20.1

[PATCH net-next 14/16] mlxsw: spectrum: PTP: Configure PTP traps and FIFO events

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

Configure MTPTPT to set which message types should arrive under which
PTP trap, and MOGCR to clear the timestamp queue after its contents are
reported through PTP_ING_FIFO or PTP_EGR_FIFO.

With this configuration, PTP packets start arriving through the PTP
traps. However since timestamping is disabled by default and there is
currently no way to enable it, they will not be timestamped.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 .../ethernet/mellanox/mlxsw/spectrum_ptp.c| 58 +++
 .../ethernet/mellanox/mlxsw/spectrum_ptp.h|  7 +++
 2 files changed, 65 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index f0f0c20ecc2e..4d6dbac18049 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -718,9 +718,35 @@ static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work)
   MLXSW_SP1_PTP_HT_GC_INTERVAL);
 }
 
+static int mlxsw_sp_ptp_mtptpt_set(struct mlxsw_sp *mlxsw_sp,
+  enum mlxsw_reg_mtptpt_trap_id trap_id,
+  u16 message_type)
+{
+   char mtptpt_pl[MLXSW_REG_MTPTPT_LEN];
+
+   mlxsw_reg_mtptptp_pack(mtptpt_pl, trap_id, message_type);
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtptpt), mtptpt_pl);
+}
+
+static int mlxsw_sp1_ptp_set_fifo_clr_on_trap(struct mlxsw_sp *mlxsw_sp,
+ bool clr)
+{
+   char mogcr_pl[MLXSW_REG_MOGCR_LEN] = {0};
+   int err;
+
+   err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl);
+   if (err)
+   return err;
+
+   mlxsw_reg_mogcr_ptp_iftc_set(mogcr_pl, clr);
+   mlxsw_reg_mogcr_ptp_eftc_set(mogcr_pl, clr);
+   return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl);
+}
+
 struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp)
 {
struct mlxsw_sp_ptp_state *ptp_state;
+   u16 message_type;
int err;
 
ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL);
@@ -735,11 +761,38 @@ struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct 
mlxsw_sp *mlxsw_sp)
if (err)
goto err_hashtable_init;
 
+   /* Delive these message types as PTP0. */
+   message_type = BIT(MLXSW_PTP_MESSAGE_TYPE_SYNC) |
+  BIT(MLXSW_PTP_MESSAGE_TYPE_DELAY_REQ) |
+  BIT(MLXSW_PTP_MESSAGE_TYPE_PDELAY_REQ) |
+  BIT(MLXSW_PTP_MESSAGE_TYPE_PDELAY_RESP);
+   err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0,
+ message_type);
+   if (err)
+   goto err_mtptpt_set;
+
+   /* Everything else is PTP1. */
+   message_type = ~message_type;
+   err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1,
+ message_type);
+   if (err)
+   goto err_mtptpt1_set;
+
+   err = mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, true);
+   if (err)
+   goto err_fifo_clr;
+
INIT_DELAYED_WORK(&ptp_state->ht_gc_dw, mlxsw_sp1_ptp_ht_gc);
mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw,
   MLXSW_SP1_PTP_HT_GC_INTERVAL);
return ptp_state;
 
+err_fifo_clr:
+   mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0);
+err_mtptpt1_set:
+   mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
+err_mtptpt_set:
+   rhashtable_destroy(&ptp_state->unmatched_ht);
 err_hashtable_init:
kfree(ptp_state);
return ERR_PTR(err);
@@ -747,7 +800,12 @@ struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct 
mlxsw_sp *mlxsw_sp)
 
 void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
 {
+   struct mlxsw_sp *mlxsw_sp = ptp_state->mlxsw_sp;
+
cancel_delayed_work_sync(&ptp_state->ht_gc_dw);
+   mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false);
+   mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0);
+   mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
rhashtable_free_and_destroy(&ptp_state->unmatched_ht,
&mlxsw_sp1_ptp_unmatched_free_fn, NULL);
kfree(ptp_state);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
index 40c9e82e2920..a135c6a0a051 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
@@ -11,6 +11,13 @@ struct mlxsw_sp;
 struct mlxsw_sp_port;
 struct mlxsw_sp_ptp_clock;
 
+enum {
+   MLXSW_PTP_MESSAGE_TYPE_SYNC,
+   MLXSW_PTP_MESSAGE_TYPE_DELAY_REQ,
+   MLXSW_PTP_MESSAGE_TYPE_PDELAY_REQ,
+   MLXSW_PTP_MESSAGE_TYPE_PDELAY_RESP,
+};
+
 #if IS_REACHABLE

[PATCH net-next 07/16] mlxsw: spectrum: PTP: Hook into packet receive path

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

When configured, the Spectrum hardware can recognize PTP packets and
trap them to the CPU using dedicated traps, PTP0 and PTP1.

One reason to get PTP packets under dedicated traps is to have a
separate policer suitable for the amount of PTP traffic expected when
switch is operated as a boundary clock. For this, add two new trap
groups, MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0 and _PTP1, and associate the
two PTP traps with these two groups.

In the driver, specifically for Spectrum-1, event PTP packets will need
to be paired up with their timestamps. Those arrive through a different
set of traps, added later in the patch set. To support this future use,
introduce a new PTP op, ptp_receive.

It is possible to configure which PTP messages should be trapped under
which PTP trap. On Spectrum systems, we will use PTP0 for event
packets (which need timestamping), and PTP1 for control packets (which
do not). Thus configure PTP0 trap with a custom callback that defers to
the ptp_receive op.

Additionally, L2 PTP packets are actually trapped through the LLDP trap,
not through any of the PTP traps. So treat the LLDP trap the same way as
the PTP0 trap. Unlike PTP traps, which are currently still disabled,
LLDP trap is active. Correspondingly, have all the implementations of
the ptp_receive op return true, which the handler treats as a signal to
forward the packet immediately.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h |  2 +
 .../net/ethernet/mellanox/mlxsw/spectrum.c| 49 +++
 .../net/ethernet/mellanox/mlxsw/spectrum.h|  2 +
 .../ethernet/mellanox/mlxsw/spectrum_ptp.c|  6 +++
 .../ethernet/mellanox/mlxsw/spectrum_ptp.h| 15 ++
 drivers/net/ethernet/mellanox/mlxsw/trap.h|  2 +
 6 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h 
b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 8de9333e6eb1..76ff5b217c04 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5292,6 +5292,8 @@ enum mlxsw_reg_htgt_trap_group {
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
+   MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
+   MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
 };
 
 /* reg_htgt_trap_group
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 91486193454a..84f4077b4b37 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -147,6 +147,18 @@ struct mlxsw_sp_mlxfw_dev {
struct mlxsw_sp *mlxsw_sp;
 };
 
+struct mlxsw_sp_ptp_ops {
+   struct mlxsw_sp_ptp_clock *
+   (*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev);
+   void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock);
+
+   /* Notify a driver that a packet that might be PTP was received. Driver
+* is responsible for freeing the passed-in SKB.
+*/
+   void (*receive)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+   u8 local_port);
+};
+
 static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev,
u16 component_index, u32 *p_max_size,
u8 *p_align_bits, u16 *p_max_write_size)
@@ -3947,8 +3959,8 @@ static void mlxsw_sp_pude_event_func(const struct 
mlxsw_reg_info *reg,
}
 }
 
-static void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
- u8 local_port, void *priv)
+void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+  u8 local_port, void *priv)
 {
struct mlxsw_sp *mlxsw_sp = priv;
struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
@@ -4022,6 +4034,14 @@ static void mlxsw_sp_rx_listener_sample_func(struct 
sk_buff *skb, u8 local_port,
consume_skb(skb);
 }
 
+static void mlxsw_sp_rx_listener_ptp(struct sk_buff *skb, u8 local_port,
+void *priv)
+{
+   struct mlxsw_sp *mlxsw_sp = priv;
+
+   mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
+}
+
 #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \
  _is_ctrl, SP_##_trap_group, DISCARD)
@@ -4043,7 +4063,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
/* L2 traps */
MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true),
MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true),
-   MLXSW_SP_RXL_NO_MARK(LLDP, TRAP_TO_CPU, LLDP, true),
+   MLXSW_RXL(mlxsw_sp_rx_listener_ptp, LLDP, TRAP_TO_CPU,
+ false, SP_LLDP, DISCARD),
MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false),

[PATCH net-next 15/16] mlxsw: spectrum: PTP: Support SIOCGHWTSTAMP, SIOCSHWTSTAMP ioctls

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

The SIOCSHWTSTAMP ioctl configures HW timestamping on a given port.
Dispatch the ioctls to per-chip handler (which add to ptp_ops). Find
which PTP messages need to be timestamped and configure MTPPPC
accordingly.

The SIOCGHWTSTAMP ioctl is getter for the current configuration.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c|  70 ++
 .../ethernet/mellanox/mlxsw/spectrum_ptp.c| 123 ++
 .../ethernet/mellanox/mlxsw/spectrum_ptp.h|  34 +
 3 files changed, 227 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 87e7964ba496..f1df3c63af3e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -166,6 +166,11 @@ struct mlxsw_sp_ptp_ops {
 */
void (*transmitted)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
u8 local_port);
+
+   int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port,
+   struct hwtstamp_config *config);
+   int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port,
+   struct hwtstamp_config *config);
 };
 
 static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev,
@@ -1808,6 +1813,65 @@ mlxsw_sp_port_get_devlink_port(struct net_device *dev)
mlxsw_sp_port->local_port);
 }
 
+static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ifreq *ifr)
+{
+   struct hwtstamp_config config;
+   int err;
+
+   if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+   return -EFAULT;
+
+   err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port,
+&config);
+   if (err)
+   return err;
+
+   if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+   return -EFAULT;
+
+   return 0;
+}
+
+static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ifreq *ifr)
+{
+   struct hwtstamp_config config;
+   int err;
+
+   err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port,
+&config);
+   if (err)
+   return err;
+
+   if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+   return -EFAULT;
+
+   return 0;
+}
+
+static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+   struct hwtstamp_config config = {0};
+
+   mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config);
+}
+
+static int
+mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+   struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+   switch (cmd) {
+   case SIOCSHWTSTAMP:
+   return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr);
+   case SIOCGHWTSTAMP:
+   return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr);
+   default:
+   return -EOPNOTSUPP;
+   }
+}
+
 static const struct net_device_ops mlxsw_sp_port_netdev_ops = {
.ndo_open   = mlxsw_sp_port_open,
.ndo_stop   = mlxsw_sp_port_stop,
@@ -1823,6 +1887,7 @@ static const struct net_device_ops 
mlxsw_sp_port_netdev_ops = {
.ndo_vlan_rx_kill_vid   = mlxsw_sp_port_kill_vid,
.ndo_set_features   = mlxsw_sp_set_features,
.ndo_get_devlink_port   = mlxsw_sp_port_get_devlink_port,
+   .ndo_do_ioctl   = mlxsw_sp_port_ioctl,
 };
 
 static void mlxsw_sp_port_get_drvinfo(struct net_device *dev,
@@ -3680,6 +3745,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp 
*mlxsw_sp, u8 local_port)
struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
 
cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw);
+   mlxsw_sp_port_ptp_clear(mlxsw_sp_port);
mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
mlxsw_sp->ports[local_port] = NULL;
@@ -4479,6 +4545,8 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = {
.fini   = mlxsw_sp1_ptp_fini,
.receive= mlxsw_sp1_ptp_receive,
.transmitted= mlxsw_sp1_ptp_transmitted,
+   .hwtstamp_get   = mlxsw_sp1_ptp_hwtstamp_get,
+   .hwtstamp_set   = mlxsw_sp1_ptp_hwtstamp_set,
 };
 
 static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
@@ -4488,6 +4556,8 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
.fini   = mlxsw_sp2_ptp_fini,
.receive= mlxsw_sp2_ptp_receive,
.transmitted= mlxsw_sp2_ptp_transmi

[PATCH net-next 12/16] mlxsw: spectrum: PTP: Support timestamping on Spectrum-1

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

On Spectrum-1, timestamps arrive through a pair of dedicated events:
MLXSW_TRAP_ID_PTP_ING_FIFO and _EGR_FIFO. The payload delivered with
those traps is contents of the timestamp FIFO at a given port in a given
direction. Add a Spectrum-1-specific handler for these two events which
decodes the timestamps and forwards them to the PTP module.

Add a function that parses a packet, dispatching to ptp_classify_raw(),
and decodes PTP message type, domain number, and sequence ID. Add a new
mlxsw dependency on the PTP classifier.

Add helpers that can store and retrieve unmatched timestamps and SKBs to
the hash table added in a preceding patch.

Add the matching code itself: upon arrival of a timestamp or a packet,
look up the corresponding unmatched entry, and match it up. If there is
none, add a new unmatched entry. This logic is the same on ingress as on
egress.

Packets and timestamps that never matched need to be eventually disposed
of. A garbage collector added in a follow-up patch will take care of
that. Since currently all this code is turned off, no crud will
accumulate in the hash table.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 drivers/net/ethernet/mellanox/mlxsw/Kconfig   |   1 +
 .../net/ethernet/mellanox/mlxsw/spectrum.c|  43 +++
 .../net/ethernet/mellanox/mlxsw/spectrum.h|   5 +
 .../ethernet/mellanox/mlxsw/spectrum_ptp.c| 324 +-
 .../ethernet/mellanox/mlxsw/spectrum_ptp.h|  13 +
 drivers/net/ethernet/mellanox/mlxsw/trap.h|   4 +
 6 files changed, 388 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig 
b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index b5d64aed259e..06c80343d9ed 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -84,6 +84,7 @@ config MLXSW_SPECTRUM
select OBJAGG
select MLXFW
imply PTP_1588_CLOCK
+   select NET_PTP_CLASSIFY if PTP_1588_CLOCK
default m
---help---
  This driver supports Mellanox Technologies Spectrum Ethernet
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 6cb7aeac0657..87e7964ba496 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3970,6 +3970,46 @@ static void mlxsw_sp_pude_event_func(const struct 
mlxsw_reg_info *reg,
}
 }
 
+static void mlxsw_sp1_ptp_fifo_event_func(struct mlxsw_sp *mlxsw_sp,
+ char *mtpptr_pl, bool ingress)
+{
+   u8 local_port;
+   u8 num_rec;
+   int i;
+
+   local_port = mlxsw_reg_mtpptr_local_port_get(mtpptr_pl);
+   num_rec = mlxsw_reg_mtpptr_num_rec_get(mtpptr_pl);
+   for (i = 0; i < num_rec; ++i) {
+   u8 domain_number;
+   u8 message_type;
+   u16 sequence_id;
+   u64 timestamp;
+
+   mlxsw_reg_mtpptr_unpack(mtpptr_pl, i, &message_type,
+   &domain_number, &sequence_id,
+   ×tamp);
+   mlxsw_sp1_ptp_got_timestamp(mlxsw_sp, ingress, local_port,
+   message_type, domain_number,
+   sequence_id, timestamp);
+   }
+}
+
+static void mlxsw_sp1_ptp_ing_fifo_event_func(const struct mlxsw_reg_info *reg,
+ char *mtpptr_pl, void *priv)
+{
+   struct mlxsw_sp *mlxsw_sp = priv;
+
+   mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, true);
+}
+
+static void mlxsw_sp1_ptp_egr_fifo_event_func(const struct mlxsw_reg_info *reg,
+ char *mtpptr_pl, void *priv)
+{
+   struct mlxsw_sp *mlxsw_sp = priv;
+
+   mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, false);
+}
+
 void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
   u8 local_port, void *priv)
 {
@@ -4151,6 +4191,9 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 };
 
 static const struct mlxsw_listener mlxsw_sp1_listener[] = {
+   /* Events */
+   MLXSW_EVENTL(mlxsw_sp1_ptp_egr_fifo_event_func, PTP_EGR_FIFO, SP_PTP0),
+   MLXSW_EVENTL(mlxsw_sp1_ptp_ing_fifo_event_func, PTP_ING_FIFO, SP_PTP0),
 };
 
 static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 7e1808179a2a..7f8427c1a997 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -266,6 +266,11 @@ struct mlxsw_sp_port {
unsigned acl_rule_count;
struct mlxsw_sp_acl_block *ing_acl_block;
struct mlxsw_sp_acl_block *eg_acl_block;
+   struct {
+   struct hwtstamp_config hwtstamp_co

[PATCH net-next 16/16] mlxsw: spectrum: PTP: Support ethtool get_ts_info

2019-06-27 Thread Ido Schimmel

From: Petr Machata 

The get_ts_info callback is used for obtaining information about
timestamping capabilities of a network device. On Spectrum-1, implement
it to advertise the PHC and the capability to do HW timestamping, and
the supported RX and TX filters.

Signed-off-by: Petr Machata 
Acked-by: Jiri Pirko 
Signed-off-by: Ido Schimmel 
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c| 14 +++
 .../ethernet/mellanox/mlxsw/spectrum_ptp.c| 18 +++
 .../ethernet/mellanox/mlxsw/spectrum_ptp.h| 23 +++
 3 files changed, 55 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index f1df3c63af3e..9a76a0faaa95 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -171,6 +171,8 @@ struct mlxsw_sp_ptp_ops {
struct hwtstamp_config *config);
int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port,
struct hwtstamp_config *config);
+   int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp,
+  struct ethtool_ts_info *info);
 };
 
 static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev,
@@ -3316,6 +3318,15 @@ static int mlxsw_sp_get_module_eeprom(struct net_device 
*netdev,
return err;
 }
 
+static int
+mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info)
+{
+   struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev);
+   struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+   return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info);
+}
+
 static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
.get_drvinfo= mlxsw_sp_port_get_drvinfo,
.get_link   = ethtool_op_get_link,
@@ -3329,6 +3340,7 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops 
= {
.set_link_ksettings = mlxsw_sp_port_set_link_ksettings,
.get_module_info= mlxsw_sp_get_module_info,
.get_module_eeprom  = mlxsw_sp_get_module_eeprom,
+   .get_ts_info= mlxsw_sp_get_ts_info,
 };
 
 static int
@@ -4547,6 +4559,7 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = {
.transmitted= mlxsw_sp1_ptp_transmitted,
.hwtstamp_get   = mlxsw_sp1_ptp_hwtstamp_get,
.hwtstamp_set   = mlxsw_sp1_ptp_hwtstamp_set,
+   .get_ts_info= mlxsw_sp1_ptp_get_ts_info,
 };
 
 static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
@@ -4558,6 +4571,7 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = {
.transmitted= mlxsw_sp2_ptp_transmitted,
.hwtstamp_get   = mlxsw_sp2_ptp_hwtstamp_get,
.hwtstamp_set   = mlxsw_sp2_ptp_hwtstamp_set,
+   .get_ts_info= mlxsw_sp2_ptp_get_ts_info,
 };
 
 static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index e8df674cd514..c83cc4df5ea8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -933,3 +933,21 @@ int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port 
*mlxsw_sp_port,
 
return 0;
 }
+
+int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info)
+{
+   info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp);
+
+   info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+   SOF_TIMESTAMPING_RX_HARDWARE |
+   SOF_TIMESTAMPING_RAW_HARDWARE;
+
+   info->tx_types = BIT(HWTSTAMP_TX_OFF) |
+BIT(HWTSTAMP_TX_ON);
+
+   info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+  BIT(HWTSTAMP_FILTER_ALL);
+
+   return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h 
b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
index 14505bcceeb7..c9e6d9c9a058 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h
@@ -18,6 +18,14 @@ enum {
MLXSW_PTP_MESSAGE_TYPE_PDELAY_RESP,
 };
 
+static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct ethtool_ts_info *info)
+{
+   info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+   SOF_TIMESTAMPING_SOFTWARE;
+   info->phc_index = -1;
+   return 0;
+}
+
 #if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
 
 struct mlxsw_sp_ptp_clock *
@@ -46,6 +54,9 @@ int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port 
*mlxsw_sp_port,
 int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port,
   struct hwtstamp_config *config);
 
+int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp,
+ struct ethtool_ts_info *info);
+
 #else
 
 static inline struct mlxsw_sp_ptp_clock *
@@ -102,6

Re: [RFC, PATCH 2/2, net-next] net: netsec: add XDP support

2019-06-27 Thread Maciej Fijalkowski

On Tue, 25 Jun 2019 18:06:19 +0300
Ilias Apalodimas  wrote:

Hi Ilias,

> +/* The current driver only supports 1 Txq, this should run under spin_lock() 
> */
> +static u32 netsec_xdp_queue_one(struct netsec_priv *priv,
> + struct xdp_frame *xdpf, bool is_ndo)
> +
> +{
> + struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
> + struct page *page = virt_to_page(xdpf->data);
> + struct netsec_tx_pkt_ctrl tx_ctrl = {};
> + struct netsec_desc tx_desc;
> + dma_addr_t dma_handle;
> + u16 filled;
> +
> + if (tx_ring->head >= tx_ring->tail)
> + filled = tx_ring->head - tx_ring->tail;
> + else
> + filled = tx_ring->head + DESC_NUM - tx_ring->tail;
> +
> + if (DESC_NUM - filled <= 1)
> + return NETSEC_XDP_CONSUMED;
> +
> + if (is_ndo) {
> + /* this is for ndo_xdp_xmit, the buffer needs mapping before
> +  * sending
> +  */
> + dma_handle = dma_map_single(priv->dev, xdpf->data, xdpf->len,
> + DMA_TO_DEVICE);
> + if (dma_mapping_error(priv->dev, dma_handle))
> + return NETSEC_XDP_CONSUMED;
> + tx_desc.buf_type = TYPE_NETSEC_XDP_NDO;
> + } else {
> + /* This is the device Rx buffer from page_pool. No need to remap
> +  * just sync and send it
> +  */
> + dma_handle = page_pool_get_dma_addr(page) +
> + NETSEC_RXBUF_HEADROOM;
> + dma_sync_single_for_device(priv->dev, dma_handle, xdpf->len,
> +DMA_BIDIRECTIONAL);
> + tx_desc.buf_type = TYPE_NETSEC_XDP_TX;
> + }
> + tx_ctrl.cksum_offload_flag = false;
> + tx_ctrl.tcp_seg_offload_flag = false;
> + tx_ctrl.tcp_seg_len = 0;

Aren't these three lines redundant? tx_ctrl is zero initialized.

> +
> + tx_desc.dma_addr = dma_handle;
> + tx_desc.addr = xdpf->data;
> + tx_desc.len = xdpf->len;
> +
> + netsec_set_tx_de(priv, tx_ring, &tx_ctrl, &tx_desc, xdpf);
> +
> + return NETSEC_XDP_TX;
> +}
> +
> +static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff 
> *xdp)
> +{
> + struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
> + struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
> + u32 ret;
> +
> + if (unlikely(!xdpf))
> + return NETSEC_XDP_CONSUMED;
> +
> + spin_lock(&tx_ring->lock);
> + ret = netsec_xdp_queue_one(priv, xdpf, false);
> + spin_unlock(&tx_ring->lock);
> +
> + return ret;
> +}
> +
> +static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> +   struct xdp_buff *xdp)
> +{
> + u32 ret = NETSEC_XDP_PASS;
> + int err;
> + u32 act;
> +
> + rcu_read_lock();
> + act = bpf_prog_run_xdp(prog, xdp);
> +
> + switch (act) {
> + case XDP_PASS:
> + ret = NETSEC_XDP_PASS;
> + break;
> + case XDP_TX:
> + ret = netsec_xdp_xmit_back(priv, xdp);
> + if (ret != NETSEC_XDP_TX)
> + xdp_return_buff(xdp);
> + break;
> + case XDP_REDIRECT:
> + err = xdp_do_redirect(priv->ndev, xdp, prog);
> + if (!err) {
> + ret = NETSEC_XDP_REDIR;
> + } else {
> + ret = NETSEC_XDP_CONSUMED;
> + xdp_return_buff(xdp);
> + }
> + break;
> + default:
> + bpf_warn_invalid_xdp_action(act);
> + /* fall through */
> + case XDP_ABORTED:
> + trace_xdp_exception(priv->ndev, prog, act);
> + /* fall through -- handle aborts by dropping packet */
> + case XDP_DROP:
> + ret = NETSEC_XDP_CONSUMED;
> + xdp_return_buff(xdp);
> + break;
> + }
> +
> + rcu_read_unlock();
> +
> + return ret;
> +}
> +
>  static int netsec_process_rx(struct netsec_priv *priv, int budget)
>  {
>   struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> + struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);

Reading BPF prog should be RCU protected. There might be a case where RCU
callback that destroys BPF prog is executed during the bottom half handling and
you have the PREEMPT_RCU=y in your kernel config. I've just rephrased Brenden's
words here, so for further info, see:

https://lore.kernel.org/netdev/20160904042958.8594-1-bbla...@plumgrid.com/

So either expand the RCU section or read prog pointer per each frame, under the
lock, as it seems that currently we have these two schemes in drivers that
support XDP.

>   struct net_device *ndev = priv->ndev;
>   struct netsec_rx_pkt_info rx_info;
> - struct sk_buff *skb;
> + struct sk_buff *skb = NULL;
> + u16 xdp_xmit = 0;
> + u32 xdp_act = 0;
>   int done = 0;
>  
>   while (d

RE: [EXT] [PATCH net-next 16/16] qlge: Refill empty buffer queues from wq

2019-06-27 Thread Manish Chopra

> -Original Message-
> From: Benjamin Poirier 
> Sent: Monday, June 17, 2019 1:19 PM
> To: Manish Chopra ; GR-Linux-NIC-Dev  nic-...@marvell.com>; netdev@vger.kernel.org
> Subject: [EXT] [PATCH net-next 16/16] qlge: Refill empty buffer queues from
> wq
> 
> External Email
> 
> --
> When operating at mtu 9000, qlge does order-1 allocations for rx buffers in
> atomic context. This is especially unreliable when free memory is low or
> fragmented. Add an approach similar to commit 3161e453e496 ("virtio: net
> refill on out-of-memory") to qlge so that the device doesn't lock up if there
> are allocation failures.
> 
> Signed-off-by: Benjamin Poirier 
> ---
>  drivers/net/ethernet/qlogic/qlge/qlge.h  |  8 ++
>  drivers/net/ethernet/qlogic/qlge/qlge_main.c | 80 
>  2 files changed, 72 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h
> b/drivers/net/ethernet/qlogic/qlge/qlge.h
> index 1d90b32f6285..9c4d933c1ff7 100644
> --- a/drivers/net/ethernet/qlogic/qlge/qlge.h
> +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
> @@ -1453,6 +1453,13 @@ struct qlge_bq {
> 
>  #define QLGE_BQ_WRAP(index) ((index) & (QLGE_BQ_LEN - 1))
> 
> +#define QLGE_BQ_HW_OWNED(bq) \
> +({ \
> + typeof(bq) _bq = bq; \
> + QLGE_BQ_WRAP(QLGE_BQ_ALIGN((_bq)->next_to_use) - \
> +  (_bq)->next_to_clean); \
> +})
> +
>  struct rx_ring {
>   struct cqicb cqicb; /* The chip's completion queue init control
> block. */
> 
> @@ -1480,6 +1487,7 @@ struct rx_ring {
>   /* Misc. handler elements. */
>   u32 irq;/* Which vector this ring is assigned. */
>   u32 cpu;/* Which CPU this should run on. */
> + struct delayed_work refill_work;
>   char name[IFNAMSIZ + 5];
>   struct napi_struct napi;
>   u8 reserved;
> diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
> b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
> index 7db4c31c9cc4..a13bda566187 100644
> --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
> +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
> @@ -1029,7 +1029,7 @@ static const char * const bq_type_name[] = {
> 
>  /* return 0 or negative error */
>  static int qlge_refill_sb(struct rx_ring *rx_ring,
> -   struct qlge_bq_desc *sbq_desc)
> +   struct qlge_bq_desc *sbq_desc, gfp_t gfp)
>  {
>   struct ql_adapter *qdev = rx_ring->qdev;
>   struct sk_buff *skb;
> @@ -1041,7 +1041,7 @@ static int qlge_refill_sb(struct rx_ring *rx_ring,
>"ring %u sbq: getting new skb for index %d.\n",
>rx_ring->cq_id, sbq_desc->index);
> 
> - skb = netdev_alloc_skb(qdev->ndev, SMALL_BUFFER_SIZE);
> + skb = __netdev_alloc_skb(qdev->ndev, SMALL_BUFFER_SIZE, gfp);
>   if (!skb)
>   return -ENOMEM;
>   skb_reserve(skb, QLGE_SB_PAD);
> @@ -1062,7 +1062,7 @@ static int qlge_refill_sb(struct rx_ring *rx_ring,
> 
>  /* return 0 or negative error */
>  static int qlge_refill_lb(struct rx_ring *rx_ring,
> -   struct qlge_bq_desc *lbq_desc)
> +   struct qlge_bq_desc *lbq_desc, gfp_t gfp)
>  {
>   struct ql_adapter *qdev = rx_ring->qdev;
>   struct qlge_page_chunk *master_chunk = &rx_ring->master_chunk;
> @@ -1071,8 +1071,7 @@ static int qlge_refill_lb(struct rx_ring *rx_ring,
>   struct page *page;
>   dma_addr_t dma_addr;
> 
> - page = alloc_pages(__GFP_COMP | GFP_ATOMIC,
> -qdev->lbq_buf_order);
> + page = alloc_pages(gfp | __GFP_COMP, qdev-
> >lbq_buf_order);
>   if (unlikely(!page))
>   return -ENOMEM;
>   dma_addr = pci_map_page(qdev->pdev, page, 0, @@ -
> 1109,33 +1108,33 @@ static int qlge_refill_lb(struct rx_ring *rx_ring,
>   return 0;
>  }
> 
> -static void qlge_refill_bq(struct qlge_bq *bq)
> +/* return 0 or negative error */
> +static int qlge_refill_bq(struct qlge_bq *bq, gfp_t gfp)
>  {
>   struct rx_ring *rx_ring = QLGE_BQ_CONTAINER(bq);
>   struct ql_adapter *qdev = rx_ring->qdev;
>   struct qlge_bq_desc *bq_desc;
>   int refill_count;
> + int retval;
>   int i;
> 
>   refill_count = QLGE_BQ_WRAP(QLGE_BQ_ALIGN(bq->next_to_clean -
> 1) -
>   bq->next_to_use);
>   if (!refill_count)
> - return;
> + return 0;
> 
>   i = bq->next_to_use;
>   bq_desc = &bq->queue[i];
>   i -= QLGE_BQ_LEN;
>   do {
> - int retval;
> -
>   netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
>"ring %u %s: try cleaning idx %d\n",
>rx_ring->cq_id, bq_type_name[bq->type], i);
> 
>   if (bq->type == QLGE_SB)
> - retval = qlge_refill_sb(rx_ring, bq_desc);
> +

[PATCH iproute2] tc: netem: fix r parameter in Bernoulli loss model

2019-06-27 Thread Andrea Claudi

As the man page for tc netem states:

To use the Bernoulli model, the only needed parameter is p while the
others will be set to the default values r=1-p, 1-h=1 and 1-k=0.

However r parameter is erroneusly set to 1, and not to 1-p.
Fix this using the same approach of the 4-state loss model.

Fixes: 3c7950af598be ("netem: add support for 4 state and GE loss model")
Signed-off-by: Andrea Claudi 
---
 tc/q_netem.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tc/q_netem.c b/tc/q_netem.c
index 6e0e8a8cbfde5..d1cd17f8a8a7e 100644
--- a/tc/q_netem.c
+++ b/tc/q_netem.c
@@ -284,14 +284,17 @@ static int netem_parse_opt(struct qdisc_util *qu, int 
argc, char **argv,
}
 
} else if (!strcmp(*argv, "gemodel")) {
+   double p;
+
NEXT_ARG();
-   if (get_percent(&gemodel.p, *argv)) {
+   if (parse_percent(&p, *argv)) {
explain1("loss gemodel p");
return -1;
}
+   set_percent(&gemodel.p, p);
 
/* set defaults */
-   set_percent(&gemodel.r, 1.);
+   set_percent(&gemodel.r, 1. - p);
set_percent(&gemodel.h, 0);
set_percent(&gemodel.k1, 0);
loss_type = NETEM_LOSS_GE;
-- 
2.20.1

[PATCH net v2] vxlan: do not destroy fdb if register_netdevice() is failed

2019-06-27 Thread Taehee Yoo

__vxlan_dev_create() destroys FDB using specific pointer which indicates
a fdb when error occurs.
But that pointer should not be used when register_netdevice() fails because
register_netdevice() internally destroys fdb when error occurs.

This patch makes vxlan_fdb_create() to do not link fdb entry to vxlan dev
internally.
Instead, a new function vxlan_fdb_link() is added to link fdb to vxlan dev.

vxlan_fdb_link() is called after calling register_netdevice().
This routine can avoid situation that ->ndo_uninit() destroys fdb entry
in error path of register_netdevice().
Hence, error path of __vxlan_dev_create() routine can have an opportunity
to destroy default fdb entry by hand.

Test command
ip link add bonding_masters type vxlan id 0 group 239.1.1.1 \
dev enp0s9 dstport 4789

Splat looks like:
[  213.392816] kasan: GPF could be caused by NULL-ptr deref or user memory 
access
[  213.401257] general protection fault:  [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[  213.402178] CPU: 0 PID: 1414 Comm: ip Not tainted 5.2.0-rc5+ #256
[  213.402178] RIP: 0010:vxlan_fdb_destroy+0x120/0x220 [vxlan]
[  213.402178] Code: df 48 8b 2b 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 06 01 
00 00 4c 8b 63 08 48 b8 00 00 00 00 00 fc d
[  213.402178] RSP: 0018:88810cb9f0a0 EFLAGS: 00010202
[  213.402178] RAX: dc00 RBX: 888101d4a8c8 RCX: 
[  213.402178] RDX: 1bd5a040 RSI: 888101d4a8c8 RDI: 888101d4a8d0
[  213.402178] RBP:  R08: fbfff22b72d9 R09: 
[  213.402178] R10: ffef R11:  R12: dead0200
[  213.402178] R13: 88810cb9f1f8 R14: 88810efccda0 R15: 88810efccda0
[  213.402178] FS:  7f7f6621a0c0() GS:88811b00() 
knlGS:
[  213.402178] CS:  0010 DS:  ES:  CR0: 80050033
[  213.402178] CR2: 55746f0807d0 CR3: 0001123e CR4: 001006f0
[  213.402178] Call Trace:
[  213.402178]  __vxlan_dev_create+0x3a9/0x7d0 [vxlan]
[  213.402178]  ? vxlan_changelink+0x740/0x740 [vxlan]
[  213.402178]  ? rcu_read_unlock+0x60/0x60 [vxlan]
[  213.402178]  ? __kasan_kmalloc.constprop.3+0xa0/0xd0
[  213.402178]  vxlan_newlink+0x8d/0xc0 [vxlan]
[  213.402178]  ? __vxlan_dev_create+0x7d0/0x7d0 [vxlan]
[  213.554119]  ? __netlink_ns_capable+0xc3/0xf0
[  213.554119]  __rtnl_newlink+0xb75/0x1180
[  213.554119]  ? rtnl_link_unregister+0x230/0x230
[ ... ]

Fixes: 0241b836732f ("vxlan: fix default fdb entry netlink notify ordering 
during netdev create")
Suggested-by: Roopa Prabhu 
Signed-off-by: Taehee Yoo 
---

v1 -> v2 :
 - Add a new function vxlan_fdb_link().
 - Fix fdb entry leak.
 - Update description.

 drivers/net/vxlan.c | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 083f3f0bf37f..4066346d6f41 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -804,6 +804,14 @@ static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev 
*vxlan,
return f;
 }
 
+static void vxlan_fdb_link(struct vxlan_dev *vxlan, const u8 *mac,
+  __be32 src_vni, struct vxlan_fdb *f)
+{
+   ++vxlan->addrcnt;
+   hlist_add_head_rcu(&f->hlist,
+  vxlan_fdb_head(vxlan, mac, src_vni));
+}
+
 static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
__u16 state, __be16 port, __be32 src_vni,
@@ -829,10 +837,6 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
return rc;
}
 
-   ++vxlan->addrcnt;
-   hlist_add_head_rcu(&f->hlist,
-  vxlan_fdb_head(vxlan, mac, src_vni));
-
*fdb = f;
 
return 0;
@@ -977,6 +981,7 @@ static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
if (rc < 0)
return rc;
 
+   vxlan_fdb_link(vxlan, mac, src_vni, f);
rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH,
  swdev_notify, extack);
if (rc)
@@ -3571,12 +3576,17 @@ static int __vxlan_dev_create(struct net *net, struct 
net_device *dev,
if (err)
goto errout;
 
-   /* notify default fdb entry */
if (f) {
+   vxlan_fdb_link(vxlan, all_zeros_mac,
+  vxlan->default_dst.remote_vni, f);
+
+   /* notify default fdb entry */
err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
   RTM_NEWNEIGH, true, extack);
-   if (err)
-   goto errout;
+   if (err) {
+   vxlan_fdb_destroy(vxlan, f, false, false);
+   goto unregister;
+   }
}
 
list_add(&vxlan->next, &vn->vxlan_list);
@@ -3588,7 +3598,8 @@ static int __vxlan_dev_create(struct net *net, struct 
net_devic

Re: [RFC PATCH 1/1] Documentation: net: dsa: b53: Describe b53 configuration

2019-06-27 Thread Benedikt Spranger

Am Thu, 27 Jun 2019 15:49:29 +0200
schrieb Andrew Lunn :

> On Thu, Jun 27, 2019 at 12:15:06PM +0200, Benedikt Spranger wrote:
> 
> Hi Benedikt
> 
> > +Configuration with tagging support
> > +--
> > +
> > +The tagging based configuration is desired.
> > +
> > +To use the b53 DSA driver some configuration need to be performed. As
> > +example configuration the following scenarios are used:
> > +
> > +*single port*
> > +  Every switch port acts as a different configurable ethernet port
> > +
> > +*bridge*
> > +  Every switch port is part of one configurable ethernet bridge
> > +
> > +*gateway*
> > +  Every switch port except one upstream port is part of a configurable
> > +  ethernet bridge.
> > +  The upstream port acts as different configurable ethernet port.
> > +
> > +All configurations are performed with tools from iproute2, wich is 
> > available at
> > +https://www.kernel.org/pub/linux/utils/net/iproute2/
> > +
> > +In this documentation the following ethernet ports are used:
> > +
> > +*eth0*
> > +  CPU port  
> 
> In DSA terminology, this is the master interface. The switch port
> which the master is connected to is called the CPU port. So you are
> causing confusion with DSA terms here.

Changed the whole section to:

Through DSA every port of a switch is handled like a normal linux ethernet
interface. The CPU port is the switch port connected to an ethernet MAC chip.
The corresponding linux ethernet interface is called the master interface.
All other corresponding linux interfaces are called slave interfaces.

The slave interfaces depend on the master interface. They can only brought up,
when the master interface is up.

In this documentation the following ethernet interfaces are used:

*eth0*
  the master interface

*LAN1*
  a slave interface

*LAN2*
  another slave interface

*WAN*
  A slave interface dedicated for upstream traffic

> > +bridge
> > +~~
> > +
> > +.. code-block:: sh
> > +
> > +  # create bridge
> > +  ip link add name br0 type bridge
> > +
> > +  # add ports to bridge
> > +  ip link set dev wan master br0
> > +  ip link set dev lan1 master br0
> > +  ip link set dev lan2 master br0
> > +
> > +  # configure the bridge
> > +  ip addr add 192.0.2.129/25 dev br0
> > +
> > +  # The master interface needs to be brought up before the slave ports.
> > +  ip link set eth0 up
> > +
> > +  # bring up the slave interfaces
> > +  ip link set wan up
> > +  ip link set lan1 up
> > +  ip link set lan2 up  
> 
> I would probably do this in a different order. Bring the master up
> first, then the slaves. Then enslave the slaves to bridge, and lastly
> configure the bridge.

No objection. Will change the order.

> > +
> > +  # bring up the bridge
> > +  ip link set dev br0 up
> > +
> > +gateway
> > +~~~
> > +
> > +.. code-block:: sh
> > +
> > +  # create bridge
> > +  ip link add name br0 type bridge
> > +
> > +  # add ports to bridge
> > +  ip link set dev lan1 master br0
> > +  ip link set dev lan2 master br0
> > +
> > +  # configure the bridge
> > +  ip addr add 192.0.2.129/25 dev br0
> > +
> > +  # configure the upstream port
> > +  ip addr add 192.0.2.1/30 dev wan
> > +
> > +  # The master interface needs to be brought up before the slave ports.
> > +  ip link set eth0 up
> > +
> > +  # bring up the slave interfaces
> > +  ip link set wan up
> > +  ip link set lan1 up
> > +  ip link set lan2 up
> > +
> > +  # bring up the bridge
> > +  ip link set dev br0 up  
> 
> It would be good to add a note that there is nothing specific to the
> B53 here. This same process will work for all DSA drivers which
> support tagging, which is actually the majority.
Will state that.

> I also tell people that once you configure the master interface up,
> they should just use the slave interfaces a normal linux
> interfaces. The fact they are on a switch does not matter, and should
> not matter. Just use them as normal.
OK.

Regards
Bene Spranger

[PATCH net-next 09/12] s390/qeth: consolidate pm code

2019-06-27 Thread Julian Wiedmann

De-duplicate the pm callback implementations from the two sub-drivers,
replacing them with core helpers that delegate to the .set_online and
.set_offline callbacks.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core.h  |  3 ---
 drivers/s390/net/qeth_core_main.c | 36 ---
 drivers/s390/net/qeth_l2_main.c   | 30 --
 drivers/s390/net/qeth_l3_main.c   | 30 --
 4 files changed, 19 insertions(+), 80 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 715bff28d48e..c81d5ec26803 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -713,9 +713,6 @@ struct qeth_discipline {
void (*remove) (struct ccwgroup_device *);
int (*set_online) (struct ccwgroup_device *);
int (*set_offline) (struct ccwgroup_device *);
-   int (*freeze)(struct ccwgroup_device *);
-   int (*thaw) (struct ccwgroup_device *);
-   int (*restore)(struct ccwgroup_device *);
int (*do_ioctl)(struct net_device *dev, struct ifreq *rq, int cmd);
int (*control_event_handler)(struct qeth_card *card,
struct qeth_ipa_cmd *cmd);
diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index efb9a27b916e..3011cae00391 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5589,28 +5589,30 @@ static void qeth_core_shutdown(struct ccwgroup_device 
*gdev)
qdio_free(CARD_DDEV(card));
 }
 
-static int qeth_core_freeze(struct ccwgroup_device *gdev)
+static int qeth_suspend(struct ccwgroup_device *gdev)
 {
struct qeth_card *card = dev_get_drvdata(&gdev->dev);
-   if (card->discipline && card->discipline->freeze)
-   return card->discipline->freeze(gdev);
-   return 0;
-}
 
-static int qeth_core_thaw(struct ccwgroup_device *gdev)
-{
-   struct qeth_card *card = dev_get_drvdata(&gdev->dev);
-   if (card->discipline && card->discipline->thaw)
-   return card->discipline->thaw(gdev);
+   qeth_set_allowed_threads(card, 0, 1);
+   wait_event(card->wait_q, qeth_threads_running(card, 0x) == 0);
+   if (gdev->state == CCWGROUP_OFFLINE)
+   return 0;
+
+   card->discipline->set_offline(gdev);
return 0;
 }
 
-static int qeth_core_restore(struct ccwgroup_device *gdev)
+static int qeth_resume(struct ccwgroup_device *gdev)
 {
struct qeth_card *card = dev_get_drvdata(&gdev->dev);
-   if (card->discipline && card->discipline->restore)
-   return card->discipline->restore(gdev);
-   return 0;
+   int rc;
+
+   rc = card->discipline->set_online(gdev);
+
+   qeth_set_allowed_threads(card, 0x, 0);
+   if (rc)
+   dev_warn(&card->gdev->dev, "The qeth device driver failed to 
recover an error on the device\n");
+   return rc;
 }
 
 static ssize_t group_store(struct device_driver *ddrv, const char *buf,
@@ -5651,9 +5653,9 @@ static struct ccwgroup_driver qeth_core_ccwgroup_driver = 
{
.shutdown = qeth_core_shutdown,
.prepare = NULL,
.complete = NULL,
-   .freeze = qeth_core_freeze,
-   .thaw = qeth_core_thaw,
-   .restore = qeth_core_restore,
+   .freeze = qeth_suspend,
+   .thaw = qeth_resume,
+   .restore = qeth_resume,
 };
 
 struct qeth_card *qeth_get_card_by_busid(char *bus_id)
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 03e1cd4a282a..4a2ff9d8aa5f 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -965,33 +965,6 @@ static void __exit qeth_l2_exit(void)
pr_info("unregister layer 2 discipline\n");
 }
 
-static int qeth_l2_pm_suspend(struct ccwgroup_device *gdev)
-{
-   struct qeth_card *card = dev_get_drvdata(&gdev->dev);
-
-   qeth_set_allowed_threads(card, 0, 1);
-   wait_event(card->wait_q, qeth_threads_running(card, 0x) == 0);
-   if (gdev->state == CCWGROUP_OFFLINE)
-   return 0;
-
-   qeth_l2_set_offline(gdev);
-   return 0;
-}
-
-static int qeth_l2_pm_resume(struct ccwgroup_device *gdev)
-{
-   struct qeth_card *card = dev_get_drvdata(&gdev->dev);
-   int rc;
-
-   rc = qeth_l2_set_online(gdev);
-
-   qeth_set_allowed_threads(card, 0x, 0);
-   if (rc)
-   dev_warn(&card->gdev->dev, "The qeth device driver "
-   "failed to recover an error on the device\n");
-   return rc;
-}
-
 /* Returns zero if the command is successfully "consumed" */
 static int qeth_l2_control_event(struct qeth_card *card,
struct qeth_ipa_cmd *cmd)
@@ -1021,9 +994,6 @@ struct qeth_discipline qeth_l2_discipline = {
.remove = qeth_l2_remove_device,
.set_online = qeth_l2_set_online,
.set_offline = qeth_l2_set_offline,
-   .freeze = qe

[PATCH net-next 10/12] s390/qeth: consolidate skb RX processing in L3 driver

2019-06-27 Thread Julian Wiedmann

Use napi_gro_receive() to pass up all types of packets that a L3 device
may receive.
1) For proper L2 packets received by the IQD sniffer, this is the
   obvious thing to do.
2) For af_iucv (which doesn't provide a GRO assist), the GRO code will
   transparently fall back to netif_receive_skb(). So there's no need to
   special-case this traffic in our code.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_l3_main.c | 30 --
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 44a602aa12ec..15351922b209 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1312,6 +1312,15 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device 
*dev,
 static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
struct qeth_hdr *hdr)
 {
+   struct af_iucv_trans_hdr *iucv = (struct af_iucv_trans_hdr *) skb->data;
+   struct net_device *dev = skb->dev;
+
+   if (IS_IQD(card) && iucv->magic == ETH_P_AF_IUCV) {
+   dev_hard_header(skb, dev, ETH_P_AF_IUCV, dev->dev_addr,
+   "FAKELL", skb->len);
+   return;
+   }
+
if (!(hdr->hdr.l3.flags & QETH_HDR_PASSTHRU)) {
u16 prot = (hdr->hdr.l3.flags & QETH_HDR_IPV6) ? ETH_P_IPV6 :
 ETH_P_IP;
@@ -1345,8 +1354,6 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, 
struct sk_buff *skb,
tg_addr, "FAKELL", skb->len);
}
 
-   skb->protocol = eth_type_trans(skb, card->dev);
-
/* copy VLAN tag from hdr into skb */
if (!card->options.sniffer &&
(hdr->hdr.l3.ext_flags & (QETH_HDR_EXT_VLAN_FRAME |
@@ -1363,12 +1370,10 @@ static void qeth_l3_rebuild_skb(struct qeth_card *card, 
struct sk_buff *skb,
 static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
int budget, int *done)
 {
-   struct net_device *dev = card->dev;
int work_done = 0;
struct sk_buff *skb;
struct qeth_hdr *hdr;
unsigned int len;
-   __u16 magic;
 
*done = 0;
WARN_ON_ONCE(!budget);
@@ -1382,23 +1387,12 @@ static int qeth_l3_process_inbound_buffer(struct 
qeth_card *card,
}
switch (hdr->hdr.l3.id) {
case QETH_HEADER_TYPE_LAYER3:
-   magic = *(__u16 *)skb->data;
-   if (IS_IQD(card) && magic == ETH_P_AF_IUCV) {
-   len = skb->len;
-   dev_hard_header(skb, dev, ETH_P_AF_IUCV,
-   dev->dev_addr, "FAKELL", len);
-   skb->protocol = eth_type_trans(skb, dev);
-   netif_receive_skb(skb);
-   } else {
-   qeth_l3_rebuild_skb(card, skb, hdr);
-   len = skb->len;
-   napi_gro_receive(&card->napi, skb);
-   }
-   break;
+   qeth_l3_rebuild_skb(card, skb, hdr);
+   /* fall through */
case QETH_HEADER_TYPE_LAYER2: /* for HiperSockets sniffer */
skb->protocol = eth_type_trans(skb, skb->dev);
len = skb->len;
-   netif_receive_skb(skb);
+   napi_gro_receive(&card->napi, skb);
break;
default:
dev_kfree_skb_any(skb);
-- 
2.17.1

[PATCH net-next 01/12] s390/qeth: dynamically allocate simple IPA cmds

2019-06-27 Thread Julian Wiedmann

This patch reduces the usage of the write channel's static cmd buffers,
by dynamically allocating all simple IPA cmds (eg. STARTLAN, SETVMAC).
It also converts the OSN path.

Doing so requires some changes to how we calculate the cmd length.
Currently when building IPA cmds, we're quite generous in how much data
we send down to the device (basically the size of the biggest cmd we
know). This is no real concern at the moment, since the static cmd
buffers are backed with zeroed pages. But for dynamic allocations, the
exact length matters. So this patch also adds the needed length
calculations to each cmd path.

Commands that have multiple subtypes (eg. SETADP) of differing length
will be converted with follow-up patches.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core.h  | 10 +-
 drivers/s390/net/qeth_core_main.c | 33 ---
 drivers/s390/net/qeth_core_mpc.h  |  2 ++
 drivers/s390/net/qeth_l2_main.c   | 12 ---
 drivers/s390/net/qeth_l3_main.c   | 17 +---
 5 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 5bcdede5e955..42aa4a21a4c2 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -551,6 +551,7 @@ enum qeth_card_states {
  * Protocol versions
  */
 enum qeth_prot_versions {
+   QETH_PROT_NONE = 0x,
QETH_PROT_IPV4 = 0x0004,
QETH_PROT_IPV6 = 0x0006,
 };
@@ -995,6 +996,14 @@ int qeth_send_ipa_cmd(struct qeth_card *, struct 
qeth_cmd_buffer *,
  void *);
 struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *,
enum qeth_ipa_cmds, enum qeth_prot_versions);
+struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card,
+  enum qeth_ipa_cmds cmd_code,
+  enum qeth_prot_versions prot,
+  unsigned int data_length);
+struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
+  unsigned int length, unsigned int ccws,
+  long timeout);
+
 struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *,
struct qeth_hdr **);
@@ -1012,7 +1021,6 @@ void qeth_release_buffer(struct qeth_cmd_buffer *iob);
 void qeth_notify_reply(struct qeth_reply *reply, int reason);
 void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
  u16 cmd_length);
-struct qeth_cmd_buffer *qeth_get_buffer(struct qeth_channel *channel);
 int qeth_query_switch_attributes(struct qeth_card *card,
  struct qeth_switch_info *sw_info);
 int qeth_query_card_info(struct qeth_card *card,
diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index fe3dfeaf5ceb..84ed772bbfbd 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -756,7 +756,7 @@ static void qeth_cancel_cmd(struct qeth_cmd_buffer *iob, 
int rc)
qeth_release_buffer(iob);
 }
 
-struct qeth_cmd_buffer *qeth_get_buffer(struct qeth_channel *channel)
+static struct qeth_cmd_buffer *qeth_get_buffer(struct qeth_channel *channel)
 {
struct qeth_cmd_buffer *buffer = NULL;
unsigned long flags;
@@ -766,11 +766,10 @@ struct qeth_cmd_buffer *qeth_get_buffer(struct 
qeth_channel *channel)
spin_unlock_irqrestore(&channel->iob_lock, flags);
return buffer;
 }
-EXPORT_SYMBOL_GPL(qeth_get_buffer);
 
-static struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
- unsigned int length,
- unsigned int ccws, long timeout)
+struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
+  unsigned int length, unsigned int ccws,
+  long timeout)
 {
struct qeth_cmd_buffer *iob;
 
@@ -795,6 +794,7 @@ static struct qeth_cmd_buffer *qeth_alloc_cmd(struct 
qeth_channel *channel,
iob->length = length;
return iob;
 }
+EXPORT_SYMBOL_GPL(qeth_alloc_cmd);
 
 void qeth_clear_cmd_buffers(struct qeth_channel *channel)
 {
@@ -2804,6 +2804,25 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct 
qeth_card *card,
 }
 EXPORT_SYMBOL_GPL(qeth_get_ipacmd_buffer);
 
+struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card,
+  enum qeth_ipa_cmds cmd_code,
+  enum qeth_prot_versions prot,
+  unsigned int data_length)
+{
+   struct qeth_cmd_buffer *iob;
+
+   data_length += offsetof(struct qeth_ipa_cmd, data);
+   iob = qeth_alloc_cmd(&card->write, IPA_PDU_HEADER_SIZE + data

[PATCH net-next 03/12] s390/qeth: dynamically allocate various cmds with sub-types

2019-06-27 Thread Julian Wiedmann

This patch converts the adapter, assist and bridgeport cmd paths to
dynamic allocation. Most of the work is about re-organizing the cmd
headers, calculating the correct cmd length, and filling in the right
value in the sub-cmd's length field.

Since we now also set the correct length for cmds that are not reflected
by a fixed struct (ie SNMP), we can remove the work-around from
qeth_snmp_command().

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core.h  |  9 ++--
 drivers/s390/net/qeth_core_main.c | 86 ---
 drivers/s390/net/qeth_core_mpc.h  | 27 --
 drivers/s390/net/qeth_l2_main.c   | 29 ++-
 drivers/s390/net/qeth_l3_main.c   | 10 ++--
 5 files changed, 78 insertions(+), 83 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 35d7b43f6580..258756dc06c3 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -1003,6 +1003,11 @@ struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct 
qeth_card *card,
 struct qeth_cmd_buffer *qeth_alloc_cmd(struct qeth_channel *channel,
   unsigned int length, unsigned int ccws,
   long timeout);
+struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
+enum qeth_ipa_funcs ipa_func,
+u16 cmd_code,
+unsigned int data_length,
+enum qeth_prot_versions prot);
 
 struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *,
@@ -1037,10 +1042,6 @@ int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
 int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
 void qeth_trace_features(struct qeth_card *);
 int qeth_setassparms_cb(struct qeth_card *, struct qeth_reply *, unsigned 
long);
-struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
-enum qeth_ipa_funcs,
-__u16, __u16,
-enum qeth_prot_versions);
 int qeth_set_features(struct net_device *, netdev_features_t);
 void qeth_enable_hw_features(struct net_device *dev);
 netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index 3ba91b1c1315..696aba566d0b 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -2915,21 +2915,24 @@ static int qeth_query_setadapterparms_cb(struct 
qeth_card *card,
 }
 
 static struct qeth_cmd_buffer *qeth_get_adapter_cmd(struct qeth_card *card,
-   __u32 command, __u32 cmdlen)
+   enum qeth_ipa_setadp_cmd 
adp_cmd,
+   unsigned int data_length)
 {
+   struct qeth_ipacmd_setadpparms_hdr *hdr;
struct qeth_cmd_buffer *iob;
-   struct qeth_ipa_cmd *cmd;
 
-   iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SETADAPTERPARMS,
-QETH_PROT_IPV4);
-   if (iob) {
-   cmd = __ipa_cmd(iob);
-   cmd->data.setadapterparms.hdr.cmdlength = cmdlen;
-   cmd->data.setadapterparms.hdr.command_code = command;
-   cmd->data.setadapterparms.hdr.used_total = 1;
-   cmd->data.setadapterparms.hdr.seq_no = 1;
-   }
+   iob = qeth_ipa_alloc_cmd(card, IPA_CMD_SETADAPTERPARMS, QETH_PROT_IPV4,
+data_length +
+offsetof(struct qeth_ipacmd_setadpparms,
+ data));
+   if (!iob)
+   return NULL;
 
+   hdr = &__ipa_cmd(iob)->data.setadapterparms.hdr;
+   hdr->cmdlength = sizeof(*hdr) + data_length;
+   hdr->command_code = adp_cmd;
+   hdr->used_total = 1;
+   hdr->seq_no = 1;
return iob;
 }
 
@@ -2940,7 +2943,7 @@ static int qeth_query_setadapterparms(struct qeth_card 
*card)
 
QETH_CARD_TEXT(card, 3, "queryadp");
iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_COMMANDS_SUPPORTED,
-  sizeof(struct qeth_ipacmd_setadpparms));
+  SETADP_DATA_SIZEOF(query_cmds_supp));
if (!iob)
return -ENOMEM;
rc = qeth_send_ipa_cmd(card, iob, qeth_query_setadapterparms_cb, NULL);
@@ -3027,8 +3030,7 @@ int qeth_query_switch_attributes(struct qeth_card *card,
return -EOPNOTSUPP;
if (!netif_carrier_ok(card->dev))
return -ENOMEDIUM;
-   iob = qeth_get_adapter_cmd(card, IPA_SETADP_QUERY_SWITCH_ATTRIBUTES,
-   sizeof(struct qeth_ipacmd_

[PATCH net-next 08/12] s390/qeth: streamline SNMP cmd code

2019-06-27 Thread Julian Wiedmann

Apply some cleanups to qeth_snmp_command() and its callback:
1. when accessing the user data, use the proper struct instead of
   hard-coded offsets. Also copy the request data straight into the
   allocated cmd, skipping the extra memdup_user() to a tmp buffer.
2. capping the request length is no longer needed, the same check gets
   applied at a base level in qeth_alloc_cmd().
3. clean up some duplicated (and misindented) trace statements.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core_main.c | 49 ---
 1 file changed, 18 insertions(+), 31 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index 3875f70118e4..efb9a27b916e 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -4320,18 +4320,13 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
return -ENOSPC;
}
QETH_CARD_TEXT_(card, 4, "snore%i",
-  cmd->data.setadapterparms.hdr.used_total);
+   cmd->data.setadapterparms.hdr.used_total);
QETH_CARD_TEXT_(card, 4, "sseqn%i",
-   cmd->data.setadapterparms.hdr.seq_no);
+   cmd->data.setadapterparms.hdr.seq_no);
/*copy entries to user buffer*/
memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len);
qinfo->udata_offset += data_len;
 
-   /* check if all replies received ... */
-   QETH_CARD_TEXT_(card, 4, "srtot%i",
-  cmd->data.setadapterparms.hdr.used_total);
-   QETH_CARD_TEXT_(card, 4, "srseq%i",
-  cmd->data.setadapterparms.hdr.seq_no);
if (cmd->data.setadapterparms.hdr.seq_no <
cmd->data.setadapterparms.hdr.used_total)
return 1;
@@ -4340,9 +4335,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
 
 static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
 {
+   struct qeth_snmp_ureq __user *ureq;
struct qeth_cmd_buffer *iob;
-   struct qeth_ipa_cmd *cmd;
-   struct qeth_snmp_ureq *ureq;
unsigned int req_len;
struct qeth_arp_query_info qinfo = {0, };
int rc = 0;
@@ -4356,34 +4350,28 @@ static int qeth_snmp_command(struct qeth_card *card, 
char __user *udata)
IS_LAYER3(card))
return -EOPNOTSUPP;
 
-   /* skip 4 bytes (data_len struct member) to get req_len */
-   if (copy_from_user(&req_len, udata + sizeof(int), sizeof(int)))
+   ureq = (struct qeth_snmp_ureq __user *) udata;
+   if (get_user(qinfo.udata_len, &ureq->hdr.data_len) ||
+   get_user(req_len, &ureq->hdr.req_len))
+   return -EFAULT;
+
+   iob = qeth_get_adapter_cmd(card, IPA_SETADP_SET_SNMP_CONTROL, req_len);
+   if (!iob)
+   return -ENOMEM;
+
+   if (copy_from_user(&__ipa_cmd(iob)->data.setadapterparms.data.snmp,
+  &ureq->cmd, req_len)) {
+   qeth_put_cmd(iob);
return -EFAULT;
-   if (req_len + offsetof(struct qeth_ipacmd_setadpparms, data) +
-   offsetof(struct qeth_ipa_cmd, data) + IPA_PDU_HEADER_SIZE >
-   QETH_BUFSIZE)
-   return -EINVAL;
-   ureq = memdup_user(udata, req_len + sizeof(struct qeth_snmp_ureq_hdr));
-   if (IS_ERR(ureq)) {
-   QETH_CARD_TEXT(card, 2, "snmpnome");
-   return PTR_ERR(ureq);
}
-   qinfo.udata_len = ureq->hdr.data_len;
+
qinfo.udata = kzalloc(qinfo.udata_len, GFP_KERNEL);
if (!qinfo.udata) {
-   kfree(ureq);
+   qeth_put_cmd(iob);
return -ENOMEM;
}
qinfo.udata_offset = sizeof(struct qeth_snmp_ureq_hdr);
 
-   iob = qeth_get_adapter_cmd(card, IPA_SETADP_SET_SNMP_CONTROL, req_len);
-   if (!iob) {
-   rc = -ENOMEM;
-   goto out;
-   }
-
-   cmd = __ipa_cmd(iob);
-   memcpy(&cmd->data.setadapterparms.data.snmp, &ureq->cmd, req_len);
rc = qeth_send_ipa_cmd(card, iob, qeth_snmp_command_cb, &qinfo);
if (rc)
QETH_DBF_MESSAGE(2, "SNMP command failed on device %x: (%#x)\n",
@@ -4392,8 +4380,7 @@ static int qeth_snmp_command(struct qeth_card *card, char 
__user *udata)
if (copy_to_user(udata, qinfo.udata, qinfo.udata_len))
rc = -EFAULT;
}
-out:
-   kfree(ureq);
+
kfree(qinfo.udata);
return rc;
 }
-- 
2.17.1

[PATCH net-next 04/12] s390/qeth: dynamically allocate diag cmds

2019-06-27 Thread Julian Wiedmann

Add a new wrapper that allocates DIAG cmds of the right size, and fills
in the common fields.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core.h  |  3 +++
 drivers/s390/net/qeth_core_main.c | 29 +
 drivers/s390/net/qeth_core_mpc.h  |  5 +
 drivers/s390/net/qeth_l3_main.c   |  4 +---
 4 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 258756dc06c3..b99fe6b043aa 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -1008,6 +1008,9 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct 
qeth_card *card,
 u16 cmd_code,
 unsigned int data_length,
 enum qeth_prot_versions prot);
+struct qeth_cmd_buffer *qeth_get_diag_cmd(struct qeth_card *card,
+ enum qeth_diags_cmds sub_cmd,
+ unsigned int data_length);
 
 struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *,
diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index 696aba566d0b..22074890835e 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3037,6 +3037,25 @@ int qeth_query_switch_attributes(struct qeth_card *card,
qeth_query_switch_attributes_cb, sw_info);
 }
 
+struct qeth_cmd_buffer *qeth_get_diag_cmd(struct qeth_card *card,
+ enum qeth_diags_cmds sub_cmd,
+ unsigned int data_length)
+{
+   struct qeth_ipacmd_diagass *cmd;
+   struct qeth_cmd_buffer *iob;
+
+   iob = qeth_ipa_alloc_cmd(card, IPA_CMD_SET_DIAG_ASS, QETH_PROT_NONE,
+DIAG_HDR_LEN + data_length);
+   if (!iob)
+   return NULL;
+
+   cmd = &__ipa_cmd(iob)->data.diagass;
+   cmd->subcmd_len = DIAG_SUB_HDR_LEN + data_length;
+   cmd->subcmd = sub_cmd;
+   return iob;
+}
+EXPORT_SYMBOL_GPL(qeth_get_diag_cmd);
+
 static int qeth_query_setdiagass_cb(struct qeth_card *card,
struct qeth_reply *reply, unsigned long data)
 {
@@ -3055,15 +3074,11 @@ static int qeth_query_setdiagass_cb(struct qeth_card 
*card,
 static int qeth_query_setdiagass(struct qeth_card *card)
 {
struct qeth_cmd_buffer *iob;
-   struct qeth_ipa_cmd*cmd;
 
QETH_CARD_TEXT(card, 2, "qdiagass");
-   iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
+   iob = qeth_get_diag_cmd(card, QETH_DIAGS_CMD_QUERY, 0);
if (!iob)
return -ENOMEM;
-   cmd = __ipa_cmd(iob);
-   cmd->data.diagass.subcmd_len = 16;
-   cmd->data.diagass.subcmd = QETH_DIAGS_CMD_QUERY;
return qeth_send_ipa_cmd(card, iob, qeth_query_setdiagass_cb, NULL);
 }
 
@@ -3111,12 +3126,10 @@ int qeth_hw_trap(struct qeth_card *card, enum 
qeth_diags_trap_action action)
struct qeth_ipa_cmd *cmd;
 
QETH_CARD_TEXT(card, 2, "diagtrap");
-   iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
+   iob = qeth_get_diag_cmd(card, QETH_DIAGS_CMD_TRAP, 64);
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
-   cmd->data.diagass.subcmd_len = 80;
-   cmd->data.diagass.subcmd = QETH_DIAGS_CMD_TRAP;
cmd->data.diagass.type = 1;
cmd->data.diagass.action = action;
switch (action) {
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index 46f038580a72..5cec877d972f 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -599,6 +599,11 @@ struct qeth_ipacmd_diagass {
__u8   cdata[64];
 } __attribute__ ((packed));
 
+#define DIAG_HDR_LEN   offsetofend(struct qeth_ipacmd_diagass, ext)
+#define DIAG_SUB_HDR_LEN   (offsetofend(struct qeth_ipacmd_diagass, ext) -\
+offsetof(struct qeth_ipacmd_diagass, \
+ subcmd_len))
+
 /* VNIC Characteristics IPA Command: */
 /* IPA commands/sub commands for VNICC */
 #define IPA_VNICC_QUERY_CHARS  0xL
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index ff4d514656f2..2e10f5be8f67 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1107,12 +1107,10 @@ qeth_diags_trace(struct qeth_card *card, enum 
qeth_diags_trace_cmds diags_cmd)
 
QETH_CARD_TEXT(card, 2, "diagtrac");
 
-   iob = qeth_get_ipacmd_buffer(card, IPA_CMD_SET_DIAG_ASS, 0);
+   iob = qeth_get_diag_cmd(card, QETH_DIAGS_CMD_TRACE, 0);
if (!iob)
return -ENOMEM;
cmd = __ipa_cmd(iob);
-

[PATCH net-next 05/12] s390/qeth: dynamically allocate vnicc cmds

2019-06-27 Thread Julian Wiedmann

The VNICC code is somewhat quirky in that it defers the whole cmd setup
to a common helper qeth_l2_vnicc_request(). Some of the cmd specifics
are then passed in via parameter, while others are simply hard-coded.

Split the whole machinery up into the usual format: one helper that
allocates the cmd & fills in the common fields, while all the cmd
originators take care of their sub-cmd type specific work.
This makes it much easier to calculate the cmd's precise length, and
reduces code complexity.

Signed-off-by: Julian Wiedmann 
Reviewed-by: Alexandra Winter 
---
 drivers/s390/net/qeth_core_mpc.h |  13 ++--
 drivers/s390/net/qeth_l2_main.c  | 123 ++-
 2 files changed, 62 insertions(+), 74 deletions(-)

diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index 5cec877d972f..75b5834ed28d 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -630,12 +630,6 @@ struct qeth_ipacmd_diagass {
 
 /* VNICC header */
 struct qeth_ipacmd_vnicc_hdr {
-   u32 sup;
-   u32 cur;
-};
-
-/* VNICC sub command header */
-struct qeth_vnicc_sub_hdr {
u16 data_length;
u16 reserved;
u32 sub_command;
@@ -660,15 +654,18 @@ struct qeth_vnicc_getset_timeout {
 
 /* complete VNICC IPA command message */
 struct qeth_ipacmd_vnicc {
+   struct qeth_ipa_caps vnicc_cmds;
struct qeth_ipacmd_vnicc_hdr hdr;
-   struct qeth_vnicc_sub_hdr sub_hdr;
union {
struct qeth_vnicc_query_cmds query_cmds;
struct qeth_vnicc_set_char set_char;
struct qeth_vnicc_getset_timeout getset_timeout;
-   };
+   } data;
 };
 
+#define VNICC_DATA_SIZEOF(field)   FIELD_SIZEOF(struct qeth_ipacmd_vnicc,\
+data.field)
+
 /* SETBRIDGEPORT IPA Command:   */
 enum qeth_ipa_sbp_cmd {
IPA_SBP_QUERY_COMMANDS_SUPPORTED= 0xL,
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index f762d22a3272..68c6f4080745 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1735,10 +1735,6 @@ static int qeth_l2_vnicc_makerc(struct qeth_card *card, 
u16 ipa_rc)
 /* generic VNICC request call back control */
 struct _qeth_l2_vnicc_request_cbctl {
u32 sub_cmd;
-   struct {
-   u32 vnic_char;
-   u32 timeout;
-   } param;
struct {
union{
u32 *sup_cmds;
@@ -1761,80 +1757,52 @@ static int qeth_l2_vnicc_request_cb(struct qeth_card 
*card,
if (cmd->hdr.return_code)
return qeth_l2_vnicc_makerc(card, cmd->hdr.return_code);
/* return results to caller */
-   card->options.vnicc.sup_chars = rep->hdr.sup;
-   card->options.vnicc.cur_chars = rep->hdr.cur;
+   card->options.vnicc.sup_chars = rep->vnicc_cmds.supported;
+   card->options.vnicc.cur_chars = rep->vnicc_cmds.enabled;
 
if (cbctl->sub_cmd == IPA_VNICC_QUERY_CMDS)
-   *cbctl->result.sup_cmds = rep->query_cmds.sup_cmds;
+   *cbctl->result.sup_cmds = rep->data.query_cmds.sup_cmds;
 
if (cbctl->sub_cmd == IPA_VNICC_GET_TIMEOUT)
-   *cbctl->result.timeout = rep->getset_timeout.timeout;
+   *cbctl->result.timeout = rep->data.getset_timeout.timeout;
 
return 0;
 }
 
-/* generic VNICC request */
-static int qeth_l2_vnicc_request(struct qeth_card *card,
-struct _qeth_l2_vnicc_request_cbctl *cbctl)
+static struct qeth_cmd_buffer *qeth_l2_vnicc_build_cmd(struct qeth_card *card,
+  u32 vnicc_cmd,
+  unsigned int data_length)
 {
-   struct qeth_ipacmd_vnicc *req;
+   struct qeth_ipacmd_vnicc_hdr *hdr;
struct qeth_cmd_buffer *iob;
-   struct qeth_ipa_cmd *cmd;
-
-   QETH_CARD_TEXT(card, 2, "vniccreq");
 
-   /* get new buffer for request */
-   iob = qeth_get_ipacmd_buffer(card, IPA_CMD_VNICC, 0);
+   iob = qeth_ipa_alloc_cmd(card, IPA_CMD_VNICC, QETH_PROT_NONE,
+data_length +
+offsetof(struct qeth_ipacmd_vnicc, data));
if (!iob)
-   return -ENOMEM;
-
-   /* create header for request */
-   cmd = __ipa_cmd(iob);
-   req = &cmd->data.vnicc;
-
-   /* create sub command header for request */
-   req->sub_hdr.data_length = sizeof(req->sub_hdr);
-   req->sub_hdr.sub_command = cbctl->sub_cmd;
-
-   /* create sub command specific request fields */
-   switch (cbctl->sub_cmd) {
-   case IPA_VNICC_QUERY_CHARS:
-   break;
-   case IPA_VNICC_QUERY_CMDS:
-   req->sub_hdr.data_length += sizeof(req->query_cmds);
-   req->query_cmds.vnic_char = cbctl->param.vnic_cha

[PATCH net-next 00/12] s390/qeth: updates 2019-06-27

2019-06-27 Thread Julian Wiedmann

Hi Dave,

please apply another round of qeth updates for net-next.
This completes the conversion of the control path to use dynamically
allocated cmd buffers, along with some fine-tuning for the route
validation fix that recently went into -net.

Thanks,
Julian

Julian Wiedmann (12):
  s390/qeth: dynamically allocate simple IPA cmds
  s390/qeth: clarify parameter for simple assist cmds
  s390/qeth: dynamically allocate various cmds with sub-types
  s390/qeth: dynamically allocate diag cmds
  s390/qeth: dynamically allocate vnicc cmds
  s390/qeth: dynamically allocate MPC cmds
  s390/qeth: remove static cmd buffer infrastructure
  s390/qeth: streamline SNMP cmd code
  s390/qeth: consolidate pm code
  s390/qeth: consolidate skb RX processing in L3 driver
  s390/qeth: extract helper for route validation
  s390/qeth: move cast type selection into fill_header()

 drivers/s390/net/qeth_core.h  |  94 +++---
 drivers/s390/net/qeth_core_main.c | 517 +++---
 drivers/s390/net/qeth_core_mpc.h  |  49 ++-
 drivers/s390/net/qeth_l2_main.c   | 201 +---
 drivers/s390/net/qeth_l3_main.c   | 210 +---
 5 files changed, 425 insertions(+), 646 deletions(-)

-- 
2.17.1

[PATCH net-next 11/12] s390/qeth: extract helper for route validation

2019-06-27 Thread Julian Wiedmann

As follow-up to commit 0cd6783d3c7d ("s390/qeth: check dst entry before use"),
consolidate the dst_check() logic into a single helper and add a wrapper
around the cast type selection.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core.h| 13 +
 drivers/s390/net/qeth_l3_main.c | 49 ++---
 2 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index c81d5ec26803..d354b39cdf4b 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -25,6 +25,8 @@
 #include 
 #include 
 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -877,6 +879,17 @@ static inline int qeth_get_ether_cast_type(struct sk_buff 
*skb)
return RTN_UNICAST;
 }
 
+static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb, int 
ipv)
+{
+   struct dst_entry *dst = skb_dst(skb);
+   struct rt6_info *rt;
+
+   rt = (struct rt6_info *) dst;
+   if (dst)
+   dst = dst_check(dst, (ipv == 6) ? rt6_get_cookie(rt) : 0);
+   return dst;
+}
+
 static inline void qeth_rx_csum(struct qeth_card *card, struct sk_buff *skb,
u8 flags)
 {
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 15351922b209..5bf5129ddcd4 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -32,7 +32,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 
@@ -1878,26 +1877,17 @@ static int qeth_l3_do_ioctl(struct net_device *dev, 
struct ifreq *rq, int cmd)
return rc;
 }
 
-static int qeth_l3_get_cast_type(struct sk_buff *skb)
+static int qeth_l3_get_cast_type_rcu(struct sk_buff *skb, struct dst_entry 
*dst,
+int ipv)
 {
-   int ipv = qeth_get_ip_version(skb);
struct neighbour *n = NULL;
-   struct dst_entry *dst;
 
-   rcu_read_lock();
-   dst = skb_dst(skb);
-   if (dst) {
-   struct rt6_info *rt = (struct rt6_info *) dst;
-
-   dst = dst_check(dst, (ipv == 6) ? rt6_get_cookie(rt) : 0);
-   if (dst)
-   n = dst_neigh_lookup_skb(dst, skb);
-   }
+   if (dst)
+   n = dst_neigh_lookup_skb(dst, skb);
 
if (n) {
int cast_type = n->type;
 
-   rcu_read_unlock();
neigh_release(n);
if ((cast_type == RTN_BROADCAST) ||
(cast_type == RTN_MULTICAST) ||
@@ -1905,7 +1895,6 @@ static int qeth_l3_get_cast_type(struct sk_buff *skb)
return cast_type;
return RTN_UNICAST;
}
-   rcu_read_unlock();
 
/* no neighbour (eg AF_PACKET), fall back to target's IP address ... */
switch (ipv) {
@@ -1923,6 +1912,20 @@ static int qeth_l3_get_cast_type(struct sk_buff *skb)
}
 }
 
+static int qeth_l3_get_cast_type(struct sk_buff *skb)
+{
+   int ipv = qeth_get_ip_version(skb);
+   struct dst_entry *dst;
+   int cast_type;
+
+   rcu_read_lock();
+   dst = qeth_dst_check_rcu(skb, ipv);
+   cast_type = qeth_l3_get_cast_type_rcu(skb, dst, ipv);
+   rcu_read_unlock();
+
+   return cast_type;
+}
+
 static u8 qeth_l3_cast_type_to_flag(int cast_type)
 {
if (cast_type == RTN_MULTICAST)
@@ -1987,27 +1990,17 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q 
*queue,
}
 
rcu_read_lock();
-   dst = skb_dst(skb);
+   dst = qeth_dst_check_rcu(skb, ipv);
 
if (ipv == 4) {
-   struct rtable *rt;
-
-   if (dst)
-   dst = dst_check(dst, 0);
-   rt = (struct rtable *) dst;
+   struct rtable *rt = (struct rtable *) dst;
 
*((__be32 *) &hdr->hdr.l3.next_hop.ipv4.addr) = (rt) ?
rt_nexthop(rt, ip_hdr(skb)->daddr) :
ip_hdr(skb)->daddr;
} else {
/* IPv6 */
-   struct rt6_info *rt;
-
-   if (dst) {
-   rt = (struct rt6_info *) dst;
-   dst = dst_check(dst, rt6_get_cookie(rt));
-   }
-   rt = (struct rt6_info *) dst;
+   struct rt6_info *rt = (struct rt6_info *) dst;
 
if (rt && !ipv6_addr_any(&rt->rt6i_gateway))
l3_hdr->next_hop.ipv6_addr = rt->rt6i_gateway;
-- 
2.17.1

[PATCH net-next 12/12] s390/qeth: move cast type selection into fill_header()

2019-06-27 Thread Julian Wiedmann

The cast type currently gets selected in .ndo_start_xmit, and is then
piped through several layers until it's stored into the HW header.
Push the selection down into qeth_l?_fill_header() to (1) reduce the
number of xmit-wide parameters, and (2) merge the two route validation
checks into just one.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core.h  |  5 ++--
 drivers/s390/net/qeth_core_main.c |  7 +++--
 drivers/s390/net/qeth_l2_main.c   |  4 +--
 drivers/s390/net/qeth_l3_main.c   | 43 ++-
 4 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index d354b39cdf4b..c7ee07ce3615 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -1040,11 +1040,10 @@ int qeth_stop(struct net_device *dev);
 
 int qeth_vm_request_mac(struct qeth_card *card);
 int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
- struct qeth_qdio_out_q *queue, int ipv, int cast_type,
+ struct qeth_qdio_out_q *queue, int ipv,
  void (*fill_header)(struct qeth_qdio_out_q *queue,
  struct qeth_hdr *hdr, struct sk_buff *skb,
- int ipv, int cast_type,
- unsigned int data_len));
+ int ipv, unsigned int data_len));
 
 /* exports for OSN */
 int qeth_osn_assist(struct net_device *, void *, int);
diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index 3011cae00391..4d0caeebc802 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3908,11 +3908,10 @@ static void qeth_fill_tso_ext(struct qeth_hdr_tso *hdr,
 }
 
 int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
- struct qeth_qdio_out_q *queue, int ipv, int cast_type,
+ struct qeth_qdio_out_q *queue, int ipv,
  void (*fill_header)(struct qeth_qdio_out_q *queue,
  struct qeth_hdr *hdr, struct sk_buff *skb,
- int ipv, int cast_type,
- unsigned int data_len))
+ int ipv, unsigned int data_len))
 {
unsigned int proto_len, hw_hdr_len;
unsigned int frame_len = skb->len;
@@ -3946,7 +3945,7 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb,
data_offset = push_len + proto_len;
}
memset(hdr, 0, hw_hdr_len);
-   fill_header(queue, hdr, skb, ipv, cast_type, frame_len);
+   fill_header(queue, hdr, skb, ipv, frame_len);
if (is_tso)
qeth_fill_tso_ext((struct qeth_hdr_tso *) hdr,
  frame_len - proto_len, skb, proto_len);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 4a2ff9d8aa5f..fd64bc3f4062 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -164,8 +164,9 @@ static void qeth_l2_drain_rx_mode_cache(struct qeth_card 
*card)
 
 static void qeth_l2_fill_header(struct qeth_qdio_out_q *queue,
struct qeth_hdr *hdr, struct sk_buff *skb,
-   int ipv, int cast_type, unsigned int data_len)
+   int ipv, unsigned int data_len)
 {
+   int cast_type = qeth_get_ether_cast_type(skb);
struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
 
hdr->hdr.l2.pkt_length = data_len;
@@ -598,7 +599,6 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff 
*skb,
rc = qeth_l2_xmit_osn(card, skb, queue);
else
rc = qeth_xmit(card, skb, queue, qeth_get_ip_version(skb),
-  qeth_get_ether_cast_type(skb),
   qeth_l2_fill_header);
 
if (!rc) {
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 5bf5129ddcd4..2dd99f103671 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1939,12 +1939,13 @@ static u8 qeth_l3_cast_type_to_flag(int cast_type)
 
 static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue,
struct qeth_hdr *hdr, struct sk_buff *skb,
-   int ipv, int cast_type, unsigned int data_len)
+   int ipv, unsigned int data_len)
 {
struct qeth_hdr_layer3 *l3_hdr = &hdr->hdr.l3;
struct vlan_ethhdr *veth = vlan_eth_hdr(skb);
struct qeth_card *card = queue->card;
struct dst_entry *dst;
+   int cast_type;
 
hdr->hdr.l3.length = data_len;
 
@@ -1981,25 +1982,22 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q 
*queue,
hdr->hdr.l3.vlan_id = ntohs(veth->h_vlan_TCI);
}
 
-   l3_hdr->flags = qeth_l3_cast_type_to_flag(cast_type);
-
-   /* OSA only: */
-   if (!ipv) {
-

[PATCH net-next 07/12] s390/qeth: remove static cmd buffer infrastructure

2019-06-27 Thread Julian Wiedmann

Now that all cmds are dynamically allocated, the code for static cmd
buffers can go away entirely. Resulting in a nice reduction of
code/data size & complexity, while removing the risk that
qeth_clear_cmd_buffers() releases cmds that are still in-flight.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core.h  |  45 +-
 drivers/s390/net/qeth_core_main.c | 254 ++
 drivers/s390/net/qeth_l2_main.c   |   3 +-
 drivers/s390/net/qeth_l3_main.c   |   1 -
 4 files changed, 59 insertions(+), 244 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index b99fe6b043aa..715bff28d48e 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -60,7 +60,7 @@ struct qeth_dbf_info {
debug_info_t *id;
 };
 
-#define QETH_DBF_CTRL_LEN 256
+#define QETH_DBF_CTRL_LEN 256U
 
 #define QETH_DBF_TEXT(name, level, text) \
debug_text_event(qeth_dbf[QETH_DBF_##name].id, level, text)
@@ -524,11 +524,6 @@ struct qeth_qdio_info {
int default_out_queue;
 };
 
-/**
- * buffer stuff for read channel
- */
-#define QETH_CMD_BUFFER_NO 8
-
 /**
  *  channel state machine
  */
@@ -556,12 +551,6 @@ enum qeth_prot_versions {
QETH_PROT_IPV6 = 0x0006,
 };
 
-enum qeth_cmd_buffer_state {
-   BUF_STATE_FREE,
-   BUF_STATE_LOCKED,
-   BUF_STATE_MALLOC,
-};
-
 enum qeth_cq {
QETH_CQ_DISABLED = 0,
QETH_CQ_ENABLED = 1,
@@ -575,18 +564,20 @@ struct qeth_ipato {
struct list_head entries;
 };
 
-struct qeth_channel;
+struct qeth_channel {
+   struct ccw_device *ccwdev;
+   enum qeth_channel_states state;
+   atomic_t irq_pending;
+};
 
 struct qeth_cmd_buffer {
-   enum qeth_cmd_buffer_state state;
unsigned int length;
refcount_t ref_count;
struct qeth_channel *channel;
struct qeth_reply *reply;
long timeout;
unsigned char *data;
-   void (*finalize)(struct qeth_card *card, struct qeth_cmd_buffer *iob,
-unsigned int length);
+   void (*finalize)(struct qeth_card *card, struct qeth_cmd_buffer *iob);
void (*callback)(struct qeth_card *card, struct qeth_cmd_buffer *iob);
 };
 
@@ -600,25 +591,8 @@ static inline struct qeth_ipa_cmd *__ipa_cmd(struct 
qeth_cmd_buffer *iob)
return (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
 }
 
-/**
- * definition of a qeth channel, used for read and write
- */
-struct qeth_channel {
-   enum qeth_channel_states state;
-   struct ccw1 *ccw;
-   spinlock_t iob_lock;
-   wait_queue_head_t wait_q;
-   struct ccw_device *ccwdev;
-/*command buffer for control data*/
-   struct qeth_cmd_buffer iob[QETH_CMD_BUFFER_NO];
-   atomic_t irq_pending;
-   int io_buf_no;
-};
-
 static inline struct ccw1 *__ccw_from_cmd(struct qeth_cmd_buffer *iob)
 {
-   if (iob->state != BUF_STATE_MALLOC)
-   return iob->channel->ccw;
return (struct ccw1 *)(iob->data + ALIGN(iob->length, 8));
 }
 
@@ -994,8 +968,6 @@ int qeth_send_ipa_cmd(struct qeth_card *, struct 
qeth_cmd_buffer *,
  int (*reply_cb)
  (struct qeth_card *, struct qeth_reply *, unsigned long),
  void *);
-struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *,
-   enum qeth_ipa_cmds, enum qeth_prot_versions);
 struct qeth_cmd_buffer *qeth_ipa_alloc_cmd(struct qeth_card *card,
   enum qeth_ipa_cmds cmd_code,
   enum qeth_prot_versions prot,
@@ -1011,6 +983,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct 
qeth_card *card,
 struct qeth_cmd_buffer *qeth_get_diag_cmd(struct qeth_card *card,
  enum qeth_diags_cmds sub_cmd,
  unsigned int data_length);
+void qeth_put_cmd(struct qeth_cmd_buffer *iob);
 
 struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *,
@@ -1020,12 +993,10 @@ int qeth_poll(struct napi_struct *napi, int budget);
 void qeth_clear_ipacmd_list(struct qeth_card *);
 int qeth_qdio_clear_card(struct qeth_card *, int);
 void qeth_clear_working_pool_list(struct qeth_card *);
-void qeth_clear_cmd_buffers(struct qeth_channel *);
 void qeth_drain_output_queues(struct qeth_card *card);
 void qeth_setadp_promisc_mode(struct qeth_card *);
 int qeth_setadpparms_change_macaddr(struct qeth_card *);
 void qeth_tx_timeout(struct net_device *);
-void qeth_release_buffer(struct qeth_cmd_buffer *iob);
 void qeth_notify_reply(struct qeth_reply *reply, int reason);
 void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
  u16 cmd_length);
diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index b4c200eec707..3875f70118e4 100644
--- a/drivers/s

[PATCH net-next 06/12] s390/qeth: dynamically allocate MPC cmds

2019-06-27 Thread Julian Wiedmann

The base MPC cmds are the last remaining user of the static cmd buffers.
Port them over to use dynamic allocation, and stop backing the write
channel's cmd buffers with pages.

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core_main.c | 36 ---
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index 22074890835e..b4c200eec707 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1454,7 +1454,7 @@ static struct qeth_card *qeth_alloc_card(struct 
ccwgroup_device *gdev)
goto out_read_cmd;
if (qeth_setup_channel(&card->read, false))
goto out_read;
-   if (qeth_setup_channel(&card->write, true))
+   if (qeth_setup_channel(&card->write, false))
goto out_write;
if (qeth_setup_channel(&card->data, false))
goto out_data;
@@ -1737,8 +1737,6 @@ static void qeth_mpc_finalize_cmd(struct qeth_card *card,
  struct qeth_cmd_buffer *iob,
  unsigned int length)
 {
-   qeth_setup_ccw(__ccw_from_cmd(iob), CCW_CMD_WRITE, 0, length,
-  iob->data);
qeth_idx_finalize_cmd(card, iob, length);
 
memcpy(QETH_PDU_HEADER_SEQ_NO(iob->data),
@@ -1751,13 +1749,20 @@ static void qeth_mpc_finalize_cmd(struct qeth_card 
*card,
iob->callback = qeth_release_buffer_cb;
 }
 
-static struct qeth_cmd_buffer *qeth_mpc_get_cmd_buffer(struct qeth_card *card)
+static struct qeth_cmd_buffer *qeth_mpc_alloc_cmd(struct qeth_card *card,
+ void *data,
+ unsigned int data_length)
 {
struct qeth_cmd_buffer *iob;
 
-   iob = qeth_get_buffer(&card->write);
-   if (iob)
-   iob->finalize = qeth_mpc_finalize_cmd;
+   iob = qeth_alloc_cmd(&card->write, data_length, 1, QETH_TIMEOUT);
+   if (!iob)
+   return NULL;
+
+   memcpy(iob->data, data, data_length);
+   qeth_setup_ccw(__ccw_from_cmd(iob), CCW_CMD_WRITE, 0, data_length,
+  iob->data);
+   iob->finalize = qeth_mpc_finalize_cmd;
return iob;
 }
 
@@ -2080,11 +2085,10 @@ static int qeth_cm_enable(struct qeth_card *card)
 
QETH_CARD_TEXT(card, 2, "cmenable");
 
-   iob = qeth_mpc_get_cmd_buffer(card);
+   iob = qeth_mpc_alloc_cmd(card, CM_ENABLE, CM_ENABLE_SIZE);
if (!iob)
return -ENOMEM;
 
-   memcpy(iob->data, CM_ENABLE, CM_ENABLE_SIZE);
memcpy(QETH_CM_ENABLE_ISSUER_RM_TOKEN(iob->data),
   &card->token.issuer_rm_r, QETH_MPC_TOKEN_LENGTH);
memcpy(QETH_CM_ENABLE_FILTER_TOKEN(iob->data),
@@ -2116,11 +2120,10 @@ static int qeth_cm_setup(struct qeth_card *card)
 
QETH_CARD_TEXT(card, 2, "cmsetup");
 
-   iob = qeth_mpc_get_cmd_buffer(card);
+   iob = qeth_mpc_alloc_cmd(card, CM_SETUP, CM_SETUP_SIZE);
if (!iob)
return -ENOMEM;
 
-   memcpy(iob->data, CM_SETUP, CM_SETUP_SIZE);
memcpy(QETH_CM_SETUP_DEST_ADDR(iob->data),
   &card->token.issuer_rm_r, QETH_MPC_TOKEN_LENGTH);
memcpy(QETH_CM_SETUP_CONNECTION_TOKEN(iob->data),
@@ -2235,11 +2238,10 @@ static int qeth_ulp_enable(struct qeth_card *card)
 
QETH_CARD_TEXT(card, 2, "ulpenabl");
 
-   iob = qeth_mpc_get_cmd_buffer(card);
+   iob = qeth_mpc_alloc_cmd(card, ULP_ENABLE, ULP_ENABLE_SIZE);
if (!iob)
return -ENOMEM;
 
-   memcpy(iob->data, ULP_ENABLE, ULP_ENABLE_SIZE);
*(QETH_ULP_ENABLE_LINKNUM(iob->data)) = (u8) card->dev->dev_port;
memcpy(QETH_ULP_ENABLE_PROT_TYPE(iob->data), &prot_type, 1);
memcpy(QETH_ULP_ENABLE_DEST_ADDR(iob->data),
@@ -2283,11 +2285,10 @@ static int qeth_ulp_setup(struct qeth_card *card)
 
QETH_CARD_TEXT(card, 2, "ulpsetup");
 
-   iob = qeth_mpc_get_cmd_buffer(card);
+   iob = qeth_mpc_alloc_cmd(card, ULP_SETUP, ULP_SETUP_SIZE);
if (!iob)
return -ENOMEM;
 
-   memcpy(iob->data, ULP_SETUP, ULP_SETUP_SIZE);
memcpy(QETH_ULP_SETUP_DEST_ADDR(iob->data),
   &card->token.cm_connection_r, QETH_MPC_TOKEN_LENGTH);
memcpy(QETH_ULP_SETUP_CONNECTION_TOKEN(iob->data),
@@ -2473,11 +2474,10 @@ static int qeth_dm_act(struct qeth_card *card)
 
QETH_CARD_TEXT(card, 2, "dmact");
 
-   iob = qeth_mpc_get_cmd_buffer(card);
+   iob = qeth_mpc_alloc_cmd(card, DM_ACT, DM_ACT_SIZE);
if (!iob)
return -ENOMEM;
 
-   memcpy(iob->data, DM_ACT, DM_ACT_SIZE);
memcpy(QETH_DM_ACT_DEST_ADDR(iob->data),
   &card->token.cm_connection_r, QETH_MPC_TOKEN_LENGTH);
memcpy(QETH_DM_ACT_CONNECTION_TOKEN(iob->data),
@@ -2770,6 +2770,8 @@ void qeth_prepare_ipa_cmd(struct qeth_card *card, struct 
qeth_cmd_bu

[PATCH net-next 02/12] s390/qeth: clarify parameter for simple assist cmds

2019-06-27 Thread Julian Wiedmann

For code that uses qeth_send_simple_setassparms_prot(), we currently
can't differentiate whether the cmd should contain (1) no parameter, or
(2) a 4-byte parameter with value 0.
At the moment this doesn't cause any trouble. But when using dynamically
allocated cmds, we need to know whether to allocate & transmit an
additional 4 bytes of zeroes.
So instead of the raw parameter value, pass a parameter pointer
(or NULL) to qeth_send_simple_setassparms_prot().

Signed-off-by: Julian Wiedmann 
---
 drivers/s390/net/qeth_core.h  |  6 +++---
 drivers/s390/net/qeth_core_main.c | 15 +++
 drivers/s390/net/qeth_core_mpc.h  |  2 ++
 drivers/s390/net/qeth_l3_main.c   | 26 ++
 4 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 42aa4a21a4c2..35d7b43f6580 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -940,12 +940,12 @@ static inline int qeth_is_diagass_supported(struct 
qeth_card *card,
 
 int qeth_send_simple_setassparms_prot(struct qeth_card *card,
  enum qeth_ipa_funcs ipa_func,
- u16 cmd_code, long data,
+ u16 cmd_code, u32 *data,
  enum qeth_prot_versions prot);
 /* IPv4 variant */
 static inline int qeth_send_simple_setassparms(struct qeth_card *card,
   enum qeth_ipa_funcs ipa_func,
-  u16 cmd_code, long data)
+  u16 cmd_code, u32 *data)
 {
return qeth_send_simple_setassparms_prot(card, ipa_func, cmd_code,
 data, QETH_PROT_IPV4);
@@ -953,7 +953,7 @@ static inline int qeth_send_simple_setassparms(struct 
qeth_card *card,
 
 static inline int qeth_send_simple_setassparms_v6(struct qeth_card *card,
  enum qeth_ipa_funcs ipa_func,
- u16 cmd_code, long data)
+ u16 cmd_code, u32 *data)
 {
return qeth_send_simple_setassparms_prot(card, ipa_func, cmd_code,
 data, QETH_PROT_IPV6);
diff --git a/drivers/s390/net/qeth_core_main.c 
b/drivers/s390/net/qeth_core_main.c
index 84ed772bbfbd..3ba91b1c1315 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5355,20 +5355,19 @@ EXPORT_SYMBOL_GPL(qeth_get_setassparms_cmd);
 
 int qeth_send_simple_setassparms_prot(struct qeth_card *card,
  enum qeth_ipa_funcs ipa_func,
- u16 cmd_code, long data,
+ u16 cmd_code, u32 *data,
  enum qeth_prot_versions prot)
 {
-   int length = 0;
+   unsigned int length = data ? SETASS_DATA_SIZEOF(flags_32bit) : 0;
struct qeth_cmd_buffer *iob;
 
QETH_CARD_TEXT_(card, 4, "simassp%i", prot);
-   if (data)
-   length = sizeof(__u32);
iob = qeth_get_setassparms_cmd(card, ipa_func, cmd_code, length, prot);
if (!iob)
return -ENOMEM;
 
-   __ipa_cmd(iob)->data.setassparms.data.flags_32bit = (__u32) data;
+   if (data)
+   __ipa_cmd(iob)->data.setassparms.data.flags_32bit = *data;
return qeth_send_ipa_cmd(card, iob, qeth_setassparms_cb, NULL);
 }
 EXPORT_SYMBOL_GPL(qeth_send_simple_setassparms_prot);
@@ -5885,8 +5884,8 @@ static int qeth_start_csum_cb(struct qeth_card *card, 
struct qeth_reply *reply,
 static int qeth_set_csum_off(struct qeth_card *card, enum qeth_ipa_funcs 
cstype,
 enum qeth_prot_versions prot)
 {
-   return qeth_send_simple_setassparms_prot(card, cstype,
-IPA_CMD_ASS_STOP, 0, prot);
+   return qeth_send_simple_setassparms_prot(card, cstype, IPA_CMD_ASS_STOP,
+NULL, prot);
 }
 
 static int qeth_set_csum_on(struct qeth_card *card, enum qeth_ipa_funcs cstype,
@@ -5974,7 +5973,7 @@ static int qeth_set_tso_off(struct qeth_card *card,
enum qeth_prot_versions prot)
 {
return qeth_send_simple_setassparms_prot(card, IPA_OUTBOUND_TSO,
-IPA_CMD_ASS_STOP, 0, prot);
+IPA_CMD_ASS_STOP, NULL, prot);
 }
 
 static int qeth_set_tso_on(struct qeth_card *card,
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index e84249f8803e..61fc4005dd53 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -437,6 +437,8 @@ struct qeth_ipacmd_setassparms {
} data;
 } __attribute__ ((packed));
 
+#d

Re: [PATCH v2 bpf-next 04/11] libbpf: refactor map initialization

2019-06-27 Thread Matt Hart

On Wed, 26 Jun 2019 at 19:29, Andrii Nakryiko  wrote:
>
> On Wed, Jun 26, 2019 at 7:48 AM Matt Hart  wrote:
> >
> > Hi all,
> >
> > I noticed perf fails to build for armv7 on linux next, due to this
> > compile error:
> > $ make -C tools/perf ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf-
> >
> >   CC   libbpf_probes.o
> > In file included from libbpf.c:27:
> > libbpf.c: In function ‘bpf_object__add_map’:
> > /home/matt/git/linux-next/tools/include/linux/kernel.h:45:17: error:
> > comparison of distinct pointer types lacks a cast [-Werror]
> >   (void) (&_max1 == &_max2);  \
> >  ^~
> > libbpf.c:776:12: note: in expansion of macro ‘max’
> >   new_cap = max(4ul, obj->maps_cap * 3 / 2);
> > ^~~
> >
> > So I bisected it and came down to this patch.
> > Commit bf82927125dd25003d76ed5541da704df21de57a
> >
> > Full verbose bisect script https://hastebin.com/odoyujofav.coffeescript
> >
> > Is this a case that perf code needs updating to match the change, or
> > is the change broken?
>
> Hi Matt,
>
> Thanks for reporting. This issue was already fixed in
> https://patchwork.ozlabs.org/patch/1122673/, so just pull latest
> bpf-next.

Thanks, I see that patch has hit linux-next so perf is building again.

> >
> >
> >
> > On Tue, 25 Jun 2019 at 16:53, Andrii Nakryiko  wrote:
> > >
> > > User and global data maps initialization has gotten pretty complicated
> > > and unnecessarily convoluted. This patch splits out the logic for global
> > > data map and user-defined map initialization. It also removes the
> > > restriction of pre-calculating how many maps will be initialized,
> > > instead allowing to keep adding new maps as they are discovered, which
> > > will be used later for BTF-defined map definitions.
> > >
> > > Signed-off-by: Andrii Nakryiko 
> > > ---
> > >  tools/lib/bpf/libbpf.c | 247 ++---
> > >  1 file changed, 133 insertions(+), 114 deletions(-)
> > >
> > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> > > index 7ee44d8877c5..88609dca4f7d 100644
> > > --- a/tools/lib/bpf/libbpf.c
> > > +++ b/tools/lib/bpf/libbpf.c
> > > @@ -234,6 +234,7 @@ struct bpf_object {
> > > size_t nr_programs;
> > > struct bpf_map *maps;
> > > size_t nr_maps;
> > > +   size_t maps_cap;
> > > struct bpf_secdata sections;
> > >
> > > bool loaded;
> > > @@ -763,21 +764,51 @@ int bpf_object__variable_offset(const struct 
> > > bpf_object *obj, const char *name,
> > > return -ENOENT;
> > >  }
> > >
> > > -static bool bpf_object__has_maps(const struct bpf_object *obj)
> > > +static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
> > >  {
> > > -   return obj->efile.maps_shndx >= 0 ||
> > > -  obj->efile.data_shndx >= 0 ||
> > > -  obj->efile.rodata_shndx >= 0 ||
> > > -  obj->efile.bss_shndx >= 0;
> > > +   struct bpf_map *new_maps;
> > > +   size_t new_cap;
> > > +   int i;
> > > +
> > > +   if (obj->nr_maps < obj->maps_cap)
> > > +   return &obj->maps[obj->nr_maps++];
> > > +
> > > +   new_cap = max(4ul, obj->maps_cap * 3 / 2);
> > > +   new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
> > > +   if (!new_maps) {
> > > +   pr_warning("alloc maps for object failed\n");
> > > +   return ERR_PTR(-ENOMEM);
> > > +   }
> > > +
> > > +   obj->maps_cap = new_cap;
> > > +   obj->maps = new_maps;
> > > +
> > > +   /* zero out new maps */
> > > +   memset(obj->maps + obj->nr_maps, 0,
> > > +  (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
> > > +   /*
> > > +* fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
> > > +* when failure (zclose won't close negative fd)).
> > > +*/
> > > +   for (i = obj->nr_maps; i < obj->maps_cap; i++) {
> > > +   obj->maps[i].fd = -1;
> > > +   obj->maps[i].inner_map_fd = -1;
> > > +   }
> > > +
> > > +   return &obj->maps[obj->nr_maps++];
> > >  }
> > >
> > >  static int
> > > -bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map 
> > > *map,
> > > - enum libbpf_map_type type, Elf_Data *data,
> > > - void **data_buff)
> > > +bpf_object__init_internal_map(struct bpf_object *obj, enum 
> > > libbpf_map_type type,
> > > + Elf_Data *data, void **data_buff)
> > >  {
> > > -   struct bpf_map_def *def = &map->def;
> > > char map_name[BPF_OBJ_NAME_LEN];
> > > +   struct bpf_map_def *def;
> > > +   struct bpf_map *map;
> > > +
> > > +   map = bpf_object__add_map(obj);
> > > +   if (IS_ERR(map))
> > > +   return PTR_ERR(map);
> > >
> > > map->libbpf_type = type;
> > > map->offset = ~(typeof(map->offset))0;
> > > @@ -789,6 +820,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, 
> > > struct

[PATCH net] selftests: rtnetlink: skip ipsec offload tests if netdevsim isn't present

2019-06-27 Thread Florian Westphal

running the script on systems without netdevsim now prints:

SKIP: ipsec_offload can't load netdevsim

instead of error message & failed status.

Signed-off-by: Florian Westphal 
---
 Feel free to apply to -next, its not a bug fix per se.

 tools/testing/selftests/net/rtnetlink.sh | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/net/rtnetlink.sh 
b/tools/testing/selftests/net/rtnetlink.sh
index b25c9fe019d2..a7a443bdbdd9 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -699,13 +699,17 @@ kci_test_ipsec_offload()
sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
sysfsf=$sysfsd/ipsec
sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
+   probed=false
 
# setup netdevsim since dummydev doesn't have offload support
-   modprobe netdevsim
-   check_err $?
-   if [ $ret -ne 0 ]; then
-   echo "FAIL: ipsec_offload can't load netdevsim"
-   return 1
+   if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+   modprobe -q netdevsim
+   check_err $?
+   if [ $ret -ne 0 ]; then
+   echo "SKIP: ipsec_offload can't load netdevsim"
+   return $ksft_skip
+   fi
+   probed=true
fi
 
echo "0" > /sys/bus/netdevsim/new_device
@@ -785,7 +789,7 @@ EOF
fi
 
# clean up any leftovers
-   rmmod netdevsim
+   $probed && rmmod netdevsim
 
if [ $ret -ne 0 ]; then
echo "FAIL: ipsec_offload"
-- 
2.21.0

Re: [RFC] longer netdev names proposal

2019-06-27 Thread Stephen Hemminger

On Thu, 27 Jun 2019 11:43:27 +0200
Jiri Pirko  wrote:

> Hi all.
> 
> In the past, there was repeatedly discussed the IFNAMSIZ (16) limit for
> netdevice name length. Now when we have PF and VF representors
> with port names like "pfXvfY", it became quite common to hit this limit:
> 0123456789012345
> enp131s0f1npf0vf6
> enp131s0f1npf0vf22
> 
> Since IFLA_NAME is just a string, I though it might be possible to use
> it to carry longer names as it is. However, the userspace tools, like
> iproute2, are doing checks before print out. So for example in output of
> "ip addr" when IFLA_NAME is longer than IFNAMSIZE, the netdevice is
> completely avoided.
> 
> So here is a proposal that might work:
> 1) Add a new attribute IFLA_NAME_EXT that could carry names longer than
>IFNAMSIZE, say 64 bytes. The max size should be only defined in kernel,
>user should be prepared for any string size.
> 2) Add a file in sysfs that would indicate that NAME_EXT is supported by
>the kernel.
> 3) Udev is going to look for the sysfs indication file. In case when
>kernel supports long names, it will do rename to longer name, setting
>IFLA_NAME_EXT. If not, it does what it does now - fail.
> 4) There are two cases that can happen during rename:
>A) The name is shorter than IFNAMSIZ
>   -> both IFLA_NAME and IFLA_NAME_EXT would contain the same string:  
>  original IFLA_NAME = eth0
>  original IFLA_NAME_EXT = eth0
>  renamed  IFLA_NAME = enp5s0f1npf0vf1
>  renamed  IFLA_NAME_EXT = enp5s0f1npf0vf1
>B) The name is longer tha IFNAMSIZ
>   -> IFLA_NAME would contain the original one, IFLA_NAME_EXT would   
>  contain the new one:
>  original IFLA_NAME = eth0
>  original IFLA_NAME_EXT = eth0
>  renamed  IFLA_NAME = eth0
>  renamed  IFLA_NAME_EXT = enp131s0f1npf0vf22
> 
> This would allow the old tools to work with "eth0" and the new
> tools would work with "enp131s0f1npf0vf22". In sysfs, there would
> be symlink from one name to another.
>   
> Also, there might be a warning added to kernel if someone works
> with IFLA_NAME that the userspace tool should be upgraded.
> 
> Eventually, only IFLA_NAME_EXT is going to be used by everyone.
> 
> I'm aware there are other places where similar new attribute
> would have to be introduced too (ip rule for example).
> I'm not saying this is a simple work.
> 
> Question is what to do with the ioctl api (get ifindex etc). I would
> probably leave it as is and push tools to use rtnetlink instead.
> 
> Any ideas why this would not work? Any ideas how to solve this
> differently?
> 
> Thanks!
> 
> Jiri
>  

I looked into this in the past, but then rejected it because
there are so many tools that use names, not just iproute2.
Plus long names are very user unfriendly.

[PATCH v4 07/13] dt-bindings: net: sun4i-mdio: Convert the binding to a schemas

2019-06-27 Thread Maxime Ripard

Switch our Allwinner A10 MDIO controller binding to a YAML schema to enable
the DT validation.

Reviewed-by: Rob Herring 
Signed-off-by: Maxime Ripard 
---
 Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml | 70 
++
 Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt  | 27 
---
 2 files changed, 70 insertions(+), 27 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
 delete mode 100644 
Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt

diff --git 
a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml 
b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
new file mode 100644
index ..df24d9d969f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/allwinner,sun4i-a10-mdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 MDIO Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai 
+  - Maxime Ripard 
+
+allOf:
+  - $ref: "mdio.yaml#"
+
+# Select every compatible, including the deprecated ones. This way, we
+# will be able to report a warning when we have that compatible, since
+# we will validate the node thanks to the select, but won't report it
+# as a valid value in the compatible property description
+select:
+  properties:
+compatible:
+  enum:
+- allwinner,sun4i-a10-mdio
+
+# Deprecated
+- allwinner,sun4i-mdio
+
+  required:
+- compatible
+
+properties:
+  "#address-cells":
+const: 1
+
+  "#size-cells":
+const: 0
+
+  compatible:
+const: allwinner,sun4i-a10-mdio
+
+  reg:
+maxItems: 1
+
+  phy-supply:
+description: PHY regulator
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+mdio@1c0b080 {
+compatible = "allwinner,sun4i-a10-mdio";
+reg = <0x01c0b080 0x14>;
+#address-cells = <1>;
+#size-cells = <0>;
+phy-supply = <®_emac_3v3>;
+
+phy0: ethernet-phy@0 {
+reg = <0>;
+};
+};
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt 
b/Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt
deleted file mode 100644
index ab5b8613b0ef..
--- a/Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* Allwinner A10 MDIO Ethernet Controller interface
-
-Required properties:
-- compatible: should be "allwinner,sun4i-a10-mdio"
-  (Deprecated: "allwinner,sun4i-mdio").
-- reg: address and length of the register set for the device.
-
-Optional properties:
-- phy-supply: phandle to a regulator if the PHY needs one
-
-Example at the SoC level:
-mdio@1c0b080 {
-   compatible = "allwinner,sun4i-a10-mdio";
-   reg = <0x01c0b080 0x14>;
-   #address-cells = <1>;
-   #size-cells = <0>;
-};
-
-And at the board level:
-
-mdio@1c0b080 {
-   phy-supply = <®_emac_3v3>;
-
-   phy0: ethernet-phy@0 {
-   reg = <0>;
-   };
-};
-- 
git-series 0.9.1

[PATCH v4 05/13] dt-bindings: net: phy: The interrupt property is not mandatory

2019-06-27 Thread Maxime Ripard

Unlike what was initially claimed in the PHY binding, the interrupt
property of a PHY can be omitted, and the OS will turn to polling instead.

Document that.

Reviewed-by: Andrew Lunn 
Reviewed-by: Rob Herring 
Signed-off-by: Maxime Ripard 
---
 Documentation/devicetree/bindings/net/ethernet-phy.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml 
b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
index 81d2016d7232..c77f97cbd54b 100644
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -156,7 +156,6 @@ properties:
 
 required:
   - reg
-  - interrupts
 
 examples:
   - |
-- 
git-series 0.9.1

[PATCH v4 04/13] MAINTAINERS: Add Ethernet PHY YAML file

2019-06-27 Thread Maxime Ripard

While the Ethernet PHY framework was marked as maintained, the device tree
bindings associated to that framework was not listed under the maintained
files. Fix that.

Reviewed-by: Andrew Lunn 
Signed-off-by: Maxime Ripard 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3e3b9738f2e6..087821e507f6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6009,6 +6009,7 @@ M:Heiner Kallweit 
 L: netdev@vger.kernel.org
 S: Maintained
 F: Documentation/ABI/testing/sysfs-bus-mdio
+F: Documentation/devicetree/bindings/net/ethernet-phy.yaml
 F: Documentation/devicetree/bindings/net/mdio*
 F: Documentation/networking/phy.rst
 F: drivers/net/phy/
-- 
git-series 0.9.1

[PATCH v4 00/13] net: Add generic and Allwinner YAML bindings

2019-06-27 Thread Maxime Ripard

Hi,

This is an attempt at getting the main generic DT bindings for the ethernet
(and related) devices, and convert some DT bindings for the Allwinner DTs
to YAML as well.

This should provide some DT validation coverage.

Let me know if you have any questions,
Maxime

Changes from v3:
  - Added a cover letter
  - Dropped the phy-mode deprecation, and the DT changes moving to
phy-connection-type
  - Fixed the mdio example node name
  - Deprecated the fixed-link array property, in favor of the fixed-link
subnode

Changes from v2:
  - Switched to the deprecated keyword to describe deprecated properties
  - Deprecated phy-mode, phy and phy-handle
  - Added patches to switch to phy-connection-type and phy-device for
Allwinner DTs
  - Changed the A83t GMAC delays to use multipleOf instead of an enum
  - Fix the snps,*pbl properties types
  - Add a generic MDIO YAML schemas

Changes from v1:
  - Move the DWMAC SoC specific bindings to separate documents
  - Mark snps,reset-gpio (and related) as deprecated and fixed the
Allwinner DTs accordingly
  - Restrict snps,tso to only a couple of compatibles
  - Use an enum for the compatibles
  - Add a custom select statement with the compatibles of all the generic
compatibles, including the deprecated ones. Remove the deprecated ones
from the valid compatible values to issue a warning when used.
  - Add a patch to MAINTAINERS for the PHY YAML binding
  - Add missing compatible options for the PHY, and missing phy speeds
  - Add a custom select clause to make the PHY binding validate all phy
nodes, and not just the ones with a compatible
  - Validate the fixed-link array elements
  - Removed deprecated properties (phy-mode, phy, phy-device)
  - Restrict the number of items under link-gpios to 1

Maxime Ripard (13):
  dt-bindings: net: Add YAML schemas for the generic Ethernet options
  dt-bindings: net: Add a YAML schemas for the generic PHY options
  dt-bindings: net: Add a YAML schemas for the generic MDIO options
  MAINTAINERS: Add Ethernet PHY YAML file
  dt-bindings: net: phy: The interrupt property is not mandatory
  dt-bindings: net: sun4i-emac: Convert the binding to a schemas
  dt-bindings: net: sun4i-mdio: Convert the binding to a schemas
  dt-bindings: net: stmmac: Convert the binding to a schemas
  dt-bindings: net: sun7i-gmac: Convert the binding to a schemas
  dt-bindings: net: sun8i-emac: Convert the binding to a schemas
  dt-bindings: net: dwmac: Deprecate the PHY reset properties
  ARM: dts: sunxi: Switch to the generic PHY properties
  ARM: dts: sunxi: Switch from phy to phy-handle

 Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml  |  55 
++-
 Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml  |  70 
-
 Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt   |  19 +---
 Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt   |  27 
+-
 Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt   |  27 
+-
 Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml  |  66 
-
 Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml | 321 
-
 Documentation/devicetree/bindings/net/dwmac-sun8i.txt| 201 
+---
 Documentation/devicetree/bindings/net/ethernet-controller.yaml   | 204 
-
 Documentation/devicetree/bindings/net/ethernet-phy.yaml  | 178 
+++-
 Documentation/devicetree/bindings/net/ethernet.txt   |  69 
+
 Documentation/devicetree/bindings/net/fixed-link.txt |  55 
+--
 Documentation/devicetree/bindings/net/mdio.txt   |  38 
+---
 Documentation/devicetree/bindings/net/mdio.yaml  |  51 
+-
 Documentation/devicetree/bindings/net/phy.txt|  80 
+--
 Documentation/devicetree/bindings/net/snps,dwmac.yaml| 410 
-
 Documentation/devicetree/bindings/net/stmmac.txt | 179 
+---
 MAINTAINERS  |   1 +-
 arch/arm/boot/dts/sun4i-a10-a1000.dts|   2 +-
 arch/arm/boot/dts/sun4i-a10-ba10-tvbox.dts   |   2 +-
 arch/arm/boot/dts/sun4i-a10-cubieboard.dts   |   2 +-
 arch/arm/boot/dts/sun4i-a10-hackberry.dts|   2 +-
 arch/arm/boot/dts/sun4i-a10-itead-iteaduino-plus.dts |   2 +-
 arch/arm/boot/dts/sun4i-a10-jesurun-q5.dts   |   2 +-
 arch/arm/boot/dts/sun4i-a10-marsboard.dts|   2 +-
 arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts

[PATCH v4 01/13] dt-bindings: net: Add YAML schemas for the generic Ethernet options

2019-06-27 Thread Maxime Ripard

The Ethernet controllers have a good number of generic options that can be
needed in a device tree. Add a YAML schemas for those.

Reviewed-by: Rob Herring 
Signed-off-by: Maxime Ripard 
---
 Documentation/devicetree/bindings/net/ethernet-controller.yaml | 204 +++-
 Documentation/devicetree/bindings/net/ethernet.txt |  69 +--
 Documentation/devicetree/bindings/net/fixed-link.txt   |  55 +--
 3 files changed, 206 insertions(+), 122 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/net/ethernet-controller.yaml

diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml 
b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
new file mode 100644
index ..77ee2aa5d29e
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -0,0 +1,204 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/ethernet-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ethernet Controller Generic Binding
+
+maintainers:
+  - David S. Miller 
+
+properties:
+  $nodename:
+pattern: "^ethernet(@.*)?$"
+
+  local-mac-address:
+allOf:
+  - $ref: /schemas/types.yaml#definitions/uint8-array
+  - minItems: 6
+maxItems: 6
+description:
+  Specifies the MAC address that was assigned to the network device.
+
+  mac-address:
+allOf:
+  - $ref: /schemas/types.yaml#definitions/uint8-array
+  - minItems: 6
+maxItems: 6
+description:
+  Specifies the MAC address that was last used by the boot
+  program; should be used in cases where the MAC address assigned
+  to the device by the boot program is different from the
+  local-mac-address property.
+
+  max-frame-size:
+$ref: /schemas/types.yaml#definitions/uint32
+description:
+  Maximum transfer unit (IEEE defined MTU), rather than the
+  maximum frame size (there\'s contradiction in the Devicetree
+  Specification).
+
+  max-speed:
+$ref: /schemas/types.yaml#definitions/uint32
+description:
+  Specifies maximum speed in Mbit/s supported by the device.
+
+  nvmem-cells:
+maxItems: 1
+description:
+  Reference to an nvmem node for the MAC address
+
+  nvmem-cells-names:
+const: mac-address
+
+  phy-connection-type:
+description:
+  Operation mode of the PHY interface
+enum:
+  # There is not a standard bus between the MAC and the PHY,
+  # something proprietary is being used to embed the PHY in the
+  # MAC.
+  - internal
+  - mii
+  - gmii
+  - sgmii
+  - qsgmii
+  - tbi
+  - rev-mii
+  - rmii
+
+  # RX and TX delays are added by the MAC when required
+  - rgmii
+
+  # RGMII with internal RX and TX delays provided by the PHY,
+  # the MAC should not add the RX or TX delays in this case
+  - rgmii-id
+
+  # RGMII with internal RX delay provided by the PHY, the MAC
+  # should not add an RX delay in this case
+  - rgmii-rxid
+
+  # RGMII with internal TX delay provided by the PHY, the MAC
+  # should not add an TX delay in this case
+  - rgmii-txid
+  - rtbi
+  - smii
+  - xgmii
+  - trgmii
+  - 1000base-x
+  - 2500base-x
+  - rxaui
+  - xaui
+
+  # 10GBASE-KR, XFI, SFI
+  - 10gbase-kr
+  - usxgmii
+
+  phy-mode:
+$ref: "#/properties/phy-connection-type"
+
+  phy-handle:
+$ref: /schemas/types.yaml#definitions/phandle
+description:
+  Specifies a reference to a node representing a PHY device.
+
+  phy:
+$ref: "#/properties/phy-handle"
+deprecated: true
+
+  phy-device:
+$ref: "#/properties/phy-handle"
+deprecated: true
+
+  rx-fifo-depth:
+$ref: /schemas/types.yaml#definitions/uint32
+description:
+  The size of the controller\'s receive fifo in bytes. This is used
+  for components that can have configurable receive fifo sizes,
+  and is useful for determining certain configuration settings
+  such as flow control thresholds.
+
+  tx-fifo-depth:
+$ref: /schemas/types.yaml#definitions/uint32
+description:
+  The size of the controller\'s transmit fifo in bytes. This
+  is used for components that can have configurable fifo sizes.
+
+  managed:
+allOf:
+  - $ref: /schemas/types.yaml#definitions/string
+  - default: auto
+enum:
+  - auto
+  - in-band-status
+description:
+  Specifies the PHY management type. If auto is set and fixed-link
+  is not specified, it uses MDIO for management.
+
+  fixed-link:
+allOf:
+  - if:
+  type: array
+then:
+  deprecated: true
+  minItems: 1
+  maxItems: 1
+  items:
+items:
+  - minimum: 0
+maximum: 31
+description:
+  Emulated PHY ID, choose any but unique to the all
+

[PATCH v4 09/13] dt-bindings: net: sun7i-gmac: Convert the binding to a schemas

2019-06-27 Thread Maxime Ripard

Switch our Allwinner A20 GMAC controller binding to a YAML schema to enable
the DT validation. Since that controller is based on a Synopsys IP, let's
add the validation to that schemas with a bunch of conditionals.

Reviewed-by: Rob Herring 
Signed-off-by: Maxime Ripard 
---
 Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt  | 27 
---
 Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml | 66 
++
 Documentation/devicetree/bindings/net/snps,dwmac.yaml   |  3 +++
 3 files changed, 69 insertions(+), 27 deletions(-)
 delete mode 100644 
Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt
 create mode 100644 
Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml

diff --git a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt 
b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt
deleted file mode 100644
index 8b3f953656e3..
--- a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-* Allwinner GMAC ethernet controller
-
-This device is a platform glue layer for stmmac.
-Please see stmmac.txt for the other unchanged properties.
-
-Required properties:
- - compatible:  Should be "allwinner,sun7i-a20-gmac"
- - clocks: Should contain the GMAC main clock, and tx clock
-   The tx clock type should be "allwinner,sun7i-a20-gmac-clk"
- - clock-names: Should contain the clock names "stmmaceth",
-   and "allwinner_gmac_tx"
-
-Optional properties:
-- phy-supply: phandle to a regulator if the PHY needs one
-
-Examples:
-
-   gmac: ethernet@1c5 {
-   compatible = "allwinner,sun7i-a20-gmac";
-   reg = <0x01c5 0x1>,
- <0x01c20164 0x4>;
-   interrupts = <0 85 1>;
-   interrupt-names = "macirq";
-   clocks = <&ahb_gates 49>, <&gmac_tx>;
-   clock-names = "stmmaceth", "allwinner_gmac_tx";
-   phy-mode = "mii";
-   };
diff --git 
a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml 
b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
new file mode 100644
index ..38f6a2a73f46
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/allwinner,sun7i-a20-gmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A20 GMAC Device Tree Bindings
+
+allOf:
+  - $ref: "snps,dwmac.yaml#"
+
+maintainers:
+  - Chen-Yu Tsai 
+  - Maxime Ripard 
+
+properties:
+  compatible:
+const: allwinner,sun7i-a20-gmac
+
+  interrupts:
+maxItems: 1
+
+  interrupt-names:
+const: macirq
+
+  clocks:
+items:
+  - description: GMAC main clock
+  - description: TX clock
+
+  clock-names:
+items:
+  - const: stmmaceth
+  - const: allwinner_gmac_tx
+
+  phy-supply:
+description:
+  PHY regulator
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - clocks
+  - clock-names
+  - phy-connection-type
+
+examples:
+  - |
+gmac: ethernet@1c5 {
+compatible = "allwinner,sun7i-a20-gmac";
+reg = <0x01c5 0x1>,
+  <0x01c20164 0x4>;
+interrupts = <0 85 1>;
+interrupt-names = "macirq";
+clocks = <&ahb_gates 49>, <&gmac_tx>;
+clock-names = "stmmaceth", "allwinner_gmac_tx";
+phy-connection-type = "mii";
+};
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml 
b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 30e2ff7a2dcb..fed623a81dcd 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -44,6 +44,7 @@ properties:
   compatible:
 contains:
   enum:
+- allwinner,sun7i-a20-gmac
 - snps,dwmac
 - snps,dwmac-3.50a
 - snps,dwmac-3.610
@@ -265,6 +266,7 @@ allOf:
 compatible:
   contains:
 enum:
+  - allwinner,sun7i-a20-gmac
   - snps,dwxgmac
   - snps,dwxgmac-2.10
   - st,spear600-gmac
@@ -305,6 +307,7 @@ allOf:
 compatible:
   contains:
 enum:
+  - allwinner,sun7i-a20-gmac
   - snps,dwmac-4.00
   - snps,dwmac-4.10a
   - snps,dwxgmac
-- 
git-series 0.9.1

[PATCH v4 08/13] dt-bindings: net: stmmac: Convert the binding to a schemas

2019-06-27 Thread Maxime Ripard

Switch the STMMAC / Synopsys DesignWare MAC controller binding to a YAML
schema to enable the DT validation.

Reviewed-by: Rob Herring 
Signed-off-by: Maxime Ripard 
---
 Documentation/devicetree/bindings/net/snps,dwmac.yaml | 389 +++-
 Documentation/devicetree/bindings/net/stmmac.txt  | 179 +-
 2 files changed, 390 insertions(+), 178 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/snps,dwmac.yaml

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml 
b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
new file mode 100644
index ..30e2ff7a2dcb
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -0,0 +1,389 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/snps,dwmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DesignWare MAC Device Tree Bindings
+
+maintainers:
+  - Alexandre Torgue 
+  - Giuseppe Cavallaro 
+  - Jose Abreu 
+
+# Select every compatible, including the deprecated ones. This way, we
+# will be able to report a warning when we have that compatible, since
+# we will validate the node thanks to the select, but won't report it
+# as a valid value in the compatible property description
+select:
+  properties:
+compatible:
+  contains:
+enum:
+  - snps,dwmac
+  - snps,dwmac-3.50a
+  - snps,dwmac-3.610
+  - snps,dwmac-3.70a
+  - snps,dwmac-3.710
+  - snps,dwmac-4.00
+  - snps,dwmac-4.10a
+  - snps,dwxgmac
+  - snps,dwxgmac-2.10
+
+  # Deprecated
+  - st,spear600-gmac
+
+  required:
+- compatible
+
+properties:
+
+  # We need to include all the compatibles from schemas that will
+  # include that schemas, otherwise compatible won't validate for
+  # those.
+  compatible:
+contains:
+  enum:
+- snps,dwmac
+- snps,dwmac-3.50a
+- snps,dwmac-3.610
+- snps,dwmac-3.70a
+- snps,dwmac-3.710
+- snps,dwmac-4.00
+- snps,dwmac-4.10a
+- snps,dwxgmac
+- snps,dwxgmac-2.10
+
+  reg:
+maxItems: 1
+
+  interrupts:
+minItems: 1
+maxItems: 3
+items:
+  - description: Combined signal for various interrupt events
+  - description: The interrupt to manage the remote wake-up packet 
detection
+  - description: The interrupt that occurs when Rx exits the LPI state
+
+  interrupt-names:
+minItems: 1
+maxItems: 3
+items:
+  - const: macirq
+  - const: eth_wake_irq
+  - const: eth_lpi
+
+  clocks:
+minItems: 1
+maxItems: 3
+items:
+  - description: GMAC main clock
+  - description: Peripheral registers interface clock
+  - description:
+  PTP reference clock. This clock is used for programming the
+  Timestamp Addend Register. If not passed then the system
+  clock will be used and this is fine on some platforms.
+
+  clock-names:
+additionalItems: true
+contains:
+  enum:
+- stmmaceth
+- pclk
+- ptp_ref
+
+  resets:
+maxItems: 1
+description:
+  MAC Reset signal.
+
+  reset-names:
+const: stmmaceth
+
+  snps,axi-config:
+$ref: /schemas/types.yaml#definitions/phandle
+description:
+  AXI BUS Mode parameters. Phandle to a node that can contain the
+  following properties
+* snps,lpi_en, enable Low Power Interface
+* snps,xit_frm, unlock on WoL
+* snps,wr_osr_lmt, max write outstanding req. limit
+* snps,rd_osr_lmt, max read outstanding req. limit
+* snps,kbbe, do not cross 1KiB boundary.
+* snps,blen, this is a vector of supported burst length.
+* snps,fb, fixed-burst
+* snps,mb, mixed-burst
+* snps,rb, rebuild INCRx Burst
+
+  snps,mtl-rx-config:
+$ref: /schemas/types.yaml#definitions/phandle
+description:
+  Multiple RX Queues parameters. Phandle to a node that can
+  contain the following properties
+* snps,rx-queues-to-use, number of RX queues to be used in the
+  driver
+* Choose one of these RX scheduling algorithms
+  * snps,rx-sched-sp, Strict priority
+  * snps,rx-sched-wsp, Weighted Strict priority
+* For each RX queue
+  * Choose one of these modes
+* snps,dcb-algorithm, Queue to be enabled as DCB
+* snps,avb-algorithm, Queue to be enabled as AVB
+  * snps,map-to-dma-channel, Channel to map
+  * Specifiy specific packet routing
+* snps,route-avcp, AV Untagged Control packets
+* snps,route-ptp, PTP Packets
+* snps,route-dcbcp, DCB Control Packets
+* snps,route-up, Untagged Packets
+* snps,route-multi-broad, Multicast & Broadcast Packets
+  * snps,priority, RX queue priority (Range 0x0 to 0xF)
+
+  snps,mtl-tx-config:
+$ref: /schemas/types.yaml#de

[PATCH v4 03/13] dt-bindings: net: Add a YAML schemas for the generic MDIO options

2019-06-27 Thread Maxime Ripard

The MDIO buses have a number of available device tree properties that can
be used in their device tree node. Add a YAML schemas for those.

Suggested-by: Andrew Lunn 
Signed-off-by: Maxime Ripard 
---
 Documentation/devicetree/bindings/net/mdio.txt  | 38 +-
 Documentation/devicetree/bindings/net/mdio.yaml | 51 ++-
 2 files changed, 52 insertions(+), 37 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/mdio.yaml

diff --git a/Documentation/devicetree/bindings/net/mdio.txt 
b/Documentation/devicetree/bindings/net/mdio.txt
index e3e1603f256c..cf8a0105488e 100644
--- a/Documentation/devicetree/bindings/net/mdio.txt
+++ b/Documentation/devicetree/bindings/net/mdio.txt
@@ -1,37 +1 @@
-Common MDIO bus properties.
-
-These are generic properties that can apply to any MDIO bus.
-
-Optional properties:
-- reset-gpios: One GPIO that control the RESET lines of all PHYs on that MDIO
-  bus.
-- reset-delay-us: RESET pulse width in microseconds.
-
-A list of child nodes, one per device on the bus is expected. These
-should follow the generic phy.txt, or a device specific binding document.
-
-The 'reset-delay-us' indicates the RESET signal pulse width in microseconds and
-applies to all PHY devices. It must therefore be appropriately determined based
-on all PHY requirements (maximum value of all per-PHY RESET pulse widths).
-
-Example :
-This example shows these optional properties, plus other properties
-required for the TI Davinci MDIO driver.
-
-   davinci_mdio: ethernet@5c03 {
-   compatible = "ti,davinci_mdio";
-   reg = <0x5c03 0x1000>;
-   #address-cells = <1>;
-   #size-cells = <0>;
-
-   reset-gpios = <&gpio2 5 GPIO_ACTIVE_LOW>;
-   reset-delay-us = <2>;
-
-   ethphy0: ethernet-phy@1 {
-   reg = <1>;
-   };
-
-   ethphy1: ethernet-phy@3 {
-   reg = <3>;
-   };
-   };
+This file has moved to mdio.yaml.
diff --git a/Documentation/devicetree/bindings/net/mdio.yaml 
b/Documentation/devicetree/bindings/net/mdio.yaml
new file mode 100644
index ..b8fa8251c4bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/mdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MDIO Bus Generic Binding
+
+maintainers:
+  - Andrew Lunn 
+  - Florian Fainelli 
+  - Heiner Kallweit 
+
+description:
+  These are generic properties that can apply to any MDIO bus. Any
+  MDIO bus must have a list of child nodes, one per device on the
+  bus. These should follow the generic ethernet-phy.yaml document, or
+  a device specific binding document.
+
+properties:
+  reset-gpios:
+maxItems: 1
+description:
+  The phandle and specifier for the GPIO that controls the RESET
+  lines of all PHYs on that MDIO bus.
+
+  reset-delay-us:
+description:
+  RESET pulse width in microseconds. It applies to all PHY devices
+  and must therefore be appropriately determined based on all PHY
+  requirements (maximum value of all per-PHY RESET pulse widths).
+
+examples:
+  - |
+davinci_mdio: mdio@5c03 {
+compatible = "ti,davinci_mdio";
+reg = <0x5c03 0x1000>;
+#address-cells = <1>;
+#size-cells = <0>;
+
+reset-gpios = <&gpio2 5 1>;
+reset-delay-us = <2>;
+
+ethphy0: ethernet-phy@1 {
+reg = <1>;
+};
+
+ethphy1: ethernet-phy@3 {
+reg = <3>;
+};
+};
-- 
git-series 0.9.1

[PATCH v4 13/13] ARM: dts: sunxi: Switch from phy to phy-handle

2019-06-27 Thread Maxime Ripard

The phy device tree property has been deprecated in favor of phy-handle,
let's replace it.

Signed-off-by: Maxime Ripard 
---
 arch/arm/boot/dts/sun4i-a10-a1000.dts| 2 +-
 arch/arm/boot/dts/sun4i-a10-ba10-tvbox.dts   | 2 +-
 arch/arm/boot/dts/sun4i-a10-cubieboard.dts   | 2 +-
 arch/arm/boot/dts/sun4i-a10-hackberry.dts| 2 +-
 arch/arm/boot/dts/sun4i-a10-itead-iteaduino-plus.dts | 2 +-
 arch/arm/boot/dts/sun4i-a10-jesurun-q5.dts   | 2 +-
 arch/arm/boot/dts/sun4i-a10-marsboard.dts| 2 +-
 arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts   | 2 +-
 arch/arm/boot/dts/sun4i-a10-pcduino.dts  | 2 +-
 arch/arm/boot/dts/sun5i-a10s-olinuxino-micro.dts | 2 +-
 arch/arm/boot/dts/sun5i-a10s-wobo-i5.dts | 2 +-
 arch/arm/boot/dts/sun6i-a31-colombus.dts | 2 +-
 arch/arm/boot/dts/sun6i-a31-hummingbird.dts  | 2 +-
 arch/arm/boot/dts/sun6i-a31-i7.dts   | 2 +-
 arch/arm/boot/dts/sun6i-a31-m9.dts   | 2 +-
 arch/arm/boot/dts/sun6i-a31-mele-a1000g-quad.dts | 2 +-
 arch/arm/boot/dts/sun6i-a31s-cs908.dts   | 2 +-
 arch/arm/boot/dts/sun6i-a31s-sina31s.dts | 2 +-
 arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts | 2 +-
 arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts | 2 +-
 arch/arm/boot/dts/sun7i-a20-bananapi.dts | 2 +-
 arch/arm/boot/dts/sun7i-a20-bananapro.dts| 2 +-
 arch/arm/boot/dts/sun7i-a20-cubieboard2.dts  | 2 +-
 arch/arm/boot/dts/sun7i-a20-cubietruck.dts   | 2 +-
 arch/arm/boot/dts/sun7i-a20-hummingbird.dts  | 2 +-
 arch/arm/boot/dts/sun7i-a20-i12-tvbox.dts| 2 +-
 arch/arm/boot/dts/sun7i-a20-icnova-swac.dts  | 2 +-
 arch/arm/boot/dts/sun7i-a20-itead-ibox.dts   | 2 +-
 arch/arm/boot/dts/sun7i-a20-m3.dts   | 2 +-
 arch/arm/boot/dts/sun7i-a20-olimex-som-evb.dts   | 2 +-
 arch/arm/boot/dts/sun7i-a20-olimex-som204-evb.dts| 2 +-
 arch/arm/boot/dts/sun7i-a20-olinuxino-lime.dts   | 2 +-
 arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts  | 2 +-
 arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts  | 2 +-
 arch/arm/boot/dts/sun7i-a20-orangepi-mini.dts| 2 +-
 arch/arm/boot/dts/sun7i-a20-orangepi.dts | 2 +-
 arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts| 2 +-
 arch/arm/boot/dts/sun7i-a20-pcduino3.dts | 2 +-
 arch/arm/boot/dts/sun7i-a20-wits-pro-a20-dkt.dts | 2 +-
 arch/arm/boot/dts/sun9i-a80-cubieboard4.dts  | 2 +-
 arch/arm/boot/dts/sun9i-a80-optimus.dts  | 2 +-
 41 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/arch/arm/boot/dts/sun4i-a10-a1000.dts 
b/arch/arm/boot/dts/sun4i-a10-a1000.dts
index 6c254ec4c85b..8692b11a83c3 100644
--- a/arch/arm/boot/dts/sun4i-a10-a1000.dts
+++ b/arch/arm/boot/dts/sun4i-a10-a1000.dts
@@ -125,7 +125,7 @@
 };
 
 &emac {
-   phy = <&phy1>;
+   phy-handle = <&phy1>;
status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/sun4i-a10-ba10-tvbox.dts 
b/arch/arm/boot/dts/sun4i-a10-ba10-tvbox.dts
index 38a2c4134952..816d534ac093 100644
--- a/arch/arm/boot/dts/sun4i-a10-ba10-tvbox.dts
+++ b/arch/arm/boot/dts/sun4i-a10-ba10-tvbox.dts
@@ -68,7 +68,7 @@
 };
 
 &emac {
-   phy = <&phy1>;
+   phy-handle = <&phy1>;
status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/sun4i-a10-cubieboard.dts 
b/arch/arm/boot/dts/sun4i-a10-cubieboard.dts
index 7306c65df88a..6ca02e824acc 100644
--- a/arch/arm/boot/dts/sun4i-a10-cubieboard.dts
+++ b/arch/arm/boot/dts/sun4i-a10-cubieboard.dts
@@ -114,7 +114,7 @@
 };
 
 &emac {
-   phy = <&phy1>;
+   phy-handle = <&phy1>;
status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/sun4i-a10-hackberry.dts 
b/arch/arm/boot/dts/sun4i-a10-hackberry.dts
index cc988ccd5ca7..47dea0922501 100644
--- a/arch/arm/boot/dts/sun4i-a10-hackberry.dts
+++ b/arch/arm/boot/dts/sun4i-a10-hackberry.dts
@@ -80,7 +80,7 @@
 };
 
 &emac {
-   phy = <&phy0>;
+   phy-handle = <&phy0>;
status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/sun4i-a10-itead-iteaduino-plus.dts 
b/arch/arm/boot/dts/sun4i-a10-itead-iteaduino-plus.dts
index 80ecd78247ac..d4e319d16aae 100644
--- a/arch/arm/boot/dts/sun4i-a10-itead-iteaduino-plus.dts
+++ b/arch/arm/boot/dts/sun4i-a10-itead-iteaduino-plus.dts
@@ -58,7 +58,7 @@
 &emac {
pinctrl-names = "default";
pinctrl-0 = <&emac_pins>;
-   phy = <&phy1>;
+   phy-handle = <&phy1>;
status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/sun4i-a10-jesurun-q5.dts 
b/arch/arm/boot/dts/sun4i-a10-jesurun-q5.dts
index 247fa27ef717..8a7b4c53d278 100644
--- a/arch/arm/boot/dts/sun4i-a10-jesurun-q5.dts
+++ b/arch/arm/boot/dts/sun4i-a10-jesurun-q5.dts
@@ -94,7 +94,7 @@
 };
 
 &emac {
-   phy = <&phy1>;
+   phy-handle = <&phy1>;
status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/sun4i-a10-marsboard.dts 
b/arch/arm/boot/dts/sun4i-a10-marsboa

[PATCH v4 06/13] dt-bindings: net: sun4i-emac: Convert the binding to a schemas

2019-06-27 Thread Maxime Ripard

Switch our Allwinner A10 EMAC controller binding to a YAML schema to enable
the DT validation.

Reviewed-by: Rob Herring 
Signed-off-by: Maxime Ripard 
---
 Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml | 55 
+++
 Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt  | 19 
---
 2 files changed, 55 insertions(+), 19 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
 delete mode 100644 
Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt

diff --git 
a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml 
b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
new file mode 100644
index ..2ff9e605cd26
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/allwinner,sun4i-a10-emac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A10 EMAC Ethernet Controller Device Tree Bindings
+
+allOf:
+  - $ref: "ethernet-controller.yaml#"
+
+maintainers:
+  - Chen-Yu Tsai 
+  - Maxime Ripard 
+
+properties:
+  compatible:
+const: allwinner,sun4i-a10-emac
+
+  reg:
+maxItems: 1
+
+  interrupts:
+maxItems: 1
+
+  clocks:
+maxItems: 1
+
+  allwinner,sram:
+description: Phandle to the device SRAM
+$ref: /schemas/types.yaml#/definitions/phandle-array
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - phy-handle
+  - allwinner,sram
+
+examples:
+  - |
+emac: ethernet@1c0b000 {
+compatible = "allwinner,sun4i-a10-emac";
+reg = <0x01c0b000 0x1000>;
+interrupts = <55>;
+clocks = <&ahb_gates 17>;
+phy = <&phy0>;
+};
+
+# FIXME: We should set it, but it would report all the generic
+# properties as additional properties.
+# additionalProperties: false
+
+...
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt 
b/Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt
deleted file mode 100644
index e98118aef5f6..
--- a/Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-* Allwinner EMAC ethernet controller
-
-Required properties:
-- compatible: should be "allwinner,sun4i-a10-emac" (Deprecated:
-  "allwinner,sun4i-emac")
-- reg: address and length of the register set for the device.
-- interrupts: interrupt for the device
-- phy: see ethernet.txt file in the same directory.
-- clocks: A phandle to the reference clock for this device
-
-Example:
-
-emac: ethernet@1c0b000 {
-   compatible = "allwinner,sun4i-a10-emac";
-   reg = <0x01c0b000 0x1000>;
-   interrupts = <55>;
-   clocks = <&ahb_gates 17>;
-   phy = <&phy0>;
-};
-- 
git-series 0.9.1

[PATCH v4 02/13] dt-bindings: net: Add a YAML schemas for the generic PHY options

2019-06-27 Thread Maxime Ripard

The networking PHYs have a number of available device tree properties that
can be used in their device tree node. Add a YAML schemas for those.

Reviewed-by: Andrew Lunn 
Reviewed-by: Rob Herring 
Signed-off-by: Maxime Ripard 
---
 Documentation/devicetree/bindings/net/ethernet-phy.yaml | 179 +-
 Documentation/devicetree/bindings/net/phy.txt   |  80 +
 2 files changed, 180 insertions(+), 79 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/net/ethernet-phy.yaml

diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml 
b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
new file mode 100644
index ..81d2016d7232
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -0,0 +1,179 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/ethernet-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ethernet PHY Generic Binding
+
+maintainers:
+  - Andrew Lunn 
+  - Florian Fainelli 
+  - Heiner Kallweit 
+
+# The dt-schema tools will generate a select statement first by using
+# the compatible, and second by using the node name if any. In our
+# case, the node name is the one we want to match on, while the
+# compatible is optional.
+select:
+  properties:
+$nodename:
+  pattern: "^ethernet-phy(@[a-f0-9]+)?$"
+
+  required:
+- $nodename
+
+properties:
+  $nodename:
+pattern: "^ethernet-phy(@[a-f0-9]+)?$"
+
+  compatible:
+oneOf:
+  - const: ethernet-phy-ieee802.3-c22
+description: PHYs that implement IEEE802.3 clause 22
+  - const: ethernet-phy-ieee802.3-c45
+description: PHYs that implement IEEE802.3 clause 45
+  - pattern: "^ethernet-phy-id[a-f0-9]{4}\\.[a-f0-9]{4}$"
+description:
+  If the PHY reports an incorrect ID (or none at all) then the
+  compatible list may contain an entry with the correct PHY ID
+  in the above form.
+  The first group of digits is the 16 bit Phy Identifier 1
+  register, this is the chip vendor OUI bits 3:18. The
+  second group of digits is the Phy Identifier 2 register,
+  this is the chip vendor OUI bits 19:24, followed by 10
+  bits of a vendor specific ID.
+  - items:
+  - pattern: "^ethernet-phy-id[a-f0-9]{4}\\.[a-f0-9]{4}$"
+  - const: ethernet-phy-ieee802.3-c45
+
+  reg:
+maxItems: 1
+minimum: 0
+maximum: 31
+description:
+  The ID number for the PHY.
+
+  interrupts:
+maxItems: 1
+
+  max-speed:
+enum:
+  - 10
+  - 100
+  - 1000
+  - 2500
+  - 5000
+  - 1
+  - 2
+  - 25000
+  - 4
+  - 5
+  - 56000
+  - 10
+  - 20
+description:
+  Maximum PHY supported speed in Mbits / seconds.
+
+  broken-turn-around:
+$ref: /schemas/types.yaml#definitions/flag
+description:
+  If set, indicates the PHY device does not correctly release
+  the turn around line low at the end of a MDIO transaction.
+
+  enet-phy-lane-swap:
+$ref: /schemas/types.yaml#definitions/flag
+description:
+  If set, indicates the PHY will swap the TX/RX lanes to
+  compensate for the board being designed with the lanes
+  swapped.
+
+  eee-broken-100tx:
+$ref: /schemas/types.yaml#definitions/flag
+description:
+  Mark the corresponding energy efficient ethernet mode as
+  broken and request the ethernet to stop advertising it.
+
+  eee-broken-1000t:
+$ref: /schemas/types.yaml#definitions/flag
+description:
+  Mark the corresponding energy efficient ethernet mode as
+  broken and request the ethernet to stop advertising it.
+
+  eee-broken-10gt:
+$ref: /schemas/types.yaml#definitions/flag
+description:
+  Mark the corresponding energy efficient ethernet mode as
+  broken and request the ethernet to stop advertising it.
+
+  eee-broken-1000kx:
+$ref: /schemas/types.yaml#definitions/flag
+description:
+  Mark the corresponding energy efficient ethernet mode as
+  broken and request the ethernet to stop advertising it.
+
+  eee-broken-10gkx4:
+$ref: /schemas/types.yaml#definitions/flag
+description:
+  Mark the corresponding energy efficient ethernet mode as
+  broken and request the ethernet to stop advertising it.
+
+  eee-broken-10gkr:
+$ref: /schemas/types.yaml#definitions/flag
+description:
+  Mark the corresponding energy efficient ethernet mode as
+  broken and request the ethernet to stop advertising it.
+
+  phy-is-integrated:
+$ref: /schemas/types.yaml#definitions/flag
+description:
+  If set, indicates that the PHY is integrated into the same
+  physical package as the Ethernet MAC. If needed, muxers
+  should be configured to ensure the integrated PHY is
+  used. The absence of this property indicates the muxers
+  should be configured so that the extern

[PATCH v4 10/13] dt-bindings: net: sun8i-emac: Convert the binding to a schemas

2019-06-27 Thread Maxime Ripard

Switch our Allwinner H3 EMAC controller binding to a YAML schema to enable
the DT validation. Since that controller is based on a Synopsys IP, let's
add the validation to that schemas with a bunch of conditionals.

Reviewed-by: Rob Herring 
Signed-off-by: Maxime Ripard 
---
 Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml | 321 
-
 Documentation/devicetree/bindings/net/dwmac-sun8i.txt| 201 
+-
 Documentation/devicetree/bindings/net/snps,dwmac.yaml|  15 +++-
 3 files changed, 336 insertions(+), 201 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
 delete mode 100644 Documentation/devicetree/bindings/net/dwmac-sun8i.txt

diff --git 
a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml 
b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
new file mode 100644
index ..6f68c7f5fc34
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
@@ -0,0 +1,321 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/allwinner,sun8i-a83t-gmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A83t EMAC Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai 
+  - Maxime Ripard 
+
+properties:
+  compatible:
+oneOf:
+  - const: allwinner,sun8i-a83t-emac
+  - const: allwinner,sun8i-h3-emac
+  - const: allwinner,sun8i-r40-emac
+  - const: allwinner,sun8i-v3s-emac
+  - const: allwinner,sun50i-a64-emac
+  - items:
+- const: allwinner,sun50i-h6-emac
+- const: allwinner,sun50i-a64-emac
+
+  reg:
+maxItems: 1
+
+  interrupts:
+maxItems: 1
+
+  interrupt-names:
+const: macirq
+
+  clocks:
+maxItems: 1
+
+  clock-names:
+const: stmmaceth
+
+  syscon:
+$ref: /schemas/types.yaml#definitions/phandle
+description:
+  Phandle to the device containing the EMAC or GMAC clock
+  register
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - phy-connection-type
+  - phy-handle
+  - syscon
+
+allOf:
+  - $ref: "snps,dwmac.yaml#"
+  - if:
+  properties:
+compatible:
+  contains:
+enum:
+  - allwinner,sun8i-a83t-emac
+  - allwinner,sun8i-h3-emac
+  - allwinner,sun8i-v3s-emac
+  - allwinner,sun50i-a64-emac
+
+then:
+  properties:
+allwinner,tx-delay-ps:
+  default: 0
+  minimum: 0
+  maximum: 700
+  multipleOf: 100
+  description:
+External RGMII PHY TX clock delay chain value in ps.
+
+allwinner,rx-delay-ps:
+  default: 0
+  minimum: 0
+  maximum: 3100
+  multipleOf: 100
+  description:
+External RGMII PHY TX clock delay chain value in ps.
+
+  - if:
+  properties:
+compatible:
+  contains:
+enum:
+  - allwinner,sun8i-r40-emac
+
+then:
+  properties:
+allwinner,rx-delay-ps:
+  default: 0
+  minimum: 0
+  maximum: 700
+  multipleOf: 100
+  description:
+External RGMII PHY TX clock delay chain value in ps.
+
+  - if:
+  properties:
+compatible:
+  contains:
+enum:
+  - allwinner,sun8i-h3-emac
+  - allwinner,sun8i-v3s-emac
+
+then:
+  properties:
+allwinner,leds-active-low:
+  $ref: /schemas/types.yaml#definitions/flag
+  description:
+EPHY LEDs are active low.
+
+mdio-mux:
+  type: object
+
+  properties:
+compatible:
+  const: allwinner,sun8i-h3-mdio-mux
+
+mdio-parent-bus:
+  $ref: /schemas/types.yaml#definitions/phandle
+  description:
+Phandle to EMAC MDIO.
+
+mdio@1:
+  type: object
+  description: Internal MDIO Bus
+
+  properties:
+"#address-cells":
+  const: 1
+
+"#size-cells":
+  const: 0
+
+compatible:
+  const: allwinner,sun8i-h3-mdio-internal
+
+reg:
+  const: 1
+
+  patternProperties:
+"^ethernet-phy@[0-9a-f]$":
+  type: object
+  description:
+Integrated PHY node
+
+  properties:
+clocks:
+  maxItems: 1
+
+resets:
+  maxItems: 1
+
+  required:
+- clocks
+- resets
+
+
+mdio@2:
+

[PATCH v4 12/13] ARM: dts: sunxi: Switch to the generic PHY properties

2019-06-27 Thread Maxime Ripard

The DWMAC specific properties to manage the PHY have been superseeded by
the generic PHY properties. Let's move to it.

Reviewed-by: Andrew Lunn 
Tested-by: Chen-Yu Tsai 
Signed-off-by: Maxime Ripard 
---
 arch/arm/boot/dts/sun6i-a31-hummingbird.dts   |  6 +++---
 arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts  |  6 +++---
 arch/arm/boot/dts/sun7i-a20-hummingbird.dts   |  9 -
 arch/arm/boot/dts/sun7i-a20-olimex-som204-evb.dts |  8 
 4 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts 
b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
index 09832b4e8fc8..2652d737fe7c 100644
--- a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
+++ b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
@@ -155,13 +155,13 @@
pinctrl-0 = <&gmac_rgmii_pins>;
phy = <&phy1>;
phy-mode = "rgmii";
-   snps,reset-gpio = <&pio 0 21 GPIO_ACTIVE_HIGH>;
-   snps,reset-active-low;
-   snps,reset-delays-us = <0 1 3>;
status = "okay";
 
phy1: ethernet-phy@1 {
reg = <1>;
+   reset-gpios = <&pio 0 21 GPIO_ACTIVE_LOW>;
+   reset-assert-us = <1>;
+   reset-deassert-us = <3>;
};
 };
 
diff --git a/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts 
b/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts
index 8e724c52feff..7899712400b2 100644
--- a/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts
+++ b/arch/arm/boot/dts/sun6i-a31s-sinovoip-bpi-m2.dts
@@ -95,13 +95,13 @@
phy = <&phy1>;
phy-mode = "rgmii";
phy-supply = <®_dldo1>;
-   snps,reset-gpio = <&pio 0 21 GPIO_ACTIVE_HIGH>; /* PA21 */
-   snps,reset-active-low;
-   snps,reset-delays-us = <0 1 3>;
status = "okay";
 
phy1: ethernet-phy@1 {
reg = <1>;
+   reset-gpios = <&pio 0 21 GPIO_ACTIVE_LOW>; /* PA21 */
+   reset-assert-us = <1>;
+   reset-deassert-us = <3>;
};
 };
 
diff --git a/arch/arm/boot/dts/sun7i-a20-hummingbird.dts 
b/arch/arm/boot/dts/sun7i-a20-hummingbird.dts
index fd0153f65685..b01d91d025ec 100644
--- a/arch/arm/boot/dts/sun7i-a20-hummingbird.dts
+++ b/arch/arm/boot/dts/sun7i-a20-hummingbird.dts
@@ -103,15 +103,14 @@
phy = <&phy1>;
phy-mode = "rgmii";
phy-supply = <®_gmac_vdd>;
-   /* phy reset config */
-   snps,reset-gpio = <&pio 0 17 GPIO_ACTIVE_HIGH>; /* PA17 */
-   snps,reset-active-low;
-   /* wait 1s after reset, otherwise fail to read phy id */
-   snps,reset-delays-us = <0 1 100>;
status = "okay";
 
phy1: ethernet-phy@1 {
reg = <1>;
+   reset-gpios = <&pio 0 17 GPIO_ACTIVE_LOW>; /* PA17 */
+   reset-assert-us = <1>;
+   /* wait 1s after reset, otherwise fail to read phy id */
+   reset-deassert-us = <100>;
};
 };
 
diff --git a/arch/arm/boot/dts/sun7i-a20-olimex-som204-evb.dts 
b/arch/arm/boot/dts/sun7i-a20-olimex-som204-evb.dts
index c34a83f666c7..ca12cee27072 100644
--- a/arch/arm/boot/dts/sun7i-a20-olimex-som204-evb.dts
+++ b/arch/arm/boot/dts/sun7i-a20-olimex-som204-evb.dts
@@ -108,14 +108,14 @@
phy = <&phy3>;
phy-mode = "rgmii";
phy-supply = <®_vcc3v3>;
-
-   snps,reset-gpio = <&pio 0 17 GPIO_ACTIVE_HIGH>;
-   snps,reset-active-low;
-   snps,reset-delays-us = <0 1 100>;
status = "okay";
 
phy3: ethernet-phy@3 {
reg = <3>;
+   reset-gpios = <&pio 0 17 GPIO_ACTIVE_LOW>; /* PA17 */
+   reset-assert-us = <1>;
+   /* wait 1s after reset, otherwise fail to read phy id */
+   reset-deassert-us = <100>;
};
 };
 
-- 
git-series 0.9.1

[PATCH v4 11/13] dt-bindings: net: dwmac: Deprecate the PHY reset properties

2019-06-27 Thread Maxime Ripard

Even though the DWMAC driver uses some driver specific properties, the PHY
core has a bunch of generic properties and can deal with them nicely.

Let's deprecate our specific properties.

Reviewed-by: Martin Blumenstingl 
Reviewed-by: Rob Herring 
Signed-off-by: Maxime Ripard 
---
 Documentation/devicetree/bindings/net/snps,dwmac.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml 
b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 956308806c33..0bf322408500 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -172,16 +172,19 @@ properties:
   * snps,priority, TX queue priority (Range 0x0 to 0xF)
 
   snps,reset-gpio:
+deprecated: true
 maxItems: 1
 description:
   PHY Reset GPIO
 
   snps,reset-active-low:
+deprecated: true
 $ref: /schemas/types.yaml#definitions/flag
 description:
   Indicates that the PHY Reset is active low
 
   snps,reset-delays-us:
+deprecated: true
 allOf:
   - $ref: /schemas/types.yaml#definitions/uint32-array
   - minItems: 3
-- 
git-series 0.9.1

Fw: [Bug 204005] New: Code in __mkroute_input isn't full correct

2019-06-27 Thread Stephen Hemminger

This is on a very old kernel, and looks like not a valid bug.
But forwarding to list anyway since others may want to provide
input.

Begin forwarded message:

Date: Thu, 27 Jun 2019 09:33:27 +
From: bugzilla-dae...@bugzilla.kernel.org
To: step...@networkplumber.org
Subject: [Bug 204005] New: Code in __mkroute_input isn't full correct

https://bugzilla.kernel.org/show_bug.cgi?id=204005

Bug ID: 204005
   Summary: Code in __mkroute_input isn't full correct
   Product: Networking
   Version: 2.5
Kernel Version: 3.10.0-862
  Hardware: All
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: normal
  Priority: P1
 Component: IPV4
  Assignee: step...@networkplumber.org
  Reporter: cliff.c...@nokia-sbell.com
Regression: No

In function __mkroute_input(), there is issue in below code:
..
rt_cache:
if (rt_cache_valid(rth)) { <<==
skb_dst_set_noref(skb, &rth->dst);
goto out;
}

..
Once the route is failed, then rth.rt_type is set as unreachable(7).
however, once the route is correct again, because the condition
rt_cache_valid(rth) only check the rt_genid in cache and net space.
so even the route is recovery, then it always get the failed route cache.
one test env.
1) host1:
add ip1 on interface x

2) host2(proxy arp)
2.1) add ip2 on interface y1 with 32 prefix
2.2) add no IP on interface y2
Notes: x, y1 and y2 are in the same layer2 networkwork
set forwarding on y1 interface
set ip3 as arp proxy on interface y1

2.3) add ip3 on interface z on any interface which isn't the same layer2 as
interface y1 and y2.

3)run below test on host1 to check whether arp is back.
arping -I x -s ip1 ip3

The possible reason analysis:
since ARP is broadcast, then interface y2 can get this ARP request first,
because forwarding isn't set on on y2, then route failed. this is correct.
however, when ARP is received on y1, the route is always failed even the result
from fib_lookup is successfully. All these because the condition
rt_cache_valid(rth).
because, the rt_genid in cache isn't changed, and
 rg_genid in network space isn't changed, too.
therefore, it will never OK until, I 
down y2, or
ip route flush cache
to increase rt_genid in network space.

thanks
Cliff

-- 
You are receiving this mail because:
You are the assignee for the bug.

Re: [PATCH v4 01/13] dt-bindings: net: Add YAML schemas for the generic Ethernet options

2019-06-27 Thread Andrew Lunn

On Thu, Jun 27, 2019 at 05:31:43PM +0200, Maxime Ripard wrote:
> The Ethernet controllers have a good number of generic options that can be
> needed in a device tree. Add a YAML schemas for those.
> 
> Reviewed-by: Rob Herring 
> Signed-off-by: Maxime Ripard 

Reviewed-by: Andrew Lunn 

Andrew

Re: [PATCH v4 03/13] dt-bindings: net: Add a YAML schemas for the generic MDIO options

2019-06-27 Thread Andrew Lunn

On Thu, Jun 27, 2019 at 05:31:45PM +0200, Maxime Ripard wrote:
> The MDIO buses have a number of available device tree properties that can
> be used in their device tree node. Add a YAML schemas for those.
> 
> Suggested-by: Andrew Lunn 
> Signed-off-by: Maxime Ripard 

Reviewed-by: Andrew Lunn 

Andrew

Re: [PATCH v4 11/13] dt-bindings: net: dwmac: Deprecate the PHY reset properties

2019-06-27 Thread Andrew Lunn

On Thu, Jun 27, 2019 at 05:31:53PM +0200, Maxime Ripard wrote:
> Even though the DWMAC driver uses some driver specific properties, the PHY
> core has a bunch of generic properties and can deal with them nicely.
> 
> Let's deprecate our specific properties.
> 
> Reviewed-by: Martin Blumenstingl 
> Reviewed-by: Rob Herring 
> Signed-off-by: Maxime Ripard 

Reviewed-by: Andrew Lunn 

Andrew

Re: [PATCH v4 13/13] ARM: dts: sunxi: Switch from phy to phy-handle

2019-06-27 Thread Andrew Lunn

On Thu, Jun 27, 2019 at 05:31:55PM +0200, Maxime Ripard wrote:
> The phy device tree property has been deprecated in favor of phy-handle,
> let's replace it.
> 
> Signed-off-by: Maxime Ripard 

Reviewed-by: Andrew Lunn 

Andrew

Re: 4.19: Traced deadlock during xfrm_user module load

2019-06-27 Thread Thomas Jarosch

Hi Florian,

You wrote on Tue, Jun 25, 2019 at 06:53:44PM +0200:
> Thanks for this detailed analysis.
> In this specific case I think this is enough:
> 
> diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
> index 92077d459109..61ba92415480 100644
> --- a/net/netfilter/nfnetlink.c
> +++ b/net/netfilter/nfnetlink.c
> @@ -578,7 +578,8 @@ static int nfnetlink_bind(struct net *net, int group)
> ss = nfnetlink_get_subsys(type << 8);
> rcu_read_unlock();
> if (!ss)
> -   request_module("nfnetlink-subsys-%d", type);
> +   request_module_nowait("nfnetlink-subsys-%d", type);
> return 0;
>  }
>  #endif

thanks for the patch! We finally found an easy way to reproduce the deadlock,
the following commands instantly trigger the problem on our machines:

rmmod nf_conntrack_netlink
rmmod xfrm_user
conntrack -e NEW -E & modprobe -v xfrm_user

Note: the "-e" filter is needed to trigger the problematic
code path in the kernel.

We were worried that using "_nowait" would introduce other race conditions,
since the requested service might not be available by the time it is required.

On the other hand, if we understand correctly, it seems that after
"nfnetlink_bind()", the caller will listen on the socket for messages
regardless whether the needed modules are loaded, loading or unloaded.
To verify this we added a three second sleep during the initialisation of
nf_conntrack_netlink. The events started to appear after
the delayed init was completed.

If this is the case, then using "_nowait" should suffice as a fix
for the problem. Could you please confirm these assumptions
and give us some piece of mind?

Best regards,
Juliana Rodrigueiro and Thomas Jarosch

Re: [PATCH v4 03/13] dt-bindings: net: Add a YAML schemas for the generic MDIO options

2019-06-27 Thread Rob Herring

On Thu, Jun 27, 2019 at 9:32 AM Maxime Ripard  wrote:
>
> The MDIO buses have a number of available device tree properties that can
> be used in their device tree node. Add a YAML schemas for those.
>
> Suggested-by: Andrew Lunn 
> Signed-off-by: Maxime Ripard 
> ---
>  Documentation/devicetree/bindings/net/mdio.txt  | 38 +-
>  Documentation/devicetree/bindings/net/mdio.yaml | 51 ++-
>  2 files changed, 52 insertions(+), 37 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/net/mdio.yaml

Reviewed-by: Rob Herring 

However, some comments for a follow-up...

> diff --git a/Documentation/devicetree/bindings/net/mdio.yaml 
> b/Documentation/devicetree/bindings/net/mdio.yaml
> new file mode 100644
> index ..b8fa8251c4bc
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/net/mdio.yaml
> @@ -0,0 +1,51 @@
> +# SPDX-License-Identifier: GPL-2.0
> +%YAML 1.2
> +---
> +$id: http://devicetree.org/schemas/net/mdio.yaml#
> +$schema: http://devicetree.org/meta-schemas/core.yaml#
> +
> +title: MDIO Bus Generic Binding
> +
> +maintainers:
> +  - Andrew Lunn 
> +  - Florian Fainelli 
> +  - Heiner Kallweit 
> +
> +description:
> +  These are generic properties that can apply to any MDIO bus. Any
> +  MDIO bus must have a list of child nodes, one per device on the
> +  bus. These should follow the generic ethernet-phy.yaml document, or
> +  a device specific binding document.
> +
> +properties:
> +  reset-gpios:
> +maxItems: 1
> +description:
> +  The phandle and specifier for the GPIO that controls the RESET
> +  lines of all PHYs on that MDIO bus.
> +
> +  reset-delay-us:
> +description:
> +  RESET pulse width in microseconds. It applies to all PHY devices
> +  and must therefore be appropriately determined based on all PHY
> +  requirements (maximum value of all per-PHY RESET pulse widths).
> +
> +examples:
> +  - |
> +davinci_mdio: mdio@5c03 {

Can we enforce nodename to be mdio? That may not work for muxes.
You'll probably have to implement it and see.

> +compatible = "ti,davinci_mdio";
> +reg = <0x5c03 0x1000>;
> +#address-cells = <1>;
> +#size-cells = <0>;

These 2 should have a schema.

> +
> +reset-gpios = <&gpio2 5 1>;
> +reset-delay-us = <2>;
> +
> +ethphy0: ethernet-phy@1 {
> +reg = <1>;

Need a child node schema to validate the unit-address and reg property.

> +};
> +
> +ethphy1: ethernet-phy@3 {
> +reg = <3>;
> +};
> +};
> --
> git-series 0.9.1

Re: 4.19: Traced deadlock during xfrm_user module load

2019-06-27 Thread Florian Westphal

Thomas Jarosch  wrote:
> You wrote on Tue, Jun 25, 2019 at 06:53:44PM +0200:
> > Thanks for this detailed analysis.
> > In this specific case I think this is enough:
> > 
> > diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
> > index 92077d459109..61ba92415480 100644
> > --- a/net/netfilter/nfnetlink.c
> > +++ b/net/netfilter/nfnetlink.c
> > @@ -578,7 +578,8 @@ static int nfnetlink_bind(struct net *net, int group)
> > ss = nfnetlink_get_subsys(type << 8);
> > rcu_read_unlock();
> > if (!ss)
> > -   request_module("nfnetlink-subsys-%d", type);
> > +   request_module_nowait("nfnetlink-subsys-%d", type);
> > return 0;
> >  }
> >  #endif
> 
> thanks for the patch! We finally found an easy way to reproduce the deadlock,
> the following commands instantly trigger the problem on our machines:
> 
> rmmod nf_conntrack_netlink
> rmmod xfrm_user
> conntrack -e NEW -E & modprobe -v xfrm_user
> 
> Note: the "-e" filter is needed to trigger the problematic
> code path in the kernel.
> 
> We were worried that using "_nowait" would introduce other race conditions,
> since the requested service might not be available by the time it is required.

Then this code would be buggy too, there is no guarantee that a
request_module() succeeds.

> "nfnetlink_bind()", the caller will listen on the socket for messages
> regardless whether the needed modules are loaded, loading or unloaded.
> To verify this we added a three second sleep during the initialisation of
> nf_conntrack_netlink. The events started to appear after
> the delayed init was completed.
> 
> If this is the case, then using "_nowait" should suffice as a fix
> for the problem. Could you please confirm these assumptions
> and give us some piece of mind?

Yes, _nowait is safe here (and needed, as you find out).
I'm away for a few hours but I plan to submit this patch officially
soon.

Re: [PATCH v4 03/13] dt-bindings: net: Add a YAML schemas for the generic MDIO options

2019-06-27 Thread Maxime Ripard

Hi Rob,

On Thu, Jun 27, 2019 at 09:48:06AM -0600, Rob Herring wrote:
> On Thu, Jun 27, 2019 at 9:32 AM Maxime Ripard  
> wrote:
> >
> > The MDIO buses have a number of available device tree properties that can
> > be used in their device tree node. Add a YAML schemas for those.
> >
> > Suggested-by: Andrew Lunn 
> > Signed-off-by: Maxime Ripard 
> > ---
> >  Documentation/devicetree/bindings/net/mdio.txt  | 38 +-
> >  Documentation/devicetree/bindings/net/mdio.yaml | 51 ++-
> >  2 files changed, 52 insertions(+), 37 deletions(-)
> >  create mode 100644 Documentation/devicetree/bindings/net/mdio.yaml
>
> Reviewed-by: Rob Herring 
>
> However, some comments for a follow-up...
>
> > diff --git a/Documentation/devicetree/bindings/net/mdio.yaml 
> > b/Documentation/devicetree/bindings/net/mdio.yaml
> > new file mode 100644
> > index ..b8fa8251c4bc
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/net/mdio.yaml
> > @@ -0,0 +1,51 @@
> > +# SPDX-License-Identifier: GPL-2.0
> > +%YAML 1.2
> > +---
> > +$id: http://devicetree.org/schemas/net/mdio.yaml#
> > +$schema: http://devicetree.org/meta-schemas/core.yaml#
> > +
> > +title: MDIO Bus Generic Binding
> > +
> > +maintainers:
> > +  - Andrew Lunn 
> > +  - Florian Fainelli 
> > +  - Heiner Kallweit 
> > +
> > +description:
> > +  These are generic properties that can apply to any MDIO bus. Any
> > +  MDIO bus must have a list of child nodes, one per device on the
> > +  bus. These should follow the generic ethernet-phy.yaml document, or
> > +  a device specific binding document.
> > +
> > +properties:
> > +  reset-gpios:
> > +maxItems: 1
> > +description:
> > +  The phandle and specifier for the GPIO that controls the RESET
> > +  lines of all PHYs on that MDIO bus.
> > +
> > +  reset-delay-us:
> > +description:
> > +  RESET pulse width in microseconds. It applies to all PHY devices
> > +  and must therefore be appropriately determined based on all PHY
> > +  requirements (maximum value of all per-PHY RESET pulse widths).
> > +
> > +examples:
> > +  - |
> > +davinci_mdio: mdio@5c03 {
>
> Can we enforce nodename to be mdio? That may not work for muxes.
> You'll probably have to implement it and see.

Ok, I'll send a follow-up patch for this.

> > +compatible = "ti,davinci_mdio";
> > +reg = <0x5c03 0x1000>;
> > +#address-cells = <1>;
> > +#size-cells = <0>;
>
> These 2 should have a schema.

Indeed, I'll do it for that too.

> > +
> > +reset-gpios = <&gpio2 5 1>;
> > +reset-delay-us = <2>;
> > +
> > +ethphy0: ethernet-phy@1 {
> > +reg = <1>;
>
> Need a child node schema to validate the unit-address and reg property.

This should be already covered by the ethernet-phy.yaml schemas
earlier in this series.

Were you expecting something else?

Maxime

--
Maxime Ripard, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com


signature.asc
Description: PGP signature

Re: [PATCH net-next 10/18] ionic: Add management of rx filters

2019-06-27 Thread Shannon Nelson


On 6/26/19 8:52 AM, Shannon Nelson wrote:

On 6/25/19 4:37 PM, Jakub Kicinski wrote:

On Thu, 20 Jun 2019 13:24:16 -0700, Shannon Nelson wrote:

+int ionic_rx_filter_save(struct lif *lif, u32 flow_id, u16 rxq_index,
+ u32 hash, struct ionic_admin_ctx *ctx)
+{
+    struct device *dev = lif->ionic->dev;
+    struct hlist_head *head;
+    struct rx_filter *f;
+    unsigned int key;
+
+    f = devm_kzalloc(dev, sizeof(*f), GFP_KERNEL);
+    if (!f)
+    return -ENOMEM;
+
+    f->flow_id = flow_id;
+    f->filter_id = le32_to_cpu(ctx->comp.rx_filter_add.filter_id);
+    f->rxq_index = rxq_index;
+    memcpy(&f->cmd, &ctx->cmd, sizeof(f->cmd));
+
+    INIT_HLIST_NODE(&f->by_hash);
+    INIT_HLIST_NODE(&f->by_id);
+
+    switch (le16_to_cpu(f->cmd.match)) {
+    case RX_FILTER_MATCH_VLAN:
+    key = le16_to_cpu(f->cmd.vlan.vlan) & RX_FILTER_HLISTS_MASK;
+    break;
+    case RX_FILTER_MATCH_MAC:
+    key = *(u32 *)f->cmd.mac.addr & RX_FILTER_HLISTS_MASK;
+    break;
+    case RX_FILTER_MATCH_MAC_VLAN:
+    key = le16_to_cpu(f->cmd.mac_vlan.vlan) & 
RX_FILTER_HLISTS_MASK;

+    break;
+    default:

I know you use devm_kzalloc() but can't this potentially keep arbitrary
amounts of memory held until the device is removed (and it's the entire
device not just a LIF)?


Yes, but we're freeing this memory when objects are deleted. We're 
trying to be tidy with our allocations, but used devm_kzalloc to be 
more sure that things went away when the device did.


... except, of course, in this error case.  Yes, I'll add a free here.

sln

Re: [PATCH net-next v3 1/4] net: sched: em_ipt: match only on ip/ipv6 traffic

2019-06-27 Thread Eyal Birger

Hi Nik,

On Thu, 27 Jun 2019 11:10:44 +0300
Nikolay Aleksandrov  wrote:

> Restrict matching only to ip/ipv6 traffic and make sure we can use the
> headers, otherwise matches will be attempted on any protocol which can
> be unexpected by the xt matches. Currently policy supports only
> ipv4/6.
> 
> Signed-off-by: Nikolay Aleksandrov 
> ---
> v3: no change
> v2: no change
> 
>  net/sched/em_ipt.c | 13 +
>  1 file changed, 13 insertions(+)
> 
> diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
> index 243fd22f2248..64dbafe4e94c 100644
> --- a/net/sched/em_ipt.c
> +++ b/net/sched/em_ipt.c
> @@ -185,6 +185,19 @@ static int em_ipt_match(struct sk_buff *skb,
> struct tcf_ematch *em, struct nf_hook_state state;
>   int ret;
>  
> + switch (tc_skb_protocol(skb)) {
> + case htons(ETH_P_IP):
> + if (!pskb_network_may_pull(skb, sizeof(struct
> iphdr)))
> + return 0;
> + break;
> + case htons(ETH_P_IPV6):
> + if (!pskb_network_may_pull(skb, sizeof(struct
> ipv6hdr)))
> + return 0;
> + break;
> + default:
> + return 0;
> + }
> +

I just realized that I didn't consider the egress direction in my review.
Don't we need an skb_pull() in that direction to make the skb->data point
to L3? I see this is done e.g. in em_ipset.

Eyal.

1 2 3 >

1 - 100 of 262 matches

Mail list logo