On 06/17/2016 04:43 PM, Saeed Mahameed wrote:
From: Maor Gottlieb <ma...@mellanox.com>
Add new packet type to skip kernel specific protocol handlers.
This is needed so device drivers can pass packets up to user space
(af_packet/tcpdump, etc..) without the need for them to go through
the whole kernel data path.
Signed-off-by: Maor Gottlieb <ma...@mellanox.com>
CC: David S. Miller <da...@davemloft.net>
CC: Patrick McHardy <ka...@trash.net>
CC: Eric Dumazet <eduma...@google.com>
Signed-off-by: Saeed Mahameed <sae...@mellanox.com>
---
include/linux/skbuff.h | 6 +++---
include/uapi/linux/if_packet.h | 1 +
net/core/dev.c | 4 ++++
3 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dc0fca7..359724e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -694,14 +694,14 @@ struct sk_buff {
/* if you move pkt_type around you also must adapt those constants */
#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX (7 << 5)
+#define PKT_TYPE_MAX (8 << 5)
#else
-#define PKT_TYPE_MAX 7
+#define PKT_TYPE_MAX 8
#endif
Aehm ... did you actually test this with BPF ?!
PKT_TYPE_MAX is a mask (naming could be better no doubt), see also function
convert_skb_access():
[...]
case SKF_AD_PKTTYPE:
*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg,
PKT_TYPE_OFFSET());
*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
#ifdef __BIG_ENDIAN_BITFIELD
*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
#endif
break;
[...]
Also, dunno if it's worth burning a skb bit for one driver.
#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset)
__u8 __pkt_type_offset[0];
- __u8 pkt_type:3;
+ __u8 pkt_type:4;
__u8 pfmemalloc:1;
__u8 ignore_df:1;
__u8 nfctinfo:3;
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 9e7edfd..93a9f13 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -29,6 +29,7 @@ struct sockaddr_ll {
#define PACKET_LOOPBACK 5 /* MC/BRD frame looped
back */
#define PACKET_USER 6 /* To user space */
#define PACKET_KERNEL 7 /* To kernel space */
+#define PACKET_OFFLOAD_KERNEL 8 /* Offload NET stack */
/* Unused, PACKET_FASTROUTE and PACKET_LOOPBACK are invisible to user space */
#define PACKET_FASTROUTE 6 /* Fastrouted frame */
diff --git a/net/core/dev.c b/net/core/dev.c
index d40593b..f300f1a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4113,6 +4113,9 @@ another_round:
pt_prev = ptype;
}
+ if (unlikely(skb->pkt_type == PACKET_OFFLOAD_KERNEL))
+ goto done;
+
skip_taps:
#ifdef CONFIG_NET_INGRESS
if (static_key_false(&ingress_needed)) {
@@ -4190,6 +4193,7 @@ ncls:
&skb->dev->ptype_specific);
}
+done:
if (pt_prev) {
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
goto drop;