[dpdk-dev] [PATCH] net/memif: fix chained mbuf determination

2021-09-09 Thread Junxiao Shi
Previously, TX functions call rte_pktmbuf_is_contiguous to determine
whether an mbuf is chained. However, rte_pktmbuf_is_contiguous is
designed to work on the first mbuf of a packet only. In case a packet
contains three or more segment mbufs in a chain, it may cause truncated
packets or rte_mbuf_sanity_check panics.

This patch updates TX functions to determine chained mbufs using
mbuf_head->nb_segs field, which works in all cases. Moreover, it
maintains that the second cacheline is only accessed when chained mbuf
is actually present.

Signed-off-by: Junxiao Shi 
---
 drivers/net/memif/rte_eth_memif.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/memif/rte_eth_memif.c 
b/drivers/net/memif/rte_eth_memif.c
index de6becd45e..fd9e877c3d 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -199,6 +199,7 @@ memif_dev_info(struct rte_eth_dev *dev __rte_unused, struct 
rte_eth_dev_info *de
dev_info->max_rx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
dev_info->max_tx_queues = ETH_MEMIF_MAX_NUM_Q_PAIRS;
dev_info->min_rx_bufsize = 0;
+   dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS;
 
return 0;
 }
@@ -567,7 +568,7 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t 
nb_pkts)
rte_eth_devices[mq->in_port].process_private;
memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
uint16_t slot, saved_slot, n_free, ring_size, mask, n_tx_pkts = 0;
-   uint16_t src_len, src_off, dst_len, dst_off, cp_len;
+   uint16_t src_len, src_off, dst_len, dst_off, cp_len, nb_segs;
memif_ring_type_t type = mq->type;
memif_desc_t *d0;
struct rte_mbuf *mbuf;
@@ -615,6 +616,7 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t 
nb_pkts)
 
while (n_tx_pkts < nb_pkts && n_free) {
mbuf_head = *bufs++;
+   nb_segs = mbuf_head->nb_segs;
mbuf = mbuf_head;
 
saved_slot = slot;
@@ -659,7 +661,7 @@ eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t 
nb_pkts)
d0->length = dst_off;
}
 
-   if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
+   if (--nb_segs > 0) {
mbuf = mbuf->next;
goto next_in_chain;
}
@@ -696,6 +698,7 @@ memif_tx_one_zc(struct pmd_process_private *proc_private, 
struct memif_queue *mq
uint16_t slot, uint16_t n_free)
 {
memif_desc_t *d0;
+   uint16_t nb_segs = mbuf->nb_segs;
int used_slots = 1;
 
 next_in_chain:
@@ -716,7 +719,7 @@ memif_tx_one_zc(struct pmd_process_private *proc_private, 
struct memif_queue *mq
d0->flags = 0;
 
/* check if buffer is chained */
-   if (rte_pktmbuf_is_contiguous(mbuf) == 0) {
+   if (--nb_segs > 0) {
if (n_free < 2)
return 0;
/* mark buffer as chained */
-- 
2.17.1



[dpdk-dev] [PATCH] net/memif: allocate socket hash on any NUMA socket

2021-09-28 Thread Junxiao Shi
Previously, memif socket hash is always allocated on NUMA socket 0.
If the application is entirely running on another NUMA socket and EAL
--socket-limit prevents memory allocation on NUMA socket 0, memif
creation fails with "HASH: memory allocation failed" error.

This patch allows allocating memif socket hash on any NUMA socket.

Signed-off-by: Junxiao Shi 
---
 drivers/net/memif/memif_socket.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
index f58ff4c0cb..364e818d65 100644
--- a/drivers/net/memif/memif_socket.c
+++ b/drivers/net/memif/memif_socket.c
@@ -946,6 +946,7 @@ memif_create_socket_hash(void)
params.key_len = MEMIF_SOCKET_UN_SIZE;
params.hash_func = rte_jhash;
params.hash_func_init_val = 0;
+   params.socket_id = SOCKET_ID_ANY;
return rte_hash_create(¶ms);
 }
 
-- 
2.17.1



[PATCH] net/memif: allow stopping and closing device

2021-11-18 Thread Junxiao Shi
Bugzilla ID: 888
Fixes: febc855b358e ("ethdev: forbid closing started device")

Signed-off-by: Junxiao Shi 
---
 drivers/net/memif/rte_eth_memif.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/memif/rte_eth_memif.c 
b/drivers/net/memif/rte_eth_memif.c
index 43d7378329..e3d523af57 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -1260,6 +1260,13 @@ memif_dev_start(struct rte_eth_dev *dev)
return ret;
 }
 
+static int
+memif_dev_stop(struct rte_eth_dev *dev)
+{
+   memif_disconnect(dev);
+   return 0;
+}
+
 static int
 memif_dev_close(struct rte_eth_dev *dev)
 {
@@ -1268,7 +1275,6 @@ memif_dev_close(struct rte_eth_dev *dev)
 
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
memif_msg_enq_disconnect(pmd->cc, "Device closed", 0);
-   memif_disconnect(dev);
 
for (i = 0; i < dev->data->nb_rx_queues; i++)
(*dev->dev_ops->rx_queue_release)(dev, i);
@@ -1276,8 +1282,6 @@ memif_dev_close(struct rte_eth_dev *dev)
(*dev->dev_ops->tx_queue_release)(dev, i);
 
memif_socket_remove_device(dev);
-   } else {
-   memif_disconnect(dev);
}
 
rte_free(dev->process_private);
@@ -1515,6 +1519,7 @@ memif_rx_queue_intr_disable(struct rte_eth_dev *dev, 
uint16_t qid __rte_unused)
 
 static const struct eth_dev_ops ops = {
.dev_start = memif_dev_start,
+   .dev_stop = memif_dev_stop,
.dev_close = memif_dev_close,
.dev_infos_get = memif_dev_info,
.dev_configure = memif_dev_configure,
-- 
2.17.1



[RFC PATCH] net/memif: change socket listener owner uid/gid

2022-11-15 Thread Junxiao Shi
This allows a DPDK application running with root privilege to create a
memif socket listener with non-root owner uid and gid, which can be
connected from client applications running without root privilege.

Signed-off-by: Junxiao Shi 
---
 doc/guides/nics/memif.rst |  2 ++
 drivers/net/memif/memif_socket.c  | 13 +--
 drivers/net/memif/rte_eth_memif.c | 56 +--
 drivers/net/memif/rte_eth_memif.h |  2 ++
 4 files changed, 60 insertions(+), 13 deletions(-)

diff --git a/doc/guides/nics/memif.rst b/doc/guides/nics/memif.rst
index aca843640b..8a8141aa72 100644
--- a/doc/guides/nics/memif.rst
+++ b/doc/guides/nics/memif.rst
@@ -44,6 +44,8 @@ client.
"rsize=11", "Log2 of ring size. If rsize is 10, actual ring size is 1024", 
"10", "1-14"
"socket=/tmp/memif.sock", "Socket filename", "/tmp/memif.sock", "string len 
108"
"socket-abstract=no", "Set usage of abstract socket address", "yes", 
"yes|no"
+   "uid=1000", "Set socket listener owner uid. Only relevant to server with 
socket-abstract=no", "unchanged", "uid_t"
+   "gid=1000", "Set socket listener owner gid. Only relevant to server with 
socket-abstract=no", "unchanged", "gid_t"
"mac=01:23:45:ab:cd:ef", "Mac address", "01:ab:23:cd:45:ef", ""
"secret=abc123", "Secret is an optional security option, which if 
specified, must be matched by peer", "", "string len 24"
"zero-copy=yes", "Enable/disable zero-copy client mode. Only relevant to 
client, requires '--single-file-segments' eal argument", "no", "yes|no"
diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
index 7886644412..bedb0637a9 100644
--- a/drivers/net/memif/memif_socket.c
+++ b/drivers/net/memif/memif_socket.c
@@ -889,7 +889,7 @@ memif_listener_handler(void *arg)
 }
 
 static struct memif_socket *
-memif_socket_create(char *key, uint8_t listener, bool is_abstract)
+memif_socket_create(char *key, uint8_t listener, bool is_abstract, uid_t 
owner_uid, gid_t owner_gid)
 {
struct memif_socket *sock;
struct sockaddr_un un = { 0 };
@@ -941,6 +941,14 @@ memif_socket_create(char *key, uint8_t listener, bool 
is_abstract)
 
MIF_LOG(DEBUG, "Memif listener socket %s created.", 
sock->filename);
 
+   if (!is_abstract && (owner_uid != (uid_t)-1 || owner_gid != 
(gid_t)-1)) {
+   ret = chown(sock->filename, owner_uid, owner_gid);
+   if (ret < 0) {
+   MIF_LOG(ERR, "Failed to change listener socket 
owner %d", errno);
+   goto error;
+   }
+   }
+
/* Allocate interrupt instance */
sock->intr_handle =
rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
@@ -1017,7 +1025,8 @@ memif_socket_init(struct rte_eth_dev *dev, const char 
*socket_filename)
if (ret < 0) {
socket = memif_socket_create(key,
(pmd->role == MEMIF_ROLE_CLIENT) ? 0 : 1,
-   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT);
+   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT,
+   pmd->owner_uid, pmd->owner_gid);
if (socket == NULL)
return -1;
ret = rte_hash_add_key_data(hash, key, socket);
diff --git a/drivers/net/memif/rte_eth_memif.c 
b/drivers/net/memif/rte_eth_memif.c
index dd951b8296..f72a53bc03 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -37,6 +37,8 @@
 #define ETH_MEMIF_RING_SIZE_ARG"rsize"
 #define ETH_MEMIF_SOCKET_ARG   "socket"
 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG  "socket-abstract"
+#define ETH_MEMIF_OWNER_UID_ARG"owner-uid"
+#define ETH_MEMIF_OWNER_GID_ARG"owner-gid"
 #define ETH_MEMIF_MAC_ARG  "mac"
 #define ETH_MEMIF_ZC_ARG   "zero-copy"
 #define ETH_MEMIF_SECRET_ARG   "secret"
@@ -48,6 +50,8 @@ static const char * const valid_arguments[] = {
ETH_MEMIF_RING_SIZE_ARG,
ETH_MEMIF_SOCKET_ARG,
ETH_MEMIF_SOCKET_ABSTRACT_ARG,
+   ETH_MEMIF_OWNER_UID_ARG,
+   ETH_MEMIF_OWNER_GID_ARG,
ETH_MEMIF_MAC_ARG,
ETH_MEMIF_ZC_ARG,
ETH_MEMIF_SECRET_ARG,
@@ -1515,7 +1519,7 @@ static const struct eth_dev_ops ops = {
 static int
 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
 memif_interface_id_t id,

[RFC PATCH v2] net/memif: change socket listener owner uid/gid

2022-11-16 Thread Junxiao Shi
This allows a DPDK application running with root privilege to create a
memif socket listener with non-root owner uid and gid, which can be
connected from client applications running without root privilege.

Signed-off-by: Junxiao Shi 
---
 doc/guides/nics/memif.rst |  2 ++
 drivers/net/memif/memif_socket.c  | 13 +++--
 drivers/net/memif/rte_eth_memif.c | 46 +--
 drivers/net/memif/rte_eth_memif.h |  2 ++
 4 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/doc/guides/nics/memif.rst b/doc/guides/nics/memif.rst
index aca843640b..8a8141aa72 100644
--- a/doc/guides/nics/memif.rst
+++ b/doc/guides/nics/memif.rst
@@ -44,6 +44,8 @@ client.
"rsize=11", "Log2 of ring size. If rsize is 10, actual ring size is 1024", 
"10", "1-14"
"socket=/tmp/memif.sock", "Socket filename", "/tmp/memif.sock", "string len 
108"
"socket-abstract=no", "Set usage of abstract socket address", "yes", 
"yes|no"
+   "uid=1000", "Set socket listener owner uid. Only relevant to server with 
socket-abstract=no", "unchanged", "uid_t"
+   "gid=1000", "Set socket listener owner gid. Only relevant to server with 
socket-abstract=no", "unchanged", "gid_t"
"mac=01:23:45:ab:cd:ef", "Mac address", "01:ab:23:cd:45:ef", ""
"secret=abc123", "Secret is an optional security option, which if 
specified, must be matched by peer", "", "string len 24"
"zero-copy=yes", "Enable/disable zero-copy client mode. Only relevant to 
client, requires '--single-file-segments' eal argument", "no", "yes|no"
diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
index 7886644412..c2b038d01a 100644
--- a/drivers/net/memif/memif_socket.c
+++ b/drivers/net/memif/memif_socket.c
@@ -889,7 +889,7 @@ memif_listener_handler(void *arg)
 }
 
 static struct memif_socket *
-memif_socket_create(char *key, uint8_t listener, bool is_abstract)
+memif_socket_create(char *key, uint8_t listener, bool is_abstract, uid_t 
owner_uid, gid_t owner_gid)
 {
struct memif_socket *sock;
struct sockaddr_un un = { 0 };
@@ -941,6 +941,14 @@ memif_socket_create(char *key, uint8_t listener, bool 
is_abstract)
 
MIF_LOG(DEBUG, "Memif listener socket %s created.", 
sock->filename);
 
+   if (!is_abstract && (owner_uid != (uid_t)-1 || owner_gid != 
(gid_t)-1)) {
+   ret = chown(sock->filename, owner_uid, owner_gid);
+   if (ret < 0) {
+   MIF_LOG(ERR, "Failed to change listener socket 
owner %d", errno);
+   goto error;
+   }
+   }
+
/* Allocate interrupt instance */
sock->intr_handle =
rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
@@ -1017,7 +1025,8 @@ memif_socket_init(struct rte_eth_dev *dev, const char 
*socket_filename)
if (ret < 0) {
socket = memif_socket_create(key,
(pmd->role == MEMIF_ROLE_CLIENT) ? 0 : 1,
-   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT);
+   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT,
+   pmd->owner_uid, pmd->owner_gid);
if (socket == NULL)
return -1;
ret = rte_hash_add_key_data(hash, key, socket);
diff --git a/drivers/net/memif/rte_eth_memif.c 
b/drivers/net/memif/rte_eth_memif.c
index dd951b8296..d69f0e823f 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -37,6 +37,8 @@
 #define ETH_MEMIF_RING_SIZE_ARG"rsize"
 #define ETH_MEMIF_SOCKET_ARG   "socket"
 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG  "socket-abstract"
+#define ETH_MEMIF_OWNER_UID_ARG"owner-uid"
+#define ETH_MEMIF_OWNER_GID_ARG"owner-gid"
 #define ETH_MEMIF_MAC_ARG  "mac"
 #define ETH_MEMIF_ZC_ARG   "zero-copy"
 #define ETH_MEMIF_SECRET_ARG   "secret"
@@ -48,6 +50,8 @@ static const char * const valid_arguments[] = {
ETH_MEMIF_RING_SIZE_ARG,
ETH_MEMIF_SOCKET_ARG,
ETH_MEMIF_SOCKET_ABSTRACT_ARG,
+   ETH_MEMIF_OWNER_UID_ARG,
+   ETH_MEMIF_OWNER_GID_ARG,
ETH_MEMIF_MAC_ARG,
ETH_MEMIF_ZC_ARG,
ETH_MEMIF_SECRET_ARG,
@@ -1515,7 +1519,7 @@ static const struct eth_dev_ops ops = {
 static int
 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
 memif_interface_id_t id,

[RFC PATCH v3] net/memif: change socket listener owner uid/gid

2022-11-16 Thread Junxiao Shi
This allows a DPDK application running with root privilege to create a
memif socket listener with non-root owner uid and gid, which can be
connected from client applications running without root privilege.

Signed-off-by: Junxiao Shi 
---
 doc/guides/nics/memif.rst |  2 ++
 drivers/net/memif/memif_socket.c  | 13 +++--
 drivers/net/memif/rte_eth_memif.c | 48 +--
 drivers/net/memif/rte_eth_memif.h |  2 ++
 4 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/doc/guides/nics/memif.rst b/doc/guides/nics/memif.rst
index aca843640b..8a8141aa72 100644
--- a/doc/guides/nics/memif.rst
+++ b/doc/guides/nics/memif.rst
@@ -44,6 +44,8 @@ client.
"rsize=11", "Log2 of ring size. If rsize is 10, actual ring size is 1024", 
"10", "1-14"
"socket=/tmp/memif.sock", "Socket filename", "/tmp/memif.sock", "string len 
108"
"socket-abstract=no", "Set usage of abstract socket address", "yes", 
"yes|no"
+   "uid=1000", "Set socket listener owner uid. Only relevant to server with 
socket-abstract=no", "unchanged", "uid_t"
+   "gid=1000", "Set socket listener owner gid. Only relevant to server with 
socket-abstract=no", "unchanged", "gid_t"
"mac=01:23:45:ab:cd:ef", "Mac address", "01:ab:23:cd:45:ef", ""
"secret=abc123", "Secret is an optional security option, which if 
specified, must be matched by peer", "", "string len 24"
"zero-copy=yes", "Enable/disable zero-copy client mode. Only relevant to 
client, requires '--single-file-segments' eal argument", "no", "yes|no"
diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
index 7886644412..c2b038d01a 100644
--- a/drivers/net/memif/memif_socket.c
+++ b/drivers/net/memif/memif_socket.c
@@ -889,7 +889,7 @@ memif_listener_handler(void *arg)
 }
 
 static struct memif_socket *
-memif_socket_create(char *key, uint8_t listener, bool is_abstract)
+memif_socket_create(char *key, uint8_t listener, bool is_abstract, uid_t 
owner_uid, gid_t owner_gid)
 {
struct memif_socket *sock;
struct sockaddr_un un = { 0 };
@@ -941,6 +941,14 @@ memif_socket_create(char *key, uint8_t listener, bool 
is_abstract)
 
MIF_LOG(DEBUG, "Memif listener socket %s created.", 
sock->filename);
 
+   if (!is_abstract && (owner_uid != (uid_t)-1 || owner_gid != 
(gid_t)-1)) {
+   ret = chown(sock->filename, owner_uid, owner_gid);
+   if (ret < 0) {
+   MIF_LOG(ERR, "Failed to change listener socket 
owner %d", errno);
+   goto error;
+   }
+   }
+
/* Allocate interrupt instance */
sock->intr_handle =
rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
@@ -1017,7 +1025,8 @@ memif_socket_init(struct rte_eth_dev *dev, const char 
*socket_filename)
if (ret < 0) {
socket = memif_socket_create(key,
(pmd->role == MEMIF_ROLE_CLIENT) ? 0 : 1,
-   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT);
+   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT,
+   pmd->owner_uid, pmd->owner_gid);
if (socket == NULL)
return -1;
ret = rte_hash_add_key_data(hash, key, socket);
diff --git a/drivers/net/memif/rte_eth_memif.c 
b/drivers/net/memif/rte_eth_memif.c
index dd951b8296..092f1cbc92 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -37,6 +37,8 @@
 #define ETH_MEMIF_RING_SIZE_ARG"rsize"
 #define ETH_MEMIF_SOCKET_ARG   "socket"
 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG  "socket-abstract"
+#define ETH_MEMIF_OWNER_UID_ARG"owner-uid"
+#define ETH_MEMIF_OWNER_GID_ARG"owner-gid"
 #define ETH_MEMIF_MAC_ARG  "mac"
 #define ETH_MEMIF_ZC_ARG   "zero-copy"
 #define ETH_MEMIF_SECRET_ARG   "secret"
@@ -48,6 +50,8 @@ static const char * const valid_arguments[] = {
ETH_MEMIF_RING_SIZE_ARG,
ETH_MEMIF_SOCKET_ARG,
ETH_MEMIF_SOCKET_ABSTRACT_ARG,
+   ETH_MEMIF_OWNER_UID_ARG,
+   ETH_MEMIF_OWNER_GID_ARG,
ETH_MEMIF_MAC_ARG,
ETH_MEMIF_ZC_ARG,
ETH_MEMIF_SECRET_ARG,
@@ -1515,7 +1519,7 @@ static const struct eth_dev_ops ops = {
 static int
 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
 memif_interface_id_t id,

[PATCH] net/memif: change socket listener owner uid/gid

2022-12-05 Thread Junxiao Shi
This allows a DPDK application running with root privilege to create a
memif socket listener with non-root owner uid and gid, which can be
connected from client applications running without root privilege.

Signed-off-by: Junxiao Shi 
---
 doc/guides/nics/memif.rst |  2 ++
 drivers/net/memif/memif_socket.c  | 13 +++--
 drivers/net/memif/rte_eth_memif.c | 48 +--
 drivers/net/memif/rte_eth_memif.h |  2 ++
 4 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/doc/guides/nics/memif.rst b/doc/guides/nics/memif.rst
index aca843640b..8a8141aa72 100644
--- a/doc/guides/nics/memif.rst
+++ b/doc/guides/nics/memif.rst
@@ -44,6 +44,8 @@ client.
"rsize=11", "Log2 of ring size. If rsize is 10, actual ring size is 1024", 
"10", "1-14"
"socket=/tmp/memif.sock", "Socket filename", "/tmp/memif.sock", "string len 
108"
"socket-abstract=no", "Set usage of abstract socket address", "yes", 
"yes|no"
+   "uid=1000", "Set socket listener owner uid. Only relevant to server with 
socket-abstract=no", "unchanged", "uid_t"
+   "gid=1000", "Set socket listener owner gid. Only relevant to server with 
socket-abstract=no", "unchanged", "gid_t"
"mac=01:23:45:ab:cd:ef", "Mac address", "01:ab:23:cd:45:ef", ""
"secret=abc123", "Secret is an optional security option, which if 
specified, must be matched by peer", "", "string len 24"
"zero-copy=yes", "Enable/disable zero-copy client mode. Only relevant to 
client, requires '--single-file-segments' eal argument", "no", "yes|no"
diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
index 4700ce2e77..34f861afdd 100644
--- a/drivers/net/memif/memif_socket.c
+++ b/drivers/net/memif/memif_socket.c
@@ -889,7 +889,7 @@ memif_listener_handler(void *arg)
 }
 
 static struct memif_socket *
-memif_socket_create(char *key, uint8_t listener, bool is_abstract)
+memif_socket_create(char *key, uint8_t listener, bool is_abstract, uid_t 
owner_uid, gid_t owner_gid)
 {
struct memif_socket *sock;
struct sockaddr_un un = { 0 };
@@ -941,6 +941,14 @@ memif_socket_create(char *key, uint8_t listener, bool 
is_abstract)
 
MIF_LOG(DEBUG, "Memif listener socket %s created.", 
sock->filename);
 
+   if (!is_abstract && (owner_uid != (uid_t)-1 || owner_gid != 
(gid_t)-1)) {
+   ret = chown(sock->filename, owner_uid, owner_gid);
+   if (ret < 0) {
+   MIF_LOG(ERR, "Failed to change listener socket 
owner %d", errno);
+   goto error;
+   }
+   }
+
/* Allocate interrupt instance */
sock->intr_handle =
rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
@@ -1017,7 +1025,8 @@ memif_socket_init(struct rte_eth_dev *dev, const char 
*socket_filename)
if (ret < 0) {
socket = memif_socket_create(key,
(pmd->role == MEMIF_ROLE_CLIENT) ? 0 : 1,
-   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT);
+   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT,
+   pmd->owner_uid, pmd->owner_gid);
if (socket == NULL)
return -1;
ret = rte_hash_add_key_data(hash, key, socket);
diff --git a/drivers/net/memif/rte_eth_memif.c 
b/drivers/net/memif/rte_eth_memif.c
index 1b1c1a652b..f82f4bccb8 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -37,6 +37,8 @@
 #define ETH_MEMIF_RING_SIZE_ARG"rsize"
 #define ETH_MEMIF_SOCKET_ARG   "socket"
 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG  "socket-abstract"
+#define ETH_MEMIF_OWNER_UID_ARG"owner-uid"
+#define ETH_MEMIF_OWNER_GID_ARG"owner-gid"
 #define ETH_MEMIF_MAC_ARG  "mac"
 #define ETH_MEMIF_ZC_ARG   "zero-copy"
 #define ETH_MEMIF_SECRET_ARG   "secret"
@@ -48,6 +50,8 @@ static const char * const valid_arguments[] = {
ETH_MEMIF_RING_SIZE_ARG,
ETH_MEMIF_SOCKET_ARG,
ETH_MEMIF_SOCKET_ABSTRACT_ARG,
+   ETH_MEMIF_OWNER_UID_ARG,
+   ETH_MEMIF_OWNER_GID_ARG,
ETH_MEMIF_MAC_ARG,
ETH_MEMIF_ZC_ARG,
ETH_MEMIF_SECRET_ARG,
@@ -1515,7 +1519,7 @@ static const struct eth_dev_ops ops = {
 static int
 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
 memif_interface_id_t id,

[PATCH v2] net/memif: change socket listener owner uid/gid

2022-12-07 Thread Junxiao Shi
This allows a DPDK application running with root privilege to create a
memif socket listener with non-root owner uid and gid, which can be
connected from client applications running without root privilege.

Signed-off-by: Junxiao Shi 
---
 doc/guides/nics/memif.rst |   2 +
 drivers/net/memif/memif_socket.c  |  13 ++-
 drivers/net/memif/rte_eth_memif.c | 129 --
 drivers/net/memif/rte_eth_memif.h |   2 +
 4 files changed, 102 insertions(+), 44 deletions(-)

diff --git a/doc/guides/nics/memif.rst b/doc/guides/nics/memif.rst
index aca843640b..afc574fdaa 100644
--- a/doc/guides/nics/memif.rst
+++ b/doc/guides/nics/memif.rst
@@ -44,6 +44,8 @@ client.
"rsize=11", "Log2 of ring size. If rsize is 10, actual ring size is 1024", 
"10", "1-14"
"socket=/tmp/memif.sock", "Socket filename", "/tmp/memif.sock", "string len 
108"
"socket-abstract=no", "Set usage of abstract socket address", "yes", 
"yes|no"
+   "owner-uid=1000", "Set socket listener owner uid. Only relevant to server 
with socket-abstract=no", "unchanged", "uid_t"
+   "owner-gid=1000", "Set socket listener owner gid. Only relevant to server 
with socket-abstract=no", "unchanged", "gid_t"
"mac=01:23:45:ab:cd:ef", "Mac address", "01:ab:23:cd:45:ef", ""
"secret=abc123", "Secret is an optional security option, which if 
specified, must be matched by peer", "", "string len 24"
"zero-copy=yes", "Enable/disable zero-copy client mode. Only relevant to 
client, requires '--single-file-segments' eal argument", "no", "yes|no"
diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
index 4700ce2e77..649f8d0e61 100644
--- a/drivers/net/memif/memif_socket.c
+++ b/drivers/net/memif/memif_socket.c
@@ -889,7 +889,7 @@ memif_listener_handler(void *arg)
 }
 
 static struct memif_socket *
-memif_socket_create(char *key, uint8_t listener, bool is_abstract)
+memif_socket_create(char *key, uint8_t listener, bool is_abstract, uid_t 
owner_uid, gid_t owner_gid)
 {
struct memif_socket *sock;
struct sockaddr_un un = { 0 };
@@ -941,6 +941,14 @@ memif_socket_create(char *key, uint8_t listener, bool 
is_abstract)
 
MIF_LOG(DEBUG, "Memif listener socket %s created.", 
sock->filename);
 
+   if (!is_abstract && (owner_uid != (uid_t)-1 || owner_gid != 
(gid_t)-1)) {
+   ret = chown(sock->filename, owner_uid, owner_gid);
+   if (ret < 0) {
+   MIF_LOG(ERR, "Failed to change listener socket 
owner");
+   goto error;
+   }
+   }
+
/* Allocate interrupt instance */
sock->intr_handle =
rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
@@ -1017,7 +1025,8 @@ memif_socket_init(struct rte_eth_dev *dev, const char 
*socket_filename)
if (ret < 0) {
socket = memif_socket_create(key,
(pmd->role == MEMIF_ROLE_CLIENT) ? 0 : 1,
-   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT);
+   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT,
+   pmd->owner_uid, pmd->owner_gid);
if (socket == NULL)
return -1;
ret = rte_hash_add_key_data(hash, key, socket);
diff --git a/drivers/net/memif/rte_eth_memif.c 
b/drivers/net/memif/rte_eth_memif.c
index 1b1c1a652b..871a2bd7d3 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -37,6 +37,8 @@
 #define ETH_MEMIF_RING_SIZE_ARG"rsize"
 #define ETH_MEMIF_SOCKET_ARG   "socket"
 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG  "socket-abstract"
+#define ETH_MEMIF_OWNER_UID_ARG"owner-uid"
+#define ETH_MEMIF_OWNER_GID_ARG"owner-gid"
 #define ETH_MEMIF_MAC_ARG  "mac"
 #define ETH_MEMIF_ZC_ARG   "zero-copy"
 #define ETH_MEMIF_SECRET_ARG   "secret"
@@ -48,6 +50,8 @@ static const char * const valid_arguments[] = {
ETH_MEMIF_RING_SIZE_ARG,
ETH_MEMIF_SOCKET_ARG,
ETH_MEMIF_SOCKET_ABSTRACT_ARG,
+   ETH_MEMIF_OWNER_UID_ARG,
+   ETH_MEMIF_OWNER_GID_ARG,
ETH_MEMIF_MAC_ARG,
ETH_MEMIF_ZC_ARG,
ETH_MEMIF_SECRET_ARG,
@@ -1515,7 +1519,7 @@ static const struct eth_dev_ops ops = {
 static int
 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
 memif_interface_id_t id,

[PATCH v3] net/memif: change socket listener owner uid/gid

2022-12-07 Thread Junxiao Shi
This allows a DPDK application running with root privilege to create a
memif socket listener with non-root owner uid and gid, which can be
connected from client applications running without root privilege.

Signed-off-by: Junxiao Shi 
---
 doc/guides/nics/memif.rst |  2 ++
 drivers/net/memif/memif_socket.c  | 13 +++--
 drivers/net/memif/rte_eth_memif.c | 48 +--
 drivers/net/memif/rte_eth_memif.h |  2 ++
 4 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/doc/guides/nics/memif.rst b/doc/guides/nics/memif.rst
index aca843640b..afc574fdaa 100644
--- a/doc/guides/nics/memif.rst
+++ b/doc/guides/nics/memif.rst
@@ -44,6 +44,8 @@ client.
"rsize=11", "Log2 of ring size. If rsize is 10, actual ring size is 1024", 
"10", "1-14"
"socket=/tmp/memif.sock", "Socket filename", "/tmp/memif.sock", "string len 
108"
"socket-abstract=no", "Set usage of abstract socket address", "yes", 
"yes|no"
+   "owner-uid=1000", "Set socket listener owner uid. Only relevant to server 
with socket-abstract=no", "unchanged", "uid_t"
+   "owner-gid=1000", "Set socket listener owner gid. Only relevant to server 
with socket-abstract=no", "unchanged", "gid_t"
"mac=01:23:45:ab:cd:ef", "Mac address", "01:ab:23:cd:45:ef", ""
"secret=abc123", "Secret is an optional security option, which if 
specified, must be matched by peer", "", "string len 24"
"zero-copy=yes", "Enable/disable zero-copy client mode. Only relevant to 
client, requires '--single-file-segments' eal argument", "no", "yes|no"
diff --git a/drivers/net/memif/memif_socket.c b/drivers/net/memif/memif_socket.c
index 4700ce2e77..649f8d0e61 100644
--- a/drivers/net/memif/memif_socket.c
+++ b/drivers/net/memif/memif_socket.c
@@ -889,7 +889,7 @@ memif_listener_handler(void *arg)
 }
 
 static struct memif_socket *
-memif_socket_create(char *key, uint8_t listener, bool is_abstract)
+memif_socket_create(char *key, uint8_t listener, bool is_abstract, uid_t 
owner_uid, gid_t owner_gid)
 {
struct memif_socket *sock;
struct sockaddr_un un = { 0 };
@@ -941,6 +941,14 @@ memif_socket_create(char *key, uint8_t listener, bool 
is_abstract)
 
MIF_LOG(DEBUG, "Memif listener socket %s created.", 
sock->filename);
 
+   if (!is_abstract && (owner_uid != (uid_t)-1 || owner_gid != 
(gid_t)-1)) {
+   ret = chown(sock->filename, owner_uid, owner_gid);
+   if (ret < 0) {
+   MIF_LOG(ERR, "Failed to change listener socket 
owner");
+   goto error;
+   }
+   }
+
/* Allocate interrupt instance */
sock->intr_handle =
rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_SHARED);
@@ -1017,7 +1025,8 @@ memif_socket_init(struct rte_eth_dev *dev, const char 
*socket_filename)
if (ret < 0) {
socket = memif_socket_create(key,
(pmd->role == MEMIF_ROLE_CLIENT) ? 0 : 1,
-   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT);
+   pmd->flags & ETH_MEMIF_FLAG_SOCKET_ABSTRACT,
+   pmd->owner_uid, pmd->owner_gid);
if (socket == NULL)
return -1;
ret = rte_hash_add_key_data(hash, key, socket);
diff --git a/drivers/net/memif/rte_eth_memif.c 
b/drivers/net/memif/rte_eth_memif.c
index 1b1c1a652b..f82f4bccb8 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -37,6 +37,8 @@
 #define ETH_MEMIF_RING_SIZE_ARG"rsize"
 #define ETH_MEMIF_SOCKET_ARG   "socket"
 #define ETH_MEMIF_SOCKET_ABSTRACT_ARG  "socket-abstract"
+#define ETH_MEMIF_OWNER_UID_ARG"owner-uid"
+#define ETH_MEMIF_OWNER_GID_ARG"owner-gid"
 #define ETH_MEMIF_MAC_ARG  "mac"
 #define ETH_MEMIF_ZC_ARG   "zero-copy"
 #define ETH_MEMIF_SECRET_ARG   "secret"
@@ -48,6 +50,8 @@ static const char * const valid_arguments[] = {
ETH_MEMIF_RING_SIZE_ARG,
ETH_MEMIF_SOCKET_ARG,
ETH_MEMIF_SOCKET_ABSTRACT_ARG,
+   ETH_MEMIF_OWNER_UID_ARG,
+   ETH_MEMIF_OWNER_GID_ARG,
ETH_MEMIF_MAC_ARG,
ETH_MEMIF_ZC_ARG,
ETH_MEMIF_SECRET_ARG,
@@ -1515,7 +1519,7 @@ static const struct eth_dev_ops ops = {
 static int
 memif_create(struct rte_vdev_device *vdev, enum memif_role_t role,
 memif_interface_id_t id,

Re: [PATCH v2] net/memif: change socket listener owner uid/gid

2022-12-07 Thread Junxiao Shi
Hi Ferruh

> On 12/7/2022 2:41 PM, Junxiao Shi wrote:
> > This allows a DPDK application running with root privilege to create a
> > memif socket listener with non-root owner uid and gid, which can be
> > connected from client applications running without root privilege.
> >
>
> Do you have an easy way to test unprivileged memif client?

This has been tested with NDN-DPDK software.
https://github.com/usnistgov/ndn-dpdk revision
311de078aa4dc3ea28db5f8858e70a1bef7b9ccd

The systemd service is running as root and it uses DPDK with the owner-uid
and owner-gid args.
The ndndpdk-godemo command is running as unprivileged process.
Directory /run/ndn still needs to be created by root.

These commands can perform a full test:

git clone https://github.com/usnistgov/ndn-dpdk.git
cd ndn-dpdk
./docs/ndndpdk-depends.sh --dpdk-patch=26031
corepack pnpm install
make
sudo make install
sudo dpdk-hugepages.py --setup 8G
sudo ndndpdk-ctrl systemd start
jq -n {} | ndndpdk-ctrl activate-forwarder
sudo mkdir -p /run/ndn
ndndpdk-godemo pingserver --name /A
ndndpdk-godemo pingclient --name /A

You can see packets flowing through.
Run `ls -l /run/ndn` and check the uid:gid of socket files too.


>
> > Signed-off-by: Junxiao Shi 
>
> <...>
>
> > @@ -1827,47 +1859,58 @@ rte_pmd_memif_probe(struct rte_vdev_device
*vdev)
> >   flags |= ETH_MEMIF_FLAG_SOCKET_ABSTRACT;
> >
> >   kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev),
valid_arguments);
> > + if (kvlist == NULL) {
> > + MIF_LOG(ERR, "Invalid kvargs key");
> > + ret = -EINVAL;
> > + goto exit;
> > + }
>
> Thanks Junxiao for updating this, but since it is not really related to
> this patch, can you please separate it to another patch?

These are reverted and will be submitted separately in the future.


[PATCH] net/af_xdp: allow operation when multiprocess is disabled

2022-02-16 Thread Junxiao Shi
If EAL multiprocess feature has been disabled via rte_mp_disable()
function, AF_XDP driver may not be able to register its IPC callback.
Previously this leads to probe failure.
This commit adds a check for this condition so that AF_XDP can still be
used even if multiprocess is disabled.

Fixes: 9876cf8316b3 ("net/af_xdp: re-enable secondary process support")

Signed-off-by: Junxiao Shi 
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c 
b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 6ac710c6bd..7f23097c5f 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -1995,7 +1995,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
/* Register IPC callback which shares xsk fds from primary to secondary 
*/
if (!afxdp_dev_count) {
ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, 
afxdp_mp_send_fds);
-   if (ret < 0) {
+   if (ret < 0 || rte_errno != ENOTSUP) {
AF_XDP_LOG(ERR, "%s: Failed to register multi-process 
IPC callback: %s",
   name, strerror(rte_errno));
return -1;
-- 
2.17.1



[PATCH v2] net/af_xdp: allow operation when multiprocess is disabled

2022-02-17 Thread Junxiao Shi
If EAL multiprocess feature has been disabled via rte_mp_disable()
function, AF_XDP driver may not be able to register its IPC callback.
Previously this leads to probe failure.
This commit adds a check for this condition so that AF_XDP can still be
used even if multiprocess is disabled.

Fixes: 9876cf8316b3 ("net/af_xdp: re-enable secondary process support")

Signed-off-by: Junxiao Shi 
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c 
b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 6ac710c6bd..2163df7c5c 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -1995,7 +1995,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
/* Register IPC callback which shares xsk fds from primary to secondary 
*/
if (!afxdp_dev_count) {
ret = rte_mp_action_register(ETH_AF_XDP_MP_KEY, 
afxdp_mp_send_fds);
-   if (ret < 0) {
+   if (ret < 0 && rte_errno != ENOTSUP) {
AF_XDP_LOG(ERR, "%s: Failed to register multi-process 
IPC callback: %s",
   name, strerror(rte_errno));
return -1;
-- 
2.17.1



[PATCH] net/af_xdp: fix custom program loading with multiple queues

2022-03-09 Thread Junxiao Shi
When the PMD is configured to load a custom XDP program, it sets
XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD flag to prevent libbpf from
loading its default XDP program. However, when queue_count is set to
greater than 1, this flag is only set for the first XSK socket but not
for subsequent XSK sockets. This causes XSK socket creation failure.

This commit ensures that XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD flag is
set for all XSK socket creations when custom XDP program is being used.

Fixes: 01fa83c94d7e ("net/af_xdp: workaround custom program loading")

Signed-off-by: Junxiao Shi 
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c 
b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 65479138d3..9920f49870 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -1307,18 +1307,19 @@ xsk_configure(struct pmd_internals *internals, struct 
pkt_rx_queue *rxq,
cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
 #endif
 
-   if (strnlen(internals->prog_path, PATH_MAX) &&
-   !internals->custom_prog_configured) {
-   ret = load_custom_xdp_prog(internals->prog_path,
-  internals->if_index,
-  &internals->map);
-   if (ret) {
-   AF_XDP_LOG(ERR, "Failed to load custom XDP program 
%s\n",
-   internals->prog_path);
-   goto out_umem;
+   if (strnlen(internals->prog_path, PATH_MAX)) {
+   if (!internals->custom_prog_configured) {
+   ret = load_custom_xdp_prog(internals->prog_path,
+   internals->if_index,
+   &internals->map);
+   if (ret) {
+   AF_XDP_LOG(ERR, "Failed to load custom XDP 
program %s\n",
+   internals->prog_path);
+   goto out_umem;
+   }
+   internals->custom_prog_configured = 1;
}
-   internals->custom_prog_configured = 1;
-   cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
+   cfg.libbpf_flags |= XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
}
 
if (internals->shared_umem)
-- 
2.17.1



[dpdk-dev] [PATCH] crypto: recognize OP_TYPE_UNDEFINED in rte_crypto_op_pool_create

2018-10-02 Thread Junxiao Shi
Signed-off-by: Junxiao Shi 
---
 lib/librte_cryptodev/rte_cryptodev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/librte_cryptodev/rte_cryptodev.c 
b/lib/librte_cryptodev/rte_cryptodev.c
index 63ae23f..3d6f474 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -1477,6 +1477,8 @@ rte_crypto_op_pool_create(const char *name, enum 
rte_crypto_op_type type,
elt_size += sizeof(struct rte_crypto_sym_op);
} else if (type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
elt_size += sizeof(struct rte_crypto_asym_op);
+   } else if (type == RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+   elt_size += RTE_MAX(sizeof(struct rte_crypto_sym_op), 
sizeof(struct rte_crypto_asym_op));
} else {
CDEV_LOG_ERR("Invalid op_type\n");
return NULL;
-- 
2.7.4



[dpdk-dev] [PATCH] crypto: recognize OP_TYPE_UNDEFINED in rte_crypto_op_pool_create

2018-10-02 Thread Junxiao Shi
Signed-off-by: Junxiao Shi 
---
 lib/librte_cryptodev/rte_cryptodev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/librte_cryptodev/rte_cryptodev.c 
b/lib/librte_cryptodev/rte_cryptodev.c
index 63ae23f..3d6f474 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -1477,6 +1477,8 @@ rte_crypto_op_pool_create(const char *name, enum 
rte_crypto_op_type type,
elt_size += sizeof(struct rte_crypto_sym_op);
} else if (type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
elt_size += sizeof(struct rte_crypto_asym_op);
+   } else if (type == RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+   elt_size += RTE_MAX(sizeof(struct rte_crypto_sym_op), 
sizeof(struct rte_crypto_asym_op));
} else {
CDEV_LOG_ERR("Invalid op_type\n");
return NULL;
-- 
2.7.4



[dpdk-dev] [PATCH v2] crypto: fix element size for undefined crypto op

2018-10-09 Thread Junxiao Shi
The documentation of rte_crypto_op_pool_create indicates that
specifying RTE_CRYPTO_OP_TYPE_UNDEFINED would create a pool that
supports all operation types. This change makes the code
consistent with documentation.

Fixes: c0f87eb5252b ("cryptodev: change burst API to be crypto op
oriented")

Signed-off-by: Junxiao Shi 
---
 lib/librte_cryptodev/rte_cryptodev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/librte_cryptodev/rte_cryptodev.c 
b/lib/librte_cryptodev/rte_cryptodev.c
index 63ae23f00..608323fdd 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -1477,6 +1477,9 @@ rte_crypto_op_pool_create(const char *name, enum 
rte_crypto_op_type type,
elt_size += sizeof(struct rte_crypto_sym_op);
} else if (type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
elt_size += sizeof(struct rte_crypto_asym_op);
+   } else if (type == RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+   elt_size += RTE_MAX(sizeof(struct rte_crypto_sym_op),
+   sizeof(struct rte_crypto_asym_op));
} else {
CDEV_LOG_ERR("Invalid op_type\n");
return NULL;
-- 
2.15.0.windows.1



[dpdk-dev] [PATCH] drivers/crypto: fix set_sym_session_private_data error in sessionless mode

2020-01-27 Thread Junxiao Shi
When OpenSSL or AESNI-MB cryptodev is being used in sessionless mode
for symmetric crypto operation (e.g. SHA256 hash), the driver prints
error message:

CRYPTODEV: set_sym_session_private_data() line 489:
   Set private data for driver 0 not allowed

Then, AESNI-MB driver segfaults in post_process_mb_job().

Bugzilla ID: 377
Fixes: b3bbd9e ("cryptodev: support device independent sessions")
Fixes: c68d7aa ("crypto/aesni_mb: use architecture independent macros")

Signed-off-by: Junxiao Shi 
---
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 4 ++--
 drivers/crypto/openssl/rte_openssl_pmd.c   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c 
b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
index 97d9f81..33f4167 100644
--- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
+++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
@@ -729,10 +729,10 @@ get_session(struct aesni_mb_qp *qp, struct rte_crypto_op 
*op)
op->sym->session,
cryptodev_driver_id);
} else {
-   void *_sess = NULL;
+   void *_sess = rte_cryptodev_sym_session_create(qp->sess_mp);
void *_sess_private_data = NULL;

-   if (rte_mempool_get(qp->sess_mp, (void **)&_sess))
+   if (_sess == NULL)
return NULL;

if (rte_mempool_get(qp->sess_mp_priv,
diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c 
b/drivers/crypto/openssl/rte_openssl_pmd.c
index 91f0283..199097b 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c
@@ -762,10 +762,10 @@ get_session(struct openssl_qp *qp, struct rte_crypto_op 
*op)
return NULL;

/* provide internal session */
-   void *_sess = NULL;
+   void *_sess = rte_cryptodev_sym_session_create(qp->sess_mp);
void *_sess_private_data = NULL;

-   if (rte_mempool_get(qp->sess_mp, (void **)&_sess))
+   if (_sess == NULL)
return NULL;

if (rte_mempool_get(qp->sess_mp_priv,
--
2.7.4



[dpdk-dev] [PATCH] bpf: properly install headers in meson build

2020-01-27 Thread Junxiao Shi
Previously, when librte_bpf is built with meson+ninja, its
headers such as bpf_def is not installed to the system.
This commit fixes this problem.

Fixes: 94972f3 ("bpf: add BPF loading and execution framework")

Signed-off-by: Junxiao Shi 
---
 lib/librte_bpf/meson.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_bpf/meson.build b/lib/librte_bpf/meson.build
index 13fc02d..52cfaf9 100644
--- a/lib/librte_bpf/meson.build
+++ b/lib/librte_bpf/meson.build
@@ -14,7 +14,7 @@ elif dpdk_conf.has('RTE_ARCH_ARM64')
sources += files('bpf_jit_arm64.c')
 endif
 
-install_headers = files('bpf_def.h',
+install_headers('bpf_def.h',
'rte_bpf.h',
'rte_bpf_ethdev.h')
 
-- 
2.7.4



[dpdk-dev] [RFC PATCH v2] kni: properly translate pa2va for cloned mbuf

2020-01-28 Thread Junxiao Shi
Previously, KNI kernel module uses the difference between m->buf_addr
and m->buf_iova to calculate userspace virtual address from physical
address. This works for direct mbufs, but does not work for indirect
(cloned) mbufs that come from another mempool. Transmitting a cloned
mbuf may cause segmentation fault in userspace.

Now, userspace KNI library writes the virtual address of each mbuf
in m->userdata field, and KNI kernel module uses this field to restore
virtual address before putting mbuf into free_q. This approach works
for both direct and indirect mbufs.

NOTE TO REVIEWER - DO NOT MERGE
The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4
Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5
I only modified kni_net_rx_normal function.
If this approach is acceptable, I will modify kni_net_rx_lo_fifo,
kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well.

Bugzilla ID: 183

Signed-off-by: Junxiao Shi 
---
 kernel/linux/kni/kni_net.c| 4 ++--
 lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++-
 lib/librte_kni/rte_kni.c  | 8 ++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 97fe85b..d783545 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -377,7 +377,7 @@ kni_net_rx_normal(struct kni_dev *kni)
kva = get_kva(kni, kni->pa[i]);
len = kva->pkt_len;
data_kva = get_data_kva(kni, kva);
-   kni->va[i] = pa2va(kni->pa[i], kva);
+   kni->va[i] = kva->va;
 
skb = netdev_alloc_skb(dev, len);
if (!skb) {
@@ -403,7 +403,7 @@ kni_net_rx_normal(struct kni_dev *kni)
kva = pa2kva(kva->next);
data_kva = kva2data_kva(kva);
/* Convert physical address to virtual address 
*/
-   prev_kva->next = pa2va(prev_kva->next, kva);
+   prev_kva->next = kva->va;
}
}
 
diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h 
b/lib/librte_eal/linux/eal/include/rte_kni_common.h
index 7313ef5..c694a1d 100644
--- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
+++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
@@ -86,7 +86,8 @@ struct rte_kni_mbuf {
uint16_t data_len;  /**< Amount of data in segment buffer. */
 
/* fields on second cache line */
-   char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+   void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+/**< Virtual address of this mbuf in userspace (overwrites 
userdata). */
void *pool;
void *next; /**< Physical address of next mbuf in kernel. */
 };
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index e388751..463485f 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -359,13 +359,15 @@ va2pa(struct rte_mbuf *m)
 static void *
 va2pa_all(struct rte_mbuf *mbuf)
 {
-   void *phy_mbuf = va2pa(mbuf);
+   void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf);
struct rte_mbuf *next = mbuf->next;
while (next) {
-   mbuf->next = va2pa(next);
+   mbuf->userdata = mbuf;
+   mbuf->next = (void*)rte_mempool_virt2iova(next);
mbuf = next;
next = mbuf->next;
}
+   mbuf->userdata = mbuf;
return phy_mbuf;
 }
 
@@ -652,6 +654,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
 offsetof(struct rte_kni_mbuf, buf_addr));
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
 offsetof(struct rte_kni_mbuf, next));
+   RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) !=
+offsetof(struct rte_kni_mbuf, va));
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
 offsetof(struct rte_kni_mbuf, data_off));
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
-- 
2.7.4



[dpdk-dev] [PATCH] cryptodev: fix set_sym_session_private_data error in sessionless mode

2019-12-10 Thread Junxiao Shi
Bugzilla ID: 377

Signed-off-by: Junxiao Shi 
---
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 4 ++--
 drivers/crypto/openssl/rte_openssl_pmd.c   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c 
b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
index 97d9f81..33f4167 100644
--- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
+++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
@@ -729,10 +729,10 @@ get_session(struct aesni_mb_qp *qp, struct rte_crypto_op 
*op)
op->sym->session,
cryptodev_driver_id);
} else {
-   void *_sess = NULL;
+   void *_sess = rte_cryptodev_sym_session_create(qp->sess_mp);
void *_sess_private_data = NULL;
 
-   if (rte_mempool_get(qp->sess_mp, (void **)&_sess))
+   if (_sess == NULL)
return NULL;
 
if (rte_mempool_get(qp->sess_mp_priv,
diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c 
b/drivers/crypto/openssl/rte_openssl_pmd.c
index 91f0283..199097b 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c
@@ -762,10 +762,10 @@ get_session(struct openssl_qp *qp, struct rte_crypto_op 
*op)
return NULL;
 
/* provide internal session */
-   void *_sess = NULL;
+   void *_sess = rte_cryptodev_sym_session_create(qp->sess_mp);
void *_sess_private_data = NULL;
 
-   if (rte_mempool_get(qp->sess_mp, (void **)&_sess))
+   if (_sess == NULL)
return NULL;
 
if (rte_mempool_get(qp->sess_mp_priv,
-- 
2.7.4



[dpdk-dev] [PATCH v2] cryptodev: free memzone when releasing cryptodev

2019-06-28 Thread Junxiao Shi
When a cryptodev is created in a primary process,
rte_cryptodev_data_alloc reserves a memzone.
However, this memzone was not released when the cryptodev
is uninitialized. After that, new cryptodev cannot be
created due to memzone name conflict.

This commit frees the memzone when a cryptodev is
uninitialized, fixing this bug. This approach is chosen
instead of keeping and reusing the old memzone, because
the new cryptodev could belong to a different NUMA socket.

Also, rte_cryptodev_data pointer is now properly recorded
in cryptodev_globals.data array.

Bugzilla ID: 105

Signed-off-by: Junxiao Shi 
---
 lib/librte_cryptodev/rte_cryptodev.c | 46 ++--
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/lib/librte_cryptodev/rte_cryptodev.c 
b/lib/librte_cryptodev/rte_cryptodev.c
index 00c2cf4..a7a0d4b 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -627,7 +627,7 @@ static inline int
 rte_cryptodev_data_alloc(uint8_t dev_id, struct rte_cryptodev_data **data,
int socket_id)
 {
-   char mz_name[RTE_CRYPTODEV_NAME_MAX_LEN];
+   char mz_name[RTE_MEMZONE_NAMESIZE];
const struct rte_memzone *mz;
int n;
 
@@ -653,6 +653,31 @@ rte_cryptodev_data_alloc(uint8_t dev_id, struct 
rte_cryptodev_data **data,
return 0;
 }
 
+static inline int
+rte_cryptodev_data_free(uint8_t dev_id, struct rte_cryptodev_data **data)
+{
+   char mz_name[RTE_MEMZONE_NAMESIZE];
+   const struct rte_memzone *mz;
+   int n;
+
+   /* generate memzone name */
+   n = snprintf(mz_name, sizeof(mz_name), "rte_cryptodev_data_%u", dev_id);
+   if (n >= (int)sizeof(mz_name))
+   return -EINVAL;
+
+   mz = rte_memzone_lookup(mz_name);
+   if (mz == NULL)
+   return -ENOMEM;
+
+   RTE_ASSERT(*data == mz->addr);
+   *data = NULL;
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+   return rte_memzone_free(mz);
+
+   return 0;
+}
+
 static uint8_t
 rte_cryptodev_find_free_device_index(void)
 {
@@ -687,16 +712,16 @@ rte_cryptodev_pmd_allocate(const char *name, int 
socket_id)
cryptodev = rte_cryptodev_pmd_get_dev(dev_id);
 
if (cryptodev->data == NULL) {
-   struct rte_cryptodev_data *cryptodev_data =
-   cryptodev_globals.data[dev_id];
+   struct rte_cryptodev_data **cryptodev_data =
+   &cryptodev_globals.data[dev_id];
 
-   int retval = rte_cryptodev_data_alloc(dev_id, &cryptodev_data,
+   int retval = rte_cryptodev_data_alloc(dev_id, cryptodev_data,
socket_id);
 
-   if (retval < 0 || cryptodev_data == NULL)
+   if (retval < 0 || *cryptodev_data == NULL)
return NULL;
 
-   cryptodev->data = cryptodev_data;
+   cryptodev->data = *cryptodev_data;
 
strlcpy(cryptodev->data->name, name,
RTE_CRYPTODEV_NAME_MAX_LEN);
@@ -720,17 +745,24 @@ int
 rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev)
 {
int ret;
+   uint8_t dev_id;
 
if (cryptodev == NULL)
return -EINVAL;
 
+   dev_id = cryptodev->data->dev_id;
+
/* Close device only if device operations have been set */
if (cryptodev->dev_ops) {
-   ret = rte_cryptodev_close(cryptodev->data->dev_id);
+   ret = rte_cryptodev_close(dev_id);
if (ret < 0)
return ret;
}
 
+   ret = rte_cryptodev_data_free(dev_id, &cryptodev_globals.data[dev_id]);
+   if (ret < 0)
+   return ret;
+
cryptodev->attached = RTE_CRYPTODEV_DETACHED;
cryptodev_globals.nb_devs--;
return 0;
-- 
2.7.4



Re: [dpdk-dev] [v3] kni: fix possible kernel crash with va2pa

2019-07-02 Thread Junxiao Shi
I am battling a related problem as reported on
https://bugs.dpdk.org/show_bug.cgi?id=183 and this patch seems
relevant, so I applied this patch on 196a46fab6eeb3ce2039e3bcaca80f8ba43ffc8d

However, this patch does not work for me:
with CONFIG_RTE_LIBRTE_MBUF_DEBUG enabled, kni_free_mbufs's invocation of
rte_pktmbuf_free throws "bad mbuf pool" error.

While all mbufs and segments in kni->rx_q now have physical addresses,
the mbufs and segments placed back to kni->free_q still have (mis-)calculated
virtual address. The pa2va function is not working properly.

Consequently, userspace side is passing wrong pointer to rte_pktmbuf_free,
so that application crashes with CONFIG_RTE_LIBRTE_MBUF_DEBUG enabled.


[dpdk-dev] [PATCH] cryptodev: free memzone when releasing cryptodev

2019-05-30 Thread Junxiao Shi
When a cryptodev is created in a primary process,
rte_cryptodev_data_alloc reserves a memzone.
However, this memzone was not released when the cryptodev
is uninitialized. After that, new cryptodev cannot be
created due to memzone name conflict.

This commit frees the memzone when a cryptodev is
uninitialized, fixing this bug. This approach is chosen
instead of keeping and reusing the old memzone, because
the new cryptodev could belong to a different NUMA socket.

Also, rte_cryptodev_data pointer is now properly recorded
in cryptodev_globals.data array.

Bugzilla ID: 105

Signed-off-by: Junxiao Shi 
---
 lib/librte_cryptodev/rte_cryptodev.c | 44 +++-
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/lib/librte_cryptodev/rte_cryptodev.c 
b/lib/librte_cryptodev/rte_cryptodev.c
index 00c2cf4..666dfea 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -653,6 +653,31 @@ rte_cryptodev_data_alloc(uint8_t dev_id, struct 
rte_cryptodev_data **data,
return 0;
 }
 
+static inline int
+rte_cryptodev_data_free(uint8_t dev_id, struct rte_cryptodev_data **data)
+{
+   char mz_name[RTE_CRYPTODEV_NAME_MAX_LEN];
+   const struct rte_memzone *mz;
+   int n;
+
+   /* generate memzone name */
+   n = snprintf(mz_name, sizeof(mz_name), "rte_cryptodev_data_%u", dev_id);
+   if (n >= (int)sizeof(mz_name))
+   return -EINVAL;
+
+   mz = rte_memzone_lookup(mz_name);
+   if (mz == NULL)
+   return -ENOMEM;
+
+   RTE_ASSERT(*data == mz->addr);
+   *data = NULL;
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+   return rte_memzone_free(mz);
+
+   return 0;
+}
+
 static uint8_t
 rte_cryptodev_find_free_device_index(void)
 {
@@ -687,16 +712,16 @@ rte_cryptodev_pmd_allocate(const char *name, int 
socket_id)
cryptodev = rte_cryptodev_pmd_get_dev(dev_id);
 
if (cryptodev->data == NULL) {
-   struct rte_cryptodev_data *cryptodev_data =
-   cryptodev_globals.data[dev_id];
+   struct rte_cryptodev_data **cryptodev_data =
+   &cryptodev_globals.data[dev_id];
 
-   int retval = rte_cryptodev_data_alloc(dev_id, &cryptodev_data,
+   int retval = rte_cryptodev_data_alloc(dev_id, cryptodev_data,
socket_id);
 
-   if (retval < 0 || cryptodev_data == NULL)
+   if (retval < 0 || *cryptodev_data == NULL)
return NULL;
 
-   cryptodev->data = cryptodev_data;
+   cryptodev->data = *cryptodev_data;
 
strlcpy(cryptodev->data->name, name,
RTE_CRYPTODEV_NAME_MAX_LEN);
@@ -724,13 +749,20 @@ rte_cryptodev_pmd_release_device(struct rte_cryptodev 
*cryptodev)
if (cryptodev == NULL)
return -EINVAL;
 
+   uint8_t dev_id = cryptodev->data->dev_id;
+
/* Close device only if device operations have been set */
if (cryptodev->dev_ops) {
-   ret = rte_cryptodev_close(cryptodev->data->dev_id);
+   ret = rte_cryptodev_close(dev_id);
if (ret < 0)
return ret;
}
 
+   struct rte_cryptodev_data **cryptodev_data = 
&cryptodev_globals.data[dev_id];
+   ret = rte_cryptodev_data_free(dev_id, cryptodev_data);
+   if (ret < 0)
+   return ret;
+
cryptodev->attached = RTE_CRYPTODEV_DETACHED;
cryptodev_globals.nb_devs--;
return 0;
-- 
2.7.4



[dpdk-dev] [RFC PATCH] kni: properly translate pa2va for cloned mbuf

2019-09-10 Thread Junxiao Shi
Previously, KNI kernel module uses the difference between m->buf_addr
and m->buf_iova to calculate userspace virtual address from physical
address. This works for direct mbufs, but does not work for indirect
(cloned) mbufs that come from another mempool. Transmitting a cloned
mbuf may cause segmentation fault in userspace.

Now, userspace KNI library writes the virtual address of each mbuf
in m->userdata field, and KNI kernel module uses this field to restore
virtual address before putting mbuf into free_q. This approach works
for both direct and indirect mbufs.

NOTE TO REVIEWER - DO NOT MERGE
The idea of this change is at https://bugs.dpdk.org/show_bug.cgi?id=183#c4
Test case is at https://bugs.dpdk.org/show_bug.cgi?id=183#c5
I only modified kni_net_rx_normal function.
If this approach is acceptable, I will modify kni_net_rx_lo_fifo,
kni_net_rx_lo_fifo_skb, and kni_fifo_trans_pa2va(rx_q) as well.

Bugzilla ID: 183

Signed-off-by: Junxiao Shi 
---
 kernel/linux/kni/kni_net.c| 4 ++--
 lib/librte_eal/linux/eal/include/rte_kni_common.h | 3 ++-
 lib/librte_kni/rte_kni.c  | 8 ++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index 7bd3a9f1e..b34ed2ed4 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -341,7 +341,7 @@ kni_net_rx_normal(struct kni_dev *kni)
kva = pa2kva(kni->pa[i]);
len = kva->pkt_len;
data_kva = kva2data_kva(kva);
-   kni->va[i] = pa2va(kni->pa[i], kva);
+   kni->va[i] = kva->va;
 
skb = netdev_alloc_skb(dev, len);
if (!skb) {
@@ -367,7 +367,7 @@ kni_net_rx_normal(struct kni_dev *kni)
kva = pa2kva(kva->next);
data_kva = kva2data_kva(kva);
/* Convert physical address to virtual address 
*/
-   prev_kva->next = pa2va(prev_kva->next, kva);
+   prev_kva->next = kva->va;
}
}
 
diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h 
b/lib/librte_eal/linux/eal/include/rte_kni_common.h
index 37d9ee8f0..bc92c0067 100644
--- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
+++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
@@ -84,7 +84,8 @@ struct rte_kni_mbuf {
uint16_t data_len;  /**< Amount of data in segment buffer. */
 
/* fields on second cache line */
-   char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+   void *va __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE)));
+/**< Virtual address of this mbuf in userspace (overwrites 
userdata). */
void *pool;
void *next; /**< Physical address of next mbuf in kernel. */
 };
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 4b51fb4fe..96a6f6af2 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -352,13 +352,15 @@ va2pa(struct rte_mbuf *m)
 static void *
 va2pa_all(struct rte_mbuf *mbuf)
 {
-   void *phy_mbuf = va2pa(mbuf);
+   void *phy_mbuf = (void*)rte_mempool_virt2iova(mbuf);
struct rte_mbuf *next = mbuf->next;
while (next) {
-   mbuf->next = va2pa(next);
+   mbuf->userdata = mbuf;
+   mbuf->next = (void*)rte_mempool_virt2iova(next);
mbuf = next;
next = mbuf->next;
}
+   mbuf->userdata = mbuf;
return phy_mbuf;
 }
 
@@ -609,6 +611,8 @@ kni_allocate_mbufs(struct rte_kni *kni)
 offsetof(struct rte_kni_mbuf, buf_addr));
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) !=
 offsetof(struct rte_kni_mbuf, next));
+   RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, userdata) !=
+offsetof(struct rte_kni_mbuf, va));
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) !=
 offsetof(struct rte_kni_mbuf, data_off));
RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
-- 
2.17.1