[dpdk-dev] [PATCH] lpm: extend IPv6 next hop field

2017-02-19 Thread Vladyslav Buslov
This patch extend next_hop field from 8-bits to 21-bits in LPM library
for IPv6.

Added versioning symbols to functions and updated
library and applications that have a dependency on LPM library.

Signed-off-by: Vladyslav Buslov 
---
 app/test/test_lpm6.c| 114 ++--
 app/test/test_lpm6_perf.c   |   4 +-
 doc/guides/prog_guide/lpm6_lib.rst  |   2 +-
 doc/guides/rel_notes/release_17_05.rst  |   5 +
 examples/ip_fragmentation/main.c|  16 +--
 examples/ip_reassembly/main.c   |  16 +--
 examples/ipsec-secgw/ipsec-secgw.c  |   2 +-
 examples/l3fwd/l3fwd_lpm_sse.h  |  20 ++--
 examples/performance-thread/l3fwd-thread/main.c |   9 +-
 lib/librte_lpm/rte_lpm6.c   | 133 +---
 lib/librte_lpm/rte_lpm6.h   |  29 +-
 lib/librte_lpm/rte_lpm_version.map  |  10 ++
 lib/librte_table/rte_table_lpm_ipv6.c   |   9 +-
 13 files changed, 282 insertions(+), 87 deletions(-)

diff --git a/app/test/test_lpm6.c b/app/test/test_lpm6.c
index 61134f7..2950aae 100644
--- a/app/test/test_lpm6.c
+++ b/app/test/test_lpm6.c
@@ -79,6 +79,7 @@ static int32_t test24(void);
 static int32_t test25(void);
 static int32_t test26(void);
 static int32_t test27(void);
+static int32_t test28(void);
 
 rte_lpm6_test tests6[] = {
 /* Test Cases */
@@ -110,6 +111,7 @@ rte_lpm6_test tests6[] = {
test25,
test26,
test27,
+   test28,
 };
 
 #define NUM_LPM6_TESTS(sizeof(tests6)/sizeof(tests6[0]))
@@ -354,7 +356,7 @@ test6(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t next_hop_return = 0;
+   uint32_t next_hop_return = 0;
int32_t status = 0;
 
config.max_rules = MAX_RULES;
@@ -392,7 +394,7 @@ test7(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[10][16];
-   int16_t next_hop_return[10];
+   int32_t next_hop_return[10];
int32_t status = 0;
 
config.max_rules = MAX_RULES;
@@ -469,7 +471,8 @@ test9(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth = 16, next_hop_add = 100, next_hop_return = 0;
+   uint8_t depth = 16;
+   uint32_t next_hop_add = 100, next_hop_return = 0;
int32_t status = 0;
uint8_t i;
 
@@ -513,7 +516,8 @@ test10(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth, next_hop_add = 100;
+   uint8_t depth;
+   uint32_t next_hop_add = 100;
int32_t status = 0;
int i;
 
@@ -557,7 +561,8 @@ test11(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth, next_hop_add = 100;
+   uint8_t depth;
+   uint32_t next_hop_add = 100;
int32_t status = 0;
 
config.max_rules = MAX_RULES;
@@ -617,7 +622,8 @@ test12(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth, next_hop_add = 100;
+   uint8_t depth;
+   uint32_t next_hop_add = 100;
int32_t status = 0;
 
config.max_rules = MAX_RULES;
@@ -655,7 +661,8 @@ test13(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth, next_hop_add = 100;
+   uint8_t depth;
+   uint32_t next_hop_add = 100;
int32_t status = 0;
 
config.max_rules = 2;
@@ -702,7 +709,8 @@ test14(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth = 25, next_hop_add = 100;
+   uint8_t depth = 25;
+   uint32_t next_hop_add = 100;
int32_t status = 0;
int i;
 
@@ -748,7 +756,8 @@ test15(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth = 24, next_hop_add = 100, next_hop_return = 0;
+   uint8_t depth = 24;
+   uint32_t next_hop_add = 100, next_hop_return = 0;
int32_t status = 0;
 
config.max_rules = MAX_RULES;
@@ -784,7 +793,8 @@ test16(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {12,12,1,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth = 128, next_hop_add = 100, next_hop_return = 0;
+   uint8_t depth = 128;
+   uint32_t next_hop_add = 100, next_hop_return = 0;
int32_t status = 0

Re: [dpdk-dev] [PATCH] lpm: extend IPv6 next hop field

2017-02-21 Thread Vladyslav Buslov
Hello Bruce,

Thanks for reviewing my code.

I noticed errors generated by checkpatch.
However all of them are due to original formatting of files. (e.g. code 
copy-pasted by me as-is from same file or modifications to original lines 
unrelated to formatting)
I decided to preserve original formatting of files instead of introducing 
inconsistencies.
I'll create V2 according to code guidelines if it is your preferred approach.

Regards,
Vlad

> -Original Message-
> From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> Sent: Tuesday, February 21, 2017 1:24 PM
> To: Vladyslav Buslov
> Cc: dev@dpdk.org
> Subject: Re: [PATCH] lpm: extend IPv6 next hop field
> 
> On Sun, Feb 19, 2017 at 07:14:59PM +0200, Vladyslav Buslov wrote:
> > This patch extend next_hop field from 8-bits to 21-bits in LPM library
> > for IPv6.
> >
> > Added versioning symbols to functions and updated library and
> > applications that have a dependency on LPM library.
> >
> > Signed-off-by: Vladyslav Buslov 
> > ---
> Looks good to me. There are a number of checkpatch errors flagged. Can you
> perhaps do a V2 with those fixed. (You can use the checkpatches.sh wrapper
> script in the devtools directory to help).
> 
> Once checkpatch errors are cleared:
> 
> Acked-by: Bruce Richardson 
> 
> Regards,
> /Bruce


[dpdk-dev] [PATCH v2] lpm: extend IPv6 next hop field

2017-02-21 Thread Vladyslav Buslov
This patch extend next_hop field from 8-bits to 21-bits in LPM library
for IPv6.

Added versioning symbols to functions and updated
library and applications that have a dependency on LPM library.

Signed-off-by: Vladyslav Buslov 
---
 app/test/test_lpm6.c| 115 ++--
 app/test/test_lpm6_perf.c   |   4 +-
 doc/guides/prog_guide/lpm6_lib.rst  |   2 +-
 doc/guides/rel_notes/release_17_05.rst  |   5 +
 examples/ip_fragmentation/main.c|  17 +--
 examples/ip_reassembly/main.c   |  17 +--
 examples/ipsec-secgw/ipsec-secgw.c  |   2 +-
 examples/l3fwd/l3fwd_lpm_sse.h  |  24 ++---
 examples/performance-thread/l3fwd-thread/main.c |  11 +-
 lib/librte_lpm/rte_lpm6.c   | 134 +---
 lib/librte_lpm/rte_lpm6.h   |  32 +-
 lib/librte_lpm/rte_lpm_version.map  |  10 ++
 lib/librte_table/rte_table_lpm_ipv6.c   |   9 +-
 13 files changed, 292 insertions(+), 90 deletions(-)

diff --git a/app/test/test_lpm6.c b/app/test/test_lpm6.c
index 61134f7..e0e7bf0 100644
--- a/app/test/test_lpm6.c
+++ b/app/test/test_lpm6.c
@@ -79,6 +79,7 @@ static int32_t test24(void);
 static int32_t test25(void);
 static int32_t test26(void);
 static int32_t test27(void);
+static int32_t test28(void);
 
 rte_lpm6_test tests6[] = {
 /* Test Cases */
@@ -110,6 +111,7 @@ rte_lpm6_test tests6[] = {
test25,
test26,
test27,
+   test28,
 };
 
 #define NUM_LPM6_TESTS(sizeof(tests6)/sizeof(tests6[0]))
@@ -354,7 +356,7 @@ test6(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t next_hop_return = 0;
+   uint32_t next_hop_return = 0;
int32_t status = 0;
 
config.max_rules = MAX_RULES;
@@ -392,7 +394,7 @@ test7(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[10][16];
-   int16_t next_hop_return[10];
+   int32_t next_hop_return[10];
int32_t status = 0;
 
config.max_rules = MAX_RULES;
@@ -469,7 +471,8 @@ test9(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth = 16, next_hop_add = 100, next_hop_return = 0;
+   uint8_t depth = 16;
+   uint32_t next_hop_add = 100, next_hop_return = 0;
int32_t status = 0;
uint8_t i;
 
@@ -513,7 +516,8 @@ test10(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth, next_hop_add = 100;
+   uint8_t depth;
+   uint32_t next_hop_add = 100;
int32_t status = 0;
int i;
 
@@ -557,7 +561,8 @@ test11(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth, next_hop_add = 100;
+   uint8_t depth;
+   uint32_t next_hop_add = 100;
int32_t status = 0;
 
config.max_rules = MAX_RULES;
@@ -617,7 +622,8 @@ test12(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth, next_hop_add = 100;
+   uint8_t depth;
+   uint32_t next_hop_add = 100;
int32_t status = 0;
 
config.max_rules = MAX_RULES;
@@ -655,7 +661,8 @@ test13(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth, next_hop_add = 100;
+   uint8_t depth;
+   uint32_t next_hop_add = 100;
int32_t status = 0;
 
config.max_rules = 2;
@@ -702,7 +709,8 @@ test14(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth = 25, next_hop_add = 100;
+   uint8_t depth = 25;
+   uint32_t next_hop_add = 100;
int32_t status = 0;
int i;
 
@@ -748,7 +756,8 @@ test15(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth = 24, next_hop_add = 100, next_hop_return = 0;
+   uint8_t depth = 24;
+   uint32_t next_hop_add = 100, next_hop_return = 0;
int32_t status = 0;
 
config.max_rules = MAX_RULES;
@@ -784,7 +793,8 @@ test16(void)
struct rte_lpm6 *lpm = NULL;
struct rte_lpm6_config config;
uint8_t ip[] = {12,12,1,0,0,0,0,0,0,0,0,0,0,0,0,0};
-   uint8_t depth = 128, next_hop_add = 100, next_hop_return = 0;
+   uint8_t depth = 128;
+   uint32_t next_hop_add = 100, next_hop_return = 0;
int32_t status = 0

Re: [dpdk-dev] [PATCH v2] lpm: extend IPv6 next hop field

2017-02-22 Thread Vladyslav Buslov
Hi Bruce,

Yes, v2 patch is same as v1 + indentation changes according to checkpatch 
warnings.

Regards,
Vlad

> -Original Message-
> From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> Sent: Wednesday, February 22, 2017 2:03 PM
> To: Vladyslav Buslov
> Cc: dev@dpdk.org
> Subject: Re: [PATCH v2] lpm: extend IPv6 next hop field
> 
> On Tue, Feb 21, 2017 at 04:46:39PM +0200, Vladyslav Buslov wrote:
> > This patch extend next_hop field from 8-bits to 21-bits in LPM library
> > for IPv6.
> >
> > Added versioning symbols to functions and updated library and
> > applications that have a dependency on LPM library.
> >
> > Signed-off-by: Vladyslav Buslov 
> Acked-by: Bruce Richardson 
> 
> Hi Vlad,
> 
> Thanks for the V2.
> I assume that this is just the V1 with the checkpatch issues fixed, right? For
> future use, please include just below the cut line "---" a short list of what 
> has
> changed since the previous version, so that we know what to look for if we
> already have reviewed the previous version.
> For minor changes in code, like in this case, you can also keep the ack on the
> resubmission, putting it just below the signoff like above.
> 
> /Bruce



[dpdk-dev] [PATCH] net/i40e: add packet prefetch

2017-03-01 Thread Vladyslav Buslov
Prefetch both cache lines of mbuf and first cache line of payload
if CONFIG_RTE_PMD_PACKET_PREFETCH is set.

Signed-off-by: Vladyslav Buslov 
---
 drivers/net/i40e/i40e_rxtx.c | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 48429cc..2b4e5c9 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -100,6 +100,12 @@
 #define I40E_TX_OFFLOAD_NOTSUP_MASK \
(PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK)
 
+#ifdef RTE_PMD_PACKET_PREFETCH
+#define rte_packet_prefetch(p) rte_prefetch0(p)
+#else
+#define rte_packet_prefetch(p) do {} while (0)
+#endif
+
 static uint16_t i40e_xmit_pkts_simple(void *tx_queue,
  struct rte_mbuf **tx_pkts,
  uint16_t nb_pkts);
@@ -495,6 +501,9 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
/* Translate descriptor info to mbuf parameters */
for (j = 0; j < nb_dd; j++) {
mb = rxep[j].mbuf;
+   rte_packet_prefetch(
+   RTE_PTR_ADD(mb->buf_addr,
+   RTE_PKTMBUF_HEADROOM));
qword1 = rte_le_to_cpu_64(\
rxdp[j].wb.qword1.status_error_len);
pkt_len = ((qword1 & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
@@ -578,9 +587,11 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)
 
rxdp = &rxq->rx_ring[alloc_idx];
for (i = 0; i < rxq->rx_free_thresh; i++) {
-   if (likely(i < (rxq->rx_free_thresh - 1)))
+   if (likely(i < (rxq->rx_free_thresh - 1))) {
/* Prefetch next mbuf */
-   rte_prefetch0(rxep[i + 1].mbuf);
+   rte_packet_prefetch(rxep[i + 1].mbuf->cacheline0);
+   rte_packet_prefetch(rxep[i + 1].mbuf->cacheline1);
+   }
 
mb = rxep[i].mbuf;
rte_mbuf_refcnt_set(mb, 1);
@@ -752,7 +763,8 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, 
uint16_t nb_pkts)
I40E_RXD_QW1_LENGTH_PBUF_SHIFT) - rxq->crc_len;
 
rxm->data_off = RTE_PKTMBUF_HEADROOM;
-   rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+   rte_packet_prefetch(RTE_PTR_ADD(rxm->buf_addr,
+   RTE_PKTMBUF_HEADROOM));
rxm->nb_segs = 1;
rxm->next = NULL;
rxm->pkt_len = rx_packet_len;
@@ -939,7 +951,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
first_seg->ol_flags |= pkt_flags;
 
/* Prefetch data of first segment, if configured to do so. */
-   rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr,
+   rte_packet_prefetch(RTE_PTR_ADD(first_seg->buf_addr,
first_seg->data_off));
rx_pkts[nb_rx++] = first_seg;
first_seg = NULL;
-- 
2.1.4



[dpdk-dev] [PATCH v3] lpm: extend IPv6 next hop field

2017-03-14 Thread Vladyslav Buslov
This patch extend next_hop field from 8-bits to 21-bits in LPM library
for IPv6.

Added versioning symbols to functions and updated
library and applications that have a dependency on LPM library.

Signed-off-by: Vladyslav Buslov 
Acked-by: Bruce Richardson 
---

Fixed compilation error in l3fwd_lpm.h

 doc/guides/prog_guide/lpm6_lib.rst  |   2 +-
 doc/guides/rel_notes/release_17_05.rst  |   5 +
 examples/ip_fragmentation/main.c|  17 +--
 examples/ip_reassembly/main.c   |  17 +--
 examples/ipsec-secgw/ipsec-secgw.c  |   2 +-
 examples/l3fwd/l3fwd_lpm.h  |   2 +-
 examples/l3fwd/l3fwd_lpm_sse.h  |  24 ++---
 examples/performance-thread/l3fwd-thread/main.c |  11 +-
 lib/librte_lpm/rte_lpm6.c   | 134 +---
 lib/librte_lpm/rte_lpm6.h   |  32 +-
 lib/librte_lpm/rte_lpm_version.map  |  10 ++
 lib/librte_table/rte_table_lpm_ipv6.c   |   9 +-
 test/test/test_lpm6.c   | 115 ++--
 test/test/test_lpm6_perf.c  |   4 +-
 14 files changed, 293 insertions(+), 91 deletions(-)

diff --git a/doc/guides/prog_guide/lpm6_lib.rst 
b/doc/guides/prog_guide/lpm6_lib.rst
index 0aea5c5..f791507 100644
--- a/doc/guides/prog_guide/lpm6_lib.rst
+++ b/doc/guides/prog_guide/lpm6_lib.rst
@@ -53,7 +53,7 @@ several thousand IPv6 rules, but the number can vary 
depending on the case.
 An LPM prefix is represented by a pair of parameters (128-bit key, depth), 
with depth in the range of 1 to 128.
 An LPM rule is represented by an LPM prefix and some user data associated with 
the prefix.
 The prefix serves as the unique identifier for the LPM rule.
-In this implementation, the user data is 1-byte long and is called "next hop",
+In this implementation, the user data is 21-bits long and is called "next hop",
 which corresponds to its main use of storing the ID of the next hop in a 
routing table entry.
 
 The main methods exported for the LPM component are:
diff --git a/doc/guides/rel_notes/release_17_05.rst 
b/doc/guides/rel_notes/release_17_05.rst
index 4b90036..918f483 100644
--- a/doc/guides/rel_notes/release_17_05.rst
+++ b/doc/guides/rel_notes/release_17_05.rst
@@ -41,6 +41,9 @@ New Features
  Also, make sure to start the actual text at the margin.
  =
 
+* **Increased number of next hops for LPM IPv6 to 2^21.**
+
+  The next_hop field is extended from 8 bits to 21 bits for IPv6.
 
 * **Added powerpc support in pci probing for vfio-pci devices.**
 
@@ -114,6 +117,8 @@ API Changes
Also, make sure to start the actual text at the margin.
=
 
+* The LPM ``next_hop`` field is extended from 8 bits to 21 bits for IPv6
+  while keeping ABI compatibility.
 
 ABI Changes
 ---
diff --git a/examples/ip_fragmentation/main.c b/examples/ip_fragmentation/main.c
index 9e9ecae..1b005b5 100644
--- a/examples/ip_fragmentation/main.c
+++ b/examples/ip_fragmentation/main.c
@@ -265,8 +265,8 @@ l3fwd_simple_forward(struct rte_mbuf *m, struct 
lcore_queue_conf *qconf,
uint8_t queueid, uint8_t port_in)
 {
struct rx_queue *rxq;
-   uint32_t i, len, next_hop_ipv4;
-   uint8_t next_hop_ipv6, port_out, ipv6;
+   uint32_t i, len, next_hop;
+   uint8_t port_out, ipv6;
int32_t len2;
 
ipv6 = 0;
@@ -290,9 +290,9 @@ l3fwd_simple_forward(struct rte_mbuf *m, struct 
lcore_queue_conf *qconf,
ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr);
 
/* Find destination port */
-   if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop_ipv4) == 0 &&
-   (enabled_port_mask & 1 << next_hop_ipv4) != 0) {
-   port_out = next_hop_ipv4;
+   if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop) == 0 &&
+   (enabled_port_mask & 1 << next_hop) != 0) {
+   port_out = next_hop;
 
/* Build transmission burst for new port */
len = qconf->tx_mbufs[port_out].len;
@@ -326,9 +326,10 @@ l3fwd_simple_forward(struct rte_mbuf *m, struct 
lcore_queue_conf *qconf,
ip_hdr = rte_pktmbuf_mtod(m, struct ipv6_hdr *);
 
/* Find destination port */
-   if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr, 
&next_hop_ipv6) == 0 &&
-   (enabled_port_mask & 1 << next_hop_ipv6) != 0) {
-   port_out = next_hop_ipv6;
+   if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr,
+   &next_hop) == 0 &&
+   (enabled_port_mask & 1 &

[dpdk-dev] [PATCH] kni: add support for core_id param in single threaded mode

2016-09-02 Thread Vladyslav Buslov
Allow binding KNI thread to specific core in single threaded mode
by setting core_id and force_bind config parameters.

Signed-off-by: Vladyslav Buslov 
---
 lib/librte_eal/linuxapp/kni/kni_misc.c | 48 ++
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
b/lib/librte_eal/linuxapp/kni/kni_misc.c
index 59d15ca..5e7cf21 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -100,6 +101,7 @@ static int kni_net_id;

 struct kni_net {
unsigned long device_in_use; /* device in use flag */
+   struct mutex kni_kthread_lock;
struct task_struct *kni_kthread;
struct rw_semaphore kni_list_lock;
struct list_head kni_list_head;
@@ -123,6 +125,9 @@ static int __net_init kni_init_net(struct net *net)
/* Clear the bit of device in use */
clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);

+   mutex_init(&knet->kni_kthread_lock);
+   knet->kni_kthread = NULL;
+
init_rwsem(&knet->kni_list_lock);
INIT_LIST_HEAD(&knet->kni_list_head);

@@ -139,9 +144,9 @@ static int __net_init kni_init_net(struct net *net)

 static void __net_exit kni_exit_net(struct net *net)
 {
-#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
struct kni_net *knet = net_generic(net, kni_net_id);
-
+   mutex_destroy(&knet->kni_kthread_lock);
+#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
kfree(knet);
 #endif
 }
@@ -236,16 +241,9 @@ kni_open(struct inode *inode, struct file *file)
return -EBUSY;

/* Create kernel thread for single mode */
-   if (multiple_kthread_on == 0) {
+   if (multiple_kthread_on == 0)
KNI_PRINT("Single kernel thread for all KNI devices\n");
-   /* Create kernel thread for RX */
-   knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
-   "kni_single");
-   if (IS_ERR(knet->kni_kthread)) {
-   KNI_ERR("Unable to create kernel threaed\n");
-   return PTR_ERR(knet->kni_kthread);
-   }
-   } else
+   else
KNI_PRINT("Multiple kernel thread mode enabled\n");

file->private_data = get_net(net);
@@ -263,9 +261,13 @@ kni_release(struct inode *inode, struct file *file)

/* Stop kernel thread for single mode */
if (multiple_kthread_on == 0) {
+   mutex_lock(&knet->kni_kthread_lock);
/* Stop kernel thread */
-   kthread_stop(knet->kni_kthread);
-   knet->kni_kthread = NULL;
+   if (knet->kni_kthread != NULL) {
+   kthread_stop(knet->kni_kthread);
+   knet->kni_kthread = NULL;
+   }
+   mutex_unlock(&knet->kni_kthread_lock);
}

down_write(&knet->kni_list_lock);
@@ -415,10 +417,9 @@ kni_ioctl_create(struct net *net,
}

/**
-* Check if the cpu core id is valid for binding,
-* for multiple kernel thread mode.
+* Check if the cpu core id is valid for binding.
 */
-   if (multiple_kthread_on && dev_info.force_bind &&
+   if (dev_info.force_bind &&
!cpu_online(dev_info.core_id)) {
KNI_ERR("cpu %u is not online\n", dev_info.core_id);
return -EINVAL;
@@ -581,6 +582,21 @@ kni_ioctl_create(struct net *net,
if (dev_info.force_bind)
kthread_bind(kni->pthread, kni->core_id);
wake_up_process(kni->pthread);
+   } else {
+   mutex_lock(&knet->kni_kthread_lock);
+   if (knet->kni_kthread == NULL) {
+   knet->kni_kthread = kthread_create(kni_thread_single,
+   (void 
*)knet,
+   
"kni_single");
+   if (IS_ERR(knet->kni_kthread)) {
+   kni_dev_remove(kni);
+   return -ECANCELED;
+   }
+   if (dev_info.force_bind)
+   kthread_bind(knet->kni_kthread, kni->core_id);
+   wake_up_process(knet->kni_kthread);
+   }
+   mutex_unlock(&knet->kni_kthread_lock);
}

down_write(&knet->kni_list_lock);
-- 
2.8.3



[dpdk-dev] [PATCH] add support for core_id param in single threaded mode

2016-09-06 Thread Vladyslav Buslov
Hi Ferruh,

According to your suggestions I implemented dynamic KNI thread affinity
patch in single threaded mode.

It reuses core_id and force_bind config parameters from multi threaded
mode.

Regards,
Vladyslav

Vladyslav Buslov (1):
  kni: add support for core_id param in single threaded mode

 lib/librte_eal/linuxapp/kni/kni_misc.c | 48 ++
 1 file changed, 32 insertions(+), 16 deletions(-)

-- 
2.8.3



[dpdk-dev] [PATCH] kni: add support for core_id param in single threaded mode

2016-09-06 Thread Vladyslav Buslov
Allow binding KNI thread to specific core in single threaded mode
by setting core_id and force_bind config parameters.

Signed-off-by: Vladyslav Buslov 
---
 lib/librte_eal/linuxapp/kni/kni_misc.c | 48 ++
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
b/lib/librte_eal/linuxapp/kni/kni_misc.c
index 59d15ca..5e7cf21 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -100,6 +101,7 @@ static int kni_net_id;

 struct kni_net {
unsigned long device_in_use; /* device in use flag */
+   struct mutex kni_kthread_lock;
struct task_struct *kni_kthread;
struct rw_semaphore kni_list_lock;
struct list_head kni_list_head;
@@ -123,6 +125,9 @@ static int __net_init kni_init_net(struct net *net)
/* Clear the bit of device in use */
clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);

+   mutex_init(&knet->kni_kthread_lock);
+   knet->kni_kthread = NULL;
+
init_rwsem(&knet->kni_list_lock);
INIT_LIST_HEAD(&knet->kni_list_head);

@@ -139,9 +144,9 @@ static int __net_init kni_init_net(struct net *net)

 static void __net_exit kni_exit_net(struct net *net)
 {
-#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
struct kni_net *knet = net_generic(net, kni_net_id);
-
+   mutex_destroy(&knet->kni_kthread_lock);
+#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
kfree(knet);
 #endif
 }
@@ -236,16 +241,9 @@ kni_open(struct inode *inode, struct file *file)
return -EBUSY;

/* Create kernel thread for single mode */
-   if (multiple_kthread_on == 0) {
+   if (multiple_kthread_on == 0)
KNI_PRINT("Single kernel thread for all KNI devices\n");
-   /* Create kernel thread for RX */
-   knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
-   "kni_single");
-   if (IS_ERR(knet->kni_kthread)) {
-   KNI_ERR("Unable to create kernel threaed\n");
-   return PTR_ERR(knet->kni_kthread);
-   }
-   } else
+   else
KNI_PRINT("Multiple kernel thread mode enabled\n");

file->private_data = get_net(net);
@@ -263,9 +261,13 @@ kni_release(struct inode *inode, struct file *file)

/* Stop kernel thread for single mode */
if (multiple_kthread_on == 0) {
+   mutex_lock(&knet->kni_kthread_lock);
/* Stop kernel thread */
-   kthread_stop(knet->kni_kthread);
-   knet->kni_kthread = NULL;
+   if (knet->kni_kthread != NULL) {
+   kthread_stop(knet->kni_kthread);
+   knet->kni_kthread = NULL;
+   }
+   mutex_unlock(&knet->kni_kthread_lock);
}

down_write(&knet->kni_list_lock);
@@ -415,10 +417,9 @@ kni_ioctl_create(struct net *net,
}

/**
-* Check if the cpu core id is valid for binding,
-* for multiple kernel thread mode.
+* Check if the cpu core id is valid for binding.
 */
-   if (multiple_kthread_on && dev_info.force_bind &&
+   if (dev_info.force_bind &&
!cpu_online(dev_info.core_id)) {
KNI_ERR("cpu %u is not online\n", dev_info.core_id);
return -EINVAL;
@@ -581,6 +582,21 @@ kni_ioctl_create(struct net *net,
if (dev_info.force_bind)
kthread_bind(kni->pthread, kni->core_id);
wake_up_process(kni->pthread);
+   } else {
+   mutex_lock(&knet->kni_kthread_lock);
+   if (knet->kni_kthread == NULL) {
+   knet->kni_kthread = kthread_create(kni_thread_single,
+   (void 
*)knet,
+   
"kni_single");
+   if (IS_ERR(knet->kni_kthread)) {
+   kni_dev_remove(kni);
+   return -ECANCELED;
+   }
+   if (dev_info.force_bind)
+   kthread_bind(knet->kni_kthread, kni->core_id);
+   wake_up_process(knet->kni_kthread);
+   }
+   mutex_unlock(&knet->kni_kthread_lock);
}

down_write(&knet->kni_list_lock);
-- 
2.8.3



[dpdk-dev] [PATCH] kni: add support for core_id param in single threaded mode

2016-09-06 Thread Vladyslav Buslov
Ferruh,

Thanks for suggestions.
I'll try to provide new patch this week.

Regards,
Vladyslav


-Original Message-
From: Ferruh Yigit [mailto:ferruh.yi...@intel.com] 
Sent: Tuesday, September 06, 2016 5:31 PM
To: Vladyslav Buslov
Cc: dev at dpdk.org
Subject: Re: [dpdk-dev] [PATCH] kni: add support for core_id param in single 
threaded mode

On 9/6/2016 3:14 PM, Ferruh Yigit wrote:
> On 9/6/2016 12:25 PM, Vladyslav Buslov wrote:
>> Allow binding KNI thread to specific core in single threaded mode by 
>> setting core_id and force_bind config parameters.
> 
> Thanks, patch does exactly what we talked, and as I tested it works fine.
> 
> 1) There are a few comments, can you please find them inline.
> 
> 2) Would you mind adding some document related this new feature?
> Document path is: doc/guides/prog_guide/kernel_nic_interface.rst
> 
>>
>> Signed-off-by: Vladyslav Buslov 
>> ---
>>  lib/librte_eal/linuxapp/kni/kni_misc.c | 48 
>> ++
>>  1 file changed, 32 insertions(+), 16 deletions(-)
>>
>> diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
>> b/lib/librte_eal/linuxapp/kni/kni_misc.c
>> index 59d15ca..5e7cf21 100644
>> --- a/lib/librte_eal/linuxapp/kni/kni_misc.c
>> +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
>> @@ -30,6 +30,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -100,6 +101,7 @@ static int kni_net_id;
>>  
>>  struct kni_net {
>>  unsigned long device_in_use; /* device in use flag */
>> +struct mutex kni_kthread_lock;
>>  struct task_struct *kni_kthread;
>>  struct rw_semaphore kni_list_lock;
>>  struct list_head kni_list_head;
>> @@ -123,6 +125,9 @@ static int __net_init kni_init_net(struct net *net)
>>  /* Clear the bit of device in use */
>>  clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
>>  
>> +mutex_init(&knet->kni_kthread_lock);
>> +knet->kni_kthread = NULL;
>> +
>>  init_rwsem(&knet->kni_list_lock);
>>  INIT_LIST_HEAD(&knet->kni_list_head);
>>  
>> @@ -139,9 +144,9 @@ static int __net_init kni_init_net(struct net 
>> *net)
>>  
>>  static void __net_exit kni_exit_net(struct net *net)  { -#ifndef 
>> HAVE_SIMPLIFIED_PERNET_OPERATIONS
>>  struct kni_net *knet = net_generic(net, kni_net_id);
>> -
>> +mutex_destroy(&knet->kni_kthread_lock);
>> +#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
>>  kfree(knet);
>>  #endif
>>  }
>> @@ -236,16 +241,9 @@ kni_open(struct inode *inode, struct file *file)
>>  return -EBUSY;
>>  
>>  /* Create kernel thread for single mode */
>> -if (multiple_kthread_on == 0) {
>> +if (multiple_kthread_on == 0)
>>  KNI_PRINT("Single kernel thread for all KNI devices\n");
>> -/* Create kernel thread for RX */
>> -knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
>> -"kni_single");
>> -if (IS_ERR(knet->kni_kthread)) {
>> -KNI_ERR("Unable to create kernel threaed\n");
>> -return PTR_ERR(knet->kni_kthread);
>> -}
>> -} else
>> +else
>>  KNI_PRINT("Multiple kernel thread mode enabled\n");
> 
> Can move logs to where threads actually created (kni_ioctl_create)

Thinking twice, moving logs to kni_ioctl_create() can be too verbose since it 
has been called multiple times, but we need this log only once.

Keepin in open() cb isn't good option, what do you think moving into 
kni_init(), after kni_parse_ktread_mode? I think fits here better since this is 
a module param...



[dpdk-dev] [PATCH v2 1/2] kni: add support for core_id param in single threaded mode

2016-09-10 Thread Vladyslav Buslov
Allow binding KNI thread to specific core in single threaded mode
by setting core_id and force_bind config parameters.

Signed-off-by: Vladyslav Buslov 
---
 lib/librte_eal/linuxapp/kni/kni_misc.c | 48 ++
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
b/lib/librte_eal/linuxapp/kni/kni_misc.c
index 59d15ca..5e7cf21 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -100,6 +101,7 @@ static int kni_net_id;

 struct kni_net {
unsigned long device_in_use; /* device in use flag */
+   struct mutex kni_kthread_lock;
struct task_struct *kni_kthread;
struct rw_semaphore kni_list_lock;
struct list_head kni_list_head;
@@ -123,6 +125,9 @@ static int __net_init kni_init_net(struct net *net)
/* Clear the bit of device in use */
clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);

+   mutex_init(&knet->kni_kthread_lock);
+   knet->kni_kthread = NULL;
+
init_rwsem(&knet->kni_list_lock);
INIT_LIST_HEAD(&knet->kni_list_head);

@@ -139,9 +144,9 @@ static int __net_init kni_init_net(struct net *net)

 static void __net_exit kni_exit_net(struct net *net)
 {
-#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
struct kni_net *knet = net_generic(net, kni_net_id);
-
+   mutex_destroy(&knet->kni_kthread_lock);
+#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
kfree(knet);
 #endif
 }
@@ -236,16 +241,9 @@ kni_open(struct inode *inode, struct file *file)
return -EBUSY;

/* Create kernel thread for single mode */
-   if (multiple_kthread_on == 0) {
+   if (multiple_kthread_on == 0)
KNI_PRINT("Single kernel thread for all KNI devices\n");
-   /* Create kernel thread for RX */
-   knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
-   "kni_single");
-   if (IS_ERR(knet->kni_kthread)) {
-   KNI_ERR("Unable to create kernel threaed\n");
-   return PTR_ERR(knet->kni_kthread);
-   }
-   } else
+   else
KNI_PRINT("Multiple kernel thread mode enabled\n");

file->private_data = get_net(net);
@@ -263,9 +261,13 @@ kni_release(struct inode *inode, struct file *file)

/* Stop kernel thread for single mode */
if (multiple_kthread_on == 0) {
+   mutex_lock(&knet->kni_kthread_lock);
/* Stop kernel thread */
-   kthread_stop(knet->kni_kthread);
-   knet->kni_kthread = NULL;
+   if (knet->kni_kthread != NULL) {
+   kthread_stop(knet->kni_kthread);
+   knet->kni_kthread = NULL;
+   }
+   mutex_unlock(&knet->kni_kthread_lock);
}

down_write(&knet->kni_list_lock);
@@ -415,10 +417,9 @@ kni_ioctl_create(struct net *net,
}

/**
-* Check if the cpu core id is valid for binding,
-* for multiple kernel thread mode.
+* Check if the cpu core id is valid for binding.
 */
-   if (multiple_kthread_on && dev_info.force_bind &&
+   if (dev_info.force_bind &&
!cpu_online(dev_info.core_id)) {
KNI_ERR("cpu %u is not online\n", dev_info.core_id);
return -EINVAL;
@@ -581,6 +582,21 @@ kni_ioctl_create(struct net *net,
if (dev_info.force_bind)
kthread_bind(kni->pthread, kni->core_id);
wake_up_process(kni->pthread);
+   } else {
+   mutex_lock(&knet->kni_kthread_lock);
+   if (knet->kni_kthread == NULL) {
+   knet->kni_kthread = kthread_create(kni_thread_single,
+   (void 
*)knet,
+   
"kni_single");
+   if (IS_ERR(knet->kni_kthread)) {
+   kni_dev_remove(kni);
+   return -ECANCELED;
+   }
+   if (dev_info.force_bind)
+   kthread_bind(knet->kni_kthread, kni->core_id);
+   wake_up_process(knet->kni_kthread);
+   }
+   mutex_unlock(&knet->kni_kthread_lock);
}

down_write(&knet->kni_list_lock);
-- 
2.8.3



[dpdk-dev] [PATCH v2 2/2] kni: add support for core_id param in single threaded mode

2016-09-10 Thread Vladyslav Buslov
Allow binding KNI thread to specific core in single threaded mode
by setting core_id and force_bind config parameters.

Signed-off-by: Vladyslav Buslov 
---

v2:
* Fixed formatting.
* Refactored kthread create/bind functionality into separate function.
* Moved thread mode print into kni_init.
* Added short description to KNI Programmer's Gude doc.
* Fixed outdated mbuf processing description in KNI Programmer's Gude doc.

 doc/guides/prog_guide/kernel_nic_interface.rst |  5 +-
 lib/librte_eal/linuxapp/kni/kni_misc.c | 72 +-
 2 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/doc/guides/prog_guide/kernel_nic_interface.rst 
b/doc/guides/prog_guide/kernel_nic_interface.rst
index fac1960..0fdc307 100644
--- a/doc/guides/prog_guide/kernel_nic_interface.rst
+++ b/doc/guides/prog_guide/kernel_nic_interface.rst
@@ -102,6 +102,9 @@ Refer to rte_kni_common.h in the DPDK source code for more 
details.

 The physical addresses will be re-mapped into the kernel address space and 
stored in separate KNI contexts.

+The affinity of kernel RX thread (both single and multi-threaded modes) is 
controlled by force_bind and
+core_id config parameters.
+
 The KNI interfaces can be deleted by a DPDK application dynamically after 
being created.
 Furthermore, all those KNI interfaces not deleted will be deleted on the 
release operation
 of the miscellaneous device (when the DPDK application is closed).
@@ -128,7 +131,7 @@ Use Case: Ingress
 On the DPDK RX side, the mbuf is allocated by the PMD in the RX thread context.
 This thread will enqueue the mbuf in the rx_q FIFO.
 The KNI thread will poll all KNI active devices for the rx_q.
-If an mbuf is dequeued, it will be converted to a sk_buff and sent to the net 
stack via netif_rx().
+If an mbuf is dequeued, it will be converted to a sk_buff and sent to the net 
stack via netif_rx_ni().
 The dequeued mbuf must be freed, so the same pointer is sent back in the 
free_q FIFO.

 The RX thread, in the same main loop, polls this FIFO and frees the mbuf after 
dequeuing it.
diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
b/lib/librte_eal/linuxapp/kni/kni_misc.c
index 5e7cf21..c79f5a8 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -172,6 +172,11 @@ kni_init(void)
return -EINVAL;
}

+   if (multiple_kthread_on == 0)
+   KNI_PRINT("Single kernel thread for all KNI devices\n");
+   else
+   KNI_PRINT("Multiple kernel thread mode enabled\n");
+
 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
rc = register_pernet_subsys(&kni_net_ops);
 #else
@@ -240,12 +245,6 @@ kni_open(struct inode *inode, struct file *file)
if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
return -EBUSY;

-   /* Create kernel thread for single mode */
-   if (multiple_kthread_on == 0)
-   KNI_PRINT("Single kernel thread for all KNI devices\n");
-   else
-   KNI_PRINT("Multiple kernel thread mode enabled\n");
-
file->private_data = get_net(net);
KNI_PRINT("/dev/kni opened\n");

@@ -391,6 +390,32 @@ kni_check_param(struct kni_dev *kni, struct 
rte_kni_device_info *dev)
return 0;
 }

+__printf(5, 6) static struct task_struct *
+kni_run_thread(int (*threadfn)(void *data),
+   void *data,
+   uint8_t force_bind,
+   unsigned core_id,
+   const char namefmt[], ...)
+{
+   struct task_struct *kni_thread = NULL;
+   char task_comm[TASK_COMM_LEN];
+   va_list args;
+
+   va_start(args, namefmt);
+   vsnprintf(task_comm, sizeof(task_comm), namefmt, args);
+   va_end(args);
+
+   kni_thread = kthread_create(threadfn, data, task_comm);
+   if (IS_ERR(kni_thread))
+   return NULL;
+
+   if (force_bind)
+   kthread_bind(kni_thread, core_id);
+   wake_up_process(kni_thread);
+
+   return kni_thread;
+}
+
 static int
 kni_ioctl_create(struct net *net,
unsigned int ioctl_num, unsigned long ioctl_param)
@@ -419,8 +444,7 @@ kni_ioctl_create(struct net *net,
/**
 * Check if the cpu core id is valid for binding.
 */
-   if (dev_info.force_bind &&
-   !cpu_online(dev_info.core_id)) {
+   if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
KNI_ERR("cpu %u is not online\n", dev_info.core_id);
return -EINVAL;
}
@@ -572,31 +596,29 @@ kni_ioctl_create(struct net *net,
 * and finally wake it up.
 */
if (multiple_kthread_on) {
-   kni->pthread = kthread_create(kni_thread_multiple,
- (void *)kni,
- "kni_%s", kni->name);
-   if (IS_ERR(kni

[dpdk-dev] [PATCH v2 2/2] kni: add support for core_id param in single threaded mode

2016-09-13 Thread Vladyslav Buslov
Hi Ferruh,

Thanks for reviewing my code.

Regarding creating kthread within locked mutex section: It is executed in 
context of ioctl_unlocked so I assume we may have race condition otherwise if 
multiple threads in same task try to create KNI interface simultaneously.
My kernel programming knowledge is limited so correct me if I'm wrong.

Regards,
Vlad

-Original Message-
From: Ferruh Yigit [mailto:ferruh.yi...@intel.com] 
Sent: Monday, September 12, 2016 8:08 PM
To: Vladyslav Buslov; dev at dpdk.org
Subject: Re: [dpdk-dev] [PATCH v2 2/2] kni: add support for core_id param in 
single threaded mode

On 9/10/2016 2:50 PM, Vladyslav Buslov wrote:
> Allow binding KNI thread to specific core in single threaded mode by 
> setting core_id and force_bind config parameters.
> 
> Signed-off-by: Vladyslav Buslov 
> ---
> 
> v2:
> * Fixed formatting.
> * Refactored kthread create/bind functionality into separate function.
> * Moved thread mode print into kni_init.
> * Added short description to KNI Programmer's Gude doc.
> * Fixed outdated mbuf processing description in KNI Programmer's Gude doc.
> 
>  doc/guides/prog_guide/kernel_nic_interface.rst |  5 +-
>  lib/librte_eal/linuxapp/kni/kni_misc.c | 72 
> +-
>  2 files changed, 51 insertions(+), 26 deletions(-)
> 
> diff --git a/doc/guides/prog_guide/kernel_nic_interface.rst 
> b/doc/guides/prog_guide/kernel_nic_interface.rst
> index fac1960..0fdc307 100644
> --- a/doc/guides/prog_guide/kernel_nic_interface.rst
> +++ b/doc/guides/prog_guide/kernel_nic_interface.rst
> @@ -102,6 +102,9 @@ Refer to rte_kni_common.h in the DPDK source code for 
> more details.
>  
>  The physical addresses will be re-mapped into the kernel address space and 
> stored in separate KNI contexts.
>  
> +The affinity of kernel RX thread (both single and multi-threaded 
> +modes) is controlled by force_bind and core_id config parameters.
> +
>  The KNI interfaces can be deleted by a DPDK application dynamically after 
> being created.
>  Furthermore, all those KNI interfaces not deleted will be deleted on 
> the release operation  of the miscellaneous device (when the DPDK application 
> is closed).
> @@ -128,7 +131,7 @@ Use Case: Ingress
>  On the DPDK RX side, the mbuf is allocated by the PMD in the RX thread 
> context.
>  This thread will enqueue the mbuf in the rx_q FIFO.
>  The KNI thread will poll all KNI active devices for the rx_q.
> -If an mbuf is dequeued, it will be converted to a sk_buff and sent to the 
> net stack via netif_rx().
> +If an mbuf is dequeued, it will be converted to a sk_buff and sent to the 
> net stack via netif_rx_ni().

Although this is small and correct modification, unrelated to this patch. It is 
good to remove from this patch, and feel free to create a separate patch if you 
want.

>  The dequeued mbuf must be freed, so the same pointer is sent back in the 
> free_q FIFO.
>  
>  The RX thread, in the same main loop, polls this FIFO and frees the mbuf 
> after dequeuing it.
> diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
> b/lib/librte_eal/linuxapp/kni/kni_misc.c
> index 5e7cf21..c79f5a8 100644
> --- a/lib/librte_eal/linuxapp/kni/kni_misc.c
> +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
> @@ -172,6 +172,11 @@ kni_init(void)
>   return -EINVAL;
>   }
>  
> + if (multiple_kthread_on == 0)
> + KNI_PRINT("Single kernel thread for all KNI devices\n");
> + else
> + KNI_PRINT("Multiple kernel thread mode enabled\n");
> +

Instead of fixing these in a second patch, why not do the correct one in first 
patch? Or I think it is better to have a single patch instead of two. What 
about squashing second patch into first?

>  #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
>   rc = register_pernet_subsys(&kni_net_ops);
>  #else
> @@ -240,12 +245,6 @@ kni_open(struct inode *inode, struct file *file)
>   if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
>   return -EBUSY;
>  
> - /* Create kernel thread for single mode */
> - if (multiple_kthread_on == 0)
> - KNI_PRINT("Single kernel thread for all KNI devices\n");
> - else
> - KNI_PRINT("Multiple kernel thread mode enabled\n");
> -
>   file->private_data = get_net(net);
>   KNI_PRINT("/dev/kni opened\n");
>  
> @@ -391,6 +390,32 @@ kni_check_param(struct kni_dev *kni, struct 
> rte_kni_device_info *dev)
>   return 0;
>  }
>  
> +__printf(5, 6) static struct task_struct * kni_run_thread(int 
> +(*threadfn)(void *data),
> + void *data,
> + uint8_t force_bind,
> + unsigned core_id,
> + co

[dpdk-dev] [PATCH] kni: add support for core_id param in single threaded mode

2016-09-20 Thread Vladyslav Buslov
Allow binding KNI thread to specific core in single threaded mode
by setting core_id and force_bind config parameters.

Signed-off-by: Vladyslav Buslov 
---
 doc/guides/prog_guide/kernel_nic_interface.rst |   3 +
 lib/librte_eal/linuxapp/kni/kni_misc.c | 101 -
 2 files changed, 67 insertions(+), 37 deletions(-)

diff --git a/doc/guides/prog_guide/kernel_nic_interface.rst 
b/doc/guides/prog_guide/kernel_nic_interface.rst
index fac1960..eb16e2e 100644
--- a/doc/guides/prog_guide/kernel_nic_interface.rst
+++ b/doc/guides/prog_guide/kernel_nic_interface.rst
@@ -102,6 +102,9 @@ Refer to rte_kni_common.h in the DPDK source code for more 
details.

 The physical addresses will be re-mapped into the kernel address space and 
stored in separate KNI contexts.

+The affinity of kernel RX thread (both single and multi-threaded modes) is 
controlled by force_bind and
+core_id config parameters.
+
 The KNI interfaces can be deleted by a DPDK application dynamically after 
being created.
 Furthermore, all those KNI interfaces not deleted will be deleted on the 
release operation
 of the miscellaneous device (when the DPDK application is closed).
diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
b/lib/librte_eal/linuxapp/kni/kni_misc.c
index 59d15ca..056e13b 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -100,6 +101,7 @@ static int kni_net_id;

 struct kni_net {
unsigned long device_in_use; /* device in use flag */
+   struct mutex kni_kthread_lock;
struct task_struct *kni_kthread;
struct rw_semaphore kni_list_lock;
struct list_head kni_list_head;
@@ -123,6 +125,9 @@ static int __net_init kni_init_net(struct net *net)
/* Clear the bit of device in use */
clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);

+   mutex_init(&knet->kni_kthread_lock);
+   knet->kni_kthread = NULL;
+
init_rwsem(&knet->kni_list_lock);
INIT_LIST_HEAD(&knet->kni_list_head);

@@ -139,9 +144,9 @@ static int __net_init kni_init_net(struct net *net)

 static void __net_exit kni_exit_net(struct net *net)
 {
-#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
struct kni_net *knet = net_generic(net, kni_net_id);
-
+   mutex_destroy(&knet->kni_kthread_lock);
+#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
kfree(knet);
 #endif
 }
@@ -167,6 +172,11 @@ kni_init(void)
return -EINVAL;
}

+   if (multiple_kthread_on == 0)
+   KNI_PRINT("Single kernel thread for all KNI devices\n");
+   else
+   KNI_PRINT("Multiple kernel thread mode enabled\n");
+
 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
rc = register_pernet_subsys(&kni_net_ops);
 #else
@@ -235,19 +245,6 @@ kni_open(struct inode *inode, struct file *file)
if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
return -EBUSY;

-   /* Create kernel thread for single mode */
-   if (multiple_kthread_on == 0) {
-   KNI_PRINT("Single kernel thread for all KNI devices\n");
-   /* Create kernel thread for RX */
-   knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
-   "kni_single");
-   if (IS_ERR(knet->kni_kthread)) {
-   KNI_ERR("Unable to create kernel threaed\n");
-   return PTR_ERR(knet->kni_kthread);
-   }
-   } else
-   KNI_PRINT("Multiple kernel thread mode enabled\n");
-
file->private_data = get_net(net);
KNI_PRINT("/dev/kni opened\n");

@@ -263,9 +260,13 @@ kni_release(struct inode *inode, struct file *file)

/* Stop kernel thread for single mode */
if (multiple_kthread_on == 0) {
+   mutex_lock(&knet->kni_kthread_lock);
/* Stop kernel thread */
-   kthread_stop(knet->kni_kthread);
-   knet->kni_kthread = NULL;
+   if (knet->kni_kthread != NULL) {
+   kthread_stop(knet->kni_kthread);
+   knet->kni_kthread = NULL;
+   }
+   mutex_unlock(&knet->kni_kthread_lock);
}

down_write(&knet->kni_list_lock);
@@ -390,6 +391,47 @@ kni_check_param(struct kni_dev *kni, struct 
rte_kni_device_info *dev)
 }

 static int
+kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
+{
+   /**
+* Create a new kernel thread for multiple mode, set its core affinity,
+* and finally wake it up.
+*/
+   if (multiple_kthread_on) {
+   kni->pthread = kthread_cre

[dpdk-dev] [PATCH] kni: add support for core_id param in single threaded mode

2016-09-21 Thread Vladyslav Buslov
> On 9/20/2016 7:36 PM, Stephen Hemminger wrote:
> > On Tue, 20 Sep 2016 21:16:37 +0300
> > Vladyslav Buslov  wrote:
> >
> >> @@ -123,6 +125,9 @@ static int __net_init kni_init_net(struct net *net)
> >>/* Clear the bit of device in use */
> >>clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
> >>
> >> +  mutex_init(&knet->kni_kthread_lock);
> >> +  knet->kni_kthread = NULL;
> >> +
> >
> > Why not just use kzalloc() here? You would still need to init the
> > mutex etc, but it would be safer.
> >
> 
> Hi Vladyslav,
> 
> This is good suggestion, if you send a new version for this update, please
> keep my Ack.
> 
> Thanks,
> ferruh

Hi Ferruh, Stephen,

Could you please elaborate on using kzalloc for this code.
Currently kni_thread_lock is value member of kni_net structure and never 
explicitly allocated or deallocated.
Kni_kthread is pointer member of kni_net and is implicitly created and 
destroyed by kthread_run, kthread_stop functions.
Which one of those do you suggest to allocate with kzalloc() and how would it 
improve safety?

Sorry for not being able to follow your code review but my Kernel programming 
experience is somewhat limited.

Thanks,
Vladyslav


[dpdk-dev] [PATCH] kni: add support for core_id param in single threaded mode

2016-09-22 Thread Vladyslav Buslov
> On Wed, 21 Sep 2016 19:23:47 +0100
> Ferruh Yigit  wrote:
> 
> > On 9/21/2016 6:15 PM, Vladyslav Buslov wrote:
> > >> On 9/20/2016 7:36 PM, Stephen Hemminger wrote:
> > >>> On Tue, 20 Sep 2016 21:16:37 +0300 Vladyslav Buslov
> > >>>  wrote:
> > >>>
> > >>>> @@ -123,6 +125,9 @@ static int __net_init kni_init_net(struct net
> *net)
> > >>>>/* Clear the bit of device in use */
> > >>>>clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet-
> >device_in_use);
> > >>>>
> > >>>> +  mutex_init(&knet->kni_kthread_lock);
> > >>>> +  knet->kni_kthread = NULL;
> > >>>> +
> > >>>
> > >>> Why not just use kzalloc() here? You would still need to init the
> > >>> mutex etc, but it would be safer.
> > >>>
> > >>
> > >> Hi Vladyslav,
> > >>
> > >> This is good suggestion, if you send a new version for this update,
> > >> please keep my Ack.
> > >>
> > >> Thanks,
> > >> ferruh
> > >
> > > Hi Ferruh, Stephen,
> > >
> > > Could you please elaborate on using kzalloc for this code.
> > > Currently kni_thread_lock is value member of kni_net structure and
> never explicitly allocated or deallocated.
> > > Kni_kthread is pointer member of kni_net and is implicitly created and
> destroyed by kthread_run, kthread_stop functions.
> > > Which one of those do you suggest to allocate with kzalloc() and how
> would it improve safety?
> > >
> >
> > Currently:
> >
> > kni_init_net() {
> > knet = kmalloc(..);
> > ..
> > mutex_init(..);
> > knet->kni_thread = NULL;
> > }
> >
> > If you allocate knet via kzalloc(), no need to assign NULL to
> > kni_thread. Also this is safer because any uninitialized knet field
> > will be zero instead of random value.
> >
> > This is what I understood at least J
> 
> Also any additional fields in knet will be set, avoiding any present or future
> uninitialized memory bugs.
> 

What about net_generic which is used instead of kmalloc in KNI code for newer 
kernels?
Quick skim through Linux code indicates that it doesn't zero out memory and 
people memset it manually.
Just add memset(0) in HAVE_SIMPLIFIED_PERNET_OPERATIONS code?


[dpdk-dev] [PATCH] kni: add support for core_id param in single threaded mode

2016-09-24 Thread Vladyslav Buslov
Allow binding KNI thread to specific core in single threaded mode
by setting core_id and force_bind config parameters.

Signed-off-by: Vladyslav Buslov 
Acked-by: Ferruh Yigit 
---
 doc/guides/prog_guide/kernel_nic_interface.rst |   3 +
 lib/librte_eal/linuxapp/kni/kni_misc.c | 103 -
 2 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/doc/guides/prog_guide/kernel_nic_interface.rst 
b/doc/guides/prog_guide/kernel_nic_interface.rst
index fac1960..eb16e2e 100644
--- a/doc/guides/prog_guide/kernel_nic_interface.rst
+++ b/doc/guides/prog_guide/kernel_nic_interface.rst
@@ -102,6 +102,9 @@ Refer to rte_kni_common.h in the DPDK source code for more 
details.

 The physical addresses will be re-mapped into the kernel address space and 
stored in separate KNI contexts.

+The affinity of kernel RX thread (both single and multi-threaded modes) is 
controlled by force_bind and
+core_id config parameters.
+
 The KNI interfaces can be deleted by a DPDK application dynamically after 
being created.
 Furthermore, all those KNI interfaces not deleted will be deleted on the 
release operation
 of the miscellaneous device (when the DPDK application is closed).
diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
b/lib/librte_eal/linuxapp/kni/kni_misc.c
index 59d15ca..7ef2d3e 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -100,6 +101,7 @@ static int kni_net_id;

 struct kni_net {
unsigned long device_in_use; /* device in use flag */
+   struct mutex kni_kthread_lock;
struct task_struct *kni_kthread;
struct rw_semaphore kni_list_lock;
struct list_head kni_list_head;
@@ -109,11 +111,12 @@ static int __net_init kni_init_net(struct net *net)
 {
 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
struct kni_net *knet = net_generic(net, kni_net_id);
+   memset(knet, 0, sizeof(*knet));
 #else
struct kni_net *knet;
int ret;

-   knet = kmalloc(sizeof(struct kni_net), GFP_KERNEL);
+   knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
if (!knet) {
ret = -ENOMEM;
return ret;
@@ -123,6 +126,8 @@ static int __net_init kni_init_net(struct net *net)
/* Clear the bit of device in use */
clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);

+   mutex_init(&knet->kni_kthread_lock);
+
init_rwsem(&knet->kni_list_lock);
INIT_LIST_HEAD(&knet->kni_list_head);

@@ -139,9 +144,9 @@ static int __net_init kni_init_net(struct net *net)

 static void __net_exit kni_exit_net(struct net *net)
 {
-#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
struct kni_net *knet = net_generic(net, kni_net_id);
-
+   mutex_destroy(&knet->kni_kthread_lock);
+#ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
kfree(knet);
 #endif
 }
@@ -167,6 +172,11 @@ kni_init(void)
return -EINVAL;
}

+   if (multiple_kthread_on == 0)
+   KNI_PRINT("Single kernel thread for all KNI devices\n");
+   else
+   KNI_PRINT("Multiple kernel thread mode enabled\n");
+
 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
rc = register_pernet_subsys(&kni_net_ops);
 #else
@@ -235,19 +245,6 @@ kni_open(struct inode *inode, struct file *file)
if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
return -EBUSY;

-   /* Create kernel thread for single mode */
-   if (multiple_kthread_on == 0) {
-   KNI_PRINT("Single kernel thread for all KNI devices\n");
-   /* Create kernel thread for RX */
-   knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
-   "kni_single");
-   if (IS_ERR(knet->kni_kthread)) {
-   KNI_ERR("Unable to create kernel threaed\n");
-   return PTR_ERR(knet->kni_kthread);
-   }
-   } else
-   KNI_PRINT("Multiple kernel thread mode enabled\n");
-
file->private_data = get_net(net);
KNI_PRINT("/dev/kni opened\n");

@@ -263,9 +260,13 @@ kni_release(struct inode *inode, struct file *file)

/* Stop kernel thread for single mode */
if (multiple_kthread_on == 0) {
+   mutex_lock(&knet->kni_kthread_lock);
/* Stop kernel thread */
-   kthread_stop(knet->kni_kthread);
-   knet->kni_kthread = NULL;
+   if (knet->kni_kthread != NULL) {
+   kthread_stop(knet->kni_kthread);
+   knet->kni_kthread = NULL;
+   }
+   mutex_unlock(&knet->kni_kthread_lock);
}

down_write(&

Re: [dpdk-dev] [PATCH] net/i40e: add packet prefetch

2017-04-06 Thread Vladyslav Buslov
Ferruh,

In our case patch significantly improves application performance. (~40% more 
PPS on load balancer core)
Using DPDK examples I can only reproduce perf improvements with similar design 
apps like load_balancer.
With applications that send on free packets on same core where they are 
received performance is not improved.

It seems that this patch doesn't do anything(or even actually slightly degrades 
performance) for run-to-completion apps so we will have to continue maintaining 
it as part part of our internal branch.

Regards,
Vlad

> -Original Message-
> From: Ferruh Yigit [mailto:ferruh.yi...@intel.com]
> Sent: Monday, April 03, 2017 1:31 PM
> To: Pei, Yulong; Vladyslav Buslov; Zhang, Helin; Wu, Jingjing
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] net/i40e: add packet prefetch
> 
> On 4/1/2017 3:01 AM, Pei, Yulong wrote:
> > Hi All
> >
> > In Non-vector mode, without this patch, single core performance can
> > reach 37.576Mpps with 64Byte packet, But after applied this patch , single
> core performance downgrade to 34.343Mpps with 64Byte packet.
> 
> Thanks Yulong for the test.
> 
> Vladyslav,
> 
> Is above test results match with your result?
> 
> If there is a degradation, this patch should not be merged.
> 
> Thanks,
> ferruh
> 
> >
> > Best Regards
> > Yulong Pei
> >
> > -Original Message-
> > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Vladyslav Buslov
> > Sent: Wednesday, March 1, 2017 6:57 PM
> > To: Zhang, Helin ; Wu, Jingjing
> > ; Yigit, Ferruh 
> > Cc: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH] net/i40e: add packet prefetch
> >
> > Prefetch both cache lines of mbuf and first cache line of payload if
> CONFIG_RTE_PMD_PACKET_PREFETCH is set.
> >
> > Signed-off-by: Vladyslav Buslov 
> <..>


[dpdk-dev] Putting packets into specific rx-queue based on VLAN id

2015-12-14 Thread Vladyslav Buslov
Hello,

I've been researching how to configure FlowDirector to split packets based on 
just VLAN id.
It seems that ixgbe and i40 drivers support only specific l3 flow options. 
(Frag/nonfrag IPV4 and IPV6 flows)
Am I missing something? Any other API's to classify packets into rx-queues 
based on VLAN id only?

Thanks,
Vlad Buslov


[dpdk-dev] [PATCH] Add missing prefetches to i40e bulk rx path

2016-07-14 Thread Vladyslav Buslov
Hello,

Recently I tried to use bulk rx function to reduce CPU usage of 
rte_eth_rx_burst.
However application performance with i40e_recv_pkts_bulk_alloc was 
significantly worse than with i40e_recv_pkts. (3m less PPS, 0.5 IPC on 
receiving core)

Quick investigation revealed two problems:
 - First payload cacheline is prefetched in i40e_recv_pkts but not in 
i40e_recv_pkts_bulk_alloc.
 - Only first line of next mbuf is prefetched during mbuf init in 
i40e_rx_alloc_bufs. This causes cache miss at setting 'next' field from mbuf 
cacheline1 to NULL.

Fixing these two small issues significantly reduced CPU time spent in 
rte_eth_rx_burst and improved PPS compared to both original 
i40e_recv_pkts_bulk_alloc and i40e_recv_pkts.

Regards,

Vladyslav Buslov (1):
  net/i40e: add additional prefetch instructions for bulk rx

 drivers/net/i40e/i40e_rxtx.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

-- 
2.8.3



[dpdk-dev] [PATCH] net/i40e: add additional prefetch instructions for bulk rx

2016-07-14 Thread Vladyslav Buslov
Added prefetch of first packet payload cacheline in i40e_rx_scan_hw_ring
Added prefetch of second mbuf cacheline in i40e_rx_alloc_bufs

Signed-off-by: Vladyslav Buslov 
---
 drivers/net/i40e/i40e_rxtx.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index d3cfb98..e493fb4 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -1003,6 +1003,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq)
/* Translate descriptor info to mbuf parameters */
for (j = 0; j < nb_dd; j++) {
mb = rxep[j].mbuf;
+   rte_prefetch0(RTE_PTR_ADD(mb->buf_addr, 
RTE_PKTMBUF_HEADROOM));
qword1 = rte_le_to_cpu_64(\
rxdp[j].wb.qword1.status_error_len);
pkt_len = ((qword1 & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
@@ -1086,9 +1087,11 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq)

rxdp = &rxq->rx_ring[alloc_idx];
for (i = 0; i < rxq->rx_free_thresh; i++) {
-   if (likely(i < (rxq->rx_free_thresh - 1)))
+   if (likely(i < (rxq->rx_free_thresh - 1))) {
/* Prefetch next mbuf */
-   rte_prefetch0(rxep[i + 1].mbuf);
+   rte_prefetch0(&rxep[i + 1].mbuf->cacheline0);
+   rte_prefetch0(&rxep[i + 1].mbuf->cacheline1);
+   }

mb = rxep[i].mbuf;
rte_mbuf_refcnt_set(mb, 1);
-- 
2.8.3



[dpdk-dev] [PATCH] bonding: fix incorrect loop boundary condition

2016-04-01 Thread Vladyslav Buslov
Loop that calculates total number of tx descriptors in slave tx queues should 
iterate up to nb_tx_queues, not nb_rx_queues.

Signed-off-by: Vladyslav Buslov 
---
 drivers/net/bonding/rte_eth_bond_8023ad.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c 
b/drivers/net/bonding/rte_eth_bond_8023ad.c
index 1b7e93a..c4448b7 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.c
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
@@ -890,7 +890,7 @@ bond_mode_8023ad_activate_slave(struct rte_eth_dev 
*bond_dev, uint8_t slave_id)
/* The size of the mempool should be at least:
 * the sum of the TX descriptors + BOND_MODE_8023AX_SLAVE_TX_PKTS */
total_tx_desc = BOND_MODE_8023AX_SLAVE_TX_PKTS;
-   for (q_id = 0; q_id < bond_dev->data->nb_rx_queues; q_id++) {
+   for (q_id = 0; q_id < bond_dev->data->nb_tx_queues; q_id++) {
bd_tx_q = (struct 
bond_tx_queue*)bond_dev->data->tx_queues[q_id];
total_tx_desc += bd_tx_q->nb_tx_desc;
}
-- 
2.8.0



[dpdk-dev] [PATCH] Performance optimization of ACL build process

2016-08-16 Thread Vladyslav Buslov
Hello,

In our application we need to be able to allocate tens of thousands of ACLs at 
runtime.
Testing revealed significant performance problems. We were able to track them 
to memset in calloc function which caused multiple page walks per invocation.
Modifying tb_mem to use huge page memory resulted ~2x performance gain for that 
operation.

Regards,
Vladyslav

Vladyslav Buslov (1):
  acl: use rte_calloc for temporary memory allocation

 lib/librte_acl/tb_mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

-- 
2.8.3



[dpdk-dev] [PATCH] acl: use rte_calloc for temporary memory allocation

2016-08-16 Thread Vladyslav Buslov
Acl build process uses significant amount of memory
which degrades performance by causing page walks when memory
is allocated on regular heap using libc calloc.

This commit changes tb_mem to allocate temporary memory on huge pages
with rte_calloc.

Signed-off-by: Vladyslav Buslov 
---
 lib/librte_acl/tb_mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_acl/tb_mem.c b/lib/librte_acl/tb_mem.c
index 157e608..c373673 100644
--- a/lib/librte_acl/tb_mem.c
+++ b/lib/librte_acl/tb_mem.c
@@ -52,7 +52,7 @@ tb_pool(struct tb_mem_pool *pool, size_t sz)
size_t size;

size = sz + pool->alignment - 1;
-   block = calloc(1, size + sizeof(*pool->block));
+   block = rte_calloc("ACL_TBMEM_BLOCK", 1, size + sizeof(*pool->block), 
0);
if (block == NULL) {
RTE_LOG(ERR, MALLOC, "%s(%zu)\n failed, currently allocated "
"by pool: %zu bytes\n", __func__, sz, pool->alloc);
-- 
2.8.3



[dpdk-dev] [PATCH] kni: add module parameter 'bind_to_core'

2016-08-16 Thread Vladyslav Buslov
Allow binding KNI thread to specific core in single threaded mode.

Signed-off-by: Vladyslav Buslov 
---
 lib/librte_eal/linuxapp/kni/kni_misc.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c 
b/lib/librte_eal/linuxapp/kni/kni_misc.c
index 59d15ca..e98f4a9 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -93,6 +93,7 @@ static char *lo_mode = NULL;
 /* Kernel thread mode */
 static char *kthread_mode = NULL;
 static unsigned multiple_kthread_on = 0;
+static int bind_to_core = -1;

 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */

@@ -239,12 +240,17 @@ kni_open(struct inode *inode, struct file *file)
if (multiple_kthread_on == 0) {
KNI_PRINT("Single kernel thread for all KNI devices\n");
/* Create kernel thread for RX */
-   knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet,
+   knet->kni_kthread = kthread_create(kni_thread_single, (void 
*)knet,
"kni_single");
if (IS_ERR(knet->kni_kthread)) {
KNI_ERR("Unable to create kernel threaed\n");
return PTR_ERR(knet->kni_kthread);
}
+   if (bind_to_core >= 0) {
+   KNI_PRINT("Bind main thread to core %d\n", 
bind_to_core);
+   kthread_bind(knet->kni_kthread, bind_to_core);
+   }
+   wake_up_process(knet->kni_kthread);
} else
KNI_PRINT("Multiple kernel thread mode enabled\n");

@@ -698,3 +704,8 @@ MODULE_PARM_DESC(kthread_mode,
 "multiple  Multiple kernel thread mode enabled.\n"
 "\n"
 );
+
+module_param(bind_to_core, int, S_IRUGO);
+MODULE_PARM_DESC(bind_to_core,
+"Bind KNI main kernel thread to specific core (default=-1(disabled)):\n"
+);
\ No newline at end of file
-- 
2.8.3



[dpdk-dev] [PATCH] kni: add module parameter 'bind_to_core'

2016-08-28 Thread Vladyslav Buslov
Hi Ferruh,

I agree that your solution is more flexible.
I'll work on moving single thread creation to kni_ioctl_create next week and 
get back to you with results.

Regards,
Vladyslav

-Original Message-
From: Ferruh Yigit [mailto:ferruh.yi...@intel.com] 
Sent: Thursday, August 25, 2016 5:47 PM
To: Vladyslav Buslov
Cc: dev at dpdk.org
Subject: Re: [PATCH] kni: add module parameter 'bind_to_core'

Hi Vladyslav,

On 8/16/2016 7:24 PM, Vladyslav Buslov wrote:
> Allow binding KNI thread to specific core in single threaded mode.

I think this is good idea.
But I am not sure about making this a module parameter, setting core can be 
more dynamic.

There is already a kni->core_id field, which is only used for multiple_kthread 
mode.
What do you think moving single thread creation into kni_ioctl_create() and use 
first kni->core_id to bind the thread? If there is no
kni->core_id set, it will behave as it is now.

> 
> Signed-off-by: Vladyslav Buslov 
> ---
<...>



[dpdk-dev] [PATCH] acl: use rte_calloc for temporary memory allocation

2016-08-31 Thread Vladyslav Buslov
Hi Konstantin,

Thanks for your feedback.

Would you accept this change as config file compile-time parameter with libc 
calloc as default?
It is one line change only so it is easy to ifdef.

Regards,
Vlad

-Original Message-
From: Ananyev, Konstantin [mailto:konstantin.anan...@intel.com] 
Sent: Wednesday, August 31, 2016 4:28 AM
To: Vladyslav Buslov
Cc: dev at dpdk.org
Subject: RE: [PATCH] acl: use rte_calloc for temporary memory allocation

Hi Vladyslav,

> -Original Message-
> From: Vladyslav Buslov [mailto:vladyslav.buslov at harmonicinc.com]
> Sent: Tuesday, August 16, 2016 3:01 PM
> To: Ananyev, Konstantin 
> Cc: dev at dpdk.org
> Subject: [PATCH] acl: use rte_calloc for temporary memory allocation
> 
> Acl build process uses significant amount of memory which degrades 
> performance by causing page walks when memory is allocated on regular heap 
> using libc calloc.
> 
> This commit changes tb_mem to allocate temporary memory on huge pages with 
> rte_calloc.

We deliberately used standard system memory allocation routines (calloc/free) 
here.
With current design build phase was never considered to be an 'RT' phase 
operation.
It is pretty cpu and memory expensive.
So if we'll use RTE memory for build phase it could easily consume all (or 
most) of it, and might cause DPDK process failure or degradation.
If you really feel that you (and other users) would benefit from using 
rte_calloc here (I personally still in doubt), then at least it should be a new 
field inside rte_acl_config, that would allow user to control that behavior.
Though, as I said above, librte_acl was never designed to ' to allocate tens of 
thousands of ACLs at runtime'.
To add ability to add/delete rules at runtime while keeping lookup time 
reasonably low some new approach need to be introduced.  
Konstantin

> 
> Signed-off-by: Vladyslav Buslov 
> ---
>  lib/librte_acl/tb_mem.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/lib/librte_acl/tb_mem.c b/lib/librte_acl/tb_mem.c index 
> 157e608..c373673 100644
> --- a/lib/librte_acl/tb_mem.c
> +++ b/lib/librte_acl/tb_mem.c
> @@ -52,7 +52,7 @@ tb_pool(struct tb_mem_pool *pool, size_t sz)
>   size_t size;
> 
>   size = sz + pool->alignment - 1;
> - block = calloc(1, size + sizeof(*pool->block));
> + block = rte_calloc("ACL_TBMEM_BLOCK", 1, size + 
> +sizeof(*pool->block), 0);
>   if (block == NULL) {
>   RTE_LOG(ERR, MALLOC, "%s(%zu)\n failed, currently allocated "
>   "by pool: %zu bytes\n", __func__, sz, pool->alloc);
> --
> 2.8.3



[dpdk-dev] [PATCH] net/i40e: add additional prefetch instructions for bulk rx

2016-11-15 Thread Vladyslav Buslov
> -Original Message-
> From: Ferruh Yigit [mailto:ferruh.yigit at intel.com]
> Sent: Tuesday, November 15, 2016 2:19 PM
> To: Ananyev, Konstantin; Richardson, Bruce
> Cc: Vladyslav Buslov; Wu, Jingjing; Zhang, Helin; dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] net/i40e: add additional prefetch
> instructions for bulk rx
> 
> On 10/13/2016 11:30 AM, Ananyev, Konstantin wrote:
> 
> <...>
> 
> >>>>
> >>>> Actually I can see some valid use cases where it is beneficial to have 
> >>>> this
> prefetch in driver.
> >>>> In our sw distributor case it is trivial to just prefetch next packet on
> each iteration because packets are processed one by one.
> >>>> However when we move this functionality to hw by means of
> >>>> RSS/vfunction/FlowDirector(our long term goal) worker threads will
> >> receive
> >>>> packets directly from rx queues of NIC.
> >>>> First operation of worker thread is to perform bulk lookup in hash
> >>>> table by destination MAC. This will cause cache miss on accessing
> >> each
> >>>> eth header and can't be easily mitigated in application code.
> >>>> I assume it is ubiquitous use case for DPDK.
> >>>
> >>> Yes it is a quite common use-case.
> >>> Though I many cases it is possible to reorder user code to hide (or
> minimize) that data-access latency.
> >>> From other side there are scenarios where this prefetch is excessive and
> can cause some drop in performance.
> >>> Again, as I know, none of PMDs for Intel devices prefetches packet's
> data in  simple (single segment) RX mode.
> >>> Another thing that some people may argue then - why only one cache
> >>> line is prefetched, in some use-cases might need to look at 2-nd one.
> >>>
> >> There is a build-time config setting for this behaviour for exactly
> >> the reasons called out here - in some apps you get a benefit, in
> >> others you see a perf hit. The default is "on", which makes sense for most
> cases, I think.
> >> From common_base:
> >>
> >> CONFIG_RTE_PMD_PACKET_PREFETCH=y$
> >
> > Yes, but right now i40e and ixgbe non-scattered RX (both vector and scalar)
> just ignore that flag.
> > Though yes, might be a good thing to make them to obey that flag
> properly.
> 
> Hi Vladyslav,
> 
> According Konstantin's comment, what do you think updating patch to do
> prefetch within CONFIG_RTE_PMD_PACKET_PREFETCH ifdef?
> 
> But since config option is enabled by default, performance concern is still
> valid and needs to be investigated.
> 
> Thanks,
> ferruh

Hi Ferruh,

I'll update my patch according to code review suggestions.

Regards,
Vlad


[dpdk-dev] [PATCH] net/i40e: add additional prefetch instructions for bulk rx

2016-10-10 Thread Vladyslav Buslov
> -Original Message-
> From: Wu, Jingjing [mailto:jingjing.wu at intel.com]
> Sent: Monday, October 10, 2016 4:26 PM
> To: Yigit, Ferruh; Vladyslav Buslov; Zhang, Helin
> Cc: dev at dpdk.org
> Subject: RE: [dpdk-dev] [PATCH] net/i40e: add additional prefetch
> instructions for bulk rx
> 
> 
> 
> > -Original Message-
> > From: Yigit, Ferruh
> > Sent: Wednesday, September 14, 2016 9:25 PM
> > To: Vladyslav Buslov ; Zhang, Helin
> > ; Wu, Jingjing 
> > Cc: dev at dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] net/i40e: add additional prefetch
> > instructions for bulk rx
> >
> > On 7/14/2016 6:27 PM, Vladyslav Buslov wrote:
> > > Added prefetch of first packet payload cacheline in
> > > i40e_rx_scan_hw_ring Added prefetch of second mbuf cacheline in
> > > i40e_rx_alloc_bufs
> > >
> > > Signed-off-by: Vladyslav Buslov 
> > > ---
> > >  drivers/net/i40e/i40e_rxtx.c | 7 +--
> > >  1 file changed, 5 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > b/drivers/net/i40e/i40e_rxtx.c index d3cfb98..e493fb4 100644
> > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > @@ -1003,6 +1003,7 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue
> *rxq)
> > > /* Translate descriptor info to mbuf parameters */
> > > for (j = 0; j < nb_dd; j++) {
> > > mb = rxep[j].mbuf;
> > > +   rte_prefetch0(RTE_PTR_ADD(mb->buf_addr,
> > RTE_PKTMBUF_HEADROOM));
> 
> Why did prefetch here? I think if application need to deal with packet, it is
> more suitable to put it in application.
> 
> > > qword1 = rte_le_to_cpu_64(\
> > > rxdp[j].wb.qword1.status_error_len);
> > > pkt_len = ((qword1 &
> > I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
> > > @@ -1086,9 +1087,11 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue
> *rxq)
> > >
> > > rxdp = &rxq->rx_ring[alloc_idx];
> > > for (i = 0; i < rxq->rx_free_thresh; i++) {
> > > -   if (likely(i < (rxq->rx_free_thresh - 1)))
> > > +   if (likely(i < (rxq->rx_free_thresh - 1))) {
> > > /* Prefetch next mbuf */
> > > -   rte_prefetch0(rxep[i + 1].mbuf);
> > > +   rte_prefetch0(&rxep[i + 1].mbuf->cacheline0);
> > > +   rte_prefetch0(&rxep[i + 1].mbuf->cacheline1);
> > > +   }
> Agree with this change. And when I test it by testpmd with iofwd, no
> performance increase is observed but minor decrease.
> Can you share will us when it will benefit the performance in your scenario ?
> 
> 
> Thanks
> Jingjing

Hello Jingjing,

Thanks for code review.

My use case: We have simple distributor thread that receives packets from port 
and distributes them among worker threads according to VLAN and MAC address 
hash. 

While working on performance optimization we determined that most of CPU usage 
of this thread is in DPDK.
As and optimization we decided to switch to rx burst alloc function, however 
that caused additional performance degradation compared to scatter rx mode.
In profiler two major culprits were:
  1. Access to packet data Eth header in application code. (cache miss)
  2. Setting next packet descriptor field to NULL in DPDK i40e_rx_alloc_bufs 
code. (this field is in second descriptor cache line that was not prefetched)
After applying my fixes performance improved compared to scatter rx mode.

I assumed that prefetch of first cache line of packet data belongs to DPDK 
because it is done in scatter rx mode. (in i40e_recv_scattered_pkts)
It can be moved to application side but IMO it is better to be consistent 
across all rx modes.

Regards,
Vladyslav


[dpdk-dev] [PATCH] net/i40e: add additional prefetch instructions for bulk rx

2016-10-11 Thread Vladyslav Buslov
Hi Konstantin,

> -Original Message-
> From: Ananyev, Konstantin [mailto:konstantin.ananyev at intel.com]
> Sent: Tuesday, October 11, 2016 11:51 AM
> To: Vladyslav Buslov; Wu, Jingjing; Yigit, Ferruh; Zhang, Helin
> Cc: dev at dpdk.org
> Subject: RE: [dpdk-dev] [PATCH] net/i40e: add additional prefetch
> instructions for bulk rx
> 
> Hi Vladislav,
> 
> > -Original Message-
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Vladyslav Buslov
> > Sent: Monday, October 10, 2016 6:06 PM
> > To: Wu, Jingjing ; Yigit, Ferruh
> > ; Zhang, Helin 
> > Cc: dev at dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] net/i40e: add additional prefetch
> > instructions for bulk rx
> >
> > > -Original Message-
> > > From: Wu, Jingjing [mailto:jingjing.wu at intel.com]
> > > Sent: Monday, October 10, 2016 4:26 PM
> > > To: Yigit, Ferruh; Vladyslav Buslov; Zhang, Helin
> > > Cc: dev at dpdk.org
> > > Subject: RE: [dpdk-dev] [PATCH] net/i40e: add additional prefetch
> > > instructions for bulk rx
> > >
> > >
> > >
> > > > -Original Message-
> > > > From: Yigit, Ferruh
> > > > Sent: Wednesday, September 14, 2016 9:25 PM
> > > > To: Vladyslav Buslov ; Zhang,
> > > > Helin ; Wu, Jingjing
> > > > 
> > > > Cc: dev at dpdk.org
> > > > Subject: Re: [dpdk-dev] [PATCH] net/i40e: add additional prefetch
> > > > instructions for bulk rx
> > > >
> > > > On 7/14/2016 6:27 PM, Vladyslav Buslov wrote:
> > > > > Added prefetch of first packet payload cacheline in
> > > > > i40e_rx_scan_hw_ring Added prefetch of second mbuf cacheline in
> > > > > i40e_rx_alloc_bufs
> > > > >
> > > > > Signed-off-by: Vladyslav Buslov
> > > > > 
> > > > > ---
> > > > >  drivers/net/i40e/i40e_rxtx.c | 7 +--
> > > > >  1 file changed, 5 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > > b/drivers/net/i40e/i40e_rxtx.c index d3cfb98..e493fb4 100644
> > > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > > @@ -1003,6 +1003,7 @@ i40e_rx_scan_hw_ring(struct
> i40e_rx_queue
> > > *rxq)
> > > > > /* Translate descriptor info to mbuf parameters */
> > > > > for (j = 0; j < nb_dd; j++) {
> > > > > mb = rxep[j].mbuf;
> > > > > +   rte_prefetch0(RTE_PTR_ADD(mb->buf_addr,
> > > > RTE_PKTMBUF_HEADROOM));
> > >
> > > Why did prefetch here? I think if application need to deal with
> > > packet, it is more suitable to put it in application.
> > >
> > > > > qword1 = rte_le_to_cpu_64(\
> > > > > rxdp[j].wb.qword1.status_error_len);
> > > > > pkt_len = ((qword1 &
> > > > I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
> > > > > @@ -1086,9 +1087,11 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue
> > > *rxq)
> > > > >
> > > > > rxdp = &rxq->rx_ring[alloc_idx];
> > > > > for (i = 0; i < rxq->rx_free_thresh; i++) {
> > > > > -   if (likely(i < (rxq->rx_free_thresh - 1)))
> > > > > +   if (likely(i < (rxq->rx_free_thresh - 1))) {
> > > > > /* Prefetch next mbuf */
> > > > > -   rte_prefetch0(rxep[i + 1].mbuf);
> > > > > +   rte_prefetch0(&rxep[i + 1].mbuf->cacheline0);
> > > > > +   rte_prefetch0(&rxep[i +
> > > > > + 1].mbuf->cacheline1);
> 
> I think there are rte_mbuf_prefetch_part1/part2 defined in rte_mbuf.h,
> specially for that case.

Thanks for pointing that out.
I'll submit new patch if you decide to move forward with this development.

> 
> > > > > +   }
> > > Agree with this change. And when I test it by testpmd with iofwd, no
> > > performance increase is observed but minor decrease.
> > > Can you share will us when it will benefit the performance in your
> scenario ?
> > >
> > >
> > > Thanks
> > > Jingjing
> >
> > Hello Jingjing,
> >
>