[dpdk-dev] [PATCH] lib/librte_ip_frag: Fix typos

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_ip_frag/ip_frag_internal.c| 2 +-
 lib/librte_ip_frag/rte_ip_frag.h | 6 +++---
 lib/librte_ip_frag/rte_ipv4_reassembly.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/librte_ip_frag/ip_frag_internal.c 
b/lib/librte_ip_frag/ip_frag_internal.c
index 09b755c..46c44ff 100644
--- a/lib/librte_ip_frag/ip_frag_internal.c
+++ b/lib/librte_ip_frag/ip_frag_internal.c
@@ -160,7 +160,7 @@ ip_frag_process(struct ip_frag_pkt *fp, struct 
rte_ip_frag_death_row *dr,
}
 
/*
-* errorneous packet: either exceeed max allowed number of fragments,
+* erroneous packet: either exceed max allowed number of fragments,
 * or duplicate first/last fragment encountered.
 */
if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) {
diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
index 35d0ecc..9f8cede 100644
--- a/lib/librte_ip_frag/rte_ip_frag.h
+++ b/lib/librte_ip_frag/rte_ip_frag.h
@@ -70,7 +70,7 @@ struct ip_frag {
struct rte_mbuf *mb;   /**< fragment mbuf */
 };
 
-/** @internal  to uniquely indetify fragmented 
datagram. */
+/** @internal  to uniquely identify fragmented 
datagram. */
 struct ip_frag_key {
uint64_t src_dst[4];  /**< src address, first 8 bytes used for IPv4 
*/
uint32_t id;   /**< dst address */
@@ -118,7 +118,7 @@ struct rte_ip_frag_tbl {
uint32_t entry_mask;  /**< hash value mask. */
uint32_t max_entries; /**< max entries allowed. */
uint32_t use_entries; /**< entries in use. */
-   uint32_t bucket_entries;  /**< hash assocaitivity. */
+   uint32_t bucket_entries;  /**< hash associativity. */
uint32_t nb_entries;  /**< total size of the table. */
uint32_t nb_buckets;  /**< num of associativity lines. 
*/
struct ip_frag_pkt *last; /**< last used entry. */
@@ -303,7 +303,7 @@ int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in,
  * @param ip_hdr
  *   Pointer to the IPV4 header inside the fragment.
  * @return
- *   Pointer to mbuf for reassebled packet, or NULL if:
+ *   Pointer to mbuf for reassembled packet, or NULL if:
  *   - an error occurred.
  *   - not all fragments of the packet are collected yet.
  */
diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c 
b/lib/librte_ip_frag/rte_ipv4_reassembly.c
index b133089..040bd70 100644
--- a/lib/librte_ip_frag/rte_ipv4_reassembly.c
+++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c
@@ -93,7 +93,7 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp)
/* update mbuf fields for reassembled packet. */
m->ol_flags |= PKT_TX_IP_CKSUM;
 
-   /* update ipv4 header for the reassmebled packet */
+   /* update ipv4 header for the reassembled packet */
ip_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
 
ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size +
@@ -117,7 +117,7 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp)
  * @param ip_hdr
  *   Pointer to the IPV4 header inside the fragment.
  * @return
- *   Pointer to mbuf for reassebled packet, or NULL if:
+ *   Pointer to mbuf for reassembled packet, or NULL if:
  *   - an error occurred.
  *   - not all fragments of the packet are collected yet.
  */
-- 
2.7.4



[dpdk-dev] [PATCH] lib/librte_jobstats: Fix a typo

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_jobstats/rte_jobstats.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_jobstats/rte_jobstats.h 
b/lib/librte_jobstats/rte_jobstats.h
index 70e034c..e159156 100644
--- a/lib/librte_jobstats/rte_jobstats.h
+++ b/lib/librte_jobstats/rte_jobstats.h
@@ -313,7 +313,7 @@ rte_jobstats_set_max(struct rte_jobstats *job, uint64_t 
period);
  *
  * @param job
  *  Job object.
- * @param update_pedriod_cb
+ * @param update_period_cb
  *  Callback to set. If NULL restore default update function.
  */
 void
-- 
2.7.4



[dpdk-dev] [PATCH] lib/librte_kni: Fix typos

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_kni/rte_kni.c  | 4 ++--
 lib/librte_kni/rte_kni.h  | 2 +-
 lib/librte_kni/rte_kni_fifo.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 5ee38e9..8eca8c0 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -340,7 +340,7 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
/* Get an available slot from the pool */
slot = kni_memzone_pool_alloc();
if (!slot) {
-   RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; 
increase the number of max_kni_ifaces(current %d) or release unusued ones.\n",
+   RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; 
increase the number of max_kni_ifaces(current %d) or release unused ones.\n",
kni_memzone_pool.max_ifaces);
return NULL;
}
@@ -659,7 +659,7 @@ kni_allocate_mbufs(struct rte_kni *kni)
phys[i] = va2pa(pkts[i]);
}
 
-   /* No pkt mbuf alocated */
+   /* No pkt mbuf allocated */
if (i <= 0)
return;
 
diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h
index d195079..d43b5b2 100644
--- a/lib/librte_kni/rte_kni.h
+++ b/lib/librte_kni/rte_kni.h
@@ -228,7 +228,7 @@ const char *rte_kni_get_name(const struct rte_kni *kni);
  * @param kni
  *  pointer to struct rte_kni.
  * @param ops
- *  ponter to struct rte_kni_ops.
+ *  pointer to struct rte_kni_ops.
  *
  * @return
  *  On success: 0
diff --git a/lib/librte_kni/rte_kni_fifo.h b/lib/librte_kni/rte_kni_fifo.h
index c7cd5c2..6f2c3cb 100644
--- a/lib/librte_kni/rte_kni_fifo.h
+++ b/lib/librte_kni/rte_kni_fifo.h
@@ -73,7 +73,7 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned 
num)
 }
 
 /**
- * Get up to num elements from the fifo. Return the number actully read
+ * Get up to num elements from the fifo. Return the number actually read
  */
 static inline unsigned
 kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
-- 
2.7.4



[dpdk-dev] [PATCH] lib/librte_mbuf: Fix typos in documentation

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_mbuf/rte_mbuf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 6d91f7d..7e326bb 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -850,10 +850,10 @@ rte_mbuf_sanity_check(const struct rte_mbuf *m, int 
is_header);
 } while (0)
 
 /**
- * Allocate an unitialized mbuf from mempool *mp*.
+ * Allocate an uninitialized mbuf from mempool *mp*.
  *
  * This function can be used by PMDs (especially in RX functions) to
- * allocate an unitialized mbuf. The driver is responsible of
+ * allocate an uninitialized mbuf. The driver is responsible of
  * initializing all the required fields. See rte_pktmbuf_reset().
  * For standard needs, prefer rte_pktmbuf_alloc().
  *
@@ -1778,7 +1778,7 @@ const void *__rte_pktmbuf_read(const struct rte_mbuf *m, 
uint32_t off,
  * @param len
  *   The amount of bytes to read.
  * @param buf
- *   The buffer where data is copied if it is not contigous in mbuf
+ *   The buffer where data is copied if it is not contiguous in mbuf
  *   data. Its length should be at least equal to the len parameter.
  * @return
  *   The pointer to the data, either in the mbuf if it is contiguous,
-- 
2.7.4



[dpdk-dev] [PATCH] lib/librte_net: Fix typos in documentation

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_net/net_crc_neon.h | 2 +-
 lib/librte_net/net_crc_sse.h  | 2 +-
 lib/librte_net/rte_ip.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/librte_net/net_crc_neon.h b/lib/librte_net/net_crc_neon.h
index 201b2c8..cb8f63d 100644
--- a/lib/librte_net/net_crc_neon.h
+++ b/lib/librte_net/net_crc_neon.h
@@ -64,7 +64,7 @@ struct crc_pmull_ctx crc16_ccitt_pmull __rte_aligned(16);
  * FOLD = XOR(T1, T2, DATA)
  *
  * @param data_block 16 byte data block
- * @param precomp precomputed rk1 constanst
+ * @param precomp precomputed rk1 constant
  * @param fold running 16 byte folded data
  *
  * @return New 16 byte folded data
diff --git a/lib/librte_net/net_crc_sse.h b/lib/librte_net/net_crc_sse.h
index ac93637..7eae147 100644
--- a/lib/librte_net/net_crc_sse.h
+++ b/lib/librte_net/net_crc_sse.h
@@ -66,7 +66,7 @@ struct crc_pclmulqdq_ctx crc16_ccitt_pclmulqdq 
__rte_aligned(16);
  * @param data_block
  *   16 byte data block
  * @param precomp
- *   Precomputed rk1 constanst
+ *   Precomputed rk1 constant
  * @param fold
  *   Current16 byte folded data
  *
diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h
index 4491b86..73ec398 100644
--- a/lib/librte_net/rte_ip.h
+++ b/lib/librte_net/rte_ip.h
@@ -237,7 +237,7 @@ rte_raw_cksum(const void *buf, size_t len)
  * @param off
  *   The offset in bytes to start the checksum.
  * @param len
- *   The length in bytes of the data to ckecksum.
+ *   The length in bytes of the data to checksum.
  * @param cksum
  *   A pointer to the checksum, filled on success.
  * @return
-- 
2.7.4



[dpdk-dev] [PATCH] lib/librte_pdump: Fix typos

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_pdump/rte_pdump.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index e6182d3..e1695de 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -225,7 +225,7 @@ pdump_tx(uint16_t port __rte_unused, uint16_t qidx 
__rte_unused,
 }
 
 static int
-pdump_regitser_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
+pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
struct rte_ring *ring, struct rte_mempool *mp,
uint16_t operation)
 {
@@ -279,7 +279,7 @@ pdump_regitser_rx_callbacks(uint16_t end_q, uint16_t port, 
uint16_t queue,
 }
 
 static int
-pdump_regitser_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
+pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
struct rte_ring *ring, struct rte_mempool *mp,
uint16_t operation)
 {
@@ -400,7 +400,7 @@ set_pdump_rxtx_cbs(struct pdump_request *p)
/* register RX callback */
if (flags & RTE_PDUMP_FLAG_RX) {
end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1;
-   ret = pdump_regitser_rx_callbacks(end_q, port, queue, ring, mp,
+   ret = pdump_register_rx_callbacks(end_q, port, queue, ring, mp,
operation);
if (ret < 0)
return ret;
@@ -409,7 +409,7 @@ set_pdump_rxtx_cbs(struct pdump_request *p)
/* register TX callback */
if (flags & RTE_PDUMP_FLAG_TX) {
end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1;
-   ret = pdump_regitser_tx_callbacks(end_q, port, queue, ring, mp,
+   ret = pdump_register_tx_callbacks(end_q, port, queue, ring, mp,
operation);
if (ret < 0)
return ret;
-- 
2.7.4



[dpdk-dev] [PATCH] lib/librte_pipeline: Fix a typo

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_pipeline/rte_pipeline.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_pipeline/rte_pipeline.h 
b/lib/librte_pipeline/rte_pipeline.h
index f366348..fdc44a7 100644
--- a/lib/librte_pipeline/rte_pipeline.h
+++ b/lib/librte_pipeline/rte_pipeline.h
@@ -483,7 +483,7 @@ int rte_pipeline_table_entry_delete(struct rte_pipeline *p,
  * @param keys
  *   Array containing table entry keys
  * @param entries
- *   Array containung new contents for every table entry identified by key
+ *   Array containing new contents for every table entry identified by key
  * @param n_keys
  *   Number of keys to add
  * @param key_found
-- 
2.7.4



[dpdk-dev] [PATCH] lib/librte_power: Fix typos

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_power/rte_power_acpi_cpufreq.c | 4 ++--
 lib/librte_power/rte_power_acpi_cpufreq.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/librte_power/rte_power_acpi_cpufreq.c 
b/lib/librte_power/rte_power_acpi_cpufreq.c
index 01ac5ac..6b0cdb2 100644
--- a/lib/librte_power/rte_power_acpi_cpufreq.c
+++ b/lib/librte_power/rte_power_acpi_cpufreq.c
@@ -267,7 +267,7 @@ power_get_available_freqs(struct rte_power_info *pi)
}
 
ret = 0;
-   POWER_DEBUG_TRACE("%d frequencie(s) of lcore %u are available\n",
+   POWER_DEBUG_TRACE("%d frequency(s) of lcore %u are available\n",
count, pi->lcore_id);
 out:
fclose(f);
@@ -359,7 +359,7 @@ rte_power_acpi_cpufreq_init(unsigned lcore_id)
}
 
RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
-   "power manamgement\n", lcore_id);
+   "power management\n", lcore_id);
rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
 
return 0;
diff --git a/lib/librte_power/rte_power_acpi_cpufreq.h 
b/lib/librte_power/rte_power_acpi_cpufreq.h
index eee0ca0..bc20dfd 100644
--- a/lib/librte_power/rte_power_acpi_cpufreq.h
+++ b/lib/librte_power/rte_power_acpi_cpufreq.h
@@ -180,7 +180,7 @@ int rte_power_acpi_cpufreq_freq_max(unsigned lcore_id);
  *
  * @return
  *  - 1 on success with frequency changed.
- *  - 0 on success without frequency chnaged.
+ *  - 0 on success without frequency changed.
  *  - Negative on error.
  */
 int rte_power_acpi_cpufreq_freq_min(unsigned lcore_id);
-- 
2.7.4



[dpdk-dev] [PATCH] lib/librte_reorder: Fix a typo in documentation

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_reorder/rte_reorder.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/librte_reorder/rte_reorder.h b/lib/librte_reorder/rte_reorder.h
index 4cd8de7..dc83f8e 100644
--- a/lib/librte_reorder/rte_reorder.h
+++ b/lib/librte_reorder/rte_reorder.h
@@ -147,9 +147,9 @@ rte_reorder_free(struct rte_reorder_buffer *b);
  *   -1 on error
  *   On error case, rte_errno will be set appropriately:
  *- ENOSPC - Cannot move existing mbufs from reorder buffer to accommodate
- *  ealry mbuf, but it can be accommodated by performing drain and then 
insert.
+ *  early mbuf, but it can be accommodated by performing drain and then 
insert.
  *- ERANGE - Too early or late mbuf which is vastly out of range of 
expected
- *  window should be ingnored without any handling.
+ *  window should be ignored without any handling.
  */
 int
 rte_reorder_insert(struct rte_reorder_buffer *b, struct rte_mbuf *mbuf);
-- 
2.7.4



[dpdk-dev] [PATCH] lib/librte_sched: Fix typos

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_sched/rte_red.h   | 4 ++--
 lib/librte_sched/rte_sched.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/librte_sched/rte_red.h b/lib/librte_sched/rte_red.h
index ca12227..6edf914 100644
--- a/lib/librte_sched/rte_red.h
+++ b/lib/librte_sched/rte_red.h
@@ -139,7 +139,7 @@ rte_red_config_init(struct rte_red_config *red_cfg,
 /**
  * @brief Generate random number for RED
  *
- * Implemenetation based on:
+ * Implementation based on:
  * 
http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
  *
  * 10 bit shift has been found through empirical tests (was 16).
@@ -200,7 +200,7 @@ __rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m)
 * Now using basic math we compute 2^n:
 *   2^(f+n) = 2^f * 2^n
 *   2^f - we use lookup table
-*   2^n - can be replaced with bit shift right oeprations
+*   2^n - can be replaced with bit shift right operations
 */
 
f = (n >> 6) & 0xf;
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index a2d0d68..7252f85 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -1020,7 +1020,7 @@ rte_sched_subport_read_stats(struct rte_sched_port *port,
memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats));
memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats));
 
-   /* Subport TC ovesubscription status */
+   /* Subport TC oversubscription status */
*tc_ov = s->tc_ov;
 
return 0;
-- 
2.7.4



[dpdk-dev] [PATCH] lib/librte_timer: Fix a typo

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_timer/rte_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
index 28decc3..88826f5 100644
--- a/lib/librte_timer/rte_timer.c
+++ b/lib/librte_timer/rte_timer.c
@@ -195,7 +195,7 @@ timer_set_running_state(struct rte_timer *tim)
 
 /*
  * Return a skiplist level for a new entry.
- * This probabalistically gives a level with p=1/4 that an entry at level n
+ * This probabilistically gives a level with p=1/4 that an entry at level n
  * will also appear at level n+1.
  */
 static uint32_t
-- 
2.7.4



[dpdk-dev] [PATCH] usertools/dpdk-devbind.py: Fix a typo

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 usertools/dpdk-devbind.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/usertools/dpdk-devbind.py b/usertools/dpdk-devbind.py
index a539995..f9f7aee 100755
--- a/usertools/dpdk-devbind.py
+++ b/usertools/dpdk-devbind.py
@@ -539,7 +539,7 @@ def bind_all(dev_list, driver, force=False):
 for d in dev_list:
 bind_one(d, driver, force)
 
-# For kenels < 3.15 when binding devices to a generic driver
+# For kernels < 3.15 when binding devices to a generic driver
 # (i.e. one that doesn't have a PCI ID table) using new_id, some devices
 # that are not bound to any other driver could be bound even if no one has
 # asked them to. hence, we check the list of drivers again, and see if
-- 
2.7.4



Re: [dpdk-dev] [PATCH 1/2] doc: update QEDE pmd nic guide

2017-11-10 Thread Ferruh Yigit
On 11/8/2017 10:52 PM, Rasesh Mody wrote:
> Signed-off-by: Rasesh Mody 

Series applied to dpdk/master, thanks.


[dpdk-dev] [PATCH] lib/librte_eal: Fix typos

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_eal/common/eal_common_log.c | 2 +-
 lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h | 2 +-
 lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h | 2 +-
 lib/librte_eal/common/include/rte_eal.h| 2 +-
 lib/librte_eal/common/include/rte_log.h| 2 +-
 lib/librte_eal/common/malloc_elem.c| 2 +-
 lib/librte_eal/common/rte_service.c| 2 +-
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 2 +-
 lib/librte_eal/linuxapp/eal/eal_timer.c| 2 +-
 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c  | 2 +-
 10 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/librte_eal/common/eal_common_log.c 
b/lib/librte_eal/common/eal_common_log.c
index be40413..e894b75 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -249,7 +249,7 @@ static const struct logtype logtype_strings[] = {
{RTE_LOGTYPE_USER8,  "user8"}
 };
 
-/* Logging should be first initialzer (before drivers and bus) */
+/* Logging should be first initializer (before drivers and bus) */
 RTE_INIT_PRIO(rte_log_init, 101);
 static void
 rte_log_init(void)
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h 
b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
index c3a2619..e4dafda 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
@@ -225,7 +225,7 @@ rte_memcpy_func(void *dst, const void *src, size_t n)
 * We split the remaining bytes (which will be less than 256) into
 * 64byte (2^6) chunks.
 * Using incrementing integers in the case labels of a switch statement
-* enourages the compiler to use a jump table. To get incrementing
+* encourages the compiler to use a jump table. To get incrementing
 * integers, we shift the 2 relevant bits to the LSB position to first
 * get decrementing integers, and then subtract.
 */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h 
b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
index ca9d1dc..75f7489 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h
@@ -164,7 +164,7 @@ rte_memcpy_func(void *dst, const void *src, size_t n)
 * We split the remaining bytes (which will be less than 256) into
 * 64byte (2^6) chunks.
 * Using incrementing integers in the case labels of a switch statement
-* enourages the compiler to use a jump table. To get incrementing
+* encourages the compiler to use a jump table. To get incrementing
 * integers, we shift the 2 relevant bits to the LSB position to first
 * get decrementing integers, and then subtract.
 */
diff --git a/lib/librte_eal/common/include/rte_eal.h 
b/lib/librte_eal/common/include/rte_eal.h
index 09b6681..8e4e71c 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -217,7 +217,7 @@ int rte_eal_primary_proc_alive(const char 
*config_file_path);
 /**
  * Usage function typedef used by the application usage function.
  *
- * Use this function typedef to define and call rte_set_applcation_usage_hook()
+ * Use this function typedef to define and call 
rte_set_application_usage_hook()
  * routine.
  */
 typedef void   (*rte_usage_hook_t)(const char * prgname);
diff --git a/lib/librte_eal/common/include/rte_log.h 
b/lib/librte_eal/common/include/rte_log.h
index 16564d4..6c2d356 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -218,7 +218,7 @@ int rte_log_cur_msg_logtype(void);
  *   The string identifying the log type.
  * @return
  *   - >0: success, the returned value is the log type identifier.
- *   - (-ENONEM): cannot allocate memory.
+ *   - (-ENOMEM): cannot allocate memory.
  */
 int rte_log_register(const char *name);
 
diff --git a/lib/librte_eal/common/malloc_elem.c 
b/lib/librte_eal/common/malloc_elem.c
index 889dffd..98bcd37 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -252,7 +252,7 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, 
unsigned align,
 }
 
 /*
- * joing two struct malloc_elem together. elem1 and elem2 must
+ * join two struct malloc_elem together. elem1 and elem2 must
  * be contiguous in memory.
  */
 static inline void
diff --git a/lib/librte_eal/common/rte_service.c 
b/lib/librte_eal/common/rte_service.c
index 09b758c..ae97e6b 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -153,7 +153,7 @@ service_valid(uint32_t id)
service = &rte_services[id];\
 } while (0)
 
-/* returns 1 if statistics should be colleced for service
+/* returns 1 if statistics should be 

[dpdk-dev] [PATCH] net/mlx4: fix last Tx wqe stamping lack

2017-11-10 Thread Matan Azrad
When Tx pakcet HW processing is done, SW should stamp all the completion
burst WQEs.

Stamp missed last completion burst WQE.

Fixes: c3c977bbecbd ("net/mlx4: add Tx bypassing Verbs")

Signed-off-by: Matan Azrad 
---
 drivers/net/mlx4/mlx4_rxtx.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

I think this is a critical bug fix that should be added to 17.11 version.
No performance impact was seen.

diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
index 3985e06..44edeac 100644
--- a/drivers/net/mlx4/mlx4_rxtx.c
+++ b/drivers/net/mlx4/mlx4_rxtx.c
@@ -336,6 +336,7 @@ struct pv {
 {
unsigned int elts_comp = txq->elts_comp;
unsigned int elts_tail = txq->elts_tail;
+   unsigned int sq_tail = sq->tail;
struct mlx4_cq *cq = &txq->mcq;
volatile struct mlx4_cqe *cqe;
uint32_t cons_index = cq->cons_index;
@@ -372,13 +373,13 @@ struct pv {
rte_be_to_cpu_16(cqe->wqe_index) & sq->txbb_cnt_mask;
do {
/* Free next descriptor. */
-   nr_txbbs +=
+   sq_tail += nr_txbbs;
+   nr_txbbs =
mlx4_txq_stamp_freed_wqe(sq,
-(sq->tail + nr_txbbs) & sq->txbb_cnt_mask,
-!!((sq->tail + nr_txbbs) & sq->txbb_cnt));
+sq_tail & sq->txbb_cnt_mask,
+!!(sq_tail & sq->txbb_cnt));
pkts++;
-   } while (((sq->tail + nr_txbbs) & sq->txbb_cnt_mask) !=
-new_index);
+   } while ((sq_tail & sq->txbb_cnt_mask) != new_index);
cons_index++;
} while (1);
if (unlikely(pkts == 0))
@@ -386,7 +387,7 @@ struct pv {
/* Update CQ. */
cq->cons_index = cons_index;
*cq->set_ci_db = rte_cpu_to_be_32(cq->cons_index & MLX4_CQ_DB_CI_MASK);
-   sq->tail = sq->tail + nr_txbbs;
+   sq->tail = sq_tail + nr_txbbs;
/* Update the list of packets posted for transmission. */
elts_comp -= pkts;
assert(elts_comp <= txq->elts_comp);
-- 
1.8.3.1



Re: [dpdk-dev] [PATCH] lib/librte_cryptodev: Fix a typo: rte_cyptodev_names -> rte_cryptodev_names

2017-11-10 Thread De Lara Guarch, Pablo
Hi Pavel,

> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Pavel Shirshov
> Sent: Friday, November 10, 2017 7:56 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] lib/librte_cryptodev: Fix a typo:
> rte_cyptodev_names -> rte_cryptodev_names
> 

Actually, this variable is not used anywhere, so it can be deleted.

By the way, for future times.
In the commit title, do not use "lib/librte...", but directly the library name.
Also, start with lowercase, and try to be as short as possible, but still 
meaningful.
For this case, it is not necessary to describe the typo in the title.

So, this would be: "cryptodev: fix typo"

Thanks,
Pablo



[dpdk-dev] [PATCH v2] lib/librte_distributor: Fix typos in comments

2017-11-10 Thread Pavel Shirshov
Signed-off-by: Pavel Shirshov 
---
 lib/librte_distributor/rte_distributor.c | 6 +++---
 lib/librte_distributor/rte_distributor.h | 2 +-
 lib/librte_distributor/rte_distributor_private.h | 2 +-
 lib/librte_distributor/rte_distributor_v20.c | 3 ++-
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/lib/librte_distributor/rte_distributor.c 
b/lib/librte_distributor/rte_distributor.c
index 57ad339..6ad2301 100644
--- a/lib/librte_distributor/rte_distributor.c
+++ b/lib/librte_distributor/rte_distributor.c
@@ -442,7 +442,7 @@ rte_distributor_process_v1705(struct rte_distributor *d,
 
/*
 * Uncommenting the next line will cause the find_match
-* function to be optimised out, making this function
+* function to be optimized out, making this function
 * do parallel (non-atomic) distribution
 */
/* matches[j] = 0; */
@@ -536,7 +536,7 @@ MAP_STATIC_SYMBOL(int rte_distributor_returned_pkts(struct 
rte_distributor *d,
 
 /*
  * Return the number of packets in-flight in a distributor, i.e. packets
- * being workered on or queued up in a backlog.
+ * being worked on or queued up in a backlog.
  */
 static inline unsigned int
 total_outstanding(const struct rte_distributor *d)
@@ -663,7 +663,7 @@ rte_distributor_create_v1705(const char *name,
 #endif
 
/*
-* Set up the backog tags so they're pointing at the second cache
+* Set up the backlog tags so they're pointing at the second cache
 * line for performance during flow matching
 */
for (i = 0 ; i < num_workers ; i++)
diff --git a/lib/librte_distributor/rte_distributor.h 
b/lib/librte_distributor/rte_distributor.h
index 9b9efdb..cbeed04 100644
--- a/lib/librte_distributor/rte_distributor.h
+++ b/lib/librte_distributor/rte_distributor.h
@@ -71,7 +71,7 @@ struct rte_mbuf;
  * @param alg_type
  *   Call the legacy API, or use the new burst API. legacy uses 32-bit
  *   flow ID, and works on a single packet at a time. Latest uses 15-
- *   bit flow ID and works on up to 8 packets at a time to worers.
+ *   bit flow ID and works on up to 8 packets at a time to workers.
  * @return
  *   The newly created distributor instance
  */
diff --git a/lib/librte_distributor/rte_distributor_private.h 
b/lib/librte_distributor/rte_distributor_private.h
index 250b23e..24f41b9 100644
--- a/lib/librte_distributor/rte_distributor_private.h
+++ b/lib/librte_distributor/rte_distributor_private.h
@@ -90,7 +90,7 @@ union rte_distributor_buffer_v20 {
 
 /*
  * Transfer up to 8 mbufs at a time to/from workers, and
- * flow matching algorithm optimised for 8 flow IDs at a time
+ * flow matching algorithm optimized for 8 flow IDs at a time
  */
 #define RTE_DIST_BURST_SIZE 8
 
diff --git a/lib/librte_distributor/rte_distributor_v20.c 
b/lib/librte_distributor/rte_distributor_v20.c
index 9adda52..5be6efd 100644
--- a/lib/librte_distributor/rte_distributor_v20.c
+++ b/lib/librte_distributor/rte_distributor_v20.c
@@ -345,7 +345,8 @@ rte_distributor_returned_pkts_v20(struct 
rte_distributor_v20 *d,
 VERSION_SYMBOL(rte_distributor_returned_pkts, _v20, 2.0);
 
 /* return the number of packets in-flight in a distributor, i.e. packets
- * being workered on or queued up in a backlog. */
+ * being worked on or queued up in a backlog.
+ */
 static inline unsigned
 total_outstanding(const struct rte_distributor_v20 *d)
 {
-- 
2.7.4



Re: [dpdk-dev] [PATCH] net/mlx5: fix flow director rules comparison

2017-11-10 Thread Ferruh Yigit
On 11/9/2017 4:43 AM, Nelio Laranjeiro wrote:
> When deleting/replacing a flow director rule the first rule of the list was
> always the one replaced instead of the corresponding one.
> 
> Fixes: 4c3e9bcdd52e ("net/mlx5: support flow director")
> 
> Signed-off-by: Nelio Laranjeiro 

Applied to dpdk/master, thanks.


Re: [dpdk-dev] [PATCH] net/nfp: initialize stats struct

2017-11-10 Thread Ferruh Yigit
On 11/9/2017 8:05 PM, Stephen Hemminger wrote:
> On Thu, 9 Nov 2017 17:10:16 -0800
> Ferruh Yigit  wrote:
> 
>> On 11/8/2017 3:59 AM, Alejandro Lucero wrote:
>>> Not all struct fields will be written and random data could
>>> confuse readers.
>>>
>>> Fixes: 92aa491b881e ("nfp: add statistics")
>>> Coverity: 140755  
>>
>> Hi Alejandro,
>>
>> Thank you for coverity fixes, but they will be considered for next release,
>> since trying to limit rc4 only for critical fixes.
>>
>> Thanks,
>> ferruh
>>
>>>
>>> Signed-off-by: Alejandro Lucero   
>>
> 
> This looks like a bug fix. because the stats are on the stack and will
> be garbage.

Yes it is, also other nfp patches are fixes.

This is for an effort to close the release, eventually it needs to stop
somewhere, and technically after rc3 is only for critical bug fixes, so this is
the time for stop getting these kind of patches.

After above said, relying on scope of the patches are PMD only and patches are
sent by driver maintainers, I will get them, reminding Thomas' right to drop
them back if he disagrees.


Re: [dpdk-dev] [dpdk-stable] [PATCH] net/mlx5: fix rxq interrupt memory corruption

2017-11-10 Thread Ferruh Yigit
On 11/9/2017 5:26 AM, Adrien Mazarguil wrote:
> On Thu, Nov 09, 2017 at 03:10:14PM +0200, Shahaf Shuler wrote:
>> intr_vec allocation size was wrong causing a memory corruption.
>>
>> Fixes: e1016cb73383 ("net/mlx5: fix Rx interrupts management")
>> Cc: adrien.mazarg...@6wind.com
>> Cc: sta...@dpdk.org
>>
>> Signed-off-by: Shahaf Shuler 
> 
> Acked-by: Adrien Mazarguil 

Applied to dpdk/master, thanks.


[dpdk-dev] 18.02 Intel Roadmap

2017-11-10 Thread O'Driscoll, Tim
With the 17.11 release almost complete, it's time to start thinking about 
18.02. These are the features that we plan to contribute:

Add Classification and Metering to SoftNIC PMD: The SoftNIC PMD provides 
software equivalency for Ethernet device (ethdev) features. Currently, only the 
Traffic Management (rte_tm) API is supported. Support will be added for flow 
classification (rte_flow API) and QoS metering & policing (rte_mtr API).

Port Representor: A Port Representor PMD will be created which will provide a 
logical representation of any port, either physical or virtual. This logical 
port provides a mechanism for control and monitoring. The primary purpose of 
port representors in DPDK is to support the configuration, management and 
monitoring of virtual functions whose physical function is also bound to a DPDK 
control plane application.

Baseband Device (BBDev) API and Turbo Encoding/Decoding: A Baseband Device 
(BBDev) API will be created to support wireless baseband accelerators. The 
initial accelerator which will be supported will be for turbo encoding/decoding 
(used for forward error correction).

Eventdev OPDL PMD: A new PMD for the Optimized Packet Distribution Library 
(OPDL) will be added to the eventdev API. OPDL provides a lockless, ring-based 
architecture that is suitable for certain types of stateless packet processing 
pipelines.

PCIe Hardware Hotplug: This will allow users to plug a NIC in/out during 
runtime, and have that event handled by DPDK.

AVF PMD: The Adaptive Virtual Function (AVF) specification provides a 
consistent interface to VFs for future generations of Intel NICs. An AVF PMD 
will be created to support future devices which comply with this specification.

I40E Support for GTP-U IPv6: Support for GTP-U with an IPv4 payload was added 
in 17.11. This will be enhanced to support an IPv6 payload.

I40E MAC Loopback: MAC loopback (Rx -> MAC loopback -> Tx) will be supported on 
I40E.

I40E Performance Improvement: Performance of the I40E PMD will be increased 
through improved use of vector instructions.

GRO VxLAN Support: Generic Receive Offload (GRO) is a popular software 
technique to reduce per-packet overhead for applications by aggregating small 
packets into large ones. Currently, DPDK GRO only supports TCP/IPv4 packets. 
GRO support for VxLAN packets that contain outer IPv4 and inner TCP/IPv4 
headers will be added.

Set VF Queue Number at Run Time: Provides the ability to set the maximum number 
of queues for a VF at run time instead of at compile time.

Support Virtual Devices in Secondary Process: Support will be added for running 
virtual devices in secondary processes. See the patch set for further details 
(http://dpdk.org/ml/archives/dev/2017-August/073532.html).

Virtio Interrupt Coalescence: The Virtio spec has two methods to implement 
interrupt/notification suppression. If the VIRTIO_F_EVENT_IDX feature bit is 
not negotiated, the flags field in the available ring offers a crude mechanism 
for the driver to inform the device that it doesn't want interrupts when 
buffers are used. Otherwise used_event is a more performant alternative where 
the driver specifies how far the device can progress before interrupting. The 
former has been done in DPDK, the latter will be supported here.

Support Virtio Features in Vhost Lib for Live Migration Between DPDK vhost-user 
and Kernel vhost-net: DPDK vhost-user backend needs to support the default 
negotiated feature set in Kernel vhost-net to fix the gap for live migration. 
These features will be supported: VIRTIO_NET_F_GSO, VIRTIO_NET_F_GUEST_UFO, 
VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_HOST_ECN.

Support Guest Announce in Virtio PMD for Live Migration: Guest Announce feature 
enables guest driver to send RARP packets when live migration is done. 
Supporting it enables wider use scenarios for the virtio PMD.

Support AES_CCM in the AESNI_MB PMD: The AESNI_MB PMD will be enhanced to 
support AES_CCM.

DPDK Build System Redesign: The build system will be updated to use meson and 
ninja, as discussed during the DPDK Summit Userspace event. The existing build 
system will be retained for backwards compatibility.


In addition, we plan to issue the following RFCs in the 18.02 timeframe in 
order to get early feedback from the community:

Userspace eBPF: A userspace implementation of eBPF, which would provide the 
ability to execute BPF code within DPDK.

Support RSS in rte_flow: The rte_flow API is defined to include RSS but so far 
RSS hasn't been supported.

FPGA Bus Driver: A bus driver to discover and probe PCI devices.

vHost Data Path Acceleration: vHost Data Path Acceleration (vDPA) is a software 
framework to support heterogeneous Virtio accelerations.

Virtio Crypto: The vhost-crypto driver will act as a translator between 
virtio-crypto requests and the dpdk cryptodev crypto operations. The driver 
will dequeue from vrings and reassemble to cryptodev operations and then 
enqueue th

Re: [dpdk-dev] [dpdk-stable] [PATCH] net/mlx4: fix rxq interrupt memory corruption

2017-11-10 Thread Ferruh Yigit
On 11/9/2017 5:57 AM, Adrien Mazarguil wrote:
> On Thu, Nov 09, 2017 at 03:50:49PM +0200, Shahaf Shuler wrote:
>> intr_vec allocation size was wrong causing a memory corruption.
>>
>> Fixes: 0a2ae703192c ("net/mlx4: fix Rx interrupts management")
>> Cc: adrien.mazarg...@6wind.com
>> Cc: sta...@dpdk.org
>>
>> Signed-off-by: Shahaf Shuler 
> 
> Acked-by: Adrien Mazarguil 

Applied to dpdk/master, thanks.


Re: [dpdk-dev] [PATCH] testpmd: add nanosleep in main loop

2017-11-10 Thread Adrien Mazarguil
Hi Marcelo,

On Fri, Nov 10, 2017 at 04:02:10AM -0200, Marcelo Tosatti wrote:
> 
> This patch allows a configurable pair of values to be set, which
> controls
> the frequency and length of a nanosleep call performed at test-pmd's
> iofwd main loop.
> 
> The problem is the following: it is necessary to execute code
> on isolated CPUs which is not part of the packet forwarding load.
> 
> For example:
> 
>  "echo val > /sys/kernel/debug/tracing/buffer_size_kb"
> 
> hangs the process, because the DPDK thread has higher 
> priority than the workqueue thread which executes the flush from 
> CPU local tracebuffer to CPU global trace buffer [the workitem
> in case].
> 
> There are more serious issues than the trace-cmd bug, such as XFS 
> workitems failing to execute causing filesystem corruption.
> 
> To workaround this problem, until a proper kernel
> solution is developed, allow DPDK to nanosleep 
> (hopefully with a small enough frequency and interval 
> so that the performance is within acceptable levels).

I understand the need to do something about it, however the nanosleep()
approach seems questionable to me.

Testpmd's forwarding modes (particularly I/O) are used for benchmarking
purposes by many and are therefore sensitive to change. This code path is
currently free from system calls for that reason and nanosleep() is an
expensive one by definition. Even if optional or called at a low frequency,
the presence of this new code has an impact.

Since testpmd is a development tool not supposed to run in a production
environment, is there really a need for it to be patched to work around a
(temporary) Linux kernel bug?

If so, why is I/O the only forwarding mode impacted?

If it's used in a production environment and such a fix can't wait, have
other workarounds been considered:

- Replacing testpmd in I/O mode with a physical cable or switch?

- Using proper options on the kernel command line as described in [1], such
  as isolcpus, rcu_nocbs, nohz_full?

[1] doc/guides/howto/pvp_reference_benchmark.rst

> 
> The new parameters are:
> 
> *  --delay-hz: sets nanosleep frequency in Hz.
> *  --delay-length: sets nanosleep length in ns.
> 
> Results for delay-hz=100,delay-length=1 (which allows 
> the buffer_size_kb change to complete):
> 
> Baseline run-1:
> [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 49505, Average:
> 19008.7 ns, StdDev: 2501.0 ns, Quartiles: 17293.0/18330.0/19901.0 ns
> 
> Baseline run-2:
> [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 49606, Average:
> 19036.4 ns, StdDev: 2485.2 ns, Quartiles: 17318.0/18349.0/19936.0 ns
> 
> Baseline run-3:
> [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 49627, Average:
> 19019.2 ns, StdDev: 2503.7 ns, Quartiles: 17323.0/18355.0/19940.0 ns
> 
> 
> 
> (10.000us, 100HZ)
> 
> Run-1:
> [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 7284, Average:
> 20830.6 ns, StdDev: 12023.0 ns, Quartiles: 17309.0/18394.0/20233.0 ns
> 
> Run-2:
> [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 6272, Average:
> 20897.1 ns, StdDev: 12057.2 ns, Quartiles: 17389.0/18457.0/20266.0 ns
> 
> Run-3:
> [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 4843, Average:
> 20535.2 ns, StdDev: 9827.3 ns, Quartiles: 17389.0/18441.0/20269.0 ns
> 
> 
> Signed-off-by: Marcelo Tosatti 
> 
> 
> diff -Nur dpdk-17.08.orig/app/test-pmd/iofwd.c dpdk-17.08/app/test-pmd/iofwd.c
> --- dpdk-17.08.orig/app/test-pmd/iofwd.c  2017-10-30 22:45:37.829492673 
> -0200
> +++ dpdk-17.08/app/test-pmd/iofwd.c   2017-10-30 22:45:48.321522581 -0200
> @@ -64,9 +64,30 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "testpmd.h"
>  
> +uint32_t nanosleep_interval;
> +
> +static void calc_nanosleep_interval(int hz)
> +{
> + uint64_t cycles_per_sec = rte_get_timer_hz();
> + nanosleep_interval = cycles_per_sec/hz;
> +}
> +
> +static void do_nanosleep(void)
> +{
> + struct timespec req;
> +
> + req.tv_sec = 0;
> + req.tv_nsec = nanosleep_length;
> +
> + nanosleep(&req, NULL);
> +
> + return;
> +}
> +
>  /*
>   * Forwarding of packets in I/O mode.
>   * Forward packets "as-is".
> @@ -81,6 +102,10 @@
>   uint16_t nb_tx;
>   uint32_t retry;
>  
> +
> + if (nanosleep_interval == 0 && nanosleep_frequency > 0)
> + calc_nanosleep_interval(nanosleep_frequency);
> +
>  #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
>   uint64_t start_tsc;
>   uint64_t end_tsc;
> @@ -91,6 +116,12 @@
>   start_tsc = rte_rdtsc();
>  #endif
>  
> + if (nanosleep_frequency > 0 &&
> + rte_get_timer_cycles() > fs->next_nanosleep) {
> + do_nanosleep();
> + fs->next_nanosleep = rte_get_timer_cycles() + 
> nanosleep_interval;
> + }
> +
>   /*
>* Receive a burst of packets and forward them.
>*/
> diff -Nur dpdk-17.08.orig/app/test-pmd/parameters.c 
> dpdk-17.08/app/test-pmd/parameters.c
> --- dpdk-17.08.orig/app/test-pmd/parameters.c 2017-10-30 

Re: [dpdk-dev] [PATCH] drivers/net: fix for incomplete nb_segs type change

2017-11-10 Thread Ferruh Yigit
On 11/9/2017 11:29 AM, Ferruh Yigit wrote:
> On 11/9/2017 5:54 AM, Ilya Matveychikov wrote:
>> Fixes: 97cb466d ("mbuf: use 2 bytes for port and nb segments")
>> Signed-off-by: Ilya V. Matveychikov 
> 
> Reviewed-by: Ferruh Yigit 

Applied to dpdk/master, thanks.


Re: [dpdk-dev] [PATCH] net/nfp: initialize stats struct

2017-11-10 Thread Ferruh Yigit
On 11/8/2017 3:59 AM, Alejandro Lucero wrote:
> Not all struct fields will be written and random data could
> confuse readers.
> 
> Fixes: 92aa491b881e ("nfp: add statistics")
> Coverity: 140755
> 
> Signed-off-by: Alejandro Lucero 

Applied to dpdk/master, thanks.


Re: [dpdk-dev] [dpdk-stable] [PATCH] net/nfp: fix null pointer check

2017-11-10 Thread Ferruh Yigit
On 11/8/2017 6:36 AM, Alejandro Lucero wrote:
> First, the received pointer was not checked before. Then the pointer
> from malloc was not the one used in the existing check.
> 
> Fixes: ad60bca34899 ("net/nfp: read PF port MAC addr using NSP")
> Coverity: 195027
> 
> Signed-off-by: Alejandro Lucero 

Applied to dpdk/master, thanks.


Re: [dpdk-dev] [dpdk-stable] [PATCH] net/nfp: release memory before exit

2017-11-10 Thread Ferruh Yigit
On 11/8/2017 6:42 AM, Alejandro Lucero wrote:
> Memory allocated was not being released in any exit path.
> 
> Fixes: 48e2255f1b63 ("net/nfp: add NSP support for HW link configuration")
> Coverity: 195030
> 
> Signed-off-by: Alejandro Lucero 

Applied to dpdk/master, thanks.


Re: [dpdk-dev] [dpdk-stable] [PATCH] net/nfp: check BAR size is above a safe size

2017-11-10 Thread Ferruh Yigit
On 11/8/2017 4:28 AM, Alejandro Lucero wrote:
> We do not know how big can the BAR be, but we know anything less
> than 1MB is an error. This BAR needs to be big enough for accessing
> most of NFP internals.
> 
> Fixes: d12206e00590 ("net/nfp: add NSP user space interface")
> Coverity: 195024
> 
> Signed-off-by: Alejandro Lucero 

Applied to dpdk/master, thanks.


Re: [dpdk-dev] [PATCH] net/nfp: fix resource leak

2017-11-10 Thread Ferruh Yigit
On 11/8/2017 4:14 AM, Alejandro Lucero wrote:
> File descriptor is not released in any potential exit path
> inside the function.
> 
> Fixes: f37d8a4b67b2 ("net/nfp: add NSP FW upload command")
> Coverity: 195018
> 
> Signed-off-by: Alejandro Lucero 

Applied to dpdk/master, thanks.


Re: [dpdk-dev] [dpdk-stable] [PATCH] net/nfp: check function return value

2017-11-10 Thread Ferruh Yigit
On 11/8/2017 4:19 AM, Alejandro Lucero wrote:
> The fstat function could return a value that indicates an error condition.
> If this is not checked, the error condition may not be handled correctly.
> 
> Fixes: f37d8a4b67b2 ("net/nfp: add NSP FW upload command")
> Coverity: 195019
> 
> Signed-off-by: Alejandro Lucero 

Applied to dpdk/master, thanks.



Re: [dpdk-dev] [PATCH] net/nfp: fix memory allocation

2017-11-10 Thread Ferruh Yigit
On 11/8/2017 4:07 AM, Alejandro Lucero wrote:
> If the function actually returns a null value, a null pointer
> dereference will occur.
> 
> Fixes: dd63df2bfff3 ("net/nfp: add NSP symbol resolution command")
> Coverity: 195013
> 
> Signed-off-by: Alejandro Lucero 

Applied to dpdk/master, thanks.


Re: [dpdk-dev] [PATCH 1/2] net/bnxt: fix duplicate creation of ntuple filter

2017-11-10 Thread Ferruh Yigit
On 11/9/2017 9:46 AM, Ajit Khaparde wrote:
> Prevent the creation of duplicate 5tuple filters.
> Fixes: b7435d660a8c ("net/bnxt: add ntuple filtering support")
> 
> Signed-off-by: Ajit Khaparde 

Series applied to dpdk/master, thanks.


Re: [dpdk-dev] [PATCH v5 1/1] ring: guarantee load/load order in enqueue and dequeue

2017-11-10 Thread Ananyev, Konstantin


> -Original Message-
> From: Jia He [mailto:hejia...@gmail.com]
> Sent: Friday, November 10, 2017 1:51 AM
> To: jerin.ja...@caviumnetworks.com; dev@dpdk.org; olivier.m...@6wind.com
> Cc: Ananyev, Konstantin ; Richardson, Bruce 
> ; jianbo@arm.com;
> hemant.agra...@nxp.com; Jia He ; Jia He 
> ; jie2@hxt-semitech.com; bing.zhao@hxt-
> semitech.com
> Subject: [PATCH v5 1/1] ring: guarantee load/load order in enqueue and dequeue
> 
> We watched a rte panic of mbuf_autotest in our qualcomm arm64 server.
> In __rte_ring_move_cons_head()
> ...
> do {
> /* Restore n as it may change every loop */
> n = max;
> 
> *old_head = r->cons.head;//1st load
> const uint32_t prod_tail = r->prod.tail; //2nd load
> 
> cpu1(producer)  cpu2(consumer)  cpu3(consumer)
> load r->prod.tail
> in enqueue:
> load r->cons.tail
> load r->prod.head
> 
> store r->prod.tail
> 
> load r->cons.head
> load r->prod.tail
> ...
> store r->cons.{head,tail}
> load r->cons.head
> 
> In weak memory order architectures(powerpc,arm), the 2nd load might be
> reodered before the 1st load, that makes *entries is bigger than we
> wanted. This nasty reording messed enque/deque up. Then, r->cons.head
> will be bigger than prod_tail, then make *entries very big and the
> consumer will go forward incorrectly.
> 
> After this patch, even with above context switches, the old cons.head
> will be recaculated after failure of rte_atomic32_cmpset. So no race
> conditions left.
> 
> There is no such issue on X86, because X86 is strong memory order model.
> But rte_smp_rmb() doesn't have impact on runtime performance on X86, so
> keep the same code without architectures specific concerns.
> 
> Signed-off-by: Jia He 
> Signed-off-by: jie2@hxt-semitech.com
> Signed-off-by: bing.z...@hxt-semitech.com
> ---
>  lib/librte_ring/rte_ring.h | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
> index 5e9b3b7..3e8085a 100644
> --- a/lib/librte_ring/rte_ring.h
> +++ b/lib/librte_ring/rte_ring.h
> @@ -409,6 +409,11 @@ __rte_ring_move_prod_head(struct rte_ring *r, int is_sp,
>   n = max;
> 
>   *old_head = r->prod.head;
> +
> + /* add rmb barrier to avoid load/load reorder in weak
> +  * memory model. It is noop on x86 */
> + rte_smp_rmb();
> +
>   const uint32_t cons_tail = r->cons.tail;
>   /*
>*  The subtraction is done between two unsigned 32bits value
> @@ -517,6 +522,11 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
>   n = max;
> 
>   *old_head = r->cons.head;
> +
> + /* add rmb barrier to avoid load/load reorder in weak
> +  * memory model. It is noop on x86 */
> + rte_smp_rmb();
> +
>   const uint32_t prod_tail = r->prod.tail;
>   /* The subtraction is done between two unsigned 32bits value
>* (the result is always modulo 32 bits even if we have
> --

Acked-by: Konstantin Ananyev 

> 2.7.4



Re: [dpdk-dev] [PATCH 1/3] eal/arm64: remove the braces {} for dmb(), dsb()

2017-11-10 Thread Ananyev, Konstantin
Hi Jia,

> -Original Message-
> From: Jia He [mailto:hejia...@gmail.com]
> Sent: Friday, November 10, 2017 2:06 AM
> To: Ananyev, Konstantin ; Jianbo Liu 
> 
> Cc: Richardson, Bruce ; 
> jerin.ja...@caviumnetworks.com; dev@dpdk.org; olivier.m...@6wind.com;
> hemant.agra...@nxp.com; jia...@hxt-semitech.com
> Subject: Re: [PATCH 1/3] eal/arm64: remove the braces {} for dmb(),dsb()
> 
> 
> 
> On 11/9/2017 5:38 PM, Ananyev, Konstantin Wrote:
> >
> >> -Original Message-
> >> From: Jianbo Liu [mailto:jianbo@arm.com]
> >> Sent: Thursday, November 9, 2017 4:56 AM
> >> To: Jia He 
> >> Cc: Richardson, Bruce ; 
> >> jerin.ja...@caviumnetworks.com; dev@dpdk.org; olivier.m...@6wind.com;
> >> Ananyev, Konstantin ; 
> >> hemant.agra...@nxp.com; jia...@hxt-semitech.com
> >> Subject: Re: [PATCH 1/3] eal/arm64: remove the braces {} for dmb(),dsb()
> >>
> >> The 11/09/2017 12:43, Jia He wrote:
> >>> Hi Jianbo
> >>>
> >>>
> >>> On 11/9/2017 11:21 AM, Jianbo Liu Wrote:
>  The 11/09/2017 11:14, Jia He wrote:
> > On 11/9/2017 9:22 AM, Jia He Wrote:
> >> Hi Bruce
> >>
> >>
> >> On 11/8/2017 6:28 PM, Bruce Richardson Wrote:
> >>> On Wed, Nov 08, 2017 at 06:17:10AM +, Jia He wrote:
>  for the code as follows:
>  if (condition)
>   rte_smp_rmb();
>  else
>   rte_smp_wmb();
>  Without this patch, compiler will report this error:
>  error: 'else' without a previous 'if'
> 
>  Signed-off-by: Jia He 
>  Signed-off-by: jia...@hxt-semitech.com
>  ---
> lib/librte_eal/common/include/arch/arm/rte_atomic_64.h | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
> 
>  diff --git
>  a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
>  b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
>  index 0b70d62..38c3393 100644
>  --- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
>  +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
>  @@ -43,8 +43,8 @@ extern "C" {
>   #include "generic/rte_atomic.h"
> -#define dsb(opt)  { asm volatile("dsb " #opt : : : "memory"); }
>  -#define dmb(opt)  { asm volatile("dmb " #opt : : : "memory"); }
>  +#define dsb(opt) asm volatile("dsb " #opt : : : "memory");
>  +#define dmb(opt) asm volatile("dmb " #opt : : : "memory");
> >>> Need to remove the trailing ";" I too I think.
> >>> Alternatively, to keep the braces, the standard practice is to use
> >>> do { ... } while(0)
> >> If trailing ";" is not removed
> >> the code:
> >> if (condition)
> >>  rte_smp_rmb();
> >> else
> >>  anything();
> >>
>  Sorry, why not use two different functions as your conditions passed in
>  are fixed in the calling functions.
> >>> Do you mean to split update_tail() into update_tail_enqueue() and
> >>> update_tail_dequeue()?
> >> Yes. So it's not need to change dsb/dmb.
> > That's a good idea - but you still might hit the same problem in
> > Some different place in future...
> > Why not to convert these macros into 'always_inline' functions then?
> > Konstantin
> >
> It makes things more complex
> opt needs to be redefined with types
> such as : __attribute__((always_inline)) void dsb( char* opt)
> and the input paramenter shoud be
> #define sy "sy"
> #define ld "ld"
> 
> And the "#" in asm codes needs to be considerred more.
> 
> IMO, the kernel way is simple and clean, isn't it?
> #define dmb(opt) asm volatile("dmb " #opt : : : "memory")

Fine by me.
Konstantin

> Another choice is adding the do/while.
> 
> @Ananyev @Jianbo
> Any thoughts?
> 
> --
> Cheers,
> Jia



Re: [dpdk-dev] [PATCH] net/mlx5: fix number of segment calculation

2017-11-10 Thread Adrien Mazarguil
Hi Ori,

On Thu, Nov 09, 2017 at 06:04:32PM +0200, Ori Kam wrote:
> The CRC size should be taken into consideration when computing
> the number of mbuf segments for packet on the receive path.
> Large packets can be dropped due to extra CRC length.
> 
> Fixes: a1366b1a2be3 ("net/mlx5: add reference counter on DPDK Rx queues")
> Cc: sta...@dpdk.org
> Cc: nelio.laranje...@6wind.com
> 
> Signed-off-by: Ori Kam 

I don't think there's an issue to fix, there's actually a reason it's done
that way, perhaps I'm wrong but let me elaborate.

When applications request CRC to be written to mbuf (more precisely not to
be stripped), its extra 4 bytes are neither part of mbuf->pkt_len nor
mbuf->data_len. It just happens to be written past mbuf data if there's room
for it, where applications knowingly expect it based on how they configured
the PMD. That's the API.

This implies applications also size mbufs accordingly; if they don't provide
room for the CRC, it can't be written. This extra room is assumed to be part
of max_rx_pkt_len. When CRC stripping is requested, they do not have to
provide such room (IBV_WQ_FLAGS_SCATTER_FCS is not set on mlx5 Rx queues).

One problem with your proposal is assuming all segments are consumed
entirely during Rx and max_rx_pkt_len is reached, another segment with zero
data length gets appended just to hold the CRC. Applications may interpret
this as a bug.

Another problem is this doesn't solve the issue when Rx scatter is disabled
although it's no different from when packet data consumes all segments
entirely and there's no room left for the CRC. If it's that important, the
PMD should fail to create the Rx queue in that case as well.

> ---
>  drivers/net/mlx5/mlx5_rxq.c |7 +--
>  1 files changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
> index 6b29aae..701925b 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -887,6 +887,8 @@ struct mlx5_rxq_ctrl*
>   const uint16_t desc_n =
>   desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
>   unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
> + uint8_t crc_size =
> + !!(dev->data->dev_conf.rxmode.hw_strip_crc == 0) << 2;
>  
>   tmpl = rte_calloc_socket("RXQ", 1,
>sizeof(*tmpl) +
> @@ -900,12 +902,13 @@ struct mlx5_rxq_ctrl*
>   /* Enable scattered packets support for this queue if necessary. */
>   assert(mb_len >= RTE_PKTMBUF_HEADROOM);
>   if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
> - (mb_len - RTE_PKTMBUF_HEADROOM)) {
> + (mb_len - RTE_PKTMBUF_HEADROOM - crc_size)) {
>   tmpl->rxq.sges_n = 0;
>   } else if (dev->data->dev_conf.rxmode.enable_scatter) {
>   unsigned int size =
>   RTE_PKTMBUF_HEADROOM +
> - dev->data->dev_conf.rxmode.max_rx_pkt_len;
> + dev->data->dev_conf.rxmode.max_rx_pkt_len +
> + crc_size;
>   unsigned int sges_n;
>  
>   /*
> -- 
> 1.7.1
> 

-- 
Adrien Mazarguil
6WIND


Re: [dpdk-dev] [PATCH] testpmd: add nanosleep in main loop

2017-11-10 Thread Daniel Bristot de Oliveira
On 11/10/2017 10:12 AM, Adrien Mazarguil wrote:
> Since testpmd is a development tool not supposed to run in a production
> environment, is there really a need for it to be patched to work around a
> (temporary) Linux kernel bug?

>From the kernel side... not even...

> If so, why is I/O the only forwarding mode impacted?
> 
> If it's used in a production environment and such a fix can't wait, have
> other workarounds been considered:
> 
> - Replacing testpmd in I/O mode with a physical cable or switch?

using proper options like:

> - Using proper options on the kernel command line as described in [1], such
>   as isolcpus, rcu_nocbs, nohz_full?

Guarantees you that a CPU is completely isolated. In the current state
of the art, it is not possible to assume that a CPU can be fully
isolated from OS housekeeping threads.

For example, some kernel sub-systems rely on executing on every CPU,
e.g., using kworkers, and they are not only tracing or debugging
options. That case Marcelo showed is just a straightforward to use use-case.

If a busy-loop-isolated task runs with rt priority, it will end up
delaying such workers to run, making system to complain about hung tasks.

-- Daniel


Re: [dpdk-dev] [PATCH] testpmd: add nanosleep in main loop

2017-11-10 Thread Ananyev, Konstantin


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Adrien Mazarguil
> Sent: Friday, November 10, 2017 9:12 AM
> To: Marcelo Tosatti 
> Cc: dev@dpdk.org; Luiz Capitulino ; Daniel Bristot de 
> Oliveira 
> Subject: Re: [dpdk-dev] [PATCH] testpmd: add nanosleep in main loop
> 
> Hi Marcelo,
> 
> On Fri, Nov 10, 2017 at 04:02:10AM -0200, Marcelo Tosatti wrote:
> >
> > This patch allows a configurable pair of values to be set, which
> > controls
> > the frequency and length of a nanosleep call performed at test-pmd's
> > iofwd main loop.
> >
> > The problem is the following: it is necessary to execute code
> > on isolated CPUs which is not part of the packet forwarding load.
> >
> > For example:
> >
> >  "echo val > /sys/kernel/debug/tracing/buffer_size_kb"
> >
> > hangs the process, because the DPDK thread has higher
> > priority than the workqueue thread which executes the flush from
> > CPU local tracebuffer to CPU global trace buffer [the workitem
> > in case].
> >
> > There are more serious issues than the trace-cmd bug, such as XFS
> > workitems failing to execute causing filesystem corruption.
> >
> > To workaround this problem, until a proper kernel
> > solution is developed, allow DPDK to nanosleep
> > (hopefully with a small enough frequency and interval
> > so that the performance is within acceptable levels).
> 
> I understand the need to do something about it, however the nanosleep()
> approach seems questionable to me.
> 
> Testpmd's forwarding modes (particularly I/O) are used for benchmarking
> purposes by many and are therefore sensitive to change. This code path is
> currently free from system calls for that reason and nanosleep() is an
> expensive one by definition. Even if optional or called at a low frequency,
> the presence of this new code has an impact.
> 
> Since testpmd is a development tool not supposed to run in a production
> environment, is there really a need for it to be patched to work around a
> (temporary) Linux kernel bug?
> 
> If so, why is I/O the only forwarding mode impacted?
> 
> If it's used in a production environment and such a fix can't wait, have
> other workarounds been considered:
> 
> - Replacing testpmd in I/O mode with a physical cable or switch?
> 
> - Using proper options on the kernel command line as described in [1], such
>   as isolcpus, rcu_nocbs, nohz_full?
> 
> [1] doc/guides/howto/pvp_reference_benchmark.rst


Agree with Adrian here - the patch doesn't fix the problem in any case,
while introducing an unnecessary slowdown in testpmd iofwd mode.
Please think up some other approach.
Konstantin

> 
> >
> > The new parameters are:
> >
> > *  --delay-hz: sets nanosleep frequency in Hz.
> > *  --delay-length: sets nanosleep length in ns.
> >
> > Results for delay-hz=100,delay-length=1 (which allows
> > the buffer_size_kb change to complete):
> >
> > Baseline run-1:
> > [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 49505, Average:
> > 19008.7 ns, StdDev: 2501.0 ns, Quartiles: 17293.0/18330.0/19901.0 ns
> >
> > Baseline run-2:
> > [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 49606, Average:
> > 19036.4 ns, StdDev: 2485.2 ns, Quartiles: 17318.0/18349.0/19936.0 ns
> >
> > Baseline run-3:
> > [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 49627, Average:
> > 19019.2 ns, StdDev: 2503.7 ns, Quartiles: 17323.0/18355.0/19940.0 ns
> >
> > 
> >
> > (10.000us, 100HZ)
> >
> > Run-1:
> > [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 7284, Average:
> > 20830.6 ns, StdDev: 12023.0 ns, Quartiles: 17309.0/18394.0/20233.0 ns
> >
> > Run-2:
> > [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 6272, Average:
> > 20897.1 ns, StdDev: 12057.2 ns, Quartiles: 17389.0/18457.0/20266.0 ns
> >
> > Run-3:
> > [Histogram port 0 to port 1 at rate 2.3 Mpps] Samples: 4843, Average:
> > 20535.2 ns, StdDev: 9827.3 ns, Quartiles: 17389.0/18441.0/20269.0 ns
> >
> >
> > Signed-off-by: Marcelo Tosatti 
> >
> >
> > diff -Nur dpdk-17.08.orig/app/test-pmd/iofwd.c 
> > dpdk-17.08/app/test-pmd/iofwd.c
> > --- dpdk-17.08.orig/app/test-pmd/iofwd.c2017-10-30 22:45:37.829492673 
> > -0200
> > +++ dpdk-17.08/app/test-pmd/iofwd.c 2017-10-30 22:45:48.321522581 -0200
> > @@ -64,9 +64,30 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >
> >  #include "testpmd.h"
> >
> > +uint32_t nanosleep_interval;
> > +
> > +static void calc_nanosleep_interval(int hz)
> > +{
> > +   uint64_t cycles_per_sec = rte_get_timer_hz();
> > +   nanosleep_interval = cycles_per_sec/hz;
> > +}
> > +
> > +static void do_nanosleep(void)
> > +{
> > +   struct timespec req;
> > +
> > +   req.tv_sec = 0;
> > +   req.tv_nsec = nanosleep_length;
> > +
> > +   nanosleep(&req, NULL);
> > +
> > +   return;
> > +}
> > +
> >  /*
> >   * Forwarding of packets in I/O mode.
> >   * Forward packets "as-is".
> > @@ -81,6 +102,10 @@
> > uint16_t nb_tx;
> > uint32_t retry;
> >
> > +
> > +   if (nanosleep_interval == 0 && 

[dpdk-dev] [PATCH] app/testpmd: remove port status check from TM node add cli

2017-11-10 Thread Jasvinder Singh
Currently, testpmd CLI doesn't permit to add leaf and non-leaf node when
port is started. It doesn't work in case of i40e device as DCB
configuration is deleted when port is stopped. Therefore, removes the
port status check before invoking leaf and nonleaf node API in the cli.
If needed, device can add port status check at the driver layer. 

Signed-off-by: Jasvinder Singh 
---
 app/test-pmd/cmdline_tm.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/app/test-pmd/cmdline_tm.c b/app/test-pmd/cmdline_tm.c
index 4acef98..803fae4 100644
--- a/app/test-pmd/cmdline_tm.c
+++ b/app/test-pmd/cmdline_tm.c
@@ -1599,12 +1599,6 @@ static void cmd_add_port_tm_nonleaf_node_parsed(void 
*parsed_result,
if (port_id_is_invalid(port_id, ENABLED_WARN))
return;
 
-   /* Port status */
-   if (port_is_started(port_id)) {
-   printf(" Port %u not stopped (error)\n", port_id);
-   return;
-   }
-
memset(&np, 0, sizeof(struct rte_tm_node_params));
 
/* Node parameters */
@@ -1759,12 +1753,6 @@ static void cmd_add_port_tm_leaf_node_parsed(void 
*parsed_result,
if (port_id_is_invalid(port_id, ENABLED_WARN))
return;
 
-   /* Port status */
-   if (port_is_started(port_id)) {
-   printf(" Port %u not stopped (error)\n", port_id);
-   return;
-   }
-
memset(&np, 0, sizeof(struct rte_tm_node_params));
 
/* Node parameters */
-- 
2.9.3



Re: [dpdk-dev] [PATCH] net/mlx5: fix number of segment calculation

2017-11-10 Thread Adrien Mazarguil
Hi Yongseok,

On Thu, Nov 09, 2017 at 02:30:30PM -0800, Yongseok Koh wrote:
> On Thu, Nov 09, 2017 at 06:04:32PM +0200, Ori Kam wrote:
> > The CRC size should be taken into consideration when computing
> > the number of mbuf segments for packet on the receive path.
> > Large packets can be dropped due to extra CRC length.
> > 
> > Fixes: a1366b1a2be3 ("net/mlx5: add reference counter on DPDK Rx queues")
> > Cc: sta...@dpdk.org
> > Cc: nelio.laranje...@6wind.com
> > 
> > Signed-off-by: Ori Kam 
> > ---
> >  drivers/net/mlx5/mlx5_rxq.c |7 +--
> >  1 files changed, 5 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
> > index 6b29aae..701925b 100644
> > --- a/drivers/net/mlx5/mlx5_rxq.c
> > +++ b/drivers/net/mlx5/mlx5_rxq.c
> > @@ -887,6 +887,8 @@ struct mlx5_rxq_ctrl*
> > const uint16_t desc_n =
> > desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
> > unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
> > +   uint8_t crc_size =
> > +   !!(dev->data->dev_conf.rxmode.hw_strip_crc == 0) << 2;
> 
> How about making it more explicit with ETHER_CRC_LEN? E.g.
>   uint8_t crc_size = ETHER_CRC_LEN * 
>  (dev->data->dev_conf.rxmode.hw_strip_crc == 0);
> 
> >  
> > tmpl = rte_calloc_socket("RXQ", 1,
> >  sizeof(*tmpl) +
> > @@ -900,12 +902,13 @@ struct mlx5_rxq_ctrl*
> > /* Enable scattered packets support for this queue if necessary. */
> > assert(mb_len >= RTE_PKTMBUF_HEADROOM);
> 
> You might want to make the same change for this assert?
> 
> > if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
> > -   (mb_len - RTE_PKTMBUF_HEADROOM)) {
> > +   (mb_len - RTE_PKTMBUF_HEADROOM - crc_size)) {
> > tmpl->rxq.sges_n = 0;
> > } else if (dev->data->dev_conf.rxmode.enable_scatter) {
> > unsigned int size =
> > RTE_PKTMBUF_HEADROOM +
> > -   dev->data->dev_conf.rxmode.max_rx_pkt_len;
> > +   dev->data->dev_conf.rxmode.max_rx_pkt_len +
> > +   crc_size;
> 
> I think there's another bugs we didn't know. If scatter is required,
> RTE_PKTMBUF_HEADROOM is also reserved per every chained mbufs. So, it looks 
> like
> mb_len should be "rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM" when 
> it
> is declared in the beginning. Make sense?

RTE_PKTMBUF_HEADROOM is actually only reserved on the first segment,
i.e. once per mbuf chain, it should be fine.

> > /*
> >  * Determine the number of SGEs needed for a full packet
> >  * and round it to the next power of two.
> >  */
> > sges_n = log2above((size / mb_len) + !!(size % mb_len));
> > tmpl->rxq.sges_n = sges_n;
> 
> rxq.sges_n is 2bits, which means the max value is 3. So, if sges_n is larger
> than 3, it would just take the last 2bits and it will result in false error
> below. As we can't use sizeof() for bit-fields, this should be changed like:

The name is perhaps confusing, sges_n is documented as a log 2 value, 1 << 3
means 8 segments at most. Assuming default mbuf size, this allows up to
17280 bytes per packet excluding headroom.

You're right exceeding 3 will remove the extra bits and since sizeof() can't
be used, that's precisely the reason for the subsequent check, which makes
sure the stored value is enough for a max_rx_pkt_len-sized packet after
converting it back to a number of bytes.

>   
>   /* Check the maximum value of the bit-field. */
>   tmpl->rxq.sges_n = -1;
>   tmpl->rxq.sges_n = RTE_MIN(tmpl->rxq.sges_n, sges_n);
> 
> > /* Make sure rxq.sges_n did not overflow. */
> > size = mb_len * (1 << tmpl->rxq.sges_n);
> > size -= RTE_PKTMBUF_HEADROOM;
> > if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
> > ERROR("%p: too many SGEs (%u) needed to handle"
> >   " requested maximum packet size %u",
> >   (void *)dev,
> >   1 << sges_n,
> >   dev->data->dev_conf.rxmode.max_rx_pkt_len);
> > goto error;
> > }
> 
> This may be unnecessary if we make right changes?

I think it has to be kept as a safety check even if the max number of SGEs
is increased, at least as long as it's stored as a bit-field value.

-- 
Adrien Mazarguil
6WIND


Re: [dpdk-dev] [PATCH] testpmd: add nanosleep in main loop

2017-11-10 Thread Daniel Bristot de Oliveira


On 11/10/2017 11:14 AM, Ananyev, Konstantin wrote:
> Agree with Adrian here - the patch doesn't fix the problem in any case,

I would agree with you if it were possible to assume one can fully
isolate a CPU on Linux... but it is not...

This:
https://lwn.net/Articles/659490/

is still an open issue, and the reason why it is an open issue is the
kernel threads that need to run on every CPU, mainly when using the
PREEMPT_RT, which turns almost everything on threads.

> while introducing an unnecessary slowdown in testpmd iofwd mode.
> Please think up some other approach.

The other approach is to increase the priority of all other threads that
run on the isolate CPU. But that is not a good idea at all, as the other
threads might preempt the busy-loop thread at the worst possible moment.

Using the knowledge of the thread about when it is the best time to give
a chance for other threads to run would be a smarter decision.

-- Daniel


Re: [dpdk-dev] [PATCH] lib/librte_sched: Fix typos

2017-11-10 Thread Dumitrescu, Cristian


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Pavel Shirshov
> Sent: Friday, November 10, 2017 8:20 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] lib/librte_sched: Fix typos
> 
> Signed-off-by: Pavel Shirshov 
> ---
>  lib/librte_sched/rte_red.h   | 4 ++--
>  lib/librte_sched/rte_sched.c | 2 +-
>  2 files changed, 3 insertions(+), 3 deletions(-)
> 
Acked-by: Cristian Dumitrescu 




Re: [dpdk-dev] [PATCH] lib/librte_pipeline: Fix a typo

2017-11-10 Thread Dumitrescu, Cristian


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Pavel Shirshov
> Sent: Friday, November 10, 2017 8:17 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] lib/librte_pipeline: Fix a typo
> 
> Signed-off-by: Pavel Shirshov 
> ---
>  lib/librte_pipeline/rte_pipeline.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/lib/librte_pipeline/rte_pipeline.h
> b/lib/librte_pipeline/rte_pipeline.h
> index f366348..fdc44a7 100644
> --- a/lib/librte_pipeline/rte_pipeline.h
> +++ b/lib/librte_pipeline/rte_pipeline.h
> @@ -483,7 +483,7 @@ int rte_pipeline_table_entry_delete(struct
> rte_pipeline *p,
>   * @param keys
>   *   Array containing table entry keys
>   * @param entries
> - *   Array containung new contents for every table entry identified by key
> + *   Array containing new contents for every table entry identified by key
>   * @param n_keys
>   *   Number of keys to add
>   * @param key_found
> --
> 2.7.4

Acked-by: Cristian Dumitrescu 



Re: [dpdk-dev] [PATCH] testpmd: add nanosleep in main loop

2017-11-10 Thread Bruce Richardson
On Fri, Nov 10, 2017 at 11:42:56AM +0100, Daniel Bristot de Oliveira wrote:
> 
> 
> On 11/10/2017 11:14 AM, Ananyev, Konstantin wrote:
> > Agree with Adrian here - the patch doesn't fix the problem in any case,
> 
> I would agree with you if it were possible to assume one can fully
> isolate a CPU on Linux... but it is not...
> 
> This:
> https://lwn.net/Articles/659490/
> 
> is still an open issue, and the reason why it is an open issue is the
> kernel threads that need to run on every CPU, mainly when using the
> PREEMPT_RT, which turns almost everything on threads.
> 
> > while introducing an unnecessary slowdown in testpmd iofwd mode.
> > Please think up some other approach.
> 
> The other approach is to increase the priority of all other threads that
> run on the isolate CPU. But that is not a good idea at all, as the other
> threads might preempt the busy-loop thread at the worst possible moment.
> 
> Using the knowledge of the thread about when it is the best time to give
> a chance for other threads to run would be a smarter decision.
> 
I don't like having this in the main loop either, and to echo others I
wouldn't have thought that testpmd was actually used as anything other
than a testing app. Also, I would have thought that running it at
realtime priority wouldn't be a good idea, because of exactly this
problem.

On the specifics of the solution, would using sched_yield() rather than
nanosleep not be more suitable, given that the reason for this sleep is
just to give the CPU to other threads?

/Bruce


Re: [dpdk-dev] [RFC] Compression API in DPDK

2017-11-10 Thread Verma, Shally


> -Original Message-
> From: Trahe, Fiona [mailto:fiona.tr...@intel.com]
> Sent: 07 November 2017 16:54
> To: Verma, Shally ; dev@dpdk.org
> Cc: Athreya, Narayana Prasad ;
> Challa, Mahipal ; Trahe, Fiona
> 
> Subject: RE: [dpdk-dev] [RFC] Compression API in DPDK
> 
> Hi Shally,
> 
> ///snip///
> > [Shally] Ok. Then, just to confirm my understanding here. You mean PMD
> can figure out amount of
> > available space in dst mbuf by calling rte_pktmbuf_data_len() on each of its
> segment?
> [Fiona] exactly.
> 
> ///snip///
> > > > > > > +  * This indicates the buffer size and should be
> > > > > > > +  * set a little larger than the expected max source buffer 
> > > > > > > size.
> > > > > > > +  * if the output of static compression doesn't fit in the
> > > > > > > +  * intermediate buffer dynamic compression may not be
> possible,
> > > > > > > +  * in this case the accelerator may revert back to static
> > > compression.
> > [Shally] > > > > +  * in this case the accelerator may revert back to 
> > static
> compression.> > > > > +  */
> > Can you elaborate more on this? This looks to me as decision made during
> enqueue_burst() processing.
> > If yes and If application has chosen specific Huffman code i.e.
> RTE_COMP_DYNAMIC or
> > RTE_COMP_FIXED in rte_comp_compress_xform, then how this would
> work?
> [Fiona] yes, it would have to revert back on the enqueue. The compressed
> data would still conform to deflate standard, so any decompressor would be
> able to inflate it. The ratio would not be as good as hoped for but it would 
> be
> the best the compression engine could do with the resources it has.
> 
[Shally] Ok. However, I'm not sure how to use Intermediate bufs here as it is 
not requirement for us for this purpose. 
So, it looks like It is very device specific requirement where some may not 
need it. So, I would suggest that API should propose a way to indicate if it's 
a requirement for specific device so that app can input it at config time. May 
be feature flag or capability.

Thanks
Shally

> ///snip///
> > [Shally] Sure. So just to align here. Except few questions posted above on
> this RFC (such as Dynamic Vs
> > Static or dst mbuf parsing), following (and any other) will further be
> covered as part of 'RFC doc'
> > discussion
> > - Hash support
> > - RTE_COMPDEV_FF_MULTI_PKT_CHECKSUM
> [Fiona] Agreed.


Re: [dpdk-dev] [PATCH] app/testpmd: remove port status check from TM node add cli

2017-11-10 Thread Lu, Wenzhuo
Hi,

> -Original Message-
> From: Singh, Jasvinder
> Sent: Friday, November 10, 2017 6:31 PM
> To: dev@dpdk.org
> Cc: Lu, Wenzhuo ; Pei, Yulong
> ; Yigit, Ferruh 
> Subject: [PATCH] app/testpmd: remove port status check from TM node add
> cli
> 
> Currently, testpmd CLI doesn't permit to add leaf and non-leaf node when
> port is started. It doesn't work in case of i40e device as DCB configuration 
> is
> deleted when port is stopped. Therefore, removes the port status check
> before invoking leaf and nonleaf node API in the cli.
> If needed, device can add port status check at the driver layer.
> 
> Signed-off-by: Jasvinder Singh 
Acked-by: Wenzhuo Lu 


[dpdk-dev] [PATCH v1] doc: update release notes for 17.11

2017-11-10 Thread John McNamara
Fix grammar, spelling and formatting of DPDK 17.11 release notes.

Signed-off-by: John McNamara 
---
 doc/guides/rel_notes/release_17_11.rst | 327 +++--
 1 file changed, 152 insertions(+), 175 deletions(-)

diff --git a/doc/guides/rel_notes/release_17_11.rst 
b/doc/guides/rel_notes/release_17_11.rst
index e6e4407..b31b6bc 100644
--- a/doc/guides/rel_notes/release_17_11.rst
+++ b/doc/guides/rel_notes/release_17_11.rst
@@ -43,40 +43,39 @@ New Features
 
 * **Extended port_id range from uint8_t to uint16_t.**
 
-  Increased port_id range from 8 bits to 16 bits in order to support more than
-  256 ports in dpdk. All ethdev APIs which have port_id as parameter are 
changed
-  in the meantime.
+  Increased the ``port_id`` range from 8 bits to 16 bits in order to support
+  more than 256 ports in DPDK. All ethdev APIs which have ``port_id`` as
+  parameter have been changed.
 
 * **Modified the return type of rte_eth_stats_reset.**
 
-  Changed return type of ``rte_eth_stats_reset`` from ``void`` to ``int``
-  so the caller may know whether a device supports the operation or not
+  Changed return type of ``rte_eth_stats_reset`` from ``void`` to ``int`` so
+  that the caller can determine whether a device supports the operation or not
   and if the operation was carried out.
 
 * **Added a new driver for Marvell Armada 7k/8k devices.**
 
-  Added the new mrvl net driver for Marvell Armada 7k/8k devices. See the
-  "Network Interface Controller Drivers" document for more details on this new
-  driver.
+  Added the new ``mrvl`` net driver for Marvell Armada 7k/8k devices. See the
+  :doc:`../nics/mrvl` NIC guide for more details on this new driver.
 
 * **Updated mlx5 driver.**
 
   Updated the mlx5 driver including the following changes:
 
-   * Enabled PMD to run on top of upstream linux kernel and rdma-core libs.
- By that removed the dependency on specific Mellanox OFED libraries.
+   * Enabled the PMD to run on top of upstream Linux kernel and rdma-core
+ libs, removing the dependency on specific Mellanox OFED libraries.
* Improved PMD latency performance.
* Improved PMD memory footprint.
-   * Supported vectorized Rx/Tx burst for ARMv8.
-   * Supported secondary process.
-   * Supported flow counters.
-   * Supported Rx hardware timestamp offload.
-   * Supported device removal event.
+   * Added support for vectorized Rx/Tx burst for ARMv8.
+   * Added support for secondary process.
+   * Added support for flow counters.
+   * Added support for Rx hardware timestamp offload.
+   * Added support for device removal event.
 
 * **Added SoftNIC PMD.**
 
-  Added new SoftNIC PMD. This virtual device offers applications a software
-  fallback support for traffic management.
+  Added a new SoftNIC PMD. This virtual device provides applications with
+  software fallback support for traffic management.
 
 * **Added support for NXP DPAA Devices.**
 
@@ -86,24 +85,23 @@ New Features
   * DPAA Mempool driver for supporting offloaded packet memory pool
   * DPAA PMD for DPAA devices
 
-  See the "Network Interface Controller Drivers" document for more details of
-  this new driver.
+  See the :doc:`../nics/dpaa` document for more details of this new driver.
 
 * **Updated support for Cavium OCTEONTX Device.**
 
-  Updated support for Cavium's OCTEONTX device(CN83xx). This includes:
+  Updated support for Cavium's OCTEONTX device (CN83xx). This includes:
 
   * OCTEONTX Mempool driver for supporting offloaded packet memory pool
   * OCTEONTX Ethdev PMD
   * OCTEONTX Eventdev-Ethdev Rx adapter
 
-  See the "Network Interface Controller Drivers" document for more details of
-  this new driver.
+  See the :doc:`../nics/octeontx` document for more details of this new driver.
 
-* **nfp: Added PF support.**
+* **Added PF support to the Netronome NFP PMD.**
 
-  Previously Netronome's NFP PMD had just support for VFs. PF support is
-  just as a basic DPDK port and has no VF management yet.
+  Added PF support to the Netronome NFP PMD. Previously the NFP PMD only
+  supported VFs. PF support is just as a basic DPDK port and has no VF
+  management yet.
 
   PF support comes with firmware upload support which allows the PMD to
   independently work from kernel netdev NFP drivers.
@@ -117,113 +115,115 @@ New Features
* Support for Flow API
* Support for Tx and Rx descriptor status functions
 
-* **Add bus agnostic functions to cryptodev for PMD initialisation**
+* **Added bus agnostic functions to cryptodev for PMD initialization**
 
-  Adds new PMD assist functions ``rte_cryptodev_pmd_parse_input_args()``,
-  ``rte_cryptodev_pmd_create()`` and ``rte_cryptodev_pmd_destroy()`` which
-  are bus independent for driver to manage creation and destruction of new
-  device instances.
+  Added new PMD assist, bus independent, functions
+  ``rte_cryptodev_pmd_parse_input_args()``, ``rte_cryptodev_pmd_create()`` and
+  ``rte_cryptodev_pmd_destroy()`` for drivers to manage creation and
+  de

[dpdk-dev] NXP's roadmap for DPDK 18.02

2017-11-10 Thread Hemant Agrawal
NXP is planning to introduce following features/changes in DPDK 18.02 release

Introduce raw-device Support: A generic device library for representing 
accelerators and IP blocks and exposing some generic APIs. This can enable 
various other non-standard devices to be deployed over DPDK framework. NXP plan 
to use it for several of it's accelerator devices. 

Crypto eventdev support: Introduce Crypto PMD (dpaa and dpaa2)  with crypto 
adapter implementation for eventdev.

Mempool enhancements to combine dpaa and dpaa2 configs into one.

Ethdev : add support to check optimal driver packet burst size capability.

IOVA mode : enable virtual address support in DPAA2 PMDs

DPAA SEC : add RTE Security Protocol offload support

DPAA2 eventdev :  add support for ordered and atomic queues.

DPAA eventdev: introduce eventdev driver with ethernet adapter implementation. 


Re: [dpdk-dev] [PATCH] testpmd: add nanosleep in main loop

2017-11-10 Thread Luiz Capitulino
On Fri, 10 Nov 2017 11:14:51 +
Bruce Richardson  wrote:

> On Fri, Nov 10, 2017 at 11:42:56AM +0100, Daniel Bristot de Oliveira wrote:
> > 
> > 
> > On 11/10/2017 11:14 AM, Ananyev, Konstantin wrote:  
> > > Agree with Adrian here - the patch doesn't fix the problem in any case,  
> > 
> > I would agree with you if it were possible to assume one can fully
> > isolate a CPU on Linux... but it is not...
> > 
> > This:
> > https://lwn.net/Articles/659490/
> > 
> > is still an open issue, and the reason why it is an open issue is the
> > kernel threads that need to run on every CPU, mainly when using the
> > PREEMPT_RT, which turns almost everything on threads.
> >   
> > > while introducing an unnecessary slowdown in testpmd iofwd mode.
> > > Please think up some other approach.  
> > 
> > The other approach is to increase the priority of all other threads that
> > run on the isolate CPU. But that is not a good idea at all, as the other
> > threads might preempt the busy-loop thread at the worst possible moment.
> > 
> > Using the knowledge of the thread about when it is the best time to give
> > a chance for other threads to run would be a smarter decision.
> >   
> I don't like having this in the main loop either, and to echo others I
> wouldn't have thought that testpmd was actually used as anything other
> than a testing app. 

That's why we're patching it. We want to be aware of the implications.
If it's not good for testpmd, it may not be good for production either.


[dpdk-dev] [PATCH 1/4] pdump: fix possible mbuf leak

2017-11-10 Thread Ilya V. Matveychikov
If pdump_pktmbuf_copy_data() fails it's possible to have segment leak
as rte_pktmbuf_free() only handles m_dup chain but not the seg just
allocated and yet not chained.

Fixes: 278f9454 ("pdump: add new library for packet capture")
Signed-off-by: Ilya V. Matveychikov 
---
 lib/librte_pdump/rte_pdump.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index 729e79a..1ca709d 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -153,6 +153,7 @@ pdump_pktmbuf_copy(struct rte_mbuf *m, struct rte_mempool 
*mp)
do {
nseg++;
if (pdump_pktmbuf_copy_data(seg, m) < 0) {
+   if (seg != m_dup) rte_pktmbuf_free(seg);
rte_pktmbuf_free(m_dup);
return NULL;
}
-- 
2.7.4



[dpdk-dev] [PATCH] pdump: fix possible mbuf leak

2017-11-10 Thread Ilya V. Matveychikov
If pdump_pktmbuf_copy_data() fails it's possible to have segment leak
as rte_pktmbuf_free() only handles m_dup chain but not the seg just
allocated and yet not chained.

Fixes: 278f945402c5 ("pdump: add new library for packet capture")
Cc: reshma.pat...@intel.com

Signed-off-by: Ilya V. Matveychikov 
---
 lib/librte_pdump/rte_pdump.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index e6182d35c..fec49b525 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -153,6 +153,8 @@ pdump_pktmbuf_copy(struct rte_mbuf *m, struct rte_mempool 
*mp)
do {
nseg++;
if (pdump_pktmbuf_copy_data(seg, m) < 0) {
+   if (seg != m_dup)
+   rte_pktmbuf_free_seg(seg);
rte_pktmbuf_free(m_dup);
return NULL;
}
-- 
2.14.2



[dpdk-dev] [PATCH] mbuf: fix for incomplete nb_segs types change

2017-11-10 Thread Ilya V. Matveychikov
Update types of variables to correspond to nb_segs type change from
uint8_t to uint16_t.

Fixes: 97cb466d65c9 ("mbuf: use 2 bytes for port and nb segments")
Cc: olivier.m...@6wind.com

Signed-off-by: Ilya V. Matveychikov 
---
 lib/librte_mbuf/rte_mbuf.c   |  4 ++--
 lib/librte_mbuf/rte_mbuf.h   | 11 +++
 lib/librte_pdump/rte_pdump.c |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 2e08b9e9c..7543662f7 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -203,7 +203,7 @@ void
 rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header)
 {
const struct rte_mbuf *m_seg;
-   unsigned nb_segs;
+   unsigned int nb_segs;
 
if (m == NULL)
rte_panic("mbuf is NULL\n");
@@ -239,7 +239,7 @@ void
 rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len)
 {
unsigned int len;
-   unsigned nb_segs;
+   unsigned int nb_segs;
 
__rte_mbuf_sanity_check(m, 1);
 
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 6d91f7d38..c9201561d 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -584,6 +584,9 @@ struct rte_mbuf {
 
 } __rte_cache_aligned;
 
+/**< Maximum number of @nb_segs allowed. */
+#define RTE_MBUF_MAX_NB_SEGS   UINT16_MAX
+
 /**
  * Prefetch the first part of the mbuf
  *
@@ -1447,7 +1450,7 @@ static inline struct rte_mbuf *rte_pktmbuf_clone(struct 
rte_mbuf *md,
 {
struct rte_mbuf *mc, *mi, **prev;
uint32_t pktlen;
-   uint8_t nseg;
+   uint16_t nseg;
 
if (unlikely ((mc = rte_pktmbuf_alloc(mp)) == NULL))
return NULL;
@@ -1807,14 +1810,14 @@ static inline const void *rte_pktmbuf_read(const struct 
rte_mbuf *m,
  *
  * @return
  *   - 0, on success.
- *   - -EOVERFLOW, if the chain is full (256 entries)
+ *   - -EOVERFLOW, if the chain segment limit exceeded
  */
 static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf 
*tail)
 {
struct rte_mbuf *cur_tail;
 
/* Check for number-of-segments-overflow */
-   if (head->nb_segs + tail->nb_segs >= 1 << (sizeof(head->nb_segs) * 8))
+   if (head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS)
return -EOVERFLOW;
 
/* Chain 'tail' onto the old tail */
@@ -1822,7 +1825,7 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf 
*head, struct rte_mbuf *tail
cur_tail->next = tail;
 
/* accumulate number of segments and total length. */
-   head->nb_segs = (uint8_t)(head->nb_segs + tail->nb_segs);
+   head->nb_segs += tail->nb_segs;
head->pkt_len += tail->pkt_len;
 
/* pkt_len is only set in the head */
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index fec49b525..456513573 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -139,7 +139,7 @@ pdump_pktmbuf_copy(struct rte_mbuf *m, struct rte_mempool 
*mp)
 {
struct rte_mbuf *m_dup, *seg, **prev;
uint32_t pktlen;
-   uint8_t nseg;
+   uint16_t nseg;
 
m_dup = rte_pktmbuf_alloc(mp);
if (unlikely(m_dup == NULL))
-- 
2.14.2



[dpdk-dev] [PATCH] examples: fix for incomplete nb_segs type change

2017-11-10 Thread Ilya V. Matveychikov
Fixes: 97cb466d65c9 ("mbuf: use 2 bytes for port and nb segments")
Cc: olivier.m...@6wind.com

Signed-off-by: Ilya V. Matveychikov 
---
 doc/guides/sample_app_ug/ipv4_multicast.rst | 2 +-
 examples/ipv4_multicast/main.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/guides/sample_app_ug/ipv4_multicast.rst 
b/doc/guides/sample_app_ug/ipv4_multicast.rst
index fd1af0066..7a8e7ebce 100644
--- a/doc/guides/sample_app_ug/ipv4_multicast.rst
+++ b/doc/guides/sample_app_ug/ipv4_multicast.rst
@@ -339,7 +339,7 @@ It is the mcast_out_pkt() function that performs the packet 
duplication (either
 /* update header's fields */
 
 hdr->pkt.pkt_len = (uint16_t)(hdr->pkt.data_len + pkt->pkt.pkt_len);
-hdr->pkt.nb_segs = (uint8_t)(pkt->pkt.nb_segs + 1);
+hdr->pkt.nb_segs = pkt->pkt.nb_segs + 1;
 
 /* copy metadata from source packet */
 
diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c
index 83ac0d808..1c5851654 100644
--- a/examples/ipv4_multicast/main.c
+++ b/examples/ipv4_multicast/main.c
@@ -289,7 +289,7 @@ mcast_out_pkt(struct rte_mbuf *pkt, int use_clone)
 
/* update header's fields */
hdr->pkt_len = (uint16_t)(hdr->data_len + pkt->pkt_len);
-   hdr->nb_segs = (uint8_t)(pkt->nb_segs + 1);
+   hdr->nb_segs = pkt->nb_segs + 1;
 
/* copy metadata from source packet*/
hdr->port = pkt->port;
-- 
2.14.2



[dpdk-dev] [PATCH] net/mlx5: fix use of bit numbers instead of masks

2017-11-10 Thread Edward Makarov
The constant ETHTOOL_LINK_MODE_1000baseT_Full_BIT and the others like
that in mlx5_link_update_unlocked_gs must be bit masks but unfortunately
they are bit numbers. This commit fixes the issue.

Fixes: 188408719888 ("net/mlx5: fix support for newer link speeds")
Cc: nelio.laranje...@6wind.com
Cc: sta...@dpdk.org

Signed-off-by: Edward Makarov 
---
 drivers/net/mlx5/mlx5_ethdev.c | 50 +-
 drivers/net/mlx5/mlx5_utils.h  |  4 
 2 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index c31ea4b62..a3cef6891 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -865,39 +865,39 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int 
wait_to_complete)
sc = ecmd->link_mode_masks[0] |
((uint64_t)ecmd->link_mode_masks[1] << 32);
priv->link_speed_capa = 0;
-   if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT)
+   if (sc & MLX5_BITSHIFT(ETHTOOL_LINK_MODE_Autoneg_BIT))
priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
-   if (sc & (ETHTOOL_LINK_MODE_1000baseT_Full_BIT |
- ETHTOOL_LINK_MODE_1000baseKX_Full_BIT))
+   if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)))
priv->link_speed_capa |= ETH_LINK_SPEED_1G;
-   if (sc & (ETHTOOL_LINK_MODE_1baseKX4_Full_BIT |
- ETHTOOL_LINK_MODE_1baseKR_Full_BIT |
- ETHTOOL_LINK_MODE_1baseR_FEC_BIT))
+   if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1baseKX4_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1baseKR_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1baseR_FEC_BIT)))
priv->link_speed_capa |= ETH_LINK_SPEED_10G;
-   if (sc & (ETHTOOL_LINK_MODE_2baseMLD2_Full_BIT |
- ETHTOOL_LINK_MODE_2baseKR2_Full_BIT))
+   if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_2baseMLD2_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_2baseKR2_Full_BIT)))
priv->link_speed_capa |= ETH_LINK_SPEED_20G;
-   if (sc & (ETHTOOL_LINK_MODE_4baseKR4_Full_BIT |
- ETHTOOL_LINK_MODE_4baseCR4_Full_BIT |
- ETHTOOL_LINK_MODE_4baseSR4_Full_BIT |
- ETHTOOL_LINK_MODE_4baseLR4_Full_BIT))
+   if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_4baseKR4_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_4baseCR4_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_4baseSR4_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_4baseLR4_Full_BIT)))
priv->link_speed_capa |= ETH_LINK_SPEED_40G;
-   if (sc & (ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT |
- ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT |
- ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT |
- ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT))
+   if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)))
priv->link_speed_capa |= ETH_LINK_SPEED_56G;
-   if (sc & (ETHTOOL_LINK_MODE_25000baseCR_Full_BIT |
- ETHTOOL_LINK_MODE_25000baseKR_Full_BIT |
- ETHTOOL_LINK_MODE_25000baseSR_Full_BIT))
+   if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)))
priv->link_speed_capa |= ETH_LINK_SPEED_25G;
-   if (sc & (ETHTOOL_LINK_MODE_5baseCR2_Full_BIT |
- ETHTOOL_LINK_MODE_5baseKR2_Full_BIT))
+   if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_5baseCR2_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_5baseKR2_Full_BIT)))
priv->link_speed_capa |= ETH_LINK_SPEED_50G;
-   if (sc & (ETHTOOL_LINK_MODE_10baseKR4_Full_BIT |
- ETHTOOL_LINK_MODE_10baseSR4_Full_BIT |
- ETHTOOL_LINK_MODE_10baseCR4_Full_BIT |
- ETHTOOL_LINK_MODE_10baseLR4_ER4_Full_BIT))
+   if (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10baseKR4_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10baseSR4_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10baseCR4_Full_BIT) |
+ MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10baseLR4_ER4_Full_BIT)))
priv->link_speed_capa |= ETH_LINK_SPEED_100G;
dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ?
ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
diff --git a/dr

Re: [dpdk-dev] [PATCH] net/mlx5: fix use of bit numbers instead of masks

2017-11-10 Thread Nelio Laranjeiro
On Fri, Nov 10, 2017 at 02:18:04PM +, Edward Makarov wrote:
> The constant ETHTOOL_LINK_MODE_1000baseT_Full_BIT and the others like
> that in mlx5_link_update_unlocked_gs must be bit masks but unfortunately
> they are bit numbers. This commit fixes the issue.
> 
> Fixes: 188408719888 ("net/mlx5: fix support for newer link speeds")
> Cc: nelio.laranje...@6wind.com
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Edward Makarov 

Acked-by: Nelio Laranjeiro 

Indeed, nice catch.

Thanks,

-- 
NĂ©lio Laranjeiro
6WIND


[dpdk-dev] RSS when doing VF to VF on 82599 NICs

2017-11-10 Thread Olivier MATZ
Hi,

I have a setup where a host and a guest communicates via 2 VFs (Intel
Niantic):

++
|  VM|
| +---+  |
| |   |  |
| |   v   testpmd|
| icmpecho mode  |
|+-+  (replies to ping)  |
||VF1.2| |
|| | |
|+-+ |
||
| ^   |  |
+---  | - | -+   +-+
| |   vHOST  |   | |
|  . . . . . . . . . . . . . . . . . . . |   | traffic |
|  . +-+   . |   | generator   |
|  . |VF1.1|  < <-   +---+  <--  | |
|  . | | dpdk l3fwd  |PF0|   | send pings  |
|  . +-+  > ->   |   |  -->  | at high rate|
|  . +---+   | |
|  . . . . . . . . . . . . . . . . . . . |   | count   |
|   +--+ |   | received|
|   | PF1  | |   | responses   |
+---+--+-+   +-+
   unused


VF1.1 and VF1.2 are associated to PF1.

The dpdk application in the host manages PF0 and VF1.1, while PF1
is managed by the Linux kernel.

This test works, but we noticed that RSS is not working: only one queue
receives packets on VF1.1 or VF1.2.


Our understanding of the Intel 82599 datasheets is that RSS should work
in VFs, the limitation is that there is only one shared RETA table and
RSS hash/key for PF and VFs.

We didn't find anything saying that RSS does not work when doing VF to
VF. Is it supposed to work? If yes, is there anything specific to do to
enable it?

Thanks,
Olivier


Re: [dpdk-dev] [PATCH] net/mlx4: fix last Tx wqe stamping lack

2017-11-10 Thread Adrien Mazarguil
On Fri, Nov 10, 2017 at 08:27:18AM +, Matan Azrad wrote:
> When Tx pakcet HW processing is done, SW should stamp all the completion
> burst WQEs.
> 
> Stamp missed last completion burst WQE.
> 
> Fixes: c3c977bbecbd ("net/mlx4: add Tx bypassing Verbs")
> 
> Signed-off-by: Matan Azrad 

This reads like you were in a hurry :)

Took me a while to understand the problem and how you addressed it. So in
short, wqe_index is consumed but its TXBBs aren't stamped because the loop
stops at its index without processing it.

Patch looks good but could have been simpler by directly initializing
nr_txbbs to sq->tail, not use sq->tail as an offset afterward and get rid of
sq_tail. It's OK as this wouldn't have resulted in a smaller patch anyway.

Commit log rewording suggestion:

 net/mlx4: fix missing stamp during Tx completion

 After processing completed packets, the owner bit of each TXBB comprised
 in its WQEs must be invalidated. The loop stops short of processing the
 last WQE.

Other than that,

Acked-by: Adrien Mazarguil 

> ---
>  drivers/net/mlx4/mlx4_rxtx.c | 13 +++--
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> I think this is a critical bug fix that should be added to 17.11 version.
> No performance impact was seen.
> 
> diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
> index 3985e06..44edeac 100644
> --- a/drivers/net/mlx4/mlx4_rxtx.c
> +++ b/drivers/net/mlx4/mlx4_rxtx.c
> @@ -336,6 +336,7 @@ struct pv {
>  {
>   unsigned int elts_comp = txq->elts_comp;
>   unsigned int elts_tail = txq->elts_tail;
> + unsigned int sq_tail = sq->tail;
>   struct mlx4_cq *cq = &txq->mcq;
>   volatile struct mlx4_cqe *cqe;
>   uint32_t cons_index = cq->cons_index;
> @@ -372,13 +373,13 @@ struct pv {
>   rte_be_to_cpu_16(cqe->wqe_index) & sq->txbb_cnt_mask;
>   do {
>   /* Free next descriptor. */
> - nr_txbbs +=
> + sq_tail += nr_txbbs;
> + nr_txbbs =
>   mlx4_txq_stamp_freed_wqe(sq,
> -  (sq->tail + nr_txbbs) & sq->txbb_cnt_mask,
> -  !!((sq->tail + nr_txbbs) & sq->txbb_cnt));
> +  sq_tail & sq->txbb_cnt_mask,
> +  !!(sq_tail & sq->txbb_cnt));
>   pkts++;
> - } while (((sq->tail + nr_txbbs) & sq->txbb_cnt_mask) !=
> -  new_index);
> + } while ((sq_tail & sq->txbb_cnt_mask) != new_index);
>   cons_index++;
>   } while (1);
>   if (unlikely(pkts == 0))
> @@ -386,7 +387,7 @@ struct pv {
>   /* Update CQ. */
>   cq->cons_index = cons_index;
>   *cq->set_ci_db = rte_cpu_to_be_32(cq->cons_index & MLX4_CQ_DB_CI_MASK);
> - sq->tail = sq->tail + nr_txbbs;
> + sq->tail = sq_tail + nr_txbbs;
>   /* Update the list of packets posted for transmission. */
>   elts_comp -= pkts;
>   assert(elts_comp <= txq->elts_comp);

-- 
Adrien Mazarguil
6WIND


Re: [dpdk-dev] rte_eth_bond 8023ad dedicated queues with i40e with vectorized rx does not work

2017-11-10 Thread Doherty, Declan

On 08/11/2017 7:21 PM, Kyle Larose wrote:

Hello,

I've been doing some testing using the 8023ad link bonding driver on a system 
with 4 10G i40e interfaces in the link bond. It's working fine, except that 
when any of the links are overloaded, it starts dropping the LACPDUs, which is 
rather unfortunate for many reasons.

While thinking about that problem, I noticed that the driver provides the 
ability to allocate dedicated queues for rx and tx of LACPDUs. This is great! 
Solves my problem (sort of - I'll send another email about that later)... Or so 
I thought. After enabling the dedicated queues, I noticed  a few things:
1. The link bond never started distributing
2. The slave interfaces started dropping frames on their dedicated 
control queues after some time
3. The connected interfaces reported both sending and receiving LACP 
PDUs.

After digging in to this, I found out that the call to rte_eth_rx_burst was 
returning 0 packets, despite their being many in the queue. It turns out that 
the i40e was using one of the vectorized rx_burst functions, which require that 
the user poll for more than 1 packet at a time. bond_mode_8023ad_periodic_cb 
was polling for exactly one.

I changed the code to read up to 16 at a time, and everything started working. 
I'm not sure this is the right fix, though, since the normal behaviour of 
processing one packet at a time maintains some hold offs/etc that may be nice, 
and I don't want to discard any packets past the first one.

Does anyone have some thoughts/comments on this? I can submit a patch with my 
current workaround, if desired.

Thanks,

Kyle



Hey Kyle,

I think this fix is fine, as is possible that there would be more than 
one packet on the queue, with both control packets and marker packets 
possible.


Please send a patch so we can review and try out.

Thanks
Declan


Re: [dpdk-dev] rte_eth_bond 8023ad behaviour under congestion

2017-11-10 Thread Doherty, Declan

On 08/11/2017 7:33 PM, Kyle Larose wrote:

Hello,

I've been doing some testing using the 8023ad link bonding driver on a system 
with 4 10G i40e interfaces in the link bond. One thing I've noticed is that if 
any of the links are overloaded when I don't have dedicated control queues 
enabled, it starts dropping LACPDUs on transmit. I quickly realized that it's 
because of the following code in bond_ethdev_tx_burst_8023ad:



num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
slave_bufs[i], slave_nb_pkts[i]);

/* If tx burst fails drop slow packets */
for ( ; num_tx_slave < slave_slow_nb_pkts[i]; num_tx_slave++)
rte_pktmbuf_free(slave_bufs[i][num_tx_slave]);

This chunk of code basically treats the LACPPDUs at a very low priority, since 
they are generated infrequently. I'd like to ensure that LACPPDUs are 
transmitted when there's congestion in the case where dedicated queues are not 
supported.

I can think of a few options to resolve this:
  1) Store the LACPDUS for later sending in a per-slave buffer if tx fails, and 
make sure these are always at the front of the send buffer, so that when 
there's room, they're sent (I'm not quite sure what the best way to do this is).


Yes this sounds like a good idea, ideally we would use the same buffers 
which is used pass the LACPDUs from the state machines to the slaves, 
but add the packet back to the head of the buffer. As the LACPDUs are 
generated at such a slow rate we could probably just re-enqueue to the 
existing ring we have today. If it was configured as a multi-producer.



  2) Allow enabling the dedicated tx queue without enabling the dedicated rx 
queue.

I think both 1 & 2 are good solutions on their own, and should probably both be 
implemented. #2 is ideal, but doesn't cover all cases (like if there are 
insufficient tx queues to dedicate one to this).

How do people feel about these proposals?



I don't have any problems with independent enablement of the dedicated 
tx/rx queues, and it should be pretty straight forward to do, as I think 
they are pretty decoupled in the implementation but unfortunately it 
will require some new public APIs or breaking of the existing API, a new 
API isn't a big issue. I do think it's likely in most cases that both 
tx/rx dedicated queues would be either both enable or disabled?



Note: I understand that this is not ideal at all, since the lack of a dedicated 
rx queue means that lacpdus could drop on rx. But, in my use-case that's less 
likely than link congestion, so I'd like to at least be resilient here.

Thanks,

Kyle
  




Thanks
Declan


Re: [dpdk-dev] [PATCH] doc: update deprecation of ethdev offload API

2017-11-10 Thread Ananyev, Konstantin


> -Original Message-
> From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Shahaf Shuler
> Sent: Tuesday, October 17, 2017 3:24 PM
> To: nhor...@tuxdriver.com; tho...@monjalon.net
> Cc: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] doc: update deprecation of ethdev offload API
> 
> Update deprecation notice for the new ethdev offloads API.
> Deprecation of the old offloads API is set to 18.05.
> 
> Signed-off-by: Shahaf Shuler 
> ---
>  doc/guides/rel_notes/deprecation.rst | 14 --
>  1 file changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/deprecation.rst 
> b/doc/guides/rel_notes/deprecation.rst
> index 52058f580..deb546a67 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -41,12 +41,14 @@ Deprecation Notices
>PKT_RX_QINQ_STRIPPED, that are better described. The old flags and
>their behavior will be kept until 17.08 and will be removed in 17.11.
> 
> -* ethdev: Tx offloads will no longer be enabled by default in 17.11.
> -  Instead, the ``rte_eth_txmode`` structure will be extended with
> -  bit field to enable each Tx offload.
> -  Besides of making the Rx/Tx configuration API more consistent for the
> -  application, PMDs will be able to provide a better out of the box 
> performance.
> -  As part of the work, ``ETH_TXQ_FLAGS_NO*`` will be superseded as well.
> +* ethdev: a new Tx and Rx offload API was introduced on 17.11.
> +  In the new API, offloads are divided into per-port and per-queue offloads.
> +  Offloads are disabled by default and enabled per application request.
> +  The old offloads API is target to be deprecated on 18.05. This includes:
> +
> +  - removal of ``ETH_TXQ_FLAGS_NO*`` flags.
> +  - removal of ``txq_flags`` field from ``rte_eth_txconf`` struct.
> +  - removal of the offloads bit-field from ``rte_eth_rxmode`` struct.
> 
>  * ethdev: the legacy filter API, including
>``rte_eth_dev_filter_supported()``, ``rte_eth_dev_filter_ctrl()`` as well
> --

Acked-by: Konstantin Ananyev 
> 2.12.0



Re: [dpdk-dev] [PATCH v5 1/3] doc: add platform guide

2017-11-10 Thread Mcnamara, John


> -Original Message-
> From: Santosh Shukla [mailto:santosh.shu...@caviumnetworks.com]
> Sent: Tuesday, November 7, 2017 7:00 AM
> To: dev@dpdk.org
> Cc: Mcnamara, John ; tho...@monjalon.net;
> jerin.ja...@caviumnetworks.com; Santosh Shukla
> 
> Subject: [PATCH v5 1/3] doc: add platform guide
> 
> This commit adds a section to the docs listing the platform guide for the
> PMDs.
> 
> It then adds the octeontx platform guide to the listed platform devices.
> 
> Patch also removes platform specific duplicate setup information from
> eventdev/octeontx.rst, nics/octeontx.rst and update to
> plaform/octeontx.rst.

Acked-by: John McNamara 




Re: [dpdk-dev] [PATCH v5 2/3] doc: add mempool and octeontx mempool device

2017-11-10 Thread Mcnamara, John


> -Original Message-
> From: Santosh Shukla [mailto:santosh.shu...@caviumnetworks.com]
> Sent: Tuesday, November 7, 2017 7:00 AM
> To: dev@dpdk.org
> Cc: Mcnamara, John ; tho...@monjalon.net;
> jerin.ja...@caviumnetworks.com; Santosh Shukla
> 
> Subject: [PATCH v5 2/3] doc: add mempool and octeontx mempool device
> 
> This commit adds a section to the docs listing the mempool device PMDs
> available.
> 
> It then adds the octeontx fpavf mempool PMD to the listed mempool devices.

Acked-by: John McNamara 




Re: [dpdk-dev] [PATCH v5 3/3] doc: use correct mempool ops handle name

2017-11-10 Thread Mcnamara, John


> -Original Message-
> From: Santosh Shukla [mailto:santosh.shu...@caviumnetworks.com]
> Sent: Tuesday, November 7, 2017 7:00 AM
> To: dev@dpdk.org
> Cc: Mcnamara, John ; tho...@monjalon.net;
> jerin.ja...@caviumnetworks.com; Santosh Shukla
> 
> Subject: [PATCH v5 3/3] doc: use correct mempool ops handle name
> 
> Fixes: f820b5896631 ("doc: add octeontx ethdev driver documentation")
> 
> Signed-off-by: Santosh Shukla 
> Acked-by: Jerin Jacob 
> Acked-by: John McNamara 


Acked-by: John McNamara 




Re: [dpdk-dev] [PATCH] net/mlx5: fix number of segment calculation

2017-11-10 Thread Yongseok Koh
On Fri, Nov 10, 2017 at 11:06:25AM +0100, Adrien Mazarguil wrote:
> Hi Ori,
> 
> On Thu, Nov 09, 2017 at 06:04:32PM +0200, Ori Kam wrote:
> > The CRC size should be taken into consideration when computing
> > the number of mbuf segments for packet on the receive path.
> > Large packets can be dropped due to extra CRC length.
> > 
> > Fixes: a1366b1a2be3 ("net/mlx5: add reference counter on DPDK Rx queues")
> > Cc: sta...@dpdk.org
> > Cc: nelio.laranje...@6wind.com
> > 
> > Signed-off-by: Ori Kam 
> 
> I don't think there's an issue to fix, there's actually a reason it's done
> that way, perhaps I'm wrong but let me elaborate.
> 
> When applications request CRC to be written to mbuf (more precisely not to
> be stripped), its extra 4 bytes are neither part of mbuf->pkt_len nor
> mbuf->data_len. It just happens to be written past mbuf data if there's room
> for it, where applications knowingly expect it based on how they configured
> the PMD. That's the API.
>
> This implies applications also size mbufs accordingly; if they don't provide
> room for the CRC, it can't be written. This extra room is assumed to be part
> of max_rx_pkt_len. When CRC stripping is requested, they do not have to
> provide such room (IBV_WQ_FLAGS_SCATTER_FCS is not set on mlx5 Rx queues).

I looked around other driver/example codes as it is not documented (or too
obvious to do?), it looks there's consensus that max_rx_pkt_len includes 4B FCS.
Then, I agree that PMD doesn't need to care about this.

> One problem with your proposal is assuming all segments are consumed
> entirely during Rx and max_rx_pkt_len is reached, another segment with zero
> data length gets appended just to hold the CRC. Applications may interpret
> this as a bug.

I don't think this patch causes the issue. It just unnecessarily reserves extra
4B room if CRC strip is disabled. And even apps should not interpret this as a
bug because apps requested to have CRC.
Currently mlx5_rx_busrt() doesn't allow this situation (putting only 4B CRC in
the last segment) because it subtracts ETHER_CRC_LEN from pkt_len if CRC isn't
stripped. And it is done before looking for the next segment. I think this is a
problem to fix on the contrary - app wanted to see CRC but it's not there.
Right?

Thanks,
Yongseok


Re: [dpdk-dev] [PATCH] net/mlx5: fix number of segment calculation

2017-11-10 Thread Yongseok Koh
On Fri, Nov 10, 2017 at 11:22:06AM +0100, Adrien Mazarguil wrote:
> Hi Yongseok,
> 
> On Thu, Nov 09, 2017 at 02:30:30PM -0800, Yongseok Koh wrote:
> > On Thu, Nov 09, 2017 at 06:04:32PM +0200, Ori Kam wrote:
> > > The CRC size should be taken into consideration when computing
> > > the number of mbuf segments for packet on the receive path.
> > > Large packets can be dropped due to extra CRC length.
> > > 
> > > Fixes: a1366b1a2be3 ("net/mlx5: add reference counter on DPDK Rx queues")
> > > Cc: sta...@dpdk.org
> > > Cc: nelio.laranje...@6wind.com
> > > 
> > > Signed-off-by: Ori Kam 
> > > ---
> > >  drivers/net/mlx5/mlx5_rxq.c |7 +--
> > >  1 files changed, 5 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
> > > index 6b29aae..701925b 100644
> > > --- a/drivers/net/mlx5/mlx5_rxq.c
> > > +++ b/drivers/net/mlx5/mlx5_rxq.c
> > > @@ -887,6 +887,8 @@ struct mlx5_rxq_ctrl*
> > >   const uint16_t desc_n =
> > >   desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
> > >   unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
> > > + uint8_t crc_size =
> > > + !!(dev->data->dev_conf.rxmode.hw_strip_crc == 0) << 2;
> > 
> > How about making it more explicit with ETHER_CRC_LEN? E.g.
> > uint8_t crc_size = ETHER_CRC_LEN * 
> >(dev->data->dev_conf.rxmode.hw_strip_crc == 0);
> > 
> > >  
> > >   tmpl = rte_calloc_socket("RXQ", 1,
> > >sizeof(*tmpl) +
> > > @@ -900,12 +902,13 @@ struct mlx5_rxq_ctrl*
> > >   /* Enable scattered packets support for this queue if necessary. */
> > >   assert(mb_len >= RTE_PKTMBUF_HEADROOM);
> > 
> > You might want to make the same change for this assert?
> > 
> > >   if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
> > > - (mb_len - RTE_PKTMBUF_HEADROOM)) {
> > > + (mb_len - RTE_PKTMBUF_HEADROOM - crc_size)) {
> > >   tmpl->rxq.sges_n = 0;
> > >   } else if (dev->data->dev_conf.rxmode.enable_scatter) {
> > >   unsigned int size =
> > >   RTE_PKTMBUF_HEADROOM +
> > > - dev->data->dev_conf.rxmode.max_rx_pkt_len;
> > > + dev->data->dev_conf.rxmode.max_rx_pkt_len +
> > > + crc_size;
> > 
> > I think there's another bugs we didn't know. If scatter is required,
> > RTE_PKTMBUF_HEADROOM is also reserved per every chained mbufs. So, it looks 
> > like
> > mb_len should be "rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM" 
> > when it
> > is declared in the beginning. Make sense?
> 
> RTE_PKTMBUF_HEADROOM is actually only reserved on the first segment,
> i.e. once per mbuf chain, it should be fine.

Right, I got confused with Tx. mlx5's Rx overwrites m->data_offset when it
injects mbufs for extra segments.

> > >   /*
> > >* Determine the number of SGEs needed for a full packet
> > >* and round it to the next power of two.
> > >*/
> > >   sges_n = log2above((size / mb_len) + !!(size % mb_len));
> > >   tmpl->rxq.sges_n = sges_n;
> > 
> > rxq.sges_n is 2bits, which means the max value is 3. So, if sges_n is larger
> > than 3, it would just take the last 2bits and it will result in false error
> > below. As we can't use sizeof() for bit-fields, this should be changed like:
> 
> The name is perhaps confusing, sges_n is documented as a log 2 value, 1 << 3
> means 8 segments at most. Assuming default mbuf size, this allows up to
> 17280 bytes per packet excluding headroom.
> 
> You're right exceeding 3 will remove the extra bits and since sizeof() can't
> be used, that's precisely the reason for the subsequent check, which makes
> sure the stored value is enough for a max_rx_pkt_len-sized packet after
> converting it back to a number of bytes.

The name wasn't confusing, I wanted to make it clearer as I thought it could
have some false negatives. But, I misread something. The sanity check can
correctly filter those cases. No bug here!

Thanks,
Yongseok