[dpdk-dev] [PATCH v2 1/3] ether: Add API to support setting TX rate for queue and VF

2014-05-26 Thread Ouyang Changchun
This patch adds API to support setting TX rate for a queue and a VF.

Signed-off-by: Ouyang Changchun 
---
 lib/librte_ether/rte_ethdev.c | 71 +++
 lib/librte_ether/rte_ethdev.h | 51 +++
 2 files changed, 122 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index a5727dd..1ea61e1 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1913,6 +1913,77 @@ rte_eth_dev_set_vf_vlan_filter(uint8_t port_id, uint16_t 
vlan_id,
vf_mask,vlan_on);
 }

+int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx,
+   uint16_t tx_rate)
+{
+   struct rte_eth_dev *dev;
+   struct rte_eth_dev_info dev_info;
+   struct rte_eth_link link;
+
+   if (port_id >= nb_ports) {
+   PMD_DEBUG_TRACE("set queue rate limit:invalid port id=%d\n",
+   port_id);
+   return -ENODEV;
+   }
+
+   dev = &rte_eth_devices[port_id];
+   rte_eth_dev_info_get(port_id, &dev_info);
+   link = dev->data->dev_link;
+
+   if (queue_idx > dev_info.max_tx_queues) {
+   PMD_DEBUG_TRACE("set queue rate limit:port %d: "
+   "invalid queue id=%d\n", port_id, queue_idx);
+   return -EINVAL;
+   }
+
+   if (tx_rate > link.link_speed) {
+   PMD_DEBUG_TRACE("set queue rate limit:invalid tx_rate=%d, "
+   "bigger than link speed= %d\n",
+   tx_rate, link_speed);
+   return -EINVAL;
+   }
+
+   FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_queue_rate_limit, -ENOTSUP);
+   return (*dev->dev_ops->set_queue_rate_limit)(dev, queue_idx, tx_rate);
+}
+
+int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, uint16_t tx_rate,
+   uint64_t q_msk)
+{
+   struct rte_eth_dev *dev;
+   struct rte_eth_dev_info dev_info;
+   struct rte_eth_link link;
+
+   if (q_msk == 0)
+   return 0;
+
+   if (port_id >= nb_ports) {
+   PMD_DEBUG_TRACE("set VF rate limit:invalid port id=%d\n",
+   port_id);
+   return -ENODEV;
+   }
+
+   dev = &rte_eth_devices[port_id];
+   rte_eth_dev_info_get(port_id, &dev_info);
+   link = dev->data->dev_link;
+
+   if (vf > dev_info.max_vfs) {
+   PMD_DEBUG_TRACE("set VF rate limit:port %d: "
+   "invalid vf id=%d\n", port_id, vf);
+   return -EINVAL;
+   }
+
+   if (tx_rate > link.link_speed) {
+   PMD_DEBUG_TRACE("set VF rate limit:invalid tx_rate=%d, "
+   "bigger than link speed= %d\n",
+   tx_rate, link_speed);
+   return -EINVAL;
+   }
+
+   FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rate_limit, -ENOTSUP);
+   return (*dev->dev_ops->set_vf_rate_limit)(dev, vf, tx_rate, q_msk);
+}
+
 int
 rte_eth_mirror_rule_set(uint8_t port_id, 
struct rte_eth_vmdq_mirror_conf *mirror_conf,
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index d5ea46b..445d40a 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1012,6 +1012,17 @@ typedef int (*eth_set_vf_vlan_filter_t)(struct 
rte_eth_dev *dev,
  uint8_t vlan_on);
 /**< @internal Set VF VLAN pool filter */

+typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev,
+   uint16_t queue_idx,
+   uint16_t tx_rate);
+/**< @internal Set queue TX rate */
+
+typedef int (*eth_set_vf_rate_limit_t)(struct rte_eth_dev *dev,
+   uint16_t vf,
+   uint16_t tx_rate,
+   uint64_t q_msk);
+/**< @internal Set VF TX rate */
+
 typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev,
  struct rte_eth_vmdq_mirror_conf *mirror_conf,
  uint8_t rule_id, 
@@ -1119,6 +1130,8 @@ struct eth_dev_ops {
eth_set_vf_rx_tset_vf_rx;  /**< enable/disable a VF receive 
*/
eth_set_vf_tx_tset_vf_tx;  /**< enable/disable a VF 
transmit */
eth_set_vf_vlan_filter_t   set_vf_vlan_filter;  /**< Set VF VLAN filter 
*/
+   eth_set_queue_rate_limit_t set_queue_rate_limit;   /**< Set queue rate 
limit */
+   eth_set_vf_rate_limit_tset_vf_rate_limit;   /**< Set VF rate limit 
*/

/** Add a signature filter. */
fdir_add_signature_filter_t fdir_add_signature_filter;
@@ -2561,6 +2574,44 @@ int rte_eth_mirror_rule_reset(uint8_t port_id,
 uint8_t rule_id);

 /**
+ * Set the rate limitation for a queu

[dpdk-dev] [PATCH v2 2/3] ixgbe: Implement the functionality of setting TX rate for queue or VF in IXGBE PMD

2014-05-26 Thread Ouyang Changchun
This patch implements the functionality of setting TX rate for queue or VF in 
IXGBE PMD.

Signed-off-by: Ouyang Changchun 
---
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 122 
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  13 +++-
 2 files changed, 132 insertions(+), 3 deletions(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c 
b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
index c9b5fe4..643477a 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
@@ -87,6 +87,8 @@
 #define IXGBE_LINK_UP_CHECK_TIMEOUT   1000 /* ms */
 #define IXGBE_VMDQ_NUM_UC_MAC 4096 /* Maximum nb. of UC MAC addr. */

+#define IXGBE_MMW_SIZE_DEFAULT0x4
+#define IXGBE_MMW_SIZE_JUMBO_FRAME0x14

 #define IXGBEVF_PMD_NAME "rte_ixgbevf_pmd" /* PMD name */

@@ -182,6 +184,10 @@ static int ixgbe_mirror_rule_set(struct rte_eth_dev *dev,
 static int ixgbe_mirror_rule_reset(struct rte_eth_dev *dev,
uint8_t rule_id);

+static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
+   uint16_t queue_idx, uint16_t tx_rate);
+static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf,
+   uint16_t tx_rate, uint64_t q_msk);
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
  */
@@ -280,6 +286,8 @@ static struct eth_dev_ops ixgbe_eth_dev_ops = {
.set_vf_rx= ixgbe_set_pool_rx,
.set_vf_tx= ixgbe_set_pool_tx,
.set_vf_vlan_filter   = ixgbe_set_pool_vlan_filter,
+   .set_queue_rate_limit = ixgbe_set_queue_rate_limit,
+   .set_vf_rate_limit= ixgbe_set_vf_rate_limit,
.fdir_add_signature_filter= ixgbe_fdir_add_signature_filter,
.fdir_update_signature_filter = ixgbe_fdir_update_signature_filter,
.fdir_remove_signature_filter = ixgbe_fdir_remove_signature_filter,
@@ -1288,10 +1296,13 @@ ixgbe_dev_start(struct rte_eth_dev *dev)
 {
struct ixgbe_hw *hw =
IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   struct ixgbe_vf_info *vfinfo =
+   *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private);
int err, link_up = 0, negotiate = 0;
uint32_t speed = 0;
int mask = 0;
int status;
+   uint16_t vf, idx;

PMD_INIT_FUNC_TRACE();

@@ -1408,6 +1419,16 @@ skip_link_setup:
goto error;
}

+   /* Restore vf rate limit */
+   if (vfinfo != NULL) {
+   for (vf = 0; vf < dev->pci_dev->max_vfs; vf++)
+   for (idx = 0; idx < IXGBE_MAX_QUEUE_NUM_PER_VF; idx++)
+   if (vfinfo[vf].tx_rate[idx] != 0)
+   ixgbe_set_vf_rate_limit(dev, vf,
+   vfinfo[vf].tx_rate[idx],
+   1 << idx);
+   }
+
ixgbe_restore_statistics_mapping(dev);

return (0);
@@ -3062,6 +3083,107 @@ ixgbe_mirror_rule_reset(struct rte_eth_dev *dev, 
uint8_t rule_id)
return 0;
 }

+static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev,
+   uint16_t queue_idx, uint16_t tx_rate)
+{
+   struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   uint32_t rf_dec, rf_int;
+   uint32_t bcnrc_val;
+   uint16_t link_speed = dev->data->dev_link.link_speed;
+
+   if (queue_idx >= hw->mac.max_tx_queues)
+   return -EINVAL;
+
+   if (tx_rate != 0) {
+   /* Calculate the rate factor values to set */
+   rf_int = (uint32_t)link_speed / (uint32_t)tx_rate;
+   rf_dec = (uint32_t)link_speed % (uint32_t)tx_rate;
+   rf_dec = (rf_dec << IXGBE_RTTBCNRC_RF_INT_SHIFT) / tx_rate;
+
+   bcnrc_val = IXGBE_RTTBCNRC_RS_ENA;
+   bcnrc_val |= ((rf_int << IXGBE_RTTBCNRC_RF_INT_SHIFT) &
+   IXGBE_RTTBCNRC_RF_INT_MASK_M);
+   bcnrc_val |= (rf_dec & IXGBE_RTTBCNRC_RF_DEC_MASK);
+   } else {
+   bcnrc_val = 0;
+   }
+
+   /*
+* Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
+* register. MMW_SIZE=0x014 if 9728-byte jumbo is supported, otherwise
+* set as 0x4.
+*/
+   if ((dev->data->dev_conf.rxmode.jumbo_frame == 1) &&
+   (dev->data->dev_conf.rxmode.max_rx_pkt_len >=
+   IXGBE_MAX_JUMBO_FRAME_SIZE))
+   IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRM,
+   IXGBE_MMW_SIZE_JUMBO_FRAME);
+   else
+   IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRM,
+   IXGBE_MMW_SIZE_DEFAULT);
+
+   /* Set RTTBCNRC of queue X */
+   IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, queue_idx);
+   IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val);
+   IXGBE_WRITE_FLUSH(hw);
+
+   return 0;
+}
+
+static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf,
+   uint16_t tx_

[dpdk-dev] [PATCH v2 3/3] testpmd: Add commands to test the functionality of setting TX rate for queue or VF

2014-05-26 Thread Ouyang Changchun
This patch adds commands in testpmd to test the functionality of setting TX 
rate for queue or VF.

Signed-off-by: Ouyang Changchun 
---
 app/test-pmd/cmdline.c | 159 -
 app/test-pmd/config.c  |  47 +++
 app/test-pmd/testpmd.h |   3 +
 3 files changed, 208 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index b3824f9..83b2665 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -342,7 +342,14 @@ static void cmd_help_long_parsed(void *parsed_result,
"BAM:accepts broadcast packets;"
"MPE:accepts all multicast packets\n\n"
"Enable/Disable a VF receive mode of a port\n\n"
-   
+
+   "set port (port_id) queue (queue_id) rate (rate_num)\n"
+   "Set rate limit for a queue of a port\n\n"
+
+   "set port (port_id) vf (vf_id) rate (rate_num) "
+   "queue_mask (queue_mask_value)\n"
+   "Set rate limit for queues in VF of a port\n\n"
+
"set port (port_id) mirror-rule (rule_id)" 
"(pool-mirror|vlan-mirror)\n"
" (poolmask|vlanid[,vlanid]*) dst-pool (pool_id) 
(on|off)\n"
@@ -4790,6 +4797,154 @@ cmdline_parse_inst_t cmd_vf_rxvlan_filter = {
},
 };

+/* *** SET RATE LIMIT FOR A QUEUE OF A PORT *** */
+struct cmd_queue_rate_limit_result {
+   cmdline_fixed_string_t set;
+   cmdline_fixed_string_t port;
+   uint8_t port_num;
+   cmdline_fixed_string_t queue;
+   uint8_t queue_num;
+   cmdline_fixed_string_t rate;
+   uint16_t rate_num;
+};
+
+static void cmd_queue_rate_limit_parsed(void *parsed_result,
+   __attribute__((unused)) struct cmdline *cl,
+   __attribute__((unused)) void *data)
+{
+   struct cmd_queue_rate_limit_result *res = parsed_result;
+   int ret = 0;
+
+   if ((strcmp(res->set, "set") == 0) && (strcmp(res->port, "port") == 0)
+   && (strcmp(res->queue, "queue") == 0)
+   && (strcmp(res->rate, "rate") == 0))
+   ret = set_queue_rate_limit(res->port_num, res->queue_num,
+   res->rate_num);
+   if (ret < 0)
+   printf("queue_rate_limit_cmd error: (%s)\n", strerror(-ret));
+
+}
+
+cmdline_parse_token_string_t cmd_queue_rate_limit_set =
+   TOKEN_STRING_INITIALIZER(struct cmd_queue_rate_limit_result,
+   set, "set");
+cmdline_parse_token_string_t cmd_queue_rate_limit_port =
+   TOKEN_STRING_INITIALIZER(struct cmd_queue_rate_limit_result,
+   port, "port");
+cmdline_parse_token_num_t cmd_queue_rate_limit_portnum =
+   TOKEN_NUM_INITIALIZER(struct cmd_queue_rate_limit_result,
+   port_num, UINT8);
+cmdline_parse_token_string_t cmd_queue_rate_limit_queue =
+   TOKEN_STRING_INITIALIZER(struct cmd_queue_rate_limit_result,
+   queue, "queue");
+cmdline_parse_token_num_t cmd_queue_rate_limit_queuenum =
+   TOKEN_NUM_INITIALIZER(struct cmd_queue_rate_limit_result,
+   queue_num, UINT8);
+cmdline_parse_token_string_t cmd_queue_rate_limit_rate =
+   TOKEN_STRING_INITIALIZER(struct cmd_queue_rate_limit_result,
+   rate, "rate");
+cmdline_parse_token_num_t cmd_queue_rate_limit_ratenum =
+   TOKEN_NUM_INITIALIZER(struct cmd_queue_rate_limit_result,
+   rate_num, UINT16);
+
+cmdline_parse_inst_t cmd_queue_rate_limit = {
+   .f = cmd_queue_rate_limit_parsed,
+   .data = (void *)0,
+   .help_str = "set port X queue Y rate Z:(X = port number,"
+   "Y = queue number,Z = rate number)set rate limit for a queue on port X",
+   .tokens = {
+   (void *)&cmd_queue_rate_limit_set,
+   (void *)&cmd_queue_rate_limit_port,
+   (void *)&cmd_queue_rate_limit_portnum,
+   (void *)&cmd_queue_rate_limit_queue,
+   (void *)&cmd_queue_rate_limit_queuenum,
+   (void *)&cmd_queue_rate_limit_rate,
+   (void *)&cmd_queue_rate_limit_ratenum,
+   NULL,
+   },
+};
+
+
+/* *** SET RATE LIMIT FOR A VF OF A PORT *** */
+struct cmd_vf_rate_limit_result {
+   cmdline_fixed_string_t set;
+   cmdline_fixed_string_t port;
+   uint8_t port_num;
+   cmdline_fixed_string_t vf;
+   uint8_t vf_num;
+   cmdline_fixed_string_t rate;
+   uint16_t rate_num;
+   cmdline_fixed_string_t q_msk;
+   uint64_t q_msk_val;
+};
+
+static void cmd_vf_rate_limit_parsed(void *parsed_result,
+   __attribute__((unused)) struct cmdline *cl,
+   __attribute__((unused)) void *data)
+{
+   struct cmd_vf_rate_limit_result *res = parsed_resul

[dpdk-dev] [PATCH v2 0/3] Support setting TX rate for queue and VF

2014-05-26 Thread Ouyang Changchun
This patch v2 fixes some errors and warnings reported by checkpatch.pl.

This patch series also contain the 3 items:
1. Add API to support setting TX rate for a queue or a VF.
2. Implement the functionality of setting TX rate for queue or VF in IXGBE PMD.
3. Add commands in testpmd to test the functionality of setting TX rate for 
queue or VF.

Ouyang Changchun (3):
  Add API to support set TX rate for a queue and VF.
  Implement the functionality of setting TX rate for queue or VF in
IXGBE PMD.
  Add commands in testpmd to test the functionality of setting TX rate
for queue or VF.

 app/test-pmd/cmdline.c  | 159 +++-
 app/test-pmd/config.c   |  47 +++
 app/test-pmd/testpmd.h  |   3 +
 lib/librte_ether/rte_ethdev.c   |  71 
 lib/librte_ether/rte_ethdev.h   |  51 
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 122 +++
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  13 ++-
 7 files changed, 462 insertions(+), 4 deletions(-)

-- 
1.9.0



[dpdk-dev] [PATCH 0/5] add mtu and flow control handlers

2014-05-26 Thread David Marchand
This patchset introduces 3 new ethdev operations: flow control parameters
retrieval and mtu get/set operations.

-- 
David Marchand

David Marchand (1):
  ethdev: add autoneg parameter in flow ctrl accessors

Ivan Boule (2):
  ixgbe: add get/set_mtu to ixgbevf
  app/testpmd: allow to configure mtu

Samuel Gauthier (1):
  ethdev: add mtu accessors

Zijie Pan (1):
  ethdev: retrieve flow control configuration

 app/test-pmd/cmdline.c  |   54 
 app/test-pmd/config.c   |   13 +++
 app/test-pmd/testpmd.h  |2 +-
 lib/librte_ether/rte_ethdev.c   |   47 ++
 lib/librte_ether/rte_ethdev.h   |   62 -
 lib/librte_pmd_e1000/e1000_ethdev.h |4 +
 lib/librte_pmd_e1000/em_ethdev.c|  111 +++
 lib/librte_pmd_e1000/em_rxtx.c  |   11 +++
 lib/librte_pmd_e1000/igb_ethdev.c   |  111 +++
 lib/librte_pmd_e1000/igb_rxtx.c |   10 +++
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c |  165 ++-
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |2 +
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c   |   28 +-
 13 files changed, 614 insertions(+), 6 deletions(-)

-- 
1.7.10.4



[dpdk-dev] [PATCH 1/5] ethdev: retrieve flow control configuration

2014-05-26 Thread David Marchand
From: Zijie Pan 

This patch adds a new function in ethdev api to retrieve current flow control
configuration.
This operation has been implemented for rte_em_pmd, rte_igb_pmd and
rte_ixgbe_pmd.

Signed-off-by: Zijie Pan 
Signed-off-by: David Marchand 
---
 lib/librte_ether/rte_ethdev.c   |   16 ++
 lib/librte_ether/rte_ethdev.h   |   24 +--
 lib/librte_pmd_e1000/em_ethdev.c|   44 
 lib/librte_pmd_e1000/igb_ethdev.c   |   44 
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c |   55 +--
 5 files changed, 179 insertions(+), 4 deletions(-)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index dabbdd2..31c18ef 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1456,6 +1456,22 @@ rte_eth_dev_fdir_set_masks(uint8_t port_id, struct 
rte_fdir_masks *fdir_mask)
 }

 int
+rte_eth_dev_flow_ctrl_get(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
+{
+   struct rte_eth_dev *dev;
+
+   if (port_id >= nb_ports) {
+   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+   return (-ENODEV);
+   }
+
+   dev = &rte_eth_devices[port_id];
+   FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_get, -ENOTSUP);
+   memset(fc_conf, 0, sizeof(*fc_conf));
+   return (*dev->dev_ops->flow_ctrl_get)(dev, fc_conf);
+}
+
+int
 rte_eth_dev_flow_ctrl_set(uint8_t port_id, struct rte_eth_fc_conf *fc_conf)
 {
struct rte_eth_dev *dev;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index d839b8c..04533e5 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -955,8 +955,12 @@ typedef int (*fdir_set_masks_t)(struct rte_eth_dev *dev,
struct rte_fdir_masks *fdir_masks);
 /**< @internal Setup flow director masks on an Ethernet device */

+typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
+  struct rte_eth_fc_conf *fc_conf);
+/**< @internal Get current flow control parameter on an Ethernet device */
+
 typedef int (*flow_ctrl_set_t)(struct rte_eth_dev *dev,
-   struct rte_eth_fc_conf *fc_conf);
+  struct rte_eth_fc_conf *fc_conf);
 /**< @internal Setup flow control parameter on an Ethernet device */

 typedef int (*priority_flow_ctrl_set_t)(struct rte_eth_dev *dev,
@@ -1120,6 +1124,7 @@ struct eth_dev_ops {
eth_queue_release_ttx_queue_release;/**< Release TX queue.*/
eth_dev_led_on_t   dev_led_on;/**< Turn on LED. */
eth_dev_led_off_t  dev_led_off;   /**< Turn off LED. */
+   flow_ctrl_get_tflow_ctrl_get; /**< Get flow control. */
flow_ctrl_set_tflow_ctrl_set; /**< Setup flow control. */
priority_flow_ctrl_set_t   priority_flow_ctrl_set; /**< Setup priority 
flow control.*/
eth_mac_addr_remove_t  mac_addr_remove; /**< Remove MAC address */
@@ -2308,6 +2313,21 @@ int  rte_eth_led_on(uint8_t port_id);
 int  rte_eth_led_off(uint8_t port_id);

 /**
+ * Get current status of the Ethernet link flow control for Ethernet device
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param fc_conf
+ *   The pointer to the structure where to store the flow control parameters.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOTSUP) if hardware doesn't support flow control.
+ *   - (-ENODEV)  if *port_id* invalid.
+ */
+int rte_eth_dev_flow_ctrl_get(uint8_t port_id,
+ struct rte_eth_fc_conf *fc_conf);
+
+/**
  * Configure the Ethernet link flow control for Ethernet device
  *
  * @param port_id
@@ -2322,7 +2342,7 @@ int  rte_eth_led_off(uint8_t port_id);
  *   - (-EIO) if flow control setup failure
  */
 int rte_eth_dev_flow_ctrl_set(uint8_t port_id,
-   struct rte_eth_fc_conf *fc_conf);
+ struct rte_eth_fc_conf *fc_conf);

 /**
  * Configure the Ethernet priority flow control under DCB environment 
diff --git a/lib/librte_pmd_e1000/em_ethdev.c b/lib/librte_pmd_e1000/em_ethdev.c
index 2f0e1a0..74dc7e5 100644
--- a/lib/librte_pmd_e1000/em_ethdev.c
+++ b/lib/librte_pmd_e1000/em_ethdev.c
@@ -77,6 +77,8 @@ static void eth_em_stats_get(struct rte_eth_dev *dev,
 static void eth_em_stats_reset(struct rte_eth_dev *dev);
 static void eth_em_infos_get(struct rte_eth_dev *dev,
struct rte_eth_dev_info *dev_info);
+static int eth_em_flow_ctrl_get(struct rte_eth_dev *dev,
+   struct rte_eth_fc_conf *fc_conf);
 static int eth_em_flow_ctrl_set(struct rte_eth_dev *dev,
struct rte_eth_fc_conf *fc_conf);
 static int eth_em_interrupt_setup(struct rte_eth_dev *dev);
@@ -153,6 +155,7 @@ static struct eth_dev_ops eth_em_ops = {
.tx_queue_release = eth_em_tx_queue_release,
   

[dpdk-dev] [PATCH 3/5] ethdev: add mtu accessors

2014-05-26 Thread David Marchand
From: Samuel Gauthier 

This patch adds two new functions in ethdev api to retrieve current MTU and
change MTU of a port.
These operations have been implemented for rte_em_pmd, rte_igb_pmd and
rte_ixgbe_pmd.

Signed-off-by: Samuel Gauthier 
Signed-off-by: Ivan Boule 
Signed-off-by: David Marchand 
---
 lib/librte_ether/rte_ethdev.c   |   31 +
 lib/librte_ether/rte_ethdev.h   |   37 
 lib/librte_pmd_e1000/e1000_ethdev.h |4 +++
 lib/librte_pmd_e1000/em_ethdev.c|   64 +++
 lib/librte_pmd_e1000/em_rxtx.c  |   11 ++
 lib/librte_pmd_e1000/igb_ethdev.c   |   64 +++
 lib/librte_pmd_e1000/igb_rxtx.c |   10 ++
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c |   62 +
 lib/librte_pmd_ixgbe/ixgbe_ethdev.h |2 ++
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c   |   10 ++
 10 files changed, 295 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 31c18ef..ece2a68 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1058,6 +1058,37 @@ rte_eth_macaddr_get(uint8_t port_id, struct ether_addr 
*mac_addr)
ether_addr_copy(&dev->data->mac_addrs[0], mac_addr);
 }

+
+int
+rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu)
+{
+   struct rte_eth_dev *dev;
+
+   if (port_id >= nb_ports) {
+   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+   return (-ENODEV);
+   }
+
+   dev = &rte_eth_devices[port_id];
+   FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_get, -ENOTSUP);
+   return (*dev->dev_ops->mtu_get)(dev, mtu);
+}
+
+int
+rte_eth_dev_set_mtu(uint8_t port_id, uint16_t *mtu)
+{
+   struct rte_eth_dev *dev;
+
+   if (port_id >= nb_ports) {
+   PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id);
+   return (-ENODEV);
+   }
+
+   dev = &rte_eth_devices[port_id];
+   FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_set, -ENOTSUP);
+   return (*dev->dev_ops->mtu_set)(dev, mtu);
+}
+
 int
 rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on)
 {
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 39351ea..177a6ec 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -890,6 +890,12 @@ typedef uint32_t (*eth_rx_queue_count_t)(struct 
rte_eth_dev *dev,
 typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset);
 /**< @Check DD bit of specific RX descriptor */

+typedef int (*mtu_get_t)(struct rte_eth_dev *dev, uint16_t *mtu);
+/**< @internal Get MTU. */
+
+typedef int (*mtu_set_t)(struct rte_eth_dev *dev, uint16_t *mtu);
+/**< @internal Set MTU. */
+
 typedef int (*vlan_filter_set_t)(struct rte_eth_dev *dev,
  uint16_t vlan_id,
  int on);
@@ -1113,6 +1119,8 @@ struct eth_dev_ops {
eth_queue_stats_mapping_set_t queue_stats_mapping_set;
/**< Configure per queue stat counter mapping. */
eth_dev_infos_get_tdev_infos_get; /**< Get device info. */
+   mtu_get_t  mtu_get; /**< Get MTU. */
+   mtu_set_t  mtu_set; /**< Set MTU. */
vlan_filter_set_t  vlan_filter_set;  /**< Filter VLAN Setup. */
vlan_tpid_set_tvlan_tpid_set;  /**< Outer VLAN TPID 
Setup. */
vlan_strip_queue_set_t vlan_strip_queue_set; /**< VLAN Stripping on 
queue. */
@@ -1680,6 +1688,35 @@ extern void rte_eth_dev_info_get(uint8_t port_id,
 struct rte_eth_dev_info *dev_info);

 /**
+ * Retrieve the MTU of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param mtu
+ *   A pointer to a uint16_t where the retrieved MTU is to be stored.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOSUP) if operation is not supported.
+ *   - (-ENODEV) if *port_id* invalid.
+ */
+extern int rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu);
+
+/**
+ * Change the MTU of an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param mtu
+ *   A pointer to a uint16_t where the MTU to be applied is stored.
+ * @return
+ *   - (0) if successful.
+ *   - (-ENOSUP) if operation is not supported.
+ *   - (-ENODEV) if *port_id* invalid.
+ *   - (-EINVAL) if *mtu* invalid.
+ */
+extern int rte_eth_dev_set_mtu(uint8_t port_id, uint16_t *mtu);
+
+/**
  * Enable/Disable hardware filtering by an Ethernet device of received
  * VLAN packets tagged with a given VLAN Tag Identifier.
  *
diff --git a/lib/librte_pmd_e1000/e1000_ethdev.h 
b/lib/librte_pmd_e1000/e1000_ethdev.h
index 8790601..5cbf436 100644
--- a/lib/librte_pmd_e1000/e1000_ethdev.h
+++ b/lib/librte_pmd_e1000/e1000_ethdev.h
@@ -138,6 +138,8 @@ uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf 
**rx_pkts,
 uint16_t eth_igb_recv_scattered_pkts(void *rxq,
  

[dpdk-dev] [PATCH 2/5] ethdev: add autoneg parameter in flow ctrl accessors

2014-05-26 Thread David Marchand
Add autoneg field in flow control parameters.
This makes it easier to understand why changing some parameters does not always
have the expected result.

Changing autoneg is not supported at the moment.

Signed-off-by: David Marchand 
---
 lib/librte_ether/rte_ethdev.h   |1 +
 lib/librte_pmd_e1000/em_ethdev.c|3 +++
 lib/librte_pmd_e1000/igb_ethdev.c   |3 +++
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c |3 +++
 4 files changed, 10 insertions(+)

diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 04533e5..39351ea 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -550,6 +550,7 @@ struct rte_eth_fc_conf {
uint16_t send_xon;/**< Is XON frame need be sent */
enum rte_eth_fc_mode mode;  /**< Link flow control mode */
uint8_t mac_ctrl_frame_fwd; /**< Forward MAC control frames */
+   uint8_t autoneg;  /**< Use Pause autoneg */
 };

 /**
diff --git a/lib/librte_pmd_e1000/em_ethdev.c b/lib/librte_pmd_e1000/em_ethdev.c
index 74dc7e5..c148cbc 100644
--- a/lib/librte_pmd_e1000/em_ethdev.c
+++ b/lib/librte_pmd_e1000/em_ethdev.c
@@ -1378,6 +1378,7 @@ eth_em_flow_ctrl_get(struct rte_eth_dev *dev, struct 
rte_eth_fc_conf *fc_conf)
fc_conf->high_water = hw->fc.high_water;
fc_conf->low_water = hw->fc.low_water;
fc_conf->send_xon = hw->fc.send_xon;
+   fc_conf->autoneg = hw->mac.autoneg;

/*
 * Return rx_pause and tx_pause status according to actual setting of
@@ -1422,6 +1423,8 @@ eth_em_flow_ctrl_set(struct rte_eth_dev *dev, struct 
rte_eth_fc_conf *fc_conf)
uint32_t rctl;

hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   if (fc_conf->autoneg != hw->mac.autoneg)
+   return -ENOTSUP;
rx_buf_size = em_get_rx_buffer_size(hw);
PMD_INIT_LOG(DEBUG, "Rx packet buffer size = 0x%x \n", rx_buf_size);

diff --git a/lib/librte_pmd_e1000/igb_ethdev.c 
b/lib/librte_pmd_e1000/igb_ethdev.c
index 6dc82c2..e15fe5a 100644
--- a/lib/librte_pmd_e1000/igb_ethdev.c
+++ b/lib/librte_pmd_e1000/igb_ethdev.c
@@ -1818,6 +1818,7 @@ eth_igb_flow_ctrl_get(struct rte_eth_dev *dev, struct 
rte_eth_fc_conf *fc_conf)
fc_conf->high_water = hw->fc.high_water;
fc_conf->low_water = hw->fc.low_water;
fc_conf->send_xon = hw->fc.send_xon;
+   fc_conf->autoneg = hw->mac.autoneg;

/*
 * Return rx_pause and tx_pause status according to actual setting of
@@ -1862,6 +1863,8 @@ eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, struct 
rte_eth_fc_conf *fc_conf)
uint32_t rctl;

hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   if (fc_conf->autoneg != hw->mac.autoneg)
+   return -ENOTSUP;
rx_buf_size = igb_get_rx_buffer_size(hw);
PMD_INIT_LOG(DEBUG, "Rx packet buffer size = 0x%x \n", rx_buf_size);

diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c 
b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
index 4633654..c876c3e 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
@@ -2193,6 +2193,7 @@ ixgbe_flow_ctrl_get(struct rte_eth_dev *dev, struct 
rte_eth_fc_conf *fc_conf)
fc_conf->high_water = hw->fc.high_water[0];
fc_conf->low_water = hw->fc.low_water[0];
fc_conf->send_xon = hw->fc.send_xon;
+   fc_conf->autoneg = !hw->fc.disable_fc_autoneg;

/*
 * Return rx_pause status according to actual setting of
@@ -2244,6 +2245,8 @@ ixgbe_flow_ctrl_set(struct rte_eth_dev *dev, struct 
rte_eth_fc_conf *fc_conf)
PMD_INIT_FUNC_TRACE();

hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   if (fc_conf->autoneg != !hw->fc.disable_fc_autoneg)
+   return -ENOTSUP;
rx_buf_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0));
PMD_INIT_LOG(DEBUG, "Rx packet buffer size = 0x%x \n", rx_buf_size);

-- 
1.7.10.4



[dpdk-dev] [PATCH 4/5] ixgbe: add get/set_mtu to ixgbevf

2014-05-26 Thread David Marchand
From: Ivan Boule 

The support of jumbo frames in the ixgbevf Poll Mode Driver of 10GbE
82599 VF functions consists in the following enhancements:

- Implement the mtu_set function in the ixgbevf PMD, using the IXGBE_VF_SET_LPE
  request of the version 1.0 of the VF/PF mailbox API for this purpose.

- Implement the mtu_get function in the ixgbevf PMD for the sake of coherency.

- Add a detailed explanation on the VF/PF rx max frame len negotiation.

- To deal with Jumbo frames, force the receive function to handle scattered
  packets.

Signed-off-by: Ivan Boule 
Signed-off-by: David Marchand 
---
 lib/librte_pmd_ixgbe/ixgbe_ethdev.c |   45 +++
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c   |   18 +-
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c 
b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
index b9db1f4..230b758 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c
@@ -194,6 +194,9 @@ static void ixgbevf_add_mac_addr(struct rte_eth_dev *dev,
 uint32_t index, uint32_t pool);
 static void ixgbevf_remove_mac_addr(struct rte_eth_dev *dev, uint32_t index);

+static int ixgbevf_dev_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu);
+static int ixgbevf_dev_set_mtu(struct rte_eth_dev *dev, uint16_t *mtu);
+
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
  */
@@ -334,6 +337,8 @@ static struct eth_dev_ops ixgbevf_eth_dev_ops = {
.stats_reset  = ixgbevf_dev_stats_reset,
.dev_close= ixgbevf_dev_close,
.dev_infos_get= ixgbe_dev_info_get,
+   .mtu_get  = ixgbevf_dev_get_mtu,
+   .mtu_set  = ixgbevf_dev_set_mtu,
.vlan_filter_set  = ixgbevf_vlan_filter_set,
.vlan_strip_queue_set = ixgbevf_vlan_strip_queue_set,
.vlan_offload_set = ixgbevf_vlan_offload_set,
@@ -3319,6 +3324,46 @@ ixgbevf_remove_mac_addr(struct rte_eth_dev *dev, 
uint32_t index)
}
 }

+static int
+ixgbevf_dev_set_mtu(struct rte_eth_dev *dev, uint16_t *mtu)
+{
+   struct ixgbe_hw *hw;
+   uint16_t max_frame = *mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
+
+   hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+   /* MTU < 68 is an error and causes problems on some kernels */
+   if ((*mtu < 68) || (max_frame > ETHER_MAX_JUMBO_FRAME_LEN))
+   return -EINVAL;
+
+   /*
+* When supported by the underlying PF driver, use the IXGBE_VF_SET_MTU
+* request of the version 2.0 of the mailbox API.
+* For now, use the IXGBE_VF_SET_LPE request of the version 1.0
+* of the mailbox API.
+* This call to IXGBE_SET_LPE action won't work with ixgbe pf drivers
+* prior to 3.11.33 which contains the following change:
+* "ixgbe: Enable jumbo frames support w/ SR-IOV"
+*/
+   ixgbevf_rlpml_set_vf(hw, max_frame);
+
+   /* update max frame size */
+   dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame;
+   return 0;
+}
+
+static int
+ixgbevf_dev_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu)
+{
+   /*
+* When available, use the IXGBE_VF_GET_MTU request
+* of the version 2.0 of the mailbox API.
+*/
+   *mtu = (uint16_t) (dev->data->dev_conf.rxmode.max_rx_pkt_len -
+  (ETHER_HDR_LEN + ETHER_CRC_LEN));
+   return 0;
+}
+
 static struct rte_driver rte_ixgbe_driver = {
.type = PMD_PDEV,
.init = rte_ixgbe_pmd_init,
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c 
b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index b05c1ba..c797616 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3700,7 +3700,20 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
PMD_INIT_FUNC_TRACE();
hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);

-   /* setup MTU */
+   /*
+* When the VF driver issues a IXGBE_VF_RESET request, the PF driver
+* disables the VF receipt of packets if the PF MTU is > 1500.
+* This is done to deal with 82599 limitations that imposes
+* the PF and all VFs to share the same MTU.
+* Then, the PF driver enables again the VF receipt of packet when
+* the VF driver issues a IXGBE_VF_SET_LPE request.
+* In the meantime, the VF device cannot be used, even if the VF driver
+* and the Guest VM network stack are ready to accept packets with a
+* size up to the PF MTU.
+* As a work-around to this PF behaviour, force the call to
+* ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way,
+* VF packets received can work in all cases.
+*/
ixgbevf_rlpml_set_vf(hw,
(uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len);

@@ -3783,6 +3796,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
}
}

+   /* Set RX function to d

[dpdk-dev] [PATCH 5/5] app/testpmd: allow to configure mtu

2014-05-26 Thread David Marchand
From: Ivan Boule 

Take avantage of the .set_mtu ethdev function and make it possible to configure
MTU on devices using testpmd.

Signed-off-by: Ivan Boule 
Signed-off-by: David Marchand 
---
 app/test-pmd/cmdline.c |   54 
 app/test-pmd/config.c  |   13 
 app/test-pmd/testpmd.h |2 +-
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 68bcd7b..517944b 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -516,6 +516,8 @@ static void cmd_help_long_parsed(void *parsed_result,
"port config all (txfreet|txrst|rxfreet) (value)\n"
"Set free threshold for rx/tx, or set"
" tx rs bit threshold.\n\n"
+   "port config mtu X value\n"
+   "Set the MTU of port X to a given value\n\n"
);
}

@@ -1014,6 +1016,57 @@ cmdline_parse_inst_t cmd_config_max_pkt_len = {
},
 };

+/* *** configure port MTU *** */
+struct cmd_config_mtu_result {
+   cmdline_fixed_string_t port;
+   cmdline_fixed_string_t keyword;
+   cmdline_fixed_string_t mtu;
+   uint8_t port_id;
+   uint16_t value;
+};
+
+static void
+cmd_config_mtu_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+   struct cmd_config_mtu_result *res = parsed_result;
+
+   if (res->value < ETHER_MIN_LEN) {
+   printf("mtu cannot be less than %d\n", ETHER_MIN_LEN);
+   return;
+   }
+   port_mtu_set(res->port_id, res->value);
+}
+
+cmdline_parse_token_string_t cmd_config_mtu_port =
+   TOKEN_STRING_INITIALIZER(struct cmd_config_mtu_result, port,
+"port");
+cmdline_parse_token_string_t cmd_config_mtu_keyword =
+   TOKEN_STRING_INITIALIZER(struct cmd_config_mtu_result, keyword,
+"config");
+cmdline_parse_token_string_t cmd_config_mtu_mtu =
+   TOKEN_STRING_INITIALIZER(struct cmd_config_mtu_result, keyword,
+"mtu");
+cmdline_parse_token_num_t cmd_config_mtu_port_id =
+   TOKEN_NUM_INITIALIZER(struct cmd_config_mtu_result, port_id, UINT8);
+cmdline_parse_token_num_t cmd_config_mtu_value =
+   TOKEN_NUM_INITIALIZER(struct cmd_config_mtu_result, value, UINT16);
+
+cmdline_parse_inst_t cmd_config_mtu = {
+   .f = cmd_config_mtu_parsed,
+   .data = NULL,
+   .help_str = "port config mtu value",
+   .tokens = {
+   (void *)&cmd_config_mtu_port,
+   (void *)&cmd_config_mtu_keyword,
+   (void *)&cmd_config_mtu_mtu,
+   (void *)&cmd_config_mtu_port_id,
+   (void *)&cmd_config_mtu_value,
+   NULL,
+   },
+};
+
 /* *** configure rx mode *** */
 struct cmd_config_rx_mode_flag {
cmdline_fixed_string_t port;
@@ -5366,6 +5419,7 @@ cmdline_parse_ctx_t main_ctx[] = {
(cmdline_parse_inst_t *)&cmd_config_speed_all,
(cmdline_parse_inst_t *)&cmd_config_speed_specific,
(cmdline_parse_inst_t *)&cmd_config_rx_tx,
+   (cmdline_parse_inst_t *)&cmd_config_mtu,
(cmdline_parse_inst_t *)&cmd_config_max_pkt_len,
(cmdline_parse_inst_t *)&cmd_config_rx_mode_flag,
(cmdline_parse_inst_t *)&cmd_config_rss,
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index d3934e5..88a808c 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -503,6 +503,19 @@ port_reg_set(portid_t port_id, uint32_t reg_off, uint32_t 
reg_v)
display_port_reg_value(port_id, reg_off, reg_v);
 }

+void
+port_mtu_set(portid_t port_id, uint16_t mtu)
+{
+   int diag;
+
+   if (port_id_is_invalid(port_id))
+   return;
+   diag = rte_eth_dev_set_mtu(port_id, &mtu);
+   if (diag == 0)
+   return;
+   printf("Set MTU failed. diag=%d\n", diag);
+}
+
 /*
  * RX/TX ring descriptors display functions.
  */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 0cf5a92..9b7dcbb 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -454,7 +454,7 @@ void fwd_config_setup(void);
 void set_def_fwd_config(void);
 int init_fwd_streams(void);

-
+void port_mtu_set(portid_t port_id, uint16_t mtu);
 void port_reg_bit_display(portid_t port_id, uint32_t reg_off, uint8_t bit_pos);
 void port_reg_bit_set(portid_t port_id, uint32_t reg_off, uint8_t bit_pos,
  uint8_t bit_v);
-- 
1.7.10.4



[dpdk-dev] [PATCH v2 00/17] add TSO support

2014-05-26 Thread Olivier MATZ
Hi Venky,

>> my testpmd iofwd test with the txqflags option disabling many mbuf
>> features is not representative of a real world application.

> [Venky] I did see your test reports. I also made the point that the
> tests we have are insufficient for testing the impact. If you look at
> data_ofs, it actually has an impact on two sides - the driver and the
> upper layer. We do not at this time have a test for the upper
> layer/accessor. Secondly, there is a whole class of apps (fast path
> route for example) that aren't in your purview that do not need
> txqflags. Calling it not representative of a real world application is
> incorrect.

I was talking about "iofwd with txqflags", not "txqflags". The iofwd
mode means that the mbuf data is not accessed by the application,
so I think we should not rely on it.

I agree that "txqflags" could be useful in some situations, when the
user does not need multi-segment. Anyway, the tests I've posted on the
list do not show a real impact on this use case.

Finally, as stated in the initial TSO cover letter [1], I've tested
the upper layer impact that you are talking about with the 6WINDGate
stack and there is no visible regression.

> Secondly, your testpmd run baseline performance should be higher. At
> this point, it is about 40% off from the numbers we see on the baseline
> on the same CPU. If the baseline is incorrect, I cannot judge anything
> more on the performance. We need to get the baseline performance the
> same, and then compare impact.

I tested with the default testpmd configuration, I though it was
close enough to the best performance.

> [Venky] I exclude VLAN because it is something explicitly set by the Rx
> side of the driver. Having Rx access a second cache line will generate a
> performance impact (can be mitigated by a prefetch, but it will cost
> more instructions, and cannot be deterministically controlled). The rest
> of the structure is on the transmit side - which is going to be cache
> hot - at least in LLC anyway. There are cases where this will not be in
> LLC - and we have a few of those. Those however, we can mitigate.

If we add another cache line to the mbuf, if the code accesses it
(whatever rx or tx side), it will at least increase the required memory
bandwidth, and in the worst case will result in an additional cache
miss. This is difficult to predict and depends on the use case. I
think each solution would have its drawbacks in specific cases.

I think the tests I've provided are representative enough to assume
that there is no need to search for an alternative, knowing the
fact that it also clean the mbuf structure and give more room for
later use.

> [Venky] I don't think reworking core data structures (especially
> regressing core data structures) is a good thing. We have kept this
> relatively stable over 5 releases, sometimes at the impact of
> performance, and churning data structures is not a good thing.

Sorry, but I don't think this is a good argument: we cannot say
"it was stable during 5 releases so we cannot modify it". I would say
that the code modifications should be guided by technical reasons.

Keeping the API stable could be a good technical reason, but as far
as I understood, the DPDK is not at this stage today. The mbuf
rework brings 9 additional bytes in the structure, which help for
TSO today, but may also help for next features. For instance, having
another checksum flag as proposed by Stephen and me [2].

Moreover, the patches make the rte_mbuf structure clearer: the current
separation rte_mbuf / rte_pktmbuf is a bit strange, for instance the
ol_flags field which is clearly related to a pktmuf is located in
the rte_mbuf.


Now, I'm ready to make some concessions and discuss about an alternative
solution.
Thomas, you are the maintainer ;) what are your plans?

Regards,
Olivier


[1] http://dpdk.org/ml/archives/dev/2014-May/002322.html
[2] http://dpdk.org/ml/archives/dev/2014-May/002339.html



[dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf

2014-05-26 Thread Olivier MATZ
Hi Walt,

> The purpose of this structure is to send commands, events or any other type
> of information between user application tasks (normally from a manager
> task).  It has been there since the beginning in the original design and
> it's up to the user to define what is in the data field and how they
> wish to use it.  It's one thing to fix a bug but to remove a structure
> like this because you don't see it use in the other parts is asking for
> trouble with customers.

To me, there is nothing that we cannot do without this structure:
depending on the use-case, it could be replaced with the same
functionalities by:

- a packet mbuf, in this case the user pointer would be stored in
   the packet data for instance. In the worst case, I would agree to
   add a flag telling that the mbuf carries control data.

- an application private structure which would contain the pointer,
   the data len (if any), plus any other field that could be useful
   for the application. This structure can be allocated in a mempool.

- nothing! I mean: if the application only wants to carry a pointer,
   why would it need an additional structure to point to it? The
   application can just give the pointer to its private data without
   allocating a control mbuf for that.

To be honnest, I don't see in which case it is useful to have this
additional structure. This modification is motivated by a gain of
bytes in the mbuf and a rationalization of the rte_mbuf structure.

I can add a documentation, for instance in the commit log, about how
the rte_ctrlmbuf could be replaced by something equivalent in different
situations. Are you fine with this?

If we find use cases where rte_ctrlmbuf is required, another idea would
be to keep this structure, but make it independant of rte_mbuf.

Regards,
Olivier


[dpdk-dev] [PATCH v3] virtio: Support multiple queues feature in DPDK based virtio-net frontend.

2014-05-26 Thread Ouyang Changchun
This v3 patch continues fixing some errors and warnings reported by 
checkpatch.pl.

This patch supports multiple queues feature in DPDK based virtio-net frontend.
It firstly gets max queue number of virtio-net from virtio PCI configuration and
then send command to negotiate the queue number with backend; When receiving and
transmitting packets, it negotiates multiple virtio-net queues which serve 
RX/TX;
To utilize this feature, the backend also need support multiple queues feature
and enable it.

Signed-off-by: Ouyang Changchun 
---
 lib/librte_pmd_virtio/virtio_ethdev.c | 377 --
 lib/librte_pmd_virtio/virtio_ethdev.h |  40 ++--
 lib/librte_pmd_virtio/virtio_pci.h|   4 +-
 lib/librte_pmd_virtio/virtio_rxtx.c   |  92 +++--
 lib/librte_pmd_virtio/virtqueue.h |  61 --
 5 files changed, 458 insertions(+), 116 deletions(-)

diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c 
b/lib/librte_pmd_virtio/virtio_ethdev.c
index 49e236b..c2b4dfb 100644
--- a/lib/librte_pmd_virtio/virtio_ethdev.c
+++ b/lib/librte_pmd_virtio/virtio_ethdev.c
@@ -81,6 +81,12 @@ static void virtio_dev_stats_get(struct rte_eth_dev *dev, 
struct rte_eth_stats *
 static void virtio_dev_stats_reset(struct rte_eth_dev *dev);
 static void virtio_dev_free_mbufs(struct rte_eth_dev *dev);

+static int virtio_dev_queue_stats_mapping_set(
+   __rte_unused struct rte_eth_dev *eth_dev,
+   __rte_unused uint16_t queue_id,
+   __rte_unused uint8_t stat_idx,
+   __rte_unused uint8_t is_rx);
+
 /*
  * The set of PCI devices this driver supports
  */
@@ -92,6 +98,135 @@ static struct rte_pci_id pci_id_virtio_map[] = {
 { .vendor_id = 0, /* sentinel */ },
 };

+static int
+virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl,
+   int *dlen, int pkt_num)
+{
+   uint32_t head = vq->vq_desc_head_idx, i;
+   int k, sum = 0;
+   virtio_net_ctrl_ack status = ~0;
+   struct virtio_pmd_ctrl result;
+
+   ctrl->status = status;
+
+   if (!vq->hw->cvq) {
+   PMD_INIT_LOG(ERR, "%s(): Control queue is "
+   "not supported by this device.\n", __func__);
+   return -1;
+   }
+
+   PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, "
+   "vq->hw->cvq = %p vq = %p\n",
+   vq->vq_desc_head_idx, status, vq->hw->cvq, vq);
+
+   if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1))
+   return -1;
+
+   memcpy(vq->virtio_net_hdr_mz->addr, ctrl,
+   sizeof(struct virtio_pmd_ctrl));
+
+   /*
+* Format is enforced in qemu code:
+* One TX packet for header;
+* At least one TX packet per argument;
+* One RX packet for ACK.
+*/
+   vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT;
+   vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr;
+   vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr);
+   vq->vq_free_cnt--;
+   i = vq->vq_ring.desc[head].next;
+
+   for (k = 0; k < pkt_num; k++) {
+   vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT;
+   vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+   + sizeof(struct virtio_net_ctrl_hdr)
+   + sizeof(ctrl->status) + sizeof(uint8_t)*sum;
+   vq->vq_ring.desc[i].len = dlen[k];
+   sum += dlen[k];
+   vq->vq_free_cnt--;
+   i = vq->vq_ring.desc[i].next;
+   }
+
+   vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE;
+   vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr
+   + sizeof(struct virtio_net_ctrl_hdr);
+   vq->vq_ring.desc[i].len = sizeof(ctrl->status);
+   vq->vq_free_cnt--;
+
+   vq->vq_desc_head_idx = vq->vq_ring.desc[i].next;
+
+   vq_update_avail_ring(vq, head);
+   vq_update_avail_idx(vq);
+
+   PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d\n", vq->vq_queue_index);
+
+   virtqueue_notify(vq);
+
+   while (vq->vq_used_cons_idx == vq->vq_ring.used->idx)
+   usleep(100);
+
+   while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
+   uint32_t idx, desc_idx, used_idx;
+   struct vring_used_elem *uep;
+
+   rmb();
+
+   used_idx = (uint32_t)(vq->vq_used_cons_idx
+   & (vq->vq_nentries - 1));
+   uep = &vq->vq_ring.used->ring[used_idx];
+   idx = (uint32_t) uep->id;
+   desc_idx = idx;
+
+   while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) {
+   desc_idx = vq->vq_ring.desc[desc_idx].next;
+   vq->vq_free_cnt++;
+   }
+
+   vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx;
+   vq->vq_desc_head_idx = idx;
+
+   vq->vq_used_cons_idx++;
+   vq->vq_free_cnt++;
+

[dpdk-dev] [PATCH v3] virtio: Support multiple queues feature in DPDK based virtio-net frontend.

2014-05-26 Thread Thomas Monjalon
2014-05-26 20:53, Ouyang Changchun:
> This v3 patch continues fixing some errors and warnings reported by
> checkpatch.pl.

Thank you for cleaning code.

> This patch supports multiple queues feature in DPDK based virtio-net

Please do not mix cleaning and feature in the same patch.
It makes difficult to read the changes implied by the new feature.

-- 
Thomas


[dpdk-dev] [PATCH] atomic: clarify use of memory barriers

2014-05-26 Thread Ananyev, Konstantin

Hi Oliver,

>> So with the following fragment of code:
>> extern int *x;
>> extern  __128i a, *p;
>> L0:
>> _mm_stream_si128( p, a);
>> rte_compiler_barrier();
>> L1:
>> *x = 0;
>>
>> There is no guarantee that store at L0 will always be finished
>> before store at L1.

>This code fragment looks very similar to what is done in
>__rte_ring_sp_do_enqueue():
>
>[...]
> ENQUEUE_PTRS(); /* I expect it is converted to an SSE store */
> rte_compiler_barrier();
> [...]
> r->prod.tail = prod_next;
>So, according to your previous explanation, I understand that
>this code would require a write memory barrier in place of the
>compiler barrier. Am I wrong?

No, right now compiler barrier is enough here.
ENQUEUE_PTRS() doesn't use Non-Temporal stores (MOVNT*), so write order should 
be guaranteed.
Though, if in future we'll change ENQUEUE_PTRS() to use  non-tempral stores, 
we'll have to use sfence(or mfence). 

>Moreover, if I understand well, a real wmb() is needed only if
>a SSE store is issued. But the programmer may not control that,
>it's the job of the compiler.

'Normal' SIMD writes are not reordered.
So it is ok for the compiler to use them if appropriate.  

> > But now, there seems a confusion: everyone has to remember that
>> smp_mb() and smp_wmb() are 'real' fences, while smp_rmb() is not.
>> That's why my suggestion was to simply keep using compiler_barrier()
>> for all cases, when we don't need real fence.

>I'm not sure the programmer has to know which smp_*mb() is a real fence
>or not. He just expects that it generates the proper CPU instructions
>that guarantees the effectiveness of the memory barrier.

In most cases just a compiler barrier is enough, but there are few exceptions.
Always using fence instructions -  means introduce unnecessary slowdown for 
cases, when order is guaranteed.
No using fences in cases, when they are needed - means introduce race window 
and possible data corruption.
That's why right now people can use either rte_compiler_barrier() or mb/rmb/wmb 
- whatever is appropriate for particular case.

Konstantin


[dpdk-dev] [PATCH] atomic: clarify use of memory barriers

2014-05-26 Thread Olivier MATZ
Hi Konstantin,

On 05/26/2014 03:57 PM, Ananyev, Konstantin wrote:
> In most cases just a compiler barrier is enough, but there are few exceptions.
> Always using fence instructions -  means introduce unnecessary slowdown for 
> cases, when order is guaranteed.
> No using fences in cases, when they are needed - means introduce race window 
> and possible data corruption.
> That's why right now people can use either rte_compiler_barrier() or 
> mb/rmb/wmb - whatever is appropriate for particular case.

OK, so let's drop this patch for now.
Thank you for reviewing and commenting.

Regards,
Olivier


[dpdk-dev] [PATCH] cmdline: finish at EOF

2014-05-26 Thread Olivier MATZ
Hi Cristian,

On 05/23/2014 05:32 PM, Olivier MATZ wrote:
> On 05/23/2014 05:21 PM, Cristian Dumitrescu wrote:
>> Bug fix in cmdline library to allow return on EOF as opposed to
>> infinite loop.
>>
>> Signed-off-by: Cristian Dumitrescu 
>> ---
>>   lib/librte_cmdline/cmdline.c |2 +-
>>   1 files changed, 1 insertions(+), 1 deletions(-)
>>   mode change 100644 => 100755 lib/librte_cmdline/cmdline.c
>
> Acked-by: Olivier Matz 

By the way, I just noticed that this bug had already been fixed into
my cmdline project onto my personal website some time ago:
http://git.droids-corp.org/?p=libcmdline.git;a=commitdiff;h=52e1d5772d1f465a013e65b8ae6e43a027438ed6

Regards,
Olivier


[dpdk-dev] [PATCH] cmdline: finish at EOF

2014-05-26 Thread Thomas Monjalon
> > Bug fix in cmdline library to allow return on EOF as opposed to infinite
> > loop.
> > 
> > Signed-off-by: Cristian Dumitrescu 
> 
> Acked-by: Olivier Matz 

Applied for version 1.7.0 with title:
cmdline: fix infinite loop after EOF

Thanks
-- 
Thomas


[dpdk-dev] [PATCH v2 00/17] add TSO support

2014-05-26 Thread Ananyev, Konstantin

Hi Oliver,

 >> I don't see any big changes in the v2 of that patch.
 >>
 >> At least both things that I have concerns about, stay unchanged in
 >> the v2:
 >>
 >> 1) merge physaddr and buf_len in a bitfield - I still think we better
 >> keep physaddr as 64bit field (PATCH 5).

>As nobody reacted to our discussion thread [1] with other arguments, I
>stayed on my initial position:

>- 2 bytes more in the mbuf structure is always good to have
>- no performance impact detected (see my test reports)
>- the 48 bits physical address limit will be reached in several years,
>   and at this time, maybe the cache lines will be 128 bytes? (solving
>   our initial rte_mbuf issue). So let's just deal with current or near
>   future hardware.

As, I understand right now, you don't need these 2 bytes to enable TSO support.
>From other side, if we'll occupy them by some fields now, then in future
we wouldn't be able to expand our phys_addr without re-working mbuf structure 
again
and breaking backward compatibility.
No doubt, that there are a lot of extra things that needed(wanted) to be put 
into the mbuf.
And as new HW with extra offload capabilities will come out - that demand will 
only increase.
That's why I think we wouldn't be able to squeeze all future fields in 64B 
anyway.
So it seems to me that we would have to expand mbuf to 128B sooner or later and 
like we it or not.
So I still think we better keep phys_addr 64bits long.
At least for now. 

>> 2) fix_tcp_phdr_cksum() is inside ixgbe TX function, while I think it
>> should be moved out of it to the upper layer (PATCH 11).

>Thomas's answer on this question [2] was to do that way, so I did not
>modify the v1 patches.

> Thomas Monjalon wrote:
>We know at least 2 checksum methods:
>- the standard one
>- the special one for ixgbe TSO
>In Linux ixgbe, checksum is redone in the driver for TSO case.

I don't think we should use linux ixgbe driver as an etalon here.

>We want to compute checksum in the application/stack in order to prevent 
>driver from modifying packet data, that could cause cache miss.
>But the application cannot always know which checksum method to use because it 
>doesn't have to know which driver will process the packet.
>So we have to choose which checksum method can be done in the application 
>without driver processing. It's not an easy choice.

>It seems simpler and reasonable to choose the standard pseudo-header checksum 
>method as it is done in Linux.
>Having a stable and generic API is something important we must target.

As I said: with the current DPDK implementation the upper code would still be 
different for packets with TCP checksum (without segmentation) and TCP 
segmentation:
you need to setup different flags in mbuf, with TSO you need to setup l4_len 
and mss fields inside mbuf, with just checksum - you don't.
So right now upper layer code has to know is it doing TSO or just checksum 
anyway.
If that's the case why it also can't compute pseudo-checksum in a 2 different 
ways too?
Also, the patch introduces support TSO only for ixgbe.
I suppose that TSO support for igb can be added in the same way. 
Let's talk about generic API, when we'll have to expand it into different 
devices.

Konstantin



[dpdk-dev] [PATCH] timer bug fix

2014-05-26 Thread Thomas Monjalon
2014-05-23 16:52, Olivier MATZ:
> > Bug: when a periodic timer's callback is running, if another
> >  timer is manipulated, the periodic timer is not reloaded.
> > Solution: set the update flag only is the modified timer is
> >  in RUNNING state
> 
> Acked-by: Olivier Matz 

Applied in version 1.7.0 with title:
timer: fix reloading after changes

Thanks
-- 
Thomas


[dpdk-dev] [PATCH] timer bug fix.

2014-05-26 Thread Thomas Monjalon
> > Bug: When a timer is running
> > 
> >   - if rte_timer_stop is called, the pending decrement is
> >   skipped (decremented only if the timer is pending) and due
> >   to the update flag the future processing is skipped so the
> >   timer is counted as pending while it is stopped. - the same
> >   applies when rte_timer_reset is called but then the pending
> >   statistics is additionally incremented so the timer is
> >   counted pending twice.
> > 
> > Solution: decrement the pending
> > 
> >   statistics after returning from the callback. If
> >   rte_timer_stop was called, it skipped decrementing the
> >   pending statistics. If rte_time_reset was called, the
> >   pending statistics was incremented. If neither was called
> >   and the timer is periodic, the pending statistics is
> >   incremented when it is reloaded
> > 
> > Signed-off-by: Vadim Suraev 
> 
> Acked-by: Olivier Matz 

Applied for version 1.7.0 with title:
timer: fix pending counter

Thanks
-- 
Thomas


[dpdk-dev] [PATCH] fix for 2 consecutive rte_eth_dev_start() can cause a SIGSEGV

2014-05-26 Thread Thomas Monjalon
2014-05-23 12:08, Konstantin Ananyev:
> 1)If igb_alloc_rx_queue_mbufs() would fail to allocate an mbuf for RX queue,
> it calls igb_rx_queue_release(rxq).
> That causes rxq to be silently freed, without updating 
> dev->data->rx_queues[]. So any firther reference to it will trigger the
> SIGSEGV.
> Same thing in em PMD too.
> To fix: igb_alloc_rx_queue_mbufs() should just return an error to the caller
> and let upper layer to deal with the probem.
> That's what ixgbe PMD doing right now.
> 2)In tx_queue_setup (for all 3 PMDs: ixgbe, igb, em) we call
> tx_queue_release(dev->data->tx_queues[queue_idx])
> without setting dev->data->tx_queues[queue_idx] = NULL
> afterwards.
> 3)Prevent rte_eth_dev_start/stop to call underneath dev_start/dev_stop
> for already started/stopped device.
> 4) fix compiler warning on PMD_DEBUG_TRACE() formats.

Please, only 1 fix per patch.
This way, we'll hopefully have a nice title for each fix.
It's simpler for changelog.

Thanks
-- 
Thomas


[dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf

2014-05-26 Thread Dumitrescu, Cristian
I am also using the rte_ctrlmbuf to send messages between cores in one of the 
Packet Framework sample apps that I am going to send as a patch tomorrow or 
later this week.

Removing rte_ctrlmbuf would require additional rework to this (complex) sample 
app. It can be done, but it is additional work very close to the code freeze 
cycle.

Thanks,
Cristian

-Original Message-
From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Gilmore, Walter E
Sent: Sunday, May 25, 2014 10:39 PM
To: Olivier Matz; dev at dpdk.org
Subject: Re: [dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf

Olivier you're making an assumption that customer application code running on 
the Intel DPDK isn't using the rte_ctrlmbuf structure. 
Remember there are more than 300 customers using the Intel DPDK and there is no 
way you can predict that this is not used by them. 
The purpose of this structure is to send commands, events or any other type of 
information between user application tasks (normally from a manager task).
It has been there since the beginning in the original design and it's up to the 
user to define what is in the data field and how they wish to use it. 
It's one thing to fix a bug but to remove a structure like this because you 
don't see it use in the other parts is asking for trouble with customers. 

-Original Message-
From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Olivier Matz
Sent: Friday, May 09, 2014 10:51 AM
To: dev at dpdk.org
Subject: [dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf

The initial role of rte_ctrlmbuf is to carry generic messages (data pointer + 
data length) but it's not used by the DPDK or it applications.
Keeping it implies:
  - loosing 1 byte in the rte_mbuf structure
  - having some dead code rte_mbuf.[ch]

This patch removes this feature. Thanks to it, it is now possible to simplify 
the rte_mbuf structure by merging the rte_pktmbuf structure in it. This is done 
in next commit.

Signed-off-by: Olivier Matz 
---
 app/test-pmd/cmdline.c   |   1 -
 app/test-pmd/testpmd.c   |   2 -
 app/test-pmd/txonly.c|   2 +-
 app/test/commands.c  |   1 -
 app/test/test_mbuf.c |  72 +
 examples/ipv4_multicast/main.c   |   2 +-
 lib/librte_mbuf/rte_mbuf.c   |  65 +++-
 lib/librte_mbuf/rte_mbuf.h   | 175 ++-
 lib/librte_pmd_e1000/em_rxtx.c   |   2 +-
 lib/librte_pmd_e1000/igb_rxtx.c  |   2 +-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c|   4 +-
 lib/librte_pmd_virtio/virtio_rxtx.c  |   2 +-
 lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c|   2 +-
 lib/librte_pmd_xenvirt/rte_eth_xenvirt.c |   2 +-
 14 files changed, 54 insertions(+), 280 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index 
7becedc..e3d1849 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -5010,7 +5010,6 @@ dump_struct_sizes(void)  #define DUMP_SIZE(t) 
printf("sizeof(" #t ") = %u\n", (unsigned)sizeof(t));
DUMP_SIZE(struct rte_mbuf);
DUMP_SIZE(struct rte_pktmbuf);
-   DUMP_SIZE(struct rte_ctrlmbuf);
DUMP_SIZE(struct rte_mempool);
DUMP_SIZE(struct rte_ring);
 #undef DUMP_SIZE
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 
9c56914..76b3823 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -389,13 +389,11 @@ testpmd_mbuf_ctor(struct rte_mempool *mp,
mb_ctor_arg = (struct mbuf_ctor_arg *) opaque_arg;
mb = (struct rte_mbuf *) raw_mbuf;

-   mb->type = RTE_MBUF_PKT;
mb->pool = mp;
mb->buf_addr = (void *) ((char *)mb + mb_ctor_arg->seg_buf_offset);
mb->buf_physaddr = (uint64_t) (rte_mempool_virt2phy(mp, mb) +
mb_ctor_arg->seg_buf_offset);
mb->buf_len  = mb_ctor_arg->seg_buf_size;
-   mb->type = RTE_MBUF_PKT;
mb->ol_flags = 0;
mb->pkt.data = (char *) mb->buf_addr + RTE_PKTMBUF_HEADROOM;
mb->pkt.nb_segs  = 1;
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index 
1cf2574..1f066d0 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -93,7 +93,7 @@ tx_mbuf_alloc(struct rte_mempool *mp)
struct rte_mbuf *m;

m = __rte_mbuf_raw_alloc(mp);
-   __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
+   __rte_mbuf_sanity_check_raw(m, 0);
return (m);
 }

diff --git a/app/test/commands.c b/app/test/commands.c index b145036..c69544b 
100644
--- a/app/test/commands.c
+++ b/app/test/commands.c
@@ -262,7 +262,6 @@ dump_struct_sizes(void)  #define DUMP_SIZE(t) 
printf("sizeof(" #t ") = %u\n", (unsigned)sizeof(t));
DUMP_SIZE(struct rte_mbuf);
DUMP_SIZE(struct rte_pktmbuf);
-   DUMP_SIZE(struct rte_ctrlmbuf);
DUMP_SIZE(struct rte_mempool);
DUMP_SIZE(struct rte_ring);
 #undef DUMP_SIZE
diff --git a/app/test/test

[dpdk-dev] DPDK Latency Issue

2014-05-26 Thread Jun Han
Thanks a lot Jeff for your detailed explanation. I still have open question
left. I would be grateful if someone would share their insight on it.

I have performed experiments to vary both the MAX_BURST_SIZE (originally
set as 32) and BURST_TX_DRAIN_US (originally set as 100 usec) in l3fwd
main.c.

While I vary the MAX_BURST_SIZE (1, 8, 16, 32, 64, and 128) and fix
BURST_TX_DRAIN_US=100 usec, I see a low average latency when sending a
burst of packets less than or equal to the MAX_BURST_SIZE.
For example, when MAX_BURST_SIZE is 32, if I send a burst of 32 packets or
less, then I get around 10 usec of latency. When it goes over it, it starts
to get higher average latency, which make total sense.

My main question are the following. When I start sending continuous packet
at a rate of 14.88 Mpps for 64B packets, it shows consistently receiving an
average latency of 150 usec, no matter what MAX_BURST_SIZE. My guess is
that the latency should be bounded by BURST_TX_DRAIN_US, which is fixed at
100 usec. Would you share your thought on this issue please?

Thanks,
Jun


On Thu, May 22, 2014 at 7:06 PM, Shaw, Jeffrey B
wrote:

> Hello,
>
> > I measured a roundtrip latency (using Spirent traffic generator) of
> sending 64B packets over a 10GbE to DPDK, and DPDK does nothing but simply
> forward back to the incoming port (l3fwd without any lookup code, i.e.,
> dstport = port_id).
> > However, to my surprise, the average latency was around 150 usec. (The
> packet drop rate was only 0.001%, i.e., 283 packets/sec dropped) Another
> test I did was to measure the latency due to sending only a single 64B
> packet, and the latency I measured is ranging anywhere from 40 usec to 100
> usec.
>
> 40-100usec seems very high.
> The l3fwd application does some internal buffering before transmitting the
> packets.  It buffers either 32 packets, or waits up to 100us (hash-defined
> as BURST_TX_DRAIN_US), whichever comes first.
> Try either removing this timeout, or sending a burst of 32 packets at
> time.  Or you could try with testpmd, which should have reasonably low
> latency out of the box.
>
> There is also a section in the Release Notes (8.6 How can I tune my
> network application to achieve lower latency?) which provides some pointers
> for getting lower latency if you are willing to give up top-rate throughput.
>
> Thanks,
> Jeff
>


[dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf

2014-05-26 Thread Neil Horman
On Sun, May 25, 2014 at 09:39:22PM +, Gilmore, Walter E wrote:
> Olivier you're making an assumption that customer application code running on 
> the Intel DPDK isn't using the rte_ctrlmbuf structure. 
> Remember there are more than 300 customers using the Intel DPDK and there is 
> no way you can predict that this is not used by them. 
> The purpose of this structure is to send commands, events or any other type 
> of information between user application tasks (normally from a manager task).
> It has been there since the beginning in the original design and it's up to 
> the user to define what is in the data field and how they wish to use it. 
> It's one thing to fix a bug but to remove a structure like this because you 
> don't see it use in the other parts is asking for trouble with customers. 
> 

Not to rub salt in this, but I'd like to point out here that this strikes me as
a case of wanting cake and eating it too.  This community seems adamant against
the notion of having a fixed API for the dpdk project, yet fractures the moment
anyone tries to change something that they, or someone they are working with,
might be using.

If you want to make sure that use cases outside the scope of the project itself
stay usable, stabilize the API, and mark it with a version.  If you do this,
then you can change the API, and mark it with a new version in the link stage,
and just focus on maintaining backward compatibility.

Neil

> -Original Message-
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Olivier Matz
> Sent: Friday, May 09, 2014 10:51 AM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf
> 
> The initial role of rte_ctrlmbuf is to carry generic messages (data pointer + 
> data length) but it's not used by the DPDK or it applications.
> Keeping it implies:
>   - loosing 1 byte in the rte_mbuf structure
>   - having some dead code rte_mbuf.[ch]
> 
> This patch removes this feature. Thanks to it, it is now possible to simplify 
> the rte_mbuf structure by merging the rte_pktmbuf structure in it. This is 
> done in next commit.
> 
> Signed-off-by: Olivier Matz 
> ---
>  app/test-pmd/cmdline.c   |   1 -
>  app/test-pmd/testpmd.c   |   2 -
>  app/test-pmd/txonly.c|   2 +-
>  app/test/commands.c  |   1 -
>  app/test/test_mbuf.c |  72 +
>  examples/ipv4_multicast/main.c   |   2 +-
>  lib/librte_mbuf/rte_mbuf.c   |  65 +++-
>  lib/librte_mbuf/rte_mbuf.h   | 175 
> ++-
>  lib/librte_pmd_e1000/em_rxtx.c   |   2 +-
>  lib/librte_pmd_e1000/igb_rxtx.c  |   2 +-
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c|   4 +-
>  lib/librte_pmd_virtio/virtio_rxtx.c  |   2 +-
>  lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c|   2 +-
>  lib/librte_pmd_xenvirt/rte_eth_xenvirt.c |   2 +-
>  14 files changed, 54 insertions(+), 280 deletions(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index 
> 7becedc..e3d1849 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -5010,7 +5010,6 @@ dump_struct_sizes(void)  #define DUMP_SIZE(t) 
> printf("sizeof(" #t ") = %u\n", (unsigned)sizeof(t));
>   DUMP_SIZE(struct rte_mbuf);
>   DUMP_SIZE(struct rte_pktmbuf);
> - DUMP_SIZE(struct rte_ctrlmbuf);
>   DUMP_SIZE(struct rte_mempool);
>   DUMP_SIZE(struct rte_ring);
>  #undef DUMP_SIZE
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 
> 9c56914..76b3823 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -389,13 +389,11 @@ testpmd_mbuf_ctor(struct rte_mempool *mp,
>   mb_ctor_arg = (struct mbuf_ctor_arg *) opaque_arg;
>   mb = (struct rte_mbuf *) raw_mbuf;
>  
> - mb->type = RTE_MBUF_PKT;
>   mb->pool = mp;
>   mb->buf_addr = (void *) ((char *)mb + mb_ctor_arg->seg_buf_offset);
>   mb->buf_physaddr = (uint64_t) (rte_mempool_virt2phy(mp, mb) +
>   mb_ctor_arg->seg_buf_offset);
>   mb->buf_len  = mb_ctor_arg->seg_buf_size;
> - mb->type = RTE_MBUF_PKT;
>   mb->ol_flags = 0;
>   mb->pkt.data = (char *) mb->buf_addr + RTE_PKTMBUF_HEADROOM;
>   mb->pkt.nb_segs  = 1;
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index 
> 1cf2574..1f066d0 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -93,7 +93,7 @@ tx_mbuf_alloc(struct rte_mempool *mp)
>   struct rte_mbuf *m;
>  
>   m = __rte_mbuf_raw_alloc(mp);
> - __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0);
> + __rte_mbuf_sanity_check_raw(m, 0);
>   return (m);
>  }
>  
> diff --git a/app/test/commands.c b/app/test/commands.c index b145036..c69544b 
> 100644
> --- a/app/test/commands.c
> +++ b/app/test/commands.c
> @@ -262,7 +262,6 @@ dump_struct_sizes(void)  #define DUMP_SIZE(t) 
> printf("sizeof(" #t ") = %u\n"

[dpdk-dev] [PATCH v2 0/3] Support setting TX rate for queue and VF

2014-05-26 Thread Neil Horman
On Mon, May 26, 2014 at 03:45:28PM +0800, Ouyang Changchun wrote:
> This patch v2 fixes some errors and warnings reported by checkpatch.pl.
>  
> This patch series also contain the 3 items:
> 1. Add API to support setting TX rate for a queue or a VF.
> 2. Implement the functionality of setting TX rate for queue or VF in IXGBE 
> PMD.
> 3. Add commands in testpmd to test the functionality of setting TX rate for 
> queue or VF.
> 
> Ouyang Changchun (3):
>   Add API to support set TX rate for a queue and VF.
>   Implement the functionality of setting TX rate for queue or VF in
> IXGBE PMD.
>   Add commands in testpmd to test the functionality of setting TX rate
> for queue or VF.
> 
>  app/test-pmd/cmdline.c  | 159 
> +++-
>  app/test-pmd/config.c   |  47 +++
>  app/test-pmd/testpmd.h  |   3 +
>  lib/librte_ether/rte_ethdev.c   |  71 
>  lib/librte_ether/rte_ethdev.h   |  51 
>  lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 122 +++
>  lib/librte_pmd_ixgbe/ixgbe_ethdev.h |  13 ++-
>  7 files changed, 462 insertions(+), 4 deletions(-)
> 
This seems a bit backwards.  queue rate limiting is rather a generic function,
that doesn't really need to know any details about the hardware, save for its
maximum tx rate, but you're implementaiton requires that it be re-implemented
for each PMD.  Why not just export max tx rates from the PMD and write a generic
queuing libarary to do rate limitation for any PMD?

Neil

> -- 
> 1.9.0
> 
> 


[dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf

2014-05-26 Thread Stephen Hemminger
On Fri,  9 May 2014 16:50:30 +0200
Olivier Matz  wrote:

> The initial role of rte_ctrlmbuf is to carry generic messages (data
> pointer + data length) but it's not used by the DPDK or it applications.
> Keeping it implies:
>   - loosing 1 byte in the rte_mbuf structure
>   - having some dead code rte_mbuf.[ch]
> 
> This patch removes this feature. Thanks to it, it is now possible to
> simplify the rte_mbuf structure by merging the rte_pktmbuf structure
> in it. This is done in next commit.
> 
> Signed-off-by: Olivier Matz 

The only win from this is to save the byte for the type field.
Yes bits here are precious.

Does external application mix control and data mbuf's in the same ring?
The stuff in the tree only uses type field for debug validation/sanity
checks.

Since it is only one bit, maybe you can find one bit to store that. 
Since buffer and pool addresses are going to be at least 32 bit aligned
maybe you can use the old GC trick of using the LSB as flag.