[dpdk-dev] [PATCH v2 1/3] ether: Add API to support setting TX rate for queue and VF
This patch adds API to support setting TX rate for a queue and a VF. Signed-off-by: Ouyang Changchun --- lib/librte_ether/rte_ethdev.c | 71 +++ lib/librte_ether/rte_ethdev.h | 51 +++ 2 files changed, 122 insertions(+) diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index a5727dd..1ea61e1 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -1913,6 +1913,77 @@ rte_eth_dev_set_vf_vlan_filter(uint8_t port_id, uint16_t vlan_id, vf_mask,vlan_on); } +int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, + uint16_t tx_rate) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_link link; + + if (port_id >= nb_ports) { + PMD_DEBUG_TRACE("set queue rate limit:invalid port id=%d\n", + port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + link = dev->data->dev_link; + + if (queue_idx > dev_info.max_tx_queues) { + PMD_DEBUG_TRACE("set queue rate limit:port %d: " + "invalid queue id=%d\n", port_id, queue_idx); + return -EINVAL; + } + + if (tx_rate > link.link_speed) { + PMD_DEBUG_TRACE("set queue rate limit:invalid tx_rate=%d, " + "bigger than link speed= %d\n", + tx_rate, link_speed); + return -EINVAL; + } + + FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_queue_rate_limit, -ENOTSUP); + return (*dev->dev_ops->set_queue_rate_limit)(dev, queue_idx, tx_rate); +} + +int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, uint16_t tx_rate, + uint64_t q_msk) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_link link; + + if (q_msk == 0) + return 0; + + if (port_id >= nb_ports) { + PMD_DEBUG_TRACE("set VF rate limit:invalid port id=%d\n", + port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + link = dev->data->dev_link; + + if (vf > dev_info.max_vfs) { + PMD_DEBUG_TRACE("set VF rate limit:port %d: " + "invalid vf id=%d\n", port_id, vf); + return -EINVAL; + } + + if (tx_rate > link.link_speed) { + PMD_DEBUG_TRACE("set VF rate limit:invalid tx_rate=%d, " + "bigger than link speed= %d\n", + tx_rate, link_speed); + return -EINVAL; + } + + FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rate_limit, -ENOTSUP); + return (*dev->dev_ops->set_vf_rate_limit)(dev, vf, tx_rate, q_msk); +} + int rte_eth_mirror_rule_set(uint8_t port_id, struct rte_eth_vmdq_mirror_conf *mirror_conf, diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index d5ea46b..445d40a 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -1012,6 +1012,17 @@ typedef int (*eth_set_vf_vlan_filter_t)(struct rte_eth_dev *dev, uint8_t vlan_on); /**< @internal Set VF VLAN pool filter */ +typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t tx_rate); +/**< @internal Set queue TX rate */ + +typedef int (*eth_set_vf_rate_limit_t)(struct rte_eth_dev *dev, + uint16_t vf, + uint16_t tx_rate, + uint64_t q_msk); +/**< @internal Set VF TX rate */ + typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev, struct rte_eth_vmdq_mirror_conf *mirror_conf, uint8_t rule_id, @@ -1119,6 +1130,8 @@ struct eth_dev_ops { eth_set_vf_rx_tset_vf_rx; /**< enable/disable a VF receive */ eth_set_vf_tx_tset_vf_tx; /**< enable/disable a VF transmit */ eth_set_vf_vlan_filter_t set_vf_vlan_filter; /**< Set VF VLAN filter */ + eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit */ + eth_set_vf_rate_limit_tset_vf_rate_limit; /**< Set VF rate limit */ /** Add a signature filter. */ fdir_add_signature_filter_t fdir_add_signature_filter; @@ -2561,6 +2574,44 @@ int rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id); /** + * Set the rate limitation for a queu
[dpdk-dev] [PATCH v2 2/3] ixgbe: Implement the functionality of setting TX rate for queue or VF in IXGBE PMD
This patch implements the functionality of setting TX rate for queue or VF in IXGBE PMD. Signed-off-by: Ouyang Changchun --- lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 122 lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 13 +++- 2 files changed, 132 insertions(+), 3 deletions(-) diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c index c9b5fe4..643477a 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c @@ -87,6 +87,8 @@ #define IXGBE_LINK_UP_CHECK_TIMEOUT 1000 /* ms */ #define IXGBE_VMDQ_NUM_UC_MAC 4096 /* Maximum nb. of UC MAC addr. */ +#define IXGBE_MMW_SIZE_DEFAULT0x4 +#define IXGBE_MMW_SIZE_JUMBO_FRAME0x14 #define IXGBEVF_PMD_NAME "rte_ixgbevf_pmd" /* PMD name */ @@ -182,6 +184,10 @@ static int ixgbe_mirror_rule_set(struct rte_eth_dev *dev, static int ixgbe_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t rule_id); +static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t tx_rate); +static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf, + uint16_t tx_rate, uint64_t q_msk); /* * Define VF Stats MACRO for Non "cleared on read" register */ @@ -280,6 +286,8 @@ static struct eth_dev_ops ixgbe_eth_dev_ops = { .set_vf_rx= ixgbe_set_pool_rx, .set_vf_tx= ixgbe_set_pool_tx, .set_vf_vlan_filter = ixgbe_set_pool_vlan_filter, + .set_queue_rate_limit = ixgbe_set_queue_rate_limit, + .set_vf_rate_limit= ixgbe_set_vf_rate_limit, .fdir_add_signature_filter= ixgbe_fdir_add_signature_filter, .fdir_update_signature_filter = ixgbe_fdir_update_signature_filter, .fdir_remove_signature_filter = ixgbe_fdir_remove_signature_filter, @@ -1288,10 +1296,13 @@ ixgbe_dev_start(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_vf_info *vfinfo = + *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private); int err, link_up = 0, negotiate = 0; uint32_t speed = 0; int mask = 0; int status; + uint16_t vf, idx; PMD_INIT_FUNC_TRACE(); @@ -1408,6 +1419,16 @@ skip_link_setup: goto error; } + /* Restore vf rate limit */ + if (vfinfo != NULL) { + for (vf = 0; vf < dev->pci_dev->max_vfs; vf++) + for (idx = 0; idx < IXGBE_MAX_QUEUE_NUM_PER_VF; idx++) + if (vfinfo[vf].tx_rate[idx] != 0) + ixgbe_set_vf_rate_limit(dev, vf, + vfinfo[vf].tx_rate[idx], + 1 << idx); + } + ixgbe_restore_statistics_mapping(dev); return (0); @@ -3062,6 +3083,107 @@ ixgbe_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t rule_id) return 0; } +static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t tx_rate) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint32_t rf_dec, rf_int; + uint32_t bcnrc_val; + uint16_t link_speed = dev->data->dev_link.link_speed; + + if (queue_idx >= hw->mac.max_tx_queues) + return -EINVAL; + + if (tx_rate != 0) { + /* Calculate the rate factor values to set */ + rf_int = (uint32_t)link_speed / (uint32_t)tx_rate; + rf_dec = (uint32_t)link_speed % (uint32_t)tx_rate; + rf_dec = (rf_dec << IXGBE_RTTBCNRC_RF_INT_SHIFT) / tx_rate; + + bcnrc_val = IXGBE_RTTBCNRC_RS_ENA; + bcnrc_val |= ((rf_int << IXGBE_RTTBCNRC_RF_INT_SHIFT) & + IXGBE_RTTBCNRC_RF_INT_MASK_M); + bcnrc_val |= (rf_dec & IXGBE_RTTBCNRC_RF_DEC_MASK); + } else { + bcnrc_val = 0; + } + + /* +* Set global transmit compensation time to the MMW_SIZE in RTTBCNRM +* register. MMW_SIZE=0x014 if 9728-byte jumbo is supported, otherwise +* set as 0x4. +*/ + if ((dev->data->dev_conf.rxmode.jumbo_frame == 1) && + (dev->data->dev_conf.rxmode.max_rx_pkt_len >= + IXGBE_MAX_JUMBO_FRAME_SIZE)) + IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRM, + IXGBE_MMW_SIZE_JUMBO_FRAME); + else + IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRM, + IXGBE_MMW_SIZE_DEFAULT); + + /* Set RTTBCNRC of queue X */ + IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, queue_idx); + IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf, + uint16_t tx_
[dpdk-dev] [PATCH v2 3/3] testpmd: Add commands to test the functionality of setting TX rate for queue or VF
This patch adds commands in testpmd to test the functionality of setting TX rate for queue or VF. Signed-off-by: Ouyang Changchun --- app/test-pmd/cmdline.c | 159 - app/test-pmd/config.c | 47 +++ app/test-pmd/testpmd.h | 3 + 3 files changed, 208 insertions(+), 1 deletion(-) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index b3824f9..83b2665 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -342,7 +342,14 @@ static void cmd_help_long_parsed(void *parsed_result, "BAM:accepts broadcast packets;" "MPE:accepts all multicast packets\n\n" "Enable/Disable a VF receive mode of a port\n\n" - + + "set port (port_id) queue (queue_id) rate (rate_num)\n" + "Set rate limit for a queue of a port\n\n" + + "set port (port_id) vf (vf_id) rate (rate_num) " + "queue_mask (queue_mask_value)\n" + "Set rate limit for queues in VF of a port\n\n" + "set port (port_id) mirror-rule (rule_id)" "(pool-mirror|vlan-mirror)\n" " (poolmask|vlanid[,vlanid]*) dst-pool (pool_id) (on|off)\n" @@ -4790,6 +4797,154 @@ cmdline_parse_inst_t cmd_vf_rxvlan_filter = { }, }; +/* *** SET RATE LIMIT FOR A QUEUE OF A PORT *** */ +struct cmd_queue_rate_limit_result { + cmdline_fixed_string_t set; + cmdline_fixed_string_t port; + uint8_t port_num; + cmdline_fixed_string_t queue; + uint8_t queue_num; + cmdline_fixed_string_t rate; + uint16_t rate_num; +}; + +static void cmd_queue_rate_limit_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_queue_rate_limit_result *res = parsed_result; + int ret = 0; + + if ((strcmp(res->set, "set") == 0) && (strcmp(res->port, "port") == 0) + && (strcmp(res->queue, "queue") == 0) + && (strcmp(res->rate, "rate") == 0)) + ret = set_queue_rate_limit(res->port_num, res->queue_num, + res->rate_num); + if (ret < 0) + printf("queue_rate_limit_cmd error: (%s)\n", strerror(-ret)); + +} + +cmdline_parse_token_string_t cmd_queue_rate_limit_set = + TOKEN_STRING_INITIALIZER(struct cmd_queue_rate_limit_result, + set, "set"); +cmdline_parse_token_string_t cmd_queue_rate_limit_port = + TOKEN_STRING_INITIALIZER(struct cmd_queue_rate_limit_result, + port, "port"); +cmdline_parse_token_num_t cmd_queue_rate_limit_portnum = + TOKEN_NUM_INITIALIZER(struct cmd_queue_rate_limit_result, + port_num, UINT8); +cmdline_parse_token_string_t cmd_queue_rate_limit_queue = + TOKEN_STRING_INITIALIZER(struct cmd_queue_rate_limit_result, + queue, "queue"); +cmdline_parse_token_num_t cmd_queue_rate_limit_queuenum = + TOKEN_NUM_INITIALIZER(struct cmd_queue_rate_limit_result, + queue_num, UINT8); +cmdline_parse_token_string_t cmd_queue_rate_limit_rate = + TOKEN_STRING_INITIALIZER(struct cmd_queue_rate_limit_result, + rate, "rate"); +cmdline_parse_token_num_t cmd_queue_rate_limit_ratenum = + TOKEN_NUM_INITIALIZER(struct cmd_queue_rate_limit_result, + rate_num, UINT16); + +cmdline_parse_inst_t cmd_queue_rate_limit = { + .f = cmd_queue_rate_limit_parsed, + .data = (void *)0, + .help_str = "set port X queue Y rate Z:(X = port number," + "Y = queue number,Z = rate number)set rate limit for a queue on port X", + .tokens = { + (void *)&cmd_queue_rate_limit_set, + (void *)&cmd_queue_rate_limit_port, + (void *)&cmd_queue_rate_limit_portnum, + (void *)&cmd_queue_rate_limit_queue, + (void *)&cmd_queue_rate_limit_queuenum, + (void *)&cmd_queue_rate_limit_rate, + (void *)&cmd_queue_rate_limit_ratenum, + NULL, + }, +}; + + +/* *** SET RATE LIMIT FOR A VF OF A PORT *** */ +struct cmd_vf_rate_limit_result { + cmdline_fixed_string_t set; + cmdline_fixed_string_t port; + uint8_t port_num; + cmdline_fixed_string_t vf; + uint8_t vf_num; + cmdline_fixed_string_t rate; + uint16_t rate_num; + cmdline_fixed_string_t q_msk; + uint64_t q_msk_val; +}; + +static void cmd_vf_rate_limit_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_vf_rate_limit_result *res = parsed_resul
[dpdk-dev] [PATCH v2 0/3] Support setting TX rate for queue and VF
This patch v2 fixes some errors and warnings reported by checkpatch.pl. This patch series also contain the 3 items: 1. Add API to support setting TX rate for a queue or a VF. 2. Implement the functionality of setting TX rate for queue or VF in IXGBE PMD. 3. Add commands in testpmd to test the functionality of setting TX rate for queue or VF. Ouyang Changchun (3): Add API to support set TX rate for a queue and VF. Implement the functionality of setting TX rate for queue or VF in IXGBE PMD. Add commands in testpmd to test the functionality of setting TX rate for queue or VF. app/test-pmd/cmdline.c | 159 +++- app/test-pmd/config.c | 47 +++ app/test-pmd/testpmd.h | 3 + lib/librte_ether/rte_ethdev.c | 71 lib/librte_ether/rte_ethdev.h | 51 lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 122 +++ lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 13 ++- 7 files changed, 462 insertions(+), 4 deletions(-) -- 1.9.0
[dpdk-dev] [PATCH 0/5] add mtu and flow control handlers
This patchset introduces 3 new ethdev operations: flow control parameters retrieval and mtu get/set operations. -- David Marchand David Marchand (1): ethdev: add autoneg parameter in flow ctrl accessors Ivan Boule (2): ixgbe: add get/set_mtu to ixgbevf app/testpmd: allow to configure mtu Samuel Gauthier (1): ethdev: add mtu accessors Zijie Pan (1): ethdev: retrieve flow control configuration app/test-pmd/cmdline.c | 54 app/test-pmd/config.c | 13 +++ app/test-pmd/testpmd.h |2 +- lib/librte_ether/rte_ethdev.c | 47 ++ lib/librte_ether/rte_ethdev.h | 62 - lib/librte_pmd_e1000/e1000_ethdev.h |4 + lib/librte_pmd_e1000/em_ethdev.c| 111 +++ lib/librte_pmd_e1000/em_rxtx.c | 11 +++ lib/librte_pmd_e1000/igb_ethdev.c | 111 +++ lib/librte_pmd_e1000/igb_rxtx.c | 10 +++ lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 165 ++- lib/librte_pmd_ixgbe/ixgbe_ethdev.h |2 + lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 28 +- 13 files changed, 614 insertions(+), 6 deletions(-) -- 1.7.10.4
[dpdk-dev] [PATCH 1/5] ethdev: retrieve flow control configuration
From: Zijie Pan This patch adds a new function in ethdev api to retrieve current flow control configuration. This operation has been implemented for rte_em_pmd, rte_igb_pmd and rte_ixgbe_pmd. Signed-off-by: Zijie Pan Signed-off-by: David Marchand --- lib/librte_ether/rte_ethdev.c | 16 ++ lib/librte_ether/rte_ethdev.h | 24 +-- lib/librte_pmd_e1000/em_ethdev.c| 44 lib/librte_pmd_e1000/igb_ethdev.c | 44 lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 55 +-- 5 files changed, 179 insertions(+), 4 deletions(-) diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index dabbdd2..31c18ef 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -1456,6 +1456,22 @@ rte_eth_dev_fdir_set_masks(uint8_t port_id, struct rte_fdir_masks *fdir_mask) } int +rte_eth_dev_flow_ctrl_get(uint8_t port_id, struct rte_eth_fc_conf *fc_conf) +{ + struct rte_eth_dev *dev; + + if (port_id >= nb_ports) { + PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return (-ENODEV); + } + + dev = &rte_eth_devices[port_id]; + FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_get, -ENOTSUP); + memset(fc_conf, 0, sizeof(*fc_conf)); + return (*dev->dev_ops->flow_ctrl_get)(dev, fc_conf); +} + +int rte_eth_dev_flow_ctrl_set(uint8_t port_id, struct rte_eth_fc_conf *fc_conf) { struct rte_eth_dev *dev; diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index d839b8c..04533e5 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -955,8 +955,12 @@ typedef int (*fdir_set_masks_t)(struct rte_eth_dev *dev, struct rte_fdir_masks *fdir_masks); /**< @internal Setup flow director masks on an Ethernet device */ +typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); +/**< @internal Get current flow control parameter on an Ethernet device */ + typedef int (*flow_ctrl_set_t)(struct rte_eth_dev *dev, - struct rte_eth_fc_conf *fc_conf); + struct rte_eth_fc_conf *fc_conf); /**< @internal Setup flow control parameter on an Ethernet device */ typedef int (*priority_flow_ctrl_set_t)(struct rte_eth_dev *dev, @@ -1120,6 +1124,7 @@ struct eth_dev_ops { eth_queue_release_ttx_queue_release;/**< Release TX queue.*/ eth_dev_led_on_t dev_led_on;/**< Turn on LED. */ eth_dev_led_off_t dev_led_off; /**< Turn off LED. */ + flow_ctrl_get_tflow_ctrl_get; /**< Get flow control. */ flow_ctrl_set_tflow_ctrl_set; /**< Setup flow control. */ priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control.*/ eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address */ @@ -2308,6 +2313,21 @@ int rte_eth_led_on(uint8_t port_id); int rte_eth_led_off(uint8_t port_id); /** + * Get current status of the Ethernet link flow control for Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param fc_conf + * The pointer to the structure where to store the flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support flow control. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_flow_ctrl_get(uint8_t port_id, + struct rte_eth_fc_conf *fc_conf); + +/** * Configure the Ethernet link flow control for Ethernet device * * @param port_id @@ -2322,7 +2342,7 @@ int rte_eth_led_off(uint8_t port_id); * - (-EIO) if flow control setup failure */ int rte_eth_dev_flow_ctrl_set(uint8_t port_id, - struct rte_eth_fc_conf *fc_conf); + struct rte_eth_fc_conf *fc_conf); /** * Configure the Ethernet priority flow control under DCB environment diff --git a/lib/librte_pmd_e1000/em_ethdev.c b/lib/librte_pmd_e1000/em_ethdev.c index 2f0e1a0..74dc7e5 100644 --- a/lib/librte_pmd_e1000/em_ethdev.c +++ b/lib/librte_pmd_e1000/em_ethdev.c @@ -77,6 +77,8 @@ static void eth_em_stats_get(struct rte_eth_dev *dev, static void eth_em_stats_reset(struct rte_eth_dev *dev); static void eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info); +static int eth_em_flow_ctrl_get(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); static int eth_em_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); static int eth_em_interrupt_setup(struct rte_eth_dev *dev); @@ -153,6 +155,7 @@ static struct eth_dev_ops eth_em_ops = { .tx_queue_release = eth_em_tx_queue_release,
[dpdk-dev] [PATCH 3/5] ethdev: add mtu accessors
From: Samuel Gauthier This patch adds two new functions in ethdev api to retrieve current MTU and change MTU of a port. These operations have been implemented for rte_em_pmd, rte_igb_pmd and rte_ixgbe_pmd. Signed-off-by: Samuel Gauthier Signed-off-by: Ivan Boule Signed-off-by: David Marchand --- lib/librte_ether/rte_ethdev.c | 31 + lib/librte_ether/rte_ethdev.h | 37 lib/librte_pmd_e1000/e1000_ethdev.h |4 +++ lib/librte_pmd_e1000/em_ethdev.c| 64 +++ lib/librte_pmd_e1000/em_rxtx.c | 11 ++ lib/librte_pmd_e1000/igb_ethdev.c | 64 +++ lib/librte_pmd_e1000/igb_rxtx.c | 10 ++ lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 62 + lib/librte_pmd_ixgbe/ixgbe_ethdev.h |2 ++ lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 10 ++ 10 files changed, 295 insertions(+) diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 31c18ef..ece2a68 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -1058,6 +1058,37 @@ rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr) ether_addr_copy(&dev->data->mac_addrs[0], mac_addr); } + +int +rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu) +{ + struct rte_eth_dev *dev; + + if (port_id >= nb_ports) { + PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return (-ENODEV); + } + + dev = &rte_eth_devices[port_id]; + FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_get, -ENOTSUP); + return (*dev->dev_ops->mtu_get)(dev, mtu); +} + +int +rte_eth_dev_set_mtu(uint8_t port_id, uint16_t *mtu) +{ + struct rte_eth_dev *dev; + + if (port_id >= nb_ports) { + PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return (-ENODEV); + } + + dev = &rte_eth_devices[port_id]; + FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_set, -ENOTSUP); + return (*dev->dev_ops->mtu_set)(dev, mtu); +} + int rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on) { diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 39351ea..177a6ec 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -890,6 +890,12 @@ typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev, typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset); /**< @Check DD bit of specific RX descriptor */ +typedef int (*mtu_get_t)(struct rte_eth_dev *dev, uint16_t *mtu); +/**< @internal Get MTU. */ + +typedef int (*mtu_set_t)(struct rte_eth_dev *dev, uint16_t *mtu); +/**< @internal Set MTU. */ + typedef int (*vlan_filter_set_t)(struct rte_eth_dev *dev, uint16_t vlan_id, int on); @@ -1113,6 +1119,8 @@ struct eth_dev_ops { eth_queue_stats_mapping_set_t queue_stats_mapping_set; /**< Configure per queue stat counter mapping. */ eth_dev_infos_get_tdev_infos_get; /**< Get device info. */ + mtu_get_t mtu_get; /**< Get MTU. */ + mtu_set_t mtu_set; /**< Set MTU. */ vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ vlan_tpid_set_tvlan_tpid_set; /**< Outer VLAN TPID Setup. */ vlan_strip_queue_set_t vlan_strip_queue_set; /**< VLAN Stripping on queue. */ @@ -1680,6 +1688,35 @@ extern void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info); /** + * Retrieve the MTU of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mtu + * A pointer to a uint16_t where the retrieved MTU is to be stored. + * @return + * - (0) if successful. + * - (-ENOSUP) if operation is not supported. + * - (-ENODEV) if *port_id* invalid. + */ +extern int rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu); + +/** + * Change the MTU of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mtu + * A pointer to a uint16_t where the MTU to be applied is stored. + * @return + * - (0) if successful. + * - (-ENOSUP) if operation is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if *mtu* invalid. + */ +extern int rte_eth_dev_set_mtu(uint8_t port_id, uint16_t *mtu); + +/** * Enable/Disable hardware filtering by an Ethernet device of received * VLAN packets tagged with a given VLAN Tag Identifier. * diff --git a/lib/librte_pmd_e1000/e1000_ethdev.h b/lib/librte_pmd_e1000/e1000_ethdev.h index 8790601..5cbf436 100644 --- a/lib/librte_pmd_e1000/e1000_ethdev.h +++ b/lib/librte_pmd_e1000/e1000_ethdev.h @@ -138,6 +138,8 @@ uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t eth_igb_recv_scattered_pkts(void *rxq,
[dpdk-dev] [PATCH 2/5] ethdev: add autoneg parameter in flow ctrl accessors
Add autoneg field in flow control parameters. This makes it easier to understand why changing some parameters does not always have the expected result. Changing autoneg is not supported at the moment. Signed-off-by: David Marchand --- lib/librte_ether/rte_ethdev.h |1 + lib/librte_pmd_e1000/em_ethdev.c|3 +++ lib/librte_pmd_e1000/igb_ethdev.c |3 +++ lib/librte_pmd_ixgbe/ixgbe_ethdev.c |3 +++ 4 files changed, 10 insertions(+) diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 04533e5..39351ea 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -550,6 +550,7 @@ struct rte_eth_fc_conf { uint16_t send_xon;/**< Is XON frame need be sent */ enum rte_eth_fc_mode mode; /**< Link flow control mode */ uint8_t mac_ctrl_frame_fwd; /**< Forward MAC control frames */ + uint8_t autoneg; /**< Use Pause autoneg */ }; /** diff --git a/lib/librte_pmd_e1000/em_ethdev.c b/lib/librte_pmd_e1000/em_ethdev.c index 74dc7e5..c148cbc 100644 --- a/lib/librte_pmd_e1000/em_ethdev.c +++ b/lib/librte_pmd_e1000/em_ethdev.c @@ -1378,6 +1378,7 @@ eth_em_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) fc_conf->high_water = hw->fc.high_water; fc_conf->low_water = hw->fc.low_water; fc_conf->send_xon = hw->fc.send_xon; + fc_conf->autoneg = hw->mac.autoneg; /* * Return rx_pause and tx_pause status according to actual setting of @@ -1422,6 +1423,8 @@ eth_em_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) uint32_t rctl; hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + if (fc_conf->autoneg != hw->mac.autoneg) + return -ENOTSUP; rx_buf_size = em_get_rx_buffer_size(hw); PMD_INIT_LOG(DEBUG, "Rx packet buffer size = 0x%x \n", rx_buf_size); diff --git a/lib/librte_pmd_e1000/igb_ethdev.c b/lib/librte_pmd_e1000/igb_ethdev.c index 6dc82c2..e15fe5a 100644 --- a/lib/librte_pmd_e1000/igb_ethdev.c +++ b/lib/librte_pmd_e1000/igb_ethdev.c @@ -1818,6 +1818,7 @@ eth_igb_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) fc_conf->high_water = hw->fc.high_water; fc_conf->low_water = hw->fc.low_water; fc_conf->send_xon = hw->fc.send_xon; + fc_conf->autoneg = hw->mac.autoneg; /* * Return rx_pause and tx_pause status according to actual setting of @@ -1862,6 +1863,8 @@ eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) uint32_t rctl; hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + if (fc_conf->autoneg != hw->mac.autoneg) + return -ENOTSUP; rx_buf_size = igb_get_rx_buffer_size(hw); PMD_INIT_LOG(DEBUG, "Rx packet buffer size = 0x%x \n", rx_buf_size); diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c index 4633654..c876c3e 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c @@ -2193,6 +2193,7 @@ ixgbe_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) fc_conf->high_water = hw->fc.high_water[0]; fc_conf->low_water = hw->fc.low_water[0]; fc_conf->send_xon = hw->fc.send_xon; + fc_conf->autoneg = !hw->fc.disable_fc_autoneg; /* * Return rx_pause status according to actual setting of @@ -2244,6 +2245,8 @@ ixgbe_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) PMD_INIT_FUNC_TRACE(); hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + if (fc_conf->autoneg != !hw->fc.disable_fc_autoneg) + return -ENOTSUP; rx_buf_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)); PMD_INIT_LOG(DEBUG, "Rx packet buffer size = 0x%x \n", rx_buf_size); -- 1.7.10.4
[dpdk-dev] [PATCH 4/5] ixgbe: add get/set_mtu to ixgbevf
From: Ivan Boule The support of jumbo frames in the ixgbevf Poll Mode Driver of 10GbE 82599 VF functions consists in the following enhancements: - Implement the mtu_set function in the ixgbevf PMD, using the IXGBE_VF_SET_LPE request of the version 1.0 of the VF/PF mailbox API for this purpose. - Implement the mtu_get function in the ixgbevf PMD for the sake of coherency. - Add a detailed explanation on the VF/PF rx max frame len negotiation. - To deal with Jumbo frames, force the receive function to handle scattered packets. Signed-off-by: Ivan Boule Signed-off-by: David Marchand --- lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 45 +++ lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 18 +- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c index b9db1f4..230b758 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c @@ -194,6 +194,9 @@ static void ixgbevf_add_mac_addr(struct rte_eth_dev *dev, uint32_t index, uint32_t pool); static void ixgbevf_remove_mac_addr(struct rte_eth_dev *dev, uint32_t index); +static int ixgbevf_dev_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu); +static int ixgbevf_dev_set_mtu(struct rte_eth_dev *dev, uint16_t *mtu); + /* * Define VF Stats MACRO for Non "cleared on read" register */ @@ -334,6 +337,8 @@ static struct eth_dev_ops ixgbevf_eth_dev_ops = { .stats_reset = ixgbevf_dev_stats_reset, .dev_close= ixgbevf_dev_close, .dev_infos_get= ixgbe_dev_info_get, + .mtu_get = ixgbevf_dev_get_mtu, + .mtu_set = ixgbevf_dev_set_mtu, .vlan_filter_set = ixgbevf_vlan_filter_set, .vlan_strip_queue_set = ixgbevf_vlan_strip_queue_set, .vlan_offload_set = ixgbevf_vlan_offload_set, @@ -3319,6 +3324,46 @@ ixgbevf_remove_mac_addr(struct rte_eth_dev *dev, uint32_t index) } } +static int +ixgbevf_dev_set_mtu(struct rte_eth_dev *dev, uint16_t *mtu) +{ + struct ixgbe_hw *hw; + uint16_t max_frame = *mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* MTU < 68 is an error and causes problems on some kernels */ + if ((*mtu < 68) || (max_frame > ETHER_MAX_JUMBO_FRAME_LEN)) + return -EINVAL; + + /* +* When supported by the underlying PF driver, use the IXGBE_VF_SET_MTU +* request of the version 2.0 of the mailbox API. +* For now, use the IXGBE_VF_SET_LPE request of the version 1.0 +* of the mailbox API. +* This call to IXGBE_SET_LPE action won't work with ixgbe pf drivers +* prior to 3.11.33 which contains the following change: +* "ixgbe: Enable jumbo frames support w/ SR-IOV" +*/ + ixgbevf_rlpml_set_vf(hw, max_frame); + + /* update max frame size */ + dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame; + return 0; +} + +static int +ixgbevf_dev_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) +{ + /* +* When available, use the IXGBE_VF_GET_MTU request +* of the version 2.0 of the mailbox API. +*/ + *mtu = (uint16_t) (dev->data->dev_conf.rxmode.max_rx_pkt_len - + (ETHER_HDR_LEN + ETHER_CRC_LEN)); + return 0; +} + static struct rte_driver rte_ixgbe_driver = { .type = PMD_PDEV, .init = rte_ixgbe_pmd_init, diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c index b05c1ba..c797616 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c @@ -3700,7 +3700,20 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - /* setup MTU */ + /* +* When the VF driver issues a IXGBE_VF_RESET request, the PF driver +* disables the VF receipt of packets if the PF MTU is > 1500. +* This is done to deal with 82599 limitations that imposes +* the PF and all VFs to share the same MTU. +* Then, the PF driver enables again the VF receipt of packet when +* the VF driver issues a IXGBE_VF_SET_LPE request. +* In the meantime, the VF device cannot be used, even if the VF driver +* and the Guest VM network stack are ready to accept packets with a +* size up to the PF MTU. +* As a work-around to this PF behaviour, force the call to +* ixgbevf_rlpml_set_vf even if jumbo frames are not used. This way, +* VF packets received can work in all cases. +*/ ixgbevf_rlpml_set_vf(hw, (uint16_t)dev->data->dev_conf.rxmode.max_rx_pkt_len); @@ -3783,6 +3796,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev) } } + /* Set RX function to d
[dpdk-dev] [PATCH 5/5] app/testpmd: allow to configure mtu
From: Ivan Boule Take avantage of the .set_mtu ethdev function and make it possible to configure MTU on devices using testpmd. Signed-off-by: Ivan Boule Signed-off-by: David Marchand --- app/test-pmd/cmdline.c | 54 app/test-pmd/config.c | 13 app/test-pmd/testpmd.h |2 +- 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index 68bcd7b..517944b 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -516,6 +516,8 @@ static void cmd_help_long_parsed(void *parsed_result, "port config all (txfreet|txrst|rxfreet) (value)\n" "Set free threshold for rx/tx, or set" " tx rs bit threshold.\n\n" + "port config mtu X value\n" + "Set the MTU of port X to a given value\n\n" ); } @@ -1014,6 +1016,57 @@ cmdline_parse_inst_t cmd_config_max_pkt_len = { }, }; +/* *** configure port MTU *** */ +struct cmd_config_mtu_result { + cmdline_fixed_string_t port; + cmdline_fixed_string_t keyword; + cmdline_fixed_string_t mtu; + uint8_t port_id; + uint16_t value; +}; + +static void +cmd_config_mtu_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_config_mtu_result *res = parsed_result; + + if (res->value < ETHER_MIN_LEN) { + printf("mtu cannot be less than %d\n", ETHER_MIN_LEN); + return; + } + port_mtu_set(res->port_id, res->value); +} + +cmdline_parse_token_string_t cmd_config_mtu_port = + TOKEN_STRING_INITIALIZER(struct cmd_config_mtu_result, port, +"port"); +cmdline_parse_token_string_t cmd_config_mtu_keyword = + TOKEN_STRING_INITIALIZER(struct cmd_config_mtu_result, keyword, +"config"); +cmdline_parse_token_string_t cmd_config_mtu_mtu = + TOKEN_STRING_INITIALIZER(struct cmd_config_mtu_result, keyword, +"mtu"); +cmdline_parse_token_num_t cmd_config_mtu_port_id = + TOKEN_NUM_INITIALIZER(struct cmd_config_mtu_result, port_id, UINT8); +cmdline_parse_token_num_t cmd_config_mtu_value = + TOKEN_NUM_INITIALIZER(struct cmd_config_mtu_result, value, UINT16); + +cmdline_parse_inst_t cmd_config_mtu = { + .f = cmd_config_mtu_parsed, + .data = NULL, + .help_str = "port config mtu value", + .tokens = { + (void *)&cmd_config_mtu_port, + (void *)&cmd_config_mtu_keyword, + (void *)&cmd_config_mtu_mtu, + (void *)&cmd_config_mtu_port_id, + (void *)&cmd_config_mtu_value, + NULL, + }, +}; + /* *** configure rx mode *** */ struct cmd_config_rx_mode_flag { cmdline_fixed_string_t port; @@ -5366,6 +5419,7 @@ cmdline_parse_ctx_t main_ctx[] = { (cmdline_parse_inst_t *)&cmd_config_speed_all, (cmdline_parse_inst_t *)&cmd_config_speed_specific, (cmdline_parse_inst_t *)&cmd_config_rx_tx, + (cmdline_parse_inst_t *)&cmd_config_mtu, (cmdline_parse_inst_t *)&cmd_config_max_pkt_len, (cmdline_parse_inst_t *)&cmd_config_rx_mode_flag, (cmdline_parse_inst_t *)&cmd_config_rss, diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index d3934e5..88a808c 100644 --- a/app/test-pmd/config.c +++ b/app/test-pmd/config.c @@ -503,6 +503,19 @@ port_reg_set(portid_t port_id, uint32_t reg_off, uint32_t reg_v) display_port_reg_value(port_id, reg_off, reg_v); } +void +port_mtu_set(portid_t port_id, uint16_t mtu) +{ + int diag; + + if (port_id_is_invalid(port_id)) + return; + diag = rte_eth_dev_set_mtu(port_id, &mtu); + if (diag == 0) + return; + printf("Set MTU failed. diag=%d\n", diag); +} + /* * RX/TX ring descriptors display functions. */ diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index 0cf5a92..9b7dcbb 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -454,7 +454,7 @@ void fwd_config_setup(void); void set_def_fwd_config(void); int init_fwd_streams(void); - +void port_mtu_set(portid_t port_id, uint16_t mtu); void port_reg_bit_display(portid_t port_id, uint32_t reg_off, uint8_t bit_pos); void port_reg_bit_set(portid_t port_id, uint32_t reg_off, uint8_t bit_pos, uint8_t bit_v); -- 1.7.10.4
[dpdk-dev] [PATCH v2 00/17] add TSO support
Hi Venky, >> my testpmd iofwd test with the txqflags option disabling many mbuf >> features is not representative of a real world application. > [Venky] I did see your test reports. I also made the point that the > tests we have are insufficient for testing the impact. If you look at > data_ofs, it actually has an impact on two sides - the driver and the > upper layer. We do not at this time have a test for the upper > layer/accessor. Secondly, there is a whole class of apps (fast path > route for example) that aren't in your purview that do not need > txqflags. Calling it not representative of a real world application is > incorrect. I was talking about "iofwd with txqflags", not "txqflags". The iofwd mode means that the mbuf data is not accessed by the application, so I think we should not rely on it. I agree that "txqflags" could be useful in some situations, when the user does not need multi-segment. Anyway, the tests I've posted on the list do not show a real impact on this use case. Finally, as stated in the initial TSO cover letter [1], I've tested the upper layer impact that you are talking about with the 6WINDGate stack and there is no visible regression. > Secondly, your testpmd run baseline performance should be higher. At > this point, it is about 40% off from the numbers we see on the baseline > on the same CPU. If the baseline is incorrect, I cannot judge anything > more on the performance. We need to get the baseline performance the > same, and then compare impact. I tested with the default testpmd configuration, I though it was close enough to the best performance. > [Venky] I exclude VLAN because it is something explicitly set by the Rx > side of the driver. Having Rx access a second cache line will generate a > performance impact (can be mitigated by a prefetch, but it will cost > more instructions, and cannot be deterministically controlled). The rest > of the structure is on the transmit side - which is going to be cache > hot - at least in LLC anyway. There are cases where this will not be in > LLC - and we have a few of those. Those however, we can mitigate. If we add another cache line to the mbuf, if the code accesses it (whatever rx or tx side), it will at least increase the required memory bandwidth, and in the worst case will result in an additional cache miss. This is difficult to predict and depends on the use case. I think each solution would have its drawbacks in specific cases. I think the tests I've provided are representative enough to assume that there is no need to search for an alternative, knowing the fact that it also clean the mbuf structure and give more room for later use. > [Venky] I don't think reworking core data structures (especially > regressing core data structures) is a good thing. We have kept this > relatively stable over 5 releases, sometimes at the impact of > performance, and churning data structures is not a good thing. Sorry, but I don't think this is a good argument: we cannot say "it was stable during 5 releases so we cannot modify it". I would say that the code modifications should be guided by technical reasons. Keeping the API stable could be a good technical reason, but as far as I understood, the DPDK is not at this stage today. The mbuf rework brings 9 additional bytes in the structure, which help for TSO today, but may also help for next features. For instance, having another checksum flag as proposed by Stephen and me [2]. Moreover, the patches make the rte_mbuf structure clearer: the current separation rte_mbuf / rte_pktmbuf is a bit strange, for instance the ol_flags field which is clearly related to a pktmuf is located in the rte_mbuf. Now, I'm ready to make some concessions and discuss about an alternative solution. Thomas, you are the maintainer ;) what are your plans? Regards, Olivier [1] http://dpdk.org/ml/archives/dev/2014-May/002322.html [2] http://dpdk.org/ml/archives/dev/2014-May/002339.html
[dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf
Hi Walt, > The purpose of this structure is to send commands, events or any other type > of information between user application tasks (normally from a manager > task). It has been there since the beginning in the original design and > it's up to the user to define what is in the data field and how they > wish to use it. It's one thing to fix a bug but to remove a structure > like this because you don't see it use in the other parts is asking for > trouble with customers. To me, there is nothing that we cannot do without this structure: depending on the use-case, it could be replaced with the same functionalities by: - a packet mbuf, in this case the user pointer would be stored in the packet data for instance. In the worst case, I would agree to add a flag telling that the mbuf carries control data. - an application private structure which would contain the pointer, the data len (if any), plus any other field that could be useful for the application. This structure can be allocated in a mempool. - nothing! I mean: if the application only wants to carry a pointer, why would it need an additional structure to point to it? The application can just give the pointer to its private data without allocating a control mbuf for that. To be honnest, I don't see in which case it is useful to have this additional structure. This modification is motivated by a gain of bytes in the mbuf and a rationalization of the rte_mbuf structure. I can add a documentation, for instance in the commit log, about how the rte_ctrlmbuf could be replaced by something equivalent in different situations. Are you fine with this? If we find use cases where rte_ctrlmbuf is required, another idea would be to keep this structure, but make it independant of rte_mbuf. Regards, Olivier
[dpdk-dev] [PATCH v3] virtio: Support multiple queues feature in DPDK based virtio-net frontend.
This v3 patch continues fixing some errors and warnings reported by checkpatch.pl. This patch supports multiple queues feature in DPDK based virtio-net frontend. It firstly gets max queue number of virtio-net from virtio PCI configuration and then send command to negotiate the queue number with backend; When receiving and transmitting packets, it negotiates multiple virtio-net queues which serve RX/TX; To utilize this feature, the backend also need support multiple queues feature and enable it. Signed-off-by: Ouyang Changchun --- lib/librte_pmd_virtio/virtio_ethdev.c | 377 -- lib/librte_pmd_virtio/virtio_ethdev.h | 40 ++-- lib/librte_pmd_virtio/virtio_pci.h| 4 +- lib/librte_pmd_virtio/virtio_rxtx.c | 92 +++-- lib/librte_pmd_virtio/virtqueue.h | 61 -- 5 files changed, 458 insertions(+), 116 deletions(-) diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c index 49e236b..c2b4dfb 100644 --- a/lib/librte_pmd_virtio/virtio_ethdev.c +++ b/lib/librte_pmd_virtio/virtio_ethdev.c @@ -81,6 +81,12 @@ static void virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats * static void virtio_dev_stats_reset(struct rte_eth_dev *dev); static void virtio_dev_free_mbufs(struct rte_eth_dev *dev); +static int virtio_dev_queue_stats_mapping_set( + __rte_unused struct rte_eth_dev *eth_dev, + __rte_unused uint16_t queue_id, + __rte_unused uint8_t stat_idx, + __rte_unused uint8_t is_rx); + /* * The set of PCI devices this driver supports */ @@ -92,6 +98,135 @@ static struct rte_pci_id pci_id_virtio_map[] = { { .vendor_id = 0, /* sentinel */ }, }; +static int +virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, + int *dlen, int pkt_num) +{ + uint32_t head = vq->vq_desc_head_idx, i; + int k, sum = 0; + virtio_net_ctrl_ack status = ~0; + struct virtio_pmd_ctrl result; + + ctrl->status = status; + + if (!vq->hw->cvq) { + PMD_INIT_LOG(ERR, "%s(): Control queue is " + "not supported by this device.\n", __func__); + return -1; + } + + PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, " + "vq->hw->cvq = %p vq = %p\n", + vq->vq_desc_head_idx, status, vq->hw->cvq, vq); + + if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1)) + return -1; + + memcpy(vq->virtio_net_hdr_mz->addr, ctrl, + sizeof(struct virtio_pmd_ctrl)); + + /* +* Format is enforced in qemu code: +* One TX packet for header; +* At least one TX packet per argument; +* One RX packet for ACK. +*/ + vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT; + vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr; + vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); + vq->vq_free_cnt--; + i = vq->vq_ring.desc[head].next; + + for (k = 0; k < pkt_num; k++) { + vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT; + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + + sizeof(struct virtio_net_ctrl_hdr) + + sizeof(ctrl->status) + sizeof(uint8_t)*sum; + vq->vq_ring.desc[i].len = dlen[k]; + sum += dlen[k]; + vq->vq_free_cnt--; + i = vq->vq_ring.desc[i].next; + } + + vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + + sizeof(struct virtio_net_ctrl_hdr); + vq->vq_ring.desc[i].len = sizeof(ctrl->status); + vq->vq_free_cnt--; + + vq->vq_desc_head_idx = vq->vq_ring.desc[i].next; + + vq_update_avail_ring(vq, head); + vq_update_avail_idx(vq); + + PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d\n", vq->vq_queue_index); + + virtqueue_notify(vq); + + while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) + usleep(100); + + while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) { + uint32_t idx, desc_idx, used_idx; + struct vring_used_elem *uep; + + rmb(); + + used_idx = (uint32_t)(vq->vq_used_cons_idx + & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + idx = (uint32_t) uep->id; + desc_idx = idx; + + while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) { + desc_idx = vq->vq_ring.desc[desc_idx].next; + vq->vq_free_cnt++; + } + + vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx; + vq->vq_desc_head_idx = idx; + + vq->vq_used_cons_idx++; + vq->vq_free_cnt++; +
[dpdk-dev] [PATCH v3] virtio: Support multiple queues feature in DPDK based virtio-net frontend.
2014-05-26 20:53, Ouyang Changchun: > This v3 patch continues fixing some errors and warnings reported by > checkpatch.pl. Thank you for cleaning code. > This patch supports multiple queues feature in DPDK based virtio-net Please do not mix cleaning and feature in the same patch. It makes difficult to read the changes implied by the new feature. -- Thomas
[dpdk-dev] [PATCH] atomic: clarify use of memory barriers
Hi Oliver, >> So with the following fragment of code: >> extern int *x; >> extern __128i a, *p; >> L0: >> _mm_stream_si128( p, a); >> rte_compiler_barrier(); >> L1: >> *x = 0; >> >> There is no guarantee that store at L0 will always be finished >> before store at L1. >This code fragment looks very similar to what is done in >__rte_ring_sp_do_enqueue(): > >[...] > ENQUEUE_PTRS(); /* I expect it is converted to an SSE store */ > rte_compiler_barrier(); > [...] > r->prod.tail = prod_next; >So, according to your previous explanation, I understand that >this code would require a write memory barrier in place of the >compiler barrier. Am I wrong? No, right now compiler barrier is enough here. ENQUEUE_PTRS() doesn't use Non-Temporal stores (MOVNT*), so write order should be guaranteed. Though, if in future we'll change ENQUEUE_PTRS() to use non-tempral stores, we'll have to use sfence(or mfence). >Moreover, if I understand well, a real wmb() is needed only if >a SSE store is issued. But the programmer may not control that, >it's the job of the compiler. 'Normal' SIMD writes are not reordered. So it is ok for the compiler to use them if appropriate. > > But now, there seems a confusion: everyone has to remember that >> smp_mb() and smp_wmb() are 'real' fences, while smp_rmb() is not. >> That's why my suggestion was to simply keep using compiler_barrier() >> for all cases, when we don't need real fence. >I'm not sure the programmer has to know which smp_*mb() is a real fence >or not. He just expects that it generates the proper CPU instructions >that guarantees the effectiveness of the memory barrier. In most cases just a compiler barrier is enough, but there are few exceptions. Always using fence instructions - means introduce unnecessary slowdown for cases, when order is guaranteed. No using fences in cases, when they are needed - means introduce race window and possible data corruption. That's why right now people can use either rte_compiler_barrier() or mb/rmb/wmb - whatever is appropriate for particular case. Konstantin
[dpdk-dev] [PATCH] atomic: clarify use of memory barriers
Hi Konstantin, On 05/26/2014 03:57 PM, Ananyev, Konstantin wrote: > In most cases just a compiler barrier is enough, but there are few exceptions. > Always using fence instructions - means introduce unnecessary slowdown for > cases, when order is guaranteed. > No using fences in cases, when they are needed - means introduce race window > and possible data corruption. > That's why right now people can use either rte_compiler_barrier() or > mb/rmb/wmb - whatever is appropriate for particular case. OK, so let's drop this patch for now. Thank you for reviewing and commenting. Regards, Olivier
[dpdk-dev] [PATCH] cmdline: finish at EOF
Hi Cristian, On 05/23/2014 05:32 PM, Olivier MATZ wrote: > On 05/23/2014 05:21 PM, Cristian Dumitrescu wrote: >> Bug fix in cmdline library to allow return on EOF as opposed to >> infinite loop. >> >> Signed-off-by: Cristian Dumitrescu >> --- >> lib/librte_cmdline/cmdline.c |2 +- >> 1 files changed, 1 insertions(+), 1 deletions(-) >> mode change 100644 => 100755 lib/librte_cmdline/cmdline.c > > Acked-by: Olivier Matz By the way, I just noticed that this bug had already been fixed into my cmdline project onto my personal website some time ago: http://git.droids-corp.org/?p=libcmdline.git;a=commitdiff;h=52e1d5772d1f465a013e65b8ae6e43a027438ed6 Regards, Olivier
[dpdk-dev] [PATCH] cmdline: finish at EOF
> > Bug fix in cmdline library to allow return on EOF as opposed to infinite > > loop. > > > > Signed-off-by: Cristian Dumitrescu > > Acked-by: Olivier Matz Applied for version 1.7.0 with title: cmdline: fix infinite loop after EOF Thanks -- Thomas
[dpdk-dev] [PATCH v2 00/17] add TSO support
Hi Oliver, >> I don't see any big changes in the v2 of that patch. >> >> At least both things that I have concerns about, stay unchanged in >> the v2: >> >> 1) merge physaddr and buf_len in a bitfield - I still think we better >> keep physaddr as 64bit field (PATCH 5). >As nobody reacted to our discussion thread [1] with other arguments, I >stayed on my initial position: >- 2 bytes more in the mbuf structure is always good to have >- no performance impact detected (see my test reports) >- the 48 bits physical address limit will be reached in several years, > and at this time, maybe the cache lines will be 128 bytes? (solving > our initial rte_mbuf issue). So let's just deal with current or near > future hardware. As, I understand right now, you don't need these 2 bytes to enable TSO support. >From other side, if we'll occupy them by some fields now, then in future we wouldn't be able to expand our phys_addr without re-working mbuf structure again and breaking backward compatibility. No doubt, that there are a lot of extra things that needed(wanted) to be put into the mbuf. And as new HW with extra offload capabilities will come out - that demand will only increase. That's why I think we wouldn't be able to squeeze all future fields in 64B anyway. So it seems to me that we would have to expand mbuf to 128B sooner or later and like we it or not. So I still think we better keep phys_addr 64bits long. At least for now. >> 2) fix_tcp_phdr_cksum() is inside ixgbe TX function, while I think it >> should be moved out of it to the upper layer (PATCH 11). >Thomas's answer on this question [2] was to do that way, so I did not >modify the v1 patches. > Thomas Monjalon wrote: >We know at least 2 checksum methods: >- the standard one >- the special one for ixgbe TSO >In Linux ixgbe, checksum is redone in the driver for TSO case. I don't think we should use linux ixgbe driver as an etalon here. >We want to compute checksum in the application/stack in order to prevent >driver from modifying packet data, that could cause cache miss. >But the application cannot always know which checksum method to use because it >doesn't have to know which driver will process the packet. >So we have to choose which checksum method can be done in the application >without driver processing. It's not an easy choice. >It seems simpler and reasonable to choose the standard pseudo-header checksum >method as it is done in Linux. >Having a stable and generic API is something important we must target. As I said: with the current DPDK implementation the upper code would still be different for packets with TCP checksum (without segmentation) and TCP segmentation: you need to setup different flags in mbuf, with TSO you need to setup l4_len and mss fields inside mbuf, with just checksum - you don't. So right now upper layer code has to know is it doing TSO or just checksum anyway. If that's the case why it also can't compute pseudo-checksum in a 2 different ways too? Also, the patch introduces support TSO only for ixgbe. I suppose that TSO support for igb can be added in the same way. Let's talk about generic API, when we'll have to expand it into different devices. Konstantin
[dpdk-dev] [PATCH] timer bug fix
2014-05-23 16:52, Olivier MATZ: > > Bug: when a periodic timer's callback is running, if another > > timer is manipulated, the periodic timer is not reloaded. > > Solution: set the update flag only is the modified timer is > > in RUNNING state > > Acked-by: Olivier Matz Applied in version 1.7.0 with title: timer: fix reloading after changes Thanks -- Thomas
[dpdk-dev] [PATCH] timer bug fix.
> > Bug: When a timer is running > > > > - if rte_timer_stop is called, the pending decrement is > > skipped (decremented only if the timer is pending) and due > > to the update flag the future processing is skipped so the > > timer is counted as pending while it is stopped. - the same > > applies when rte_timer_reset is called but then the pending > > statistics is additionally incremented so the timer is > > counted pending twice. > > > > Solution: decrement the pending > > > > statistics after returning from the callback. If > > rte_timer_stop was called, it skipped decrementing the > > pending statistics. If rte_time_reset was called, the > > pending statistics was incremented. If neither was called > > and the timer is periodic, the pending statistics is > > incremented when it is reloaded > > > > Signed-off-by: Vadim Suraev > > Acked-by: Olivier Matz Applied for version 1.7.0 with title: timer: fix pending counter Thanks -- Thomas
[dpdk-dev] [PATCH] fix for 2 consecutive rte_eth_dev_start() can cause a SIGSEGV
2014-05-23 12:08, Konstantin Ananyev: > 1)If igb_alloc_rx_queue_mbufs() would fail to allocate an mbuf for RX queue, > it calls igb_rx_queue_release(rxq). > That causes rxq to be silently freed, without updating > dev->data->rx_queues[]. So any firther reference to it will trigger the > SIGSEGV. > Same thing in em PMD too. > To fix: igb_alloc_rx_queue_mbufs() should just return an error to the caller > and let upper layer to deal with the probem. > That's what ixgbe PMD doing right now. > 2)In tx_queue_setup (for all 3 PMDs: ixgbe, igb, em) we call > tx_queue_release(dev->data->tx_queues[queue_idx]) > without setting dev->data->tx_queues[queue_idx] = NULL > afterwards. > 3)Prevent rte_eth_dev_start/stop to call underneath dev_start/dev_stop > for already started/stopped device. > 4) fix compiler warning on PMD_DEBUG_TRACE() formats. Please, only 1 fix per patch. This way, we'll hopefully have a nice title for each fix. It's simpler for changelog. Thanks -- Thomas
[dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf
I am also using the rte_ctrlmbuf to send messages between cores in one of the Packet Framework sample apps that I am going to send as a patch tomorrow or later this week. Removing rte_ctrlmbuf would require additional rework to this (complex) sample app. It can be done, but it is additional work very close to the code freeze cycle. Thanks, Cristian -Original Message- From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Gilmore, Walter E Sent: Sunday, May 25, 2014 10:39 PM To: Olivier Matz; dev at dpdk.org Subject: Re: [dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf Olivier you're making an assumption that customer application code running on the Intel DPDK isn't using the rte_ctrlmbuf structure. Remember there are more than 300 customers using the Intel DPDK and there is no way you can predict that this is not used by them. The purpose of this structure is to send commands, events or any other type of information between user application tasks (normally from a manager task). It has been there since the beginning in the original design and it's up to the user to define what is in the data field and how they wish to use it. It's one thing to fix a bug but to remove a structure like this because you don't see it use in the other parts is asking for trouble with customers. -Original Message- From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Olivier Matz Sent: Friday, May 09, 2014 10:51 AM To: dev at dpdk.org Subject: [dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf The initial role of rte_ctrlmbuf is to carry generic messages (data pointer + data length) but it's not used by the DPDK or it applications. Keeping it implies: - loosing 1 byte in the rte_mbuf structure - having some dead code rte_mbuf.[ch] This patch removes this feature. Thanks to it, it is now possible to simplify the rte_mbuf structure by merging the rte_pktmbuf structure in it. This is done in next commit. Signed-off-by: Olivier Matz --- app/test-pmd/cmdline.c | 1 - app/test-pmd/testpmd.c | 2 - app/test-pmd/txonly.c| 2 +- app/test/commands.c | 1 - app/test/test_mbuf.c | 72 + examples/ipv4_multicast/main.c | 2 +- lib/librte_mbuf/rte_mbuf.c | 65 +++- lib/librte_mbuf/rte_mbuf.h | 175 ++- lib/librte_pmd_e1000/em_rxtx.c | 2 +- lib/librte_pmd_e1000/igb_rxtx.c | 2 +- lib/librte_pmd_ixgbe/ixgbe_rxtx.c| 4 +- lib/librte_pmd_virtio/virtio_rxtx.c | 2 +- lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c| 2 +- lib/librte_pmd_xenvirt/rte_eth_xenvirt.c | 2 +- 14 files changed, 54 insertions(+), 280 deletions(-) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index 7becedc..e3d1849 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -5010,7 +5010,6 @@ dump_struct_sizes(void) #define DUMP_SIZE(t) printf("sizeof(" #t ") = %u\n", (unsigned)sizeof(t)); DUMP_SIZE(struct rte_mbuf); DUMP_SIZE(struct rte_pktmbuf); - DUMP_SIZE(struct rte_ctrlmbuf); DUMP_SIZE(struct rte_mempool); DUMP_SIZE(struct rte_ring); #undef DUMP_SIZE diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 9c56914..76b3823 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -389,13 +389,11 @@ testpmd_mbuf_ctor(struct rte_mempool *mp, mb_ctor_arg = (struct mbuf_ctor_arg *) opaque_arg; mb = (struct rte_mbuf *) raw_mbuf; - mb->type = RTE_MBUF_PKT; mb->pool = mp; mb->buf_addr = (void *) ((char *)mb + mb_ctor_arg->seg_buf_offset); mb->buf_physaddr = (uint64_t) (rte_mempool_virt2phy(mp, mb) + mb_ctor_arg->seg_buf_offset); mb->buf_len = mb_ctor_arg->seg_buf_size; - mb->type = RTE_MBUF_PKT; mb->ol_flags = 0; mb->pkt.data = (char *) mb->buf_addr + RTE_PKTMBUF_HEADROOM; mb->pkt.nb_segs = 1; diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index 1cf2574..1f066d0 100644 --- a/app/test-pmd/txonly.c +++ b/app/test-pmd/txonly.c @@ -93,7 +93,7 @@ tx_mbuf_alloc(struct rte_mempool *mp) struct rte_mbuf *m; m = __rte_mbuf_raw_alloc(mp); - __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0); + __rte_mbuf_sanity_check_raw(m, 0); return (m); } diff --git a/app/test/commands.c b/app/test/commands.c index b145036..c69544b 100644 --- a/app/test/commands.c +++ b/app/test/commands.c @@ -262,7 +262,6 @@ dump_struct_sizes(void) #define DUMP_SIZE(t) printf("sizeof(" #t ") = %u\n", (unsigned)sizeof(t)); DUMP_SIZE(struct rte_mbuf); DUMP_SIZE(struct rte_pktmbuf); - DUMP_SIZE(struct rte_ctrlmbuf); DUMP_SIZE(struct rte_mempool); DUMP_SIZE(struct rte_ring); #undef DUMP_SIZE diff --git a/app/test/test
[dpdk-dev] DPDK Latency Issue
Thanks a lot Jeff for your detailed explanation. I still have open question left. I would be grateful if someone would share their insight on it. I have performed experiments to vary both the MAX_BURST_SIZE (originally set as 32) and BURST_TX_DRAIN_US (originally set as 100 usec) in l3fwd main.c. While I vary the MAX_BURST_SIZE (1, 8, 16, 32, 64, and 128) and fix BURST_TX_DRAIN_US=100 usec, I see a low average latency when sending a burst of packets less than or equal to the MAX_BURST_SIZE. For example, when MAX_BURST_SIZE is 32, if I send a burst of 32 packets or less, then I get around 10 usec of latency. When it goes over it, it starts to get higher average latency, which make total sense. My main question are the following. When I start sending continuous packet at a rate of 14.88 Mpps for 64B packets, it shows consistently receiving an average latency of 150 usec, no matter what MAX_BURST_SIZE. My guess is that the latency should be bounded by BURST_TX_DRAIN_US, which is fixed at 100 usec. Would you share your thought on this issue please? Thanks, Jun On Thu, May 22, 2014 at 7:06 PM, Shaw, Jeffrey B wrote: > Hello, > > > I measured a roundtrip latency (using Spirent traffic generator) of > sending 64B packets over a 10GbE to DPDK, and DPDK does nothing but simply > forward back to the incoming port (l3fwd without any lookup code, i.e., > dstport = port_id). > > However, to my surprise, the average latency was around 150 usec. (The > packet drop rate was only 0.001%, i.e., 283 packets/sec dropped) Another > test I did was to measure the latency due to sending only a single 64B > packet, and the latency I measured is ranging anywhere from 40 usec to 100 > usec. > > 40-100usec seems very high. > The l3fwd application does some internal buffering before transmitting the > packets. It buffers either 32 packets, or waits up to 100us (hash-defined > as BURST_TX_DRAIN_US), whichever comes first. > Try either removing this timeout, or sending a burst of 32 packets at > time. Or you could try with testpmd, which should have reasonably low > latency out of the box. > > There is also a section in the Release Notes (8.6 How can I tune my > network application to achieve lower latency?) which provides some pointers > for getting lower latency if you are willing to give up top-rate throughput. > > Thanks, > Jeff >
[dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf
On Sun, May 25, 2014 at 09:39:22PM +, Gilmore, Walter E wrote: > Olivier you're making an assumption that customer application code running on > the Intel DPDK isn't using the rte_ctrlmbuf structure. > Remember there are more than 300 customers using the Intel DPDK and there is > no way you can predict that this is not used by them. > The purpose of this structure is to send commands, events or any other type > of information between user application tasks (normally from a manager task). > It has been there since the beginning in the original design and it's up to > the user to define what is in the data field and how they wish to use it. > It's one thing to fix a bug but to remove a structure like this because you > don't see it use in the other parts is asking for trouble with customers. > Not to rub salt in this, but I'd like to point out here that this strikes me as a case of wanting cake and eating it too. This community seems adamant against the notion of having a fixed API for the dpdk project, yet fractures the moment anyone tries to change something that they, or someone they are working with, might be using. If you want to make sure that use cases outside the scope of the project itself stay usable, stabilize the API, and mark it with a version. If you do this, then you can change the API, and mark it with a new version in the link stage, and just focus on maintaining backward compatibility. Neil > -Original Message- > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Olivier Matz > Sent: Friday, May 09, 2014 10:51 AM > To: dev at dpdk.org > Subject: [dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf > > The initial role of rte_ctrlmbuf is to carry generic messages (data pointer + > data length) but it's not used by the DPDK or it applications. > Keeping it implies: > - loosing 1 byte in the rte_mbuf structure > - having some dead code rte_mbuf.[ch] > > This patch removes this feature. Thanks to it, it is now possible to simplify > the rte_mbuf structure by merging the rte_pktmbuf structure in it. This is > done in next commit. > > Signed-off-by: Olivier Matz > --- > app/test-pmd/cmdline.c | 1 - > app/test-pmd/testpmd.c | 2 - > app/test-pmd/txonly.c| 2 +- > app/test/commands.c | 1 - > app/test/test_mbuf.c | 72 + > examples/ipv4_multicast/main.c | 2 +- > lib/librte_mbuf/rte_mbuf.c | 65 +++- > lib/librte_mbuf/rte_mbuf.h | 175 > ++- > lib/librte_pmd_e1000/em_rxtx.c | 2 +- > lib/librte_pmd_e1000/igb_rxtx.c | 2 +- > lib/librte_pmd_ixgbe/ixgbe_rxtx.c| 4 +- > lib/librte_pmd_virtio/virtio_rxtx.c | 2 +- > lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c| 2 +- > lib/librte_pmd_xenvirt/rte_eth_xenvirt.c | 2 +- > 14 files changed, 54 insertions(+), 280 deletions(-) > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index > 7becedc..e3d1849 100644 > --- a/app/test-pmd/cmdline.c > +++ b/app/test-pmd/cmdline.c > @@ -5010,7 +5010,6 @@ dump_struct_sizes(void) #define DUMP_SIZE(t) > printf("sizeof(" #t ") = %u\n", (unsigned)sizeof(t)); > DUMP_SIZE(struct rte_mbuf); > DUMP_SIZE(struct rte_pktmbuf); > - DUMP_SIZE(struct rte_ctrlmbuf); > DUMP_SIZE(struct rte_mempool); > DUMP_SIZE(struct rte_ring); > #undef DUMP_SIZE > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index > 9c56914..76b3823 100644 > --- a/app/test-pmd/testpmd.c > +++ b/app/test-pmd/testpmd.c > @@ -389,13 +389,11 @@ testpmd_mbuf_ctor(struct rte_mempool *mp, > mb_ctor_arg = (struct mbuf_ctor_arg *) opaque_arg; > mb = (struct rte_mbuf *) raw_mbuf; > > - mb->type = RTE_MBUF_PKT; > mb->pool = mp; > mb->buf_addr = (void *) ((char *)mb + mb_ctor_arg->seg_buf_offset); > mb->buf_physaddr = (uint64_t) (rte_mempool_virt2phy(mp, mb) + > mb_ctor_arg->seg_buf_offset); > mb->buf_len = mb_ctor_arg->seg_buf_size; > - mb->type = RTE_MBUF_PKT; > mb->ol_flags = 0; > mb->pkt.data = (char *) mb->buf_addr + RTE_PKTMBUF_HEADROOM; > mb->pkt.nb_segs = 1; > diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index > 1cf2574..1f066d0 100644 > --- a/app/test-pmd/txonly.c > +++ b/app/test-pmd/txonly.c > @@ -93,7 +93,7 @@ tx_mbuf_alloc(struct rte_mempool *mp) > struct rte_mbuf *m; > > m = __rte_mbuf_raw_alloc(mp); > - __rte_mbuf_sanity_check_raw(m, RTE_MBUF_PKT, 0); > + __rte_mbuf_sanity_check_raw(m, 0); > return (m); > } > > diff --git a/app/test/commands.c b/app/test/commands.c index b145036..c69544b > 100644 > --- a/app/test/commands.c > +++ b/app/test/commands.c > @@ -262,7 +262,6 @@ dump_struct_sizes(void) #define DUMP_SIZE(t) > printf("sizeof(" #t ") = %u\n"
[dpdk-dev] [PATCH v2 0/3] Support setting TX rate for queue and VF
On Mon, May 26, 2014 at 03:45:28PM +0800, Ouyang Changchun wrote: > This patch v2 fixes some errors and warnings reported by checkpatch.pl. > > This patch series also contain the 3 items: > 1. Add API to support setting TX rate for a queue or a VF. > 2. Implement the functionality of setting TX rate for queue or VF in IXGBE > PMD. > 3. Add commands in testpmd to test the functionality of setting TX rate for > queue or VF. > > Ouyang Changchun (3): > Add API to support set TX rate for a queue and VF. > Implement the functionality of setting TX rate for queue or VF in > IXGBE PMD. > Add commands in testpmd to test the functionality of setting TX rate > for queue or VF. > > app/test-pmd/cmdline.c | 159 > +++- > app/test-pmd/config.c | 47 +++ > app/test-pmd/testpmd.h | 3 + > lib/librte_ether/rte_ethdev.c | 71 > lib/librte_ether/rte_ethdev.h | 51 > lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 122 +++ > lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 13 ++- > 7 files changed, 462 insertions(+), 4 deletions(-) > This seems a bit backwards. queue rate limiting is rather a generic function, that doesn't really need to know any details about the hardware, save for its maximum tx rate, but you're implementaiton requires that it be re-implemented for each PMD. Why not just export max tx rates from the PMD and write a generic queuing libarary to do rate limitation for any PMD? Neil > -- > 1.9.0 > >
[dpdk-dev] [PATCH RFC 03/11] mbuf: remove rte_ctrlmbuf
On Fri, 9 May 2014 16:50:30 +0200 Olivier Matz wrote: > The initial role of rte_ctrlmbuf is to carry generic messages (data > pointer + data length) but it's not used by the DPDK or it applications. > Keeping it implies: > - loosing 1 byte in the rte_mbuf structure > - having some dead code rte_mbuf.[ch] > > This patch removes this feature. Thanks to it, it is now possible to > simplify the rte_mbuf structure by merging the rte_pktmbuf structure > in it. This is done in next commit. > > Signed-off-by: Olivier Matz The only win from this is to save the byte for the type field. Yes bits here are precious. Does external application mix control and data mbuf's in the same ring? The stuff in the tree only uses type field for debug validation/sanity checks. Since it is only one bit, maybe you can find one bit to store that. Since buffer and pool addresses are going to be at least 32 bit aligned maybe you can use the old GC trick of using the LSB as flag.