Hi Wojchech, > -----Original Message----- > From: Liguzinski, WojciechX <wojciechx.liguzin...@intel.com> > Sent: Monday, October 11, 2021 8:56 AM > To: dev@dpdk.org; Singh, Jasvinder <jasvinder.si...@intel.com>; > Dumitrescu, Cristian <cristian.dumitre...@intel.com> > Cc: Ajmera, Megha <megha.ajm...@intel.com> > Subject: [PATCH v9 1/5] sched: add PIE based congestion management > > Implement PIE based congestion management based on rfc8033 > > Signed-off-by: Liguzinski, WojciechX <wojciechx.liguzin...@intel.com> > --- > drivers/net/softnic/rte_eth_softnic_tm.c | 6 +- > lib/sched/meson.build | 10 +- > lib/sched/rte_pie.c | 82 +++++ > lib/sched/rte_pie.h | 393 +++++++++++++++++++++++ > lib/sched/rte_sched.c | 228 +++++++++---- > lib/sched/rte_sched.h | 53 ++- > lib/sched/version.map | 3 + > 7 files changed, 685 insertions(+), 90 deletions(-) > create mode 100644 lib/sched/rte_pie.c > create mode 100644 lib/sched/rte_pie.h > > diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c > b/drivers/net/softnic/rte_eth_softnic_tm.c > index 90baba15ce..5b6c4e6d4b 100644 > --- a/drivers/net/softnic/rte_eth_softnic_tm.c > +++ b/drivers/net/softnic/rte_eth_softnic_tm.c > @@ -420,7 +420,7 @@ pmd_tm_node_type_get(struct rte_eth_dev *dev, > return 0; > } > > -#ifdef RTE_SCHED_RED > +#ifdef RTE_SCHED_AQM > #define WRED_SUPPORTED 1 > #else > #define WRED_SUPPORTED 0 > @@ -2306,7 +2306,7 @@ tm_tc_wred_profile_get(struct rte_eth_dev *dev, > uint32_t tc_id) > return NULL; > } > > -#ifdef RTE_SCHED_RED > +#ifdef RTE_SCHED_AQM > > static void > wred_profiles_set(struct rte_eth_dev *dev, uint32_t subport_id) > @@ -2321,7 +2321,7 @@ wred_profiles_set(struct rte_eth_dev *dev, > uint32_t subport_id) > for (tc_id = 0; tc_id < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; > tc_id++) > for (color = RTE_COLOR_GREEN; color < RTE_COLORS; > color++) { > struct rte_red_params *dst = > - &pp->red_params[tc_id][color]; > + &pp->wred_params[tc_id][color]; > struct tm_wred_profile *src_wp = > tm_tc_wred_profile_get(dev, tc_id); > struct rte_tm_red_params *src = > diff --git a/lib/sched/meson.build b/lib/sched/meson.build > index b24f7b8775..e7ae9bcf19 100644 > --- a/lib/sched/meson.build > +++ b/lib/sched/meson.build > @@ -1,11 +1,7 @@ > # SPDX-License-Identifier: BSD-3-Clause > # Copyright(c) 2017 Intel Corporation > > -sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c') > -headers = files( > - 'rte_approx.h', > - 'rte_red.h', > - 'rte_sched.h', > - 'rte_sched_common.h', > -) > +sources = files('rte_sched.c', 'rte_red.c', 'rte_approx.c', 'rte_pie.c') > +headers = files('rte_sched.h', 'rte_sched_common.h', > + 'rte_red.h', 'rte_approx.h', 'rte_pie.h') > deps += ['mbuf', 'meter'] > diff --git a/lib/sched/rte_pie.c b/lib/sched/rte_pie.c > new file mode 100644 > index 0000000000..2fcecb2db4 > --- /dev/null > +++ b/lib/sched/rte_pie.c > @@ -0,0 +1,82 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2020 Intel Corporation > + */ > + > +#include <stdlib.h> > + > +#include "rte_pie.h" > +#include <rte_common.h> > +#include <rte_cycles.h> > +#include <rte_malloc.h> > + > +#ifdef __INTEL_COMPILER > +#pragma warning(disable:2259) /* conversion may lose significant bits */ > +#endif > + > +void > +rte_pie_rt_data_init(struct rte_pie *pie) > +{ > + if (pie == NULL) { > + /* Allocate memory to use the PIE data structure */ > + pie = rte_malloc(NULL, sizeof(struct rte_pie), 0); > + > + if (pie == NULL) > + RTE_LOG(ERR, SCHED, "%s: Memory allocation > fails\n", __func__); > + } > + > + pie->active = 0; > + pie->in_measurement = 0; > + pie->departed_bytes_count = 0; > + pie->start_measurement = 0; > + pie->last_measurement = 0; > + pie->qlen = 0; > + pie->avg_dq_time = 0; > + pie->burst_allowance = 0; > + pie->qdelay_old = 0; > + pie->drop_prob = 0; > + pie->accu_prob = 0; > +} > + > +int > +rte_pie_config_init(struct rte_pie_config *pie_cfg, > + const uint16_t qdelay_ref, > + const uint16_t dp_update_interval, > + const uint16_t max_burst, > + const uint16_t tailq_th) > +{ > + uint64_t tsc_hz = rte_get_tsc_hz(); > + > + if (pie_cfg == NULL) > + return -1; > + > + if (qdelay_ref <= 0) { > + RTE_LOG(ERR, SCHED, > + "%s: Incorrect value for qdelay_ref\n", __func__); > + return -EINVAL; > + } > + > + if (dp_update_interval <= 0) { > + RTE_LOG(ERR, SCHED, > + "%s: Incorrect value for dp_update_interval\n", > __func__); > + return -EINVAL; > + } > + > + if (max_burst <= 0) { > + RTE_LOG(ERR, SCHED, > + "%s: Incorrect value for max_burst\n", __func__); > + return -EINVAL; > + } > + > + if (tailq_th <= 0) { > + RTE_LOG(ERR, SCHED, > + "%s: Incorrect value for tailq_th\n", __func__); > + return -EINVAL; > + } > + > + pie_cfg->qdelay_ref = (tsc_hz * qdelay_ref) / 1000; > + pie_cfg->dp_update_interval = (tsc_hz * dp_update_interval) / > 1000; > + pie_cfg->max_burst = (tsc_hz * max_burst) / 1000; > + pie_cfg->tailq_th = tailq_th; > + > + return 0; > +} > diff --git a/lib/sched/rte_pie.h b/lib/sched/rte_pie.h > new file mode 100644 > index 0000000000..f83c95664f > --- /dev/null > +++ b/lib/sched/rte_pie.h > @@ -0,0 +1,393 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2020 Intel Corporation > + */ > + > +#ifndef __RTE_PIE_H_INCLUDED__ > +#define __RTE_PIE_H_INCLUDED__ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +/** > + * @file > + * RTE Proportional Integral controller Enhanced (PIE) > + * > + * > + ***/ > + > +#include <stdint.h> > + > +#include <rte_random.h> > +#include <rte_debug.h> > + > +#define RTE_DQ_THRESHOLD 16384 /**< Queue length threshold (2^14) > + * to start measurement cycle (bytes) > + */ > +#define RTE_DQ_WEIGHT 0.25 /**< Weight > (RTE_DQ_THRESHOLD/2^16) to compute dequeue rate */ > +#define RTE_ALPHA 0.125 /**< Weights in drop probability > calculations */ > +#define RTE_BETA 1.25 /**< Weights in drop probability > calculations > */ > +#define RTE_RAND_MAX ~0LLU /**< Max value of the random number > */ > + > + > +/** > + * PIE configuration parameters passed by user > + * > + */ > +struct rte_pie_params { > + uint16_t qdelay_ref; /**< Latency Target (milliseconds) */ > + uint16_t dp_update_interval; /**< Update interval for drop > probability (milliseconds) */ > + uint16_t max_burst; /**< Max Burst Allowance (milliseconds) > */ > + uint16_t tailq_th; /**< Tailq drop threshold (packet > counts) */ > +}; > + > +/** > + * PIE configuration parameters > + * > + */ > +struct rte_pie_config { > + uint64_t qdelay_ref; /**< Latency Target (in CPU cycles.) */ > + uint64_t dp_update_interval; /**< Update interval for drop > probability (in CPU cycles) */ > + uint64_t max_burst; /**< Max Burst Allowance (in CPU cycles.) > */ > + uint16_t tailq_th; /**< Tailq drop threshold (packet > counts) */ > +}; > + > +/** > + * RED run-time data > + */ > +struct rte_pie { > + uint16_t active; /**< Flag for activating/deactivating > pie */ > + uint16_t in_measurement; /**< Flag for activation of > measurement cycle */ > + uint32_t departed_bytes_count; /**< Number of bytes departed in > current measurement cycle */ > + uint64_t start_measurement; /**< Time to start to measurement > cycle (in cpu cycles) */ > + uint64_t last_measurement; /**< Time of last measurement (in > cpu cycles) */ > + uint64_t qlen; /**< Queue length (packets count) */ > + uint64_t qlen_bytes; /**< Queue length (bytes count) */ > + uint64_t avg_dq_time; /**< Time averaged dequeue rate (in > cpu cycles) */ > + uint32_t burst_allowance; /**< Current burst allowance (bytes) */ > + uint64_t qdelay_old; /**< Old queue delay (bytes) */ > + double drop_prob; /**< Current packet drop probability */ > + double accu_prob; /**< Accumulated packet drop probability > */ > +}; > + > +/** > + * @brief Initialises run-time data > + * > + * @param pie [in,out] data pointer to PIE runtime data > + */ > +void > +__rte_experimental > +rte_pie_rt_data_init(struct rte_pie *pie); > + > +/** > + * @brief Configures a single PIE configuration parameter structure. > + * > + * @param pie_cfg [in,out] config pointer to a PIE configuration parameter > structure > + * @param qdelay_ref [in] latency target(milliseconds) > + * @param dp_update_interval [in] update interval for drop probability > (milliseconds) > + * @param max_burst [in] maximum burst allowance (milliseconds) > + * @param tailq_th [in] tail drop threshold for the queue (number of > packets) > + * > + * @return Operation status > + * @retval 0 success > + * @retval !0 error > + */ > +int > +__rte_experimental > +rte_pie_config_init(struct rte_pie_config *pie_cfg, > + const uint16_t qdelay_ref, > + const uint16_t dp_update_interval, > + const uint16_t max_burst, > + const uint16_t tailq_th); > + > +/** > + * @brief Decides packet enqueue when queue is empty > + * > + * Note: packet is never dropped in this particular case. > + * > + * @param pie_cfg [in] config pointer to a PIE configuration parameter > structure > + * @param pie [in, out] data pointer to PIE runtime data > + * @param pkt_len [in] packet length in bytes > + * > + * @return Operation status > + * @retval 0 enqueue the packet > + * @retval !0 drop the packet > + */ > +static inline int > +__rte_experimental > +rte_pie_enqueue_empty(const struct rte_pie_config *pie_cfg, > + struct rte_pie *pie, > + uint32_t pkt_len) > +{ > + RTE_ASSERT(pkt_len != NULL); > + > + /* Update the PIE qlen parameter */ > + pie->qlen++; > + pie->qlen_bytes += pkt_len; > + > + /** > + * If the queue has been idle for a while, turn off PIE and Reset > counters > + */ > + if ((pie->active == 1) && > + (pie->qlen < (pie_cfg->tailq_th * 0.1))) { > + pie->active = 0; > + pie->in_measurement = 0; > + } > + > + return 0; > +} > + > +/** > + * @brief make a decision to drop or enqueue a packet based on probability > + * criteria > + * > + * @param pie_cfg [in] config pointer to a PIE configuration parameter > structure > + * @param pie [in, out] data pointer to PIE runtime data > + * @param time [in] current time (measured in cpu cycles) > + */ > +static inline void > +__rte_experimental > +_calc_drop_probability(const struct rte_pie_config *pie_cfg, > + struct rte_pie *pie, uint64_t time) > +{ > + uint64_t qdelay_ref = pie_cfg->qdelay_ref; > + > + /* Note: can be implemented using integer multiply. > + * DQ_THRESHOLD is power of 2 value. > + */ > + double current_qdelay = pie->qlen * (pie->avg_dq_time / > RTE_DQ_THRESHOLD); > + > + double p = RTE_ALPHA * (current_qdelay - qdelay_ref) + > + RTE_BETA * (current_qdelay - pie->qdelay_old); > + > + if (pie->drop_prob < 0.000001) > + p = p * 0.00048828125; /* (1/2048) = 0.00048828125 > */ > + else if (pie->drop_prob < 0.00001) > + p = p * 0.001953125; /* (1/512) = 0.001953125 */ > + else if (pie->drop_prob < 0.0001) > + p = p * 0.0078125; /* (1/128) = 0.0078125 */ > + else if (pie->drop_prob < 0.001) > + p = p * 0.03125; /* (1/32) = 0.03125 */ > + else if (pie->drop_prob < 0.01) > + p = p * 0.125; /* (1/8) = 0.125 */ > + else if (pie->drop_prob < 0.1) > + p = p * 0.5; /* (1/2) = 0.5 */ > + > + if (pie->drop_prob >= 0.1 && p > 0.02) > + p = 0.02; > + > + pie->drop_prob += p; > + > + double qdelay = qdelay_ref * 0.5; > + > + /* Exponentially decay drop prob when congestion goes away */ > + if (current_qdelay < qdelay && pie->qdelay_old < qdelay) > + pie->drop_prob *= 0.98; /* 1 - 1/64 is sufficient */ > + > + /* Bound drop probability */ > + if (pie->drop_prob < 0) > + pie->drop_prob = 0; > + if (pie->drop_prob > 1) > + pie->drop_prob = 1; > + > + pie->qdelay_old = current_qdelay; > + pie->last_measurement = time; > + > + uint64_t burst_allowance = pie->burst_allowance - pie_cfg- > >dp_update_interval; > + > + pie->burst_allowance = (burst_allowance > 0) ? burst_allowance : 0; > +} > + > +/** > + * @brief make a decision to drop or enqueue a packet based on probability > + * criteria > + * > + * @param pie_cfg [in] config pointer to a PIE configuration parameter > structure > + * @param pie [in, out] data pointer to PIE runtime data > + * > + * @return operation status > + * @retval 0 enqueue the packet > + * @retval 1 drop the packet > + */ > +static inline int > +__rte_experimental > +_rte_pie_drop(const struct rte_pie_config *pie_cfg, > + struct rte_pie *pie) > +{ > + uint64_t rand_value; > + double qdelay = pie_cfg->qdelay_ref * 0.5; > + > + /* PIE is active but the queue is not congested: return 0 */ > + if (((pie->qdelay_old < qdelay) && (pie->drop_prob < 0.2)) || > + (pie->qlen <= (pie_cfg->tailq_th * 0.1))) > + return 0; > + > + if (pie->drop_prob == 0) > + pie->accu_prob = 0; > + > + /* For practical reasons, drop probability can be further scaled > according > + * to packet size, but one needs to set a bound to avoid unnecessary > bias > + * Random drop > + */ > + pie->accu_prob += pie->drop_prob; > + > + if (pie->accu_prob < 0.85) > + return 0; > + > + if (pie->accu_prob >= 8.5) > + return 1; > + > + rand_value = rte_rand()/RTE_RAND_MAX; > + > + if ((double)rand_value < pie->drop_prob) { > + pie->accu_prob = 0; > + return 1; > + } > + > + /* No drop */ > + return 0; > +} > + > +/** > + * @brief Decides if new packet should be enqeued or dropped for non- > empty queue > + * > + * @param pie_cfg [in] config pointer to a PIE configuration parameter > structure > + * @param pie [in,out] data pointer to PIE runtime data > + * @param pkt_len [in] packet length in bytes > + * @param time [in] current time (measured in cpu cycles) > + * > + * @return Operation status > + * @retval 0 enqueue the packet > + * @retval 1 drop the packet based on max threshold criterion > + * @retval 2 drop the packet based on mark probability criterion > + */ > +static inline int > +__rte_experimental > +rte_pie_enqueue_nonempty(const struct rte_pie_config *pie_cfg, > + struct rte_pie *pie, > + uint32_t pkt_len, > + const uint64_t time) > +{ > + /* Check queue space against the tail drop threshold */ > + if (pie->qlen >= pie_cfg->tailq_th) { > + > + pie->accu_prob = 0; > + return 1; > + } > + > + if (pie->active) { > + /* Update drop probability after certain interval */ > + if ((time - pie->last_measurement) >= pie_cfg- > >dp_update_interval) > + _calc_drop_probability(pie_cfg, pie, time); > + > + /* Decide whether packet to be dropped or enqueued */ > + if (_rte_pie_drop(pie_cfg, pie) && pie->burst_allowance == > 0) > + return 2; > + } > + > + /* When queue occupancy is over a certain threshold, turn on PIE */ > + if ((pie->active == 0) && > + (pie->qlen >= (pie_cfg->tailq_th * 0.1))) { > + pie->active = 1; > + pie->qdelay_old = 0; > + pie->drop_prob = 0; > + pie->in_measurement = 1; > + pie->departed_bytes_count = 0; > + pie->avg_dq_time = 0; > + pie->last_measurement = time; > + pie->burst_allowance = pie_cfg->max_burst; > + pie->accu_prob = 0; > + pie->start_measurement = time; > + } > + > + /* when queue has been idle for a while, turn off PIE and Reset > counters */ > + if (pie->active == 1 && > + pie->qlen < (pie_cfg->tailq_th * 0.1)) { > + pie->active = 0; > + pie->in_measurement = 0; > + } > + > + /* Update PIE qlen parameter */ > + pie->qlen++; > + pie->qlen_bytes += pkt_len; > + > + /* No drop */ > + return 0; > +} > + > +/** > + * @brief Decides if new packet should be enqeued or dropped > + * Updates run time data and gives verdict whether to enqueue or drop the > packet. > + * > + * @param pie_cfg [in] config pointer to a PIE configuration parameter > structure > + * @param pie [in,out] data pointer to PIE runtime data > + * @param qlen [in] queue length > + * @param pkt_len [in] packet length in bytes > + * @param time [in] current time stamp (measured in cpu cycles) > + * > + * @return Operation status > + * @retval 0 enqueue the packet > + * @retval 1 drop the packet based on drop probility criteria > + */ > +static inline int > +__rte_experimental > +rte_pie_enqueue(const struct rte_pie_config *pie_cfg, > + struct rte_pie *pie, > + const unsigned int qlen, > + uint32_t pkt_len, > + const uint64_t time) > +{ > + RTE_ASSERT(pie_cfg != NULL); > + RTE_ASSERT(pie != NULL); > + > + if (qlen != 0) > + return rte_pie_enqueue_nonempty(pie_cfg, pie, pkt_len, > time); > + else > + return rte_pie_enqueue_empty(pie_cfg, pie, pkt_len); > +} > + > +/** > + * @brief PIE rate estimation method > + * Called on each packet departure. > + * > + * @param pie [in] data pointer to PIE runtime data > + * @param pkt_len [in] packet length in bytes > + * @param time [in] current time stamp in cpu cycles > + */ > +static inline void > +__rte_experimental > +rte_pie_dequeue(struct rte_pie *pie, > + uint32_t pkt_len, > + uint64_t time) > +{ > + /* Dequeue rate estimation */ > + if (pie->in_measurement) { > + pie->departed_bytes_count += pkt_len; > + > + /* Start a new measurement cycle when enough packets */ > + if (pie->departed_bytes_count >= RTE_DQ_THRESHOLD) { > + uint64_t dq_time = time - pie->start_measurement; > + > + if (pie->avg_dq_time == 0) > + pie->avg_dq_time = dq_time; > + else > + pie->avg_dq_time = dq_time * > RTE_DQ_WEIGHT + pie->avg_dq_time > + * (1 - RTE_DQ_WEIGHT); > + > + pie->in_measurement = 0; > + } > + } > + > + /* Start measurement cycle when enough data in the queue */ > + if ((pie->qlen_bytes >= RTE_DQ_THRESHOLD) && (pie- > >in_measurement == 0)) { > + pie->in_measurement = 1; > + pie->start_measurement = time; > + pie->departed_bytes_count = 0; > + } > +} > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* __RTE_PIE_H_INCLUDED__ */ > diff --git a/lib/sched/rte_sched.c b/lib/sched/rte_sched.c > index a858f61f95..320435ed91 100644 > --- a/lib/sched/rte_sched.c > +++ b/lib/sched/rte_sched.c > @@ -89,8 +89,12 @@ struct rte_sched_queue { > > struct rte_sched_queue_extra { > struct rte_sched_queue_stats stats; > -#ifdef RTE_SCHED_RED > - struct rte_red red; > +#ifdef RTE_SCHED_AQM > + RTE_STD_C11 > + union { > + struct rte_red red; > + struct rte_pie pie; > + }; > #endif > }; > > @@ -183,8 +187,13 @@ struct rte_sched_subport { > /* Pipe queues size */ > uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; > > -#ifdef RTE_SCHED_RED > - struct rte_red_config > red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; > + enum rte_sched_aqm_mode aqm; > +#ifdef RTE_SCHED_AQM > + RTE_STD_C11 > + union { > + struct rte_red_config > wred_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; > + struct rte_pie_config > pie_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; > + }; > #endif > > /* Scheduling loop detection */ > @@ -1078,6 +1087,91 @@ rte_sched_free_memory(struct rte_sched_port > *port, uint32_t n_subports) > rte_free(port); > } > > +#ifdef RTE_SCHED_AQM > + > +static int > +rte_sched_red_config(struct rte_sched_port *port, > + struct rte_sched_subport *s, > + struct rte_sched_subport_params *params, > + uint32_t n_subports) > +{ > + uint32_t i; > + > + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { > + > + uint32_t j; > + > + for (j = 0; j < RTE_COLORS; j++) { > + /* if min/max are both zero, then RED is disabled */ > + if ((params->wred_params[i][j].min_th | > + params->wred_params[i][j].max_th) == 0) { > + continue; > + } > + > + if (rte_red_config_init(&s->wred_config[i][j], > + params->wred_params[i][j].wq_log2, > + params->wred_params[i][j].min_th, > + params->wred_params[i][j].max_th, > + params->wred_params[i][j].maxp_inv) != 0) { > + rte_sched_free_memory(port, n_subports); > + > + RTE_LOG(NOTICE, SCHED, > + "%s: RED configuration init fails\n", > __func__); > + return -EINVAL; > + } > + } > + } > + s->aqm = RTE_SCHED_AQM_WRED; > + return 0; > +} > + > +static int > +rte_sched_pie_config(struct rte_sched_port *port, > + struct rte_sched_subport *s, > + struct rte_sched_subport_params *params, > + uint32_t n_subports) > +{ > + uint32_t i; > + > + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { > + if (params->pie_params[i].tailq_th > params->qsize[i]) { > + RTE_LOG(NOTICE, SCHED, > + "%s: PIE tailq threshold incorrect\n", __func__); > + return -EINVAL; > + } > + > + if (rte_pie_config_init(&s->pie_config[i], > + params->pie_params[i].qdelay_ref, > + params->pie_params[i].dp_update_interval, > + params->pie_params[i].max_burst, > + params->pie_params[i].tailq_th) != 0) { > + rte_sched_free_memory(port, n_subports); > + > + RTE_LOG(NOTICE, SCHED, > + "%s: PIE configuration init fails\n", __func__); > + return -EINVAL; > + } > + } > + s->aqm = RTE_SCHED_AQM_PIE; > + return 0; > +} > + > +static int > +rte_sched_aqm_config(struct rte_sched_port *port, > + struct rte_sched_subport *s, > + struct rte_sched_subport_params *params, > + uint32_t n_subports) > +{ > + if (params->aqm == RTE_SCHED_AQM_WRED) > + return rte_sched_red_config(port, s, params, n_subports); > + > + else if (params->aqm == RTE_SCHED_AQM_PIE) > + return rte_sched_pie_config(port, s, params, n_subports); > + > + return -EINVAL; > +} > +#endif > + > int > rte_sched_subport_config(struct rte_sched_port *port, > uint32_t subport_id, > @@ -1167,29 +1261,11 @@ rte_sched_subport_config(struct > rte_sched_port *port, > s->n_pipe_profiles = params->n_pipe_profiles; > s->n_max_pipe_profiles = params->n_max_pipe_profiles; > > -#ifdef RTE_SCHED_RED > - for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { > - uint32_t j; > - > - for (j = 0; j < RTE_COLORS; j++) { > - /* if min/max are both zero, then RED is disabled */ > - if ((params->red_params[i][j].min_th | > - params->red_params[i][j].max_th) == 0) { > - continue; > - } > - > - if (rte_red_config_init(&s->red_config[i][j], > - params->red_params[i][j].wq_log2, > - params->red_params[i][j].min_th, > - params->red_params[i][j].max_th, > - params->red_params[i][j].maxp_inv) != 0) > { > - RTE_LOG(NOTICE, SCHED, > - "%s: RED configuration init fails\n", > - __func__); > - ret = -EINVAL; > - goto out; > - } > - } > +#ifdef RTE_SCHED_AQM > + status = rte_sched_aqm_config(port, s, params, > n_subports); > + if (status) { > + RTE_LOG(NOTICE, SCHED, "%s: AQM configuration > fails\n", __func__); > + return status; > } > #endif > > @@ -1718,29 +1794,20 @@ rte_sched_port_update_subport_stats(struct > rte_sched_port *port, > subport->stats.n_bytes_tc[tc_index] += pkt_len; > } > > -#ifdef RTE_SCHED_RED > -static inline void > -rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port > *port, > - struct rte_sched_subport *subport, > - uint32_t qindex, > - struct rte_mbuf *pkt, > - uint32_t red) > -#else > static inline void > rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port > *port, > struct rte_sched_subport *subport, > uint32_t qindex, > struct rte_mbuf *pkt, > - __rte_unused uint32_t red) > -#endif > + __rte_unused uint32_t drops) > { > uint32_t tc_index = rte_sched_port_pipe_tc(port, qindex); > uint32_t pkt_len = pkt->pkt_len; > > subport->stats.n_pkts_tc_dropped[tc_index] += 1; > subport->stats.n_bytes_tc_dropped[tc_index] += pkt_len; > -#ifdef RTE_SCHED_RED > - subport->stats.n_pkts_red_dropped[tc_index] += red; > +#ifdef RTE_SCHED_AQM > + subport->stats.n_pkts_aqm_dropped[tc_index] += drops; > #endif > } > > @@ -1756,58 +1823,61 @@ rte_sched_port_update_queue_stats(struct > rte_sched_subport *subport, > qe->stats.n_bytes += pkt_len; > } > > -#ifdef RTE_SCHED_RED > -static inline void > -rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport > *subport, > - uint32_t qindex, > - struct rte_mbuf *pkt, > - uint32_t red) > -#else > static inline void > rte_sched_port_update_queue_stats_on_drop(struct rte_sched_subport > *subport, > uint32_t qindex, > struct rte_mbuf *pkt, > - __rte_unused uint32_t red) > -#endif > + __rte_unused uint32_t drops) > { > struct rte_sched_queue_extra *qe = subport->queue_extra + > qindex; > uint32_t pkt_len = pkt->pkt_len; > > qe->stats.n_pkts_dropped += 1; > qe->stats.n_bytes_dropped += pkt_len; > -#ifdef RTE_SCHED_RED > - qe->stats.n_pkts_red_dropped += red; > +#ifdef RTE_SCHED_AQM > + qe->stats.n_pkts_aqm_dropped += drops; > #endif > } > > #endif /* RTE_SCHED_COLLECT_STATS */ > > -#ifdef RTE_SCHED_RED > +#ifdef RTE_SCHED_AQM > > static inline int > -rte_sched_port_red_drop(struct rte_sched_port *port, > +rte_sched_port_aqm_drop(struct rte_sched_port *port, > struct rte_sched_subport *subport, > struct rte_mbuf *pkt, > uint32_t qindex, > uint16_t qlen) > { > struct rte_sched_queue_extra *qe; > - struct rte_red_config *red_cfg; > - struct rte_red *red; > uint32_t tc_index; > - enum rte_color color; > > tc_index = rte_sched_port_pipe_tc(port, qindex); > - color = rte_sched_port_pkt_read_color(pkt); > - red_cfg = &subport->red_config[tc_index][color]; > + qe = subport->queue_extra + qindex; > > - if ((red_cfg->min_th | red_cfg->max_th) == 0) > - return 0; > + /* WRED */ > + if (subport->aqm == RTE_SCHED_AQM_WRED) { > + struct rte_red_config *red_cfg; > + struct rte_red *red; > + enum rte_color color; > > - qe = subport->queue_extra + qindex; > - red = &qe->red; > + color = rte_sched_port_pkt_read_color(pkt); > + red_cfg = &subport->wred_config[tc_index][color]; > + > + if ((red_cfg->min_th | red_cfg->max_th) == 0) > + return 0; > > - return rte_red_enqueue(red_cfg, red, qlen, port->time); > + red = &qe->red; > + > + return rte_red_enqueue(red_cfg, red, qlen, port->time); > + } > + > + /* PIE */ > + struct rte_pie_config *pie_cfg = &subport->pie_config[tc_index]; > + struct rte_pie *pie = &qe->pie; > + > + return rte_pie_enqueue(pie_cfg, pie, pkt->pkt_len, qlen, port- > >time_cpu_cycles); > } > > static inline void > @@ -1815,14 +1885,29 @@ > rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port > *port, > struct rte_sched_subport *subport, uint32_t qindex) > { > struct rte_sched_queue_extra *qe = subport->queue_extra + > qindex; > - struct rte_red *red = &qe->red; > + if (subport->aqm == RTE_SCHED_AQM_WRED) { > + struct rte_red *red = &qe->red; > + > + rte_red_mark_queue_empty(red, port->time); > + } > +} > + > +static inline void > +rte_sched_port_pie_dequeue(struct rte_sched_subport *subport, > +uint32_t qindex, uint32_t pkt_len, uint64_t time) { > + struct rte_sched_queue_extra *qe = subport->queue_extra + > qindex; > + struct rte_pie *pie = &qe->pie; > > - rte_red_mark_queue_empty(red, port->time); > + /* Update queue length */ > + pie->qlen -= 1; > + pie->qlen_bytes -= pkt_len; > + > + rte_pie_dequeue(pie, pkt_len, time); > } > > #else > > -static inline int rte_sched_port_red_drop(struct rte_sched_port *port > __rte_unused, > +static inline int rte_sched_port_aqm_drop(struct rte_sched_port *port > __rte_unused, > struct rte_sched_subport *subport __rte_unused, > struct rte_mbuf *pkt __rte_unused, > uint32_t qindex __rte_unused, > @@ -1833,7 +1918,7 @@ static inline int rte_sched_port_red_drop(struct > rte_sched_port *port __rte_unus > > #define rte_sched_port_set_queue_empty_timestamp(port, subport, > qindex) > > -#endif /* RTE_SCHED_RED */ > +#endif /* RTE_SCHED_AQM */ > > #ifdef RTE_SCHED_DEBUG > > @@ -1929,7 +2014,7 @@ rte_sched_port_enqueue_qwa(struct > rte_sched_port *port, > qlen = q->qw - q->qr; > > /* Drop the packet (and update drop stats) when queue is full */ > - if (unlikely(rte_sched_port_red_drop(port, subport, pkt, qindex, > qlen) || > + if (unlikely(rte_sched_port_aqm_drop(port, subport, pkt, qindex, > qlen) || > (qlen >= qsize))) { > rte_pktmbuf_free(pkt); > #ifdef RTE_SCHED_COLLECT_STATS > @@ -2402,6 +2487,7 @@ grinder_schedule(struct rte_sched_port *port, > { > struct rte_sched_grinder *grinder = subport->grinder + pos; > struct rte_sched_queue *queue = grinder->queue[grinder->qpos]; > + uint32_t qindex = grinder->qindex[grinder->qpos]; > struct rte_mbuf *pkt = grinder->pkt; > uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; > uint32_t be_tc_active; > @@ -2421,15 +2507,19 @@ grinder_schedule(struct rte_sched_port *port, > (pkt_len * grinder->wrr_cost[grinder->qpos]) & > be_tc_active; > > if (queue->qr == queue->qw) { > - uint32_t qindex = grinder->qindex[grinder->qpos]; > - > rte_bitmap_clear(subport->bmp, qindex); > grinder->qmask &= ~(1 << grinder->qpos); > if (be_tc_active) > grinder->wrr_mask[grinder->qpos] = 0; > + > rte_sched_port_set_queue_empty_timestamp(port, > subport, qindex); > } > > +#ifdef RTE_SCHED_AQM > + if (subport->aqm == RTE_SCHED_AQM_PIE) > + rte_sched_port_pie_dequeue(subport, qindex, pkt_len, > port->time_cpu_cycles); > +#endif > + > /* Reset pipe loop detection */ > subport->pipe_loop = RTE_SCHED_PIPE_INVALID; > grinder->productive = 1; > diff --git a/lib/sched/rte_sched.h b/lib/sched/rte_sched.h > index c1a772b70c..a5fe6266cd 100644 > --- a/lib/sched/rte_sched.h > +++ b/lib/sched/rte_sched.h > @@ -61,9 +61,10 @@ extern "C" { > #include <rte_mbuf.h> > #include <rte_meter.h> > > -/** Random Early Detection (RED) */ > -#ifdef RTE_SCHED_RED > +/** Active Queue Management */ > +#ifdef RTE_SCHED_AQM > #include "rte_red.h" > +#include "rte_pie.h" > #endif > > /** Maximum number of queues per pipe. > @@ -110,6 +111,28 @@ extern "C" { > #define RTE_SCHED_FRAME_OVERHEAD_DEFAULT 24 > #endif > > +/** > + * Active Queue Management (AQM) mode > + * > + * This is used for controlling the admission of packets into a packet queue > or > + * group of packet queues on congestion. > + * > + * The *Random Early Detection (RED)* algorithm works by proactively > dropping > + * more and more input packets as the queue occupancy builds up. When > the queue > + * is full or almost full, RED effectively works as *tail drop*. The > *Weighted > + * RED* algorithm uses a separate set of RED thresholds for each packet > color. > + * > + * Similar to RED, Proportional Integral Controller Enhanced (PIE) randomly > + * drops a packet at the onset of the congestion and tries to control the > + * latency around the target value. The congestion detection, however, is > based > + * on the queueing latency instead of the queue length like RED. For more > + * information, refer RFC8033. > + */ > +enum rte_sched_aqm_mode { > + RTE_SCHED_AQM_WRED, /**< Weighted Random Early Detection > (WRED) */ > + RTE_SCHED_AQM_PIE, /**< Proportional Integral Controller > Enhanced (PIE) */ > +}; > + > /* > * Pipe configuration parameters. The period and credits_per_period > * parameters are measured in bytes, with one byte meaning the time > @@ -174,9 +197,17 @@ struct rte_sched_subport_params { > /** Max allowed profiles in the pipe profile table */ > uint32_t n_max_pipe_profiles; > > -#ifdef RTE_SCHED_RED > - /** RED parameters */ > - struct rte_red_params > red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; > +#ifdef RTE_SCHED_AQM > + /** Active Queue Management mode */ > + enum rte_sched_aqm_mode aqm; > + > + RTE_STD_C11 > + union { > + /** WRED parameters */ > + struct rte_red_params > wred_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][RTE_COLORS]; > + /** PIE parameters */ > + struct rte_pie_params > pie_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; > + }; > #endif > }; > > @@ -208,9 +239,9 @@ struct rte_sched_subport_stats { > /** Number of bytes dropped for each traffic class */ > uint64_t > n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; > > -#ifdef RTE_SCHED_RED > - /** Number of packets dropped by red */ > - uint64_t > n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; > +#ifdef RTE_SCHED_AQM > + /** Number of packets dropped by active queue management > scheme */ > + uint64_t > n_pkts_aqm_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; > #endif > }; > > @@ -222,9 +253,9 @@ struct rte_sched_queue_stats { > /** Packets dropped */ > uint64_t n_pkts_dropped; > > -#ifdef RTE_SCHED_RED > - /** Packets dropped by RED */ > - uint64_t n_pkts_red_dropped; > +#ifdef RTE_SCHED_AQM > + /** Packets dropped by active queue management scheme */ > + uint64_t n_pkts_aqm_dropped; > #endif > > /** Bytes successfully written */ > diff --git a/lib/sched/version.map b/lib/sched/version.map > index ace284b7de..3422821ac8 100644 > --- a/lib/sched/version.map > +++ b/lib/sched/version.map > @@ -30,4 +30,7 @@ EXPERIMENTAL { > rte_sched_subport_pipe_profile_add; > # added in 20.11 > rte_sched_port_subport_profile_add; > + > + rte_pie_rt_data_init; > + rte_pie_config_init; > }; > -- > 2.25.1
NACK I see that none of my previous comments from the V4 review got implemented, is there any reason to silently discard all of them? https://patches.dpdk.org/project/dpdk/patch/20210705080421.18736-2-wojciechx.liguzin...@intel.com/ I did not see any reply from you on my comments, so I assumed that you accepted and implemented most of them, but I see that none of them were picked up. Also, I don't see any revision history, just the version counter gets incremented, so reviewing a new version of your patch requires re-reading every line of code, which is time consuming. Could you please add a revision history? Thanks, Cristian