From: Jasvinder Singh <jasvinder.si...@intel.com>

Update weighted round robin function for best-effort traffic class
queues of the scheduler to allow configuration flexiblity for pipe traffic
classes and queues, and subport level configuration of the pipe parameters.

Signed-off-by: Jasvinder Singh <jasvinder.si...@intel.com>
Signed-off-by: Abraham Tovar <abrahamx.to...@intel.com>
Signed-off-by: Lukasz Krakowiak <lukaszx.krakow...@intel.com>
---
 lib/librte_sched/rte_sched.c        | 135 +++++++++++++++++-----------
 lib/librte_sched/rte_sched_common.h |  41 +++++++++
 2 files changed, 125 insertions(+), 51 deletions(-)

diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index 07939c04f..a9b5f7bf8 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -2316,73 +2316,106 @@ grinder_next_pipe(struct rte_sched_subport *subport, 
uint32_t pos)
        return 1;
 }
 
-
 static inline void
-grinder_wrr_load(struct rte_sched_port *port, uint32_t pos)
+grinder_wrr_load(struct rte_sched_subport *subport, uint32_t pos)
 {
-       struct rte_sched_grinder *grinder = port->grinder + pos;
+       struct rte_sched_grinder *grinder = subport->grinder + pos;
        struct rte_sched_pipe *pipe = grinder->pipe;
        struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params;
-       uint32_t tc_index = grinder->tc_index;
-       uint32_t qmask = grinder->qmask;
-       uint32_t qindex;
-
-       qindex = tc_index * 4;
-
-       grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << 
RTE_SCHED_WRR_SHIFT;
-       grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << 
RTE_SCHED_WRR_SHIFT;
-       grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << 
RTE_SCHED_WRR_SHIFT;
-       grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << 
RTE_SCHED_WRR_SHIFT;
-
-       grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF;
-       grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF;
-       grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF;
-       grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF;
+       uint32_t qmask = grinder->be.qmask;
+       uint32_t qindex = grinder->be.qindex[0];
+       uint32_t i;
 
-       grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex];
-       grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1];
-       grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2];
-       grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3];
+       for (i = 0; i < pipe->n_be_queues; i++) {
+               grinder->be.wrr_tokens[i] =
+                       ((uint16_t) pipe->wrr_tokens[qindex + i]) << 
RTE_SCHED_WRR_SHIFT;
+               grinder->be.wrr_mask[i] = ((qmask >> i) & 0x1) * 0xFFFF;
+               grinder->be.wrr_cost[i] = pipe_params->wrr_cost[qindex + i];
+       }
 }
 
 static inline void
-grinder_wrr_store(struct rte_sched_port *port, uint32_t pos)
+grinder_wrr_store(struct rte_sched_subport *subport, uint32_t pos)
 {
-       struct rte_sched_grinder *grinder = port->grinder + pos;
+       struct rte_sched_grinder *grinder = subport->grinder + pos;
        struct rte_sched_pipe *pipe = grinder->pipe;
        uint32_t tc_index = grinder->tc_index;
-       uint32_t qindex;
-
-       qindex = tc_index * 4;
+       uint32_t i;
 
-       pipe->wrr_tokens[qindex] = (grinder->wrr_tokens[0] & 
grinder->wrr_mask[0])
-               >> RTE_SCHED_WRR_SHIFT;
-       pipe->wrr_tokens[qindex + 1] = (grinder->wrr_tokens[1] & 
grinder->wrr_mask[1])
-               >> RTE_SCHED_WRR_SHIFT;
-       pipe->wrr_tokens[qindex + 2] = (grinder->wrr_tokens[2] & 
grinder->wrr_mask[2])
-               >> RTE_SCHED_WRR_SHIFT;
-       pipe->wrr_tokens[qindex + 3] = (grinder->wrr_tokens[3] & 
grinder->wrr_mask[3])
-               >> RTE_SCHED_WRR_SHIFT;
+       if (tc_index == RTE_SCHED_TRAFFIC_CLASS_BE)
+               for (i = 0; i < pipe->n_be_queues; i++)
+                       pipe->wrr_tokens[i] =
+                               (grinder->be.wrr_tokens[i] & 
grinder->be.wrr_mask[i]) >>
+                               RTE_SCHED_WRR_SHIFT;
 }
 
 static inline void
-grinder_wrr(struct rte_sched_port *port, uint32_t pos)
+grinder_wrr(struct rte_sched_subport *subport, uint32_t pos)
 {
-       struct rte_sched_grinder *grinder = port->grinder + pos;
+       struct rte_sched_grinder *grinder = subport->grinder + pos;
+       struct rte_sched_pipe *pipe = grinder->pipe;
+       uint32_t n_be_queues = pipe->n_be_queues;
        uint16_t wrr_tokens_min;
 
-       grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0];
-       grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1];
-       grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2];
-       grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3];
+       if (n_be_queues == 1) {
+               grinder->be.wrr_tokens[0] |= ~grinder->be.wrr_mask[0];
+               grinder->be.qpos = 0;
+               wrr_tokens_min = grinder->be.wrr_tokens[0];
+               grinder->be.wrr_tokens[0] -= wrr_tokens_min;
+               return;
+       }
+
+       if (n_be_queues == 2) {
+               grinder->be.wrr_tokens[0] |= ~grinder->be.wrr_mask[0];
+               grinder->be.wrr_tokens[1] |= ~grinder->be.wrr_mask[1];
+
+               grinder->be.qpos = rte_min_pos_2_u16(grinder->be.wrr_tokens);
+               wrr_tokens_min = grinder->be.wrr_tokens[grinder->be.qpos];
+
+               grinder->be.wrr_tokens[0] -= wrr_tokens_min;
+               grinder->be.wrr_tokens[1] -= wrr_tokens_min;
+               return;
+       }
+
+       if (n_be_queues == 4) {
+               grinder->be.wrr_tokens[0] |= ~grinder->be.wrr_mask[0];
+               grinder->be.wrr_tokens[1] |= ~grinder->be.wrr_mask[1];
+               grinder->be.wrr_tokens[2] |= ~grinder->be.wrr_mask[2];
+               grinder->be.wrr_tokens[3] |= ~grinder->be.wrr_mask[3];
+
+               grinder->be.qpos = rte_min_pos_4_u16(grinder->be.wrr_tokens);
+               wrr_tokens_min = grinder->be.wrr_tokens[grinder->be.qpos];
 
-       grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens);
-       wrr_tokens_min = grinder->wrr_tokens[grinder->qpos];
+               grinder->be.wrr_tokens[0] -= wrr_tokens_min;
+               grinder->be.wrr_tokens[1] -= wrr_tokens_min;
+               grinder->be.wrr_tokens[2] -= wrr_tokens_min;
+               grinder->be.wrr_tokens[3] -= wrr_tokens_min;
+               return;
+       }
 
-       grinder->wrr_tokens[0] -= wrr_tokens_min;
-       grinder->wrr_tokens[1] -= wrr_tokens_min;
-       grinder->wrr_tokens[2] -= wrr_tokens_min;
-       grinder->wrr_tokens[3] -= wrr_tokens_min;
+       if (n_be_queues == 8) {
+               grinder->be.wrr_tokens[0] |= ~grinder->be.wrr_mask[0];
+               grinder->be.wrr_tokens[1] |= ~grinder->be.wrr_mask[1];
+               grinder->be.wrr_tokens[2] |= ~grinder->be.wrr_mask[2];
+               grinder->be.wrr_tokens[3] |= ~grinder->be.wrr_mask[3];
+               grinder->be.wrr_tokens[4] |= ~grinder->be.wrr_mask[4];
+               grinder->be.wrr_tokens[5] |= ~grinder->be.wrr_mask[5];
+               grinder->be.wrr_tokens[6] |= ~grinder->be.wrr_mask[6];
+               grinder->be.wrr_tokens[7] |= ~grinder->be.wrr_mask[7];
+
+               grinder->be.qpos = rte_min_pos_8_u16(grinder->be.wrr_tokens);
+               wrr_tokens_min = grinder->be.wrr_tokens[grinder->be.qpos];
+
+               grinder->be.wrr_tokens[0] -= wrr_tokens_min;
+               grinder->be.wrr_tokens[1] -= wrr_tokens_min;
+               grinder->be.wrr_tokens[2] -= wrr_tokens_min;
+               grinder->be.wrr_tokens[3] -= wrr_tokens_min;
+               grinder->be.wrr_tokens[4] -= wrr_tokens_min;
+               grinder->be.wrr_tokens[5] -= wrr_tokens_min;
+               grinder->be.wrr_tokens[6] -= wrr_tokens_min;
+               grinder->be.wrr_tokens[7] -= wrr_tokens_min;
+               return;
+       }
 }
 
 
@@ -2423,8 +2456,8 @@ grinder_prefetch_tc_queue_arrays(struct rte_sched_port 
*port, uint32_t pos)
                rte_prefetch0(grinder->be.qbase[i] + qr[i]);
        }
 
-       grinder_wrr_load(port, pos);
-       grinder_wrr(port, pos);
+       grinder_wrr_load(port->subport, pos);
+       grinder_wrr(port->subport, pos);
 }
 
 static inline void
@@ -2493,12 +2526,12 @@ grinder_handle(struct rte_sched_port *port, uint32_t 
pos)
 
                /* Look for next packet within the same TC */
                if (result && grinder->qmask) {
-                       grinder_wrr(port, pos);
+                       grinder_wrr(port->subport, pos);
                        grinder_prefetch_mbuf(port, pos);
 
                        return 1;
                }
-               grinder_wrr_store(port, pos);
+               grinder_wrr_store(port->subport, pos);
 
                /* Look for another active TC within same pipe */
                if (grinder_next_tc(port->subport, pos)) {
diff --git a/lib/librte_sched/rte_sched_common.h 
b/lib/librte_sched/rte_sched_common.h
index 8c191a9b8..bb3595f26 100644
--- a/lib/librte_sched/rte_sched_common.h
+++ b/lib/librte_sched/rte_sched_common.h
@@ -20,6 +20,18 @@ rte_sched_min_val_2_u32(uint32_t x, uint32_t y)
        return (x < y)? x : y;
 }
 
+/* Simplified version to remove branches with CMOV instruction */
+static inline uint32_t
+rte_min_pos_2_u16(uint16_t *x)
+{
+       uint32_t pos0 = 0;
+
+       if (x[1] <= x[0])
+               pos0 = 1;
+
+       return pos0;
+}
+
 #if 0
 static inline uint32_t
 rte_min_pos_4_u16(uint16_t *x)
@@ -50,6 +62,35 @@ rte_min_pos_4_u16(uint16_t *x)
 
 #endif
 
+/* Simplified version to remove branches with CMOV instruction */
+static inline uint32_t
+rte_min_pos_8_u16(uint16_t *x)
+{
+       uint32_t pos0 = 0;
+       uint32_t pos1 = 2;
+       uint32_t pos2 = 4;
+       uint32_t pos3 = 6;
+
+       if (x[1] <= x[0])
+               pos0 = 1;
+       if (x[3] <= x[2])
+               pos1 = 3;
+       if (x[5] <= x[4])
+               pos2 = 5;
+       if (x[7] <= x[6])
+               pos3 = 7;
+
+       if (x[pos1] <= x[pos0])
+               pos0 = pos1;
+       if (x[pos3] <= x[pos2])
+               pos2 = pos3;
+
+       if (x[pos2] <= x[pos0])
+               pos0 = pos2;
+
+       return pos0;
+}
+
 /*
  * Compute the Greatest Common Divisor (GCD) of two numbers.
  * This implementation uses Euclid's algorithm:
-- 
2.20.1

Reply via email to