Signed-off-by: Danny Zhou <danny.zhou at intel.com>
---
 examples/l3fwd-power/main.c | 170 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 129 insertions(+), 41 deletions(-)

diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index f6b55b9..e6e4f55 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -75,12 +75,13 @@
 #include <rte_string_fns.h>
 #include <rte_timer.h>
 #include <rte_power.h>
+#include <rte_eal.h>

 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1

 #define MAX_PKT_BURST 32

-#define MIN_ZERO_POLL_COUNT 5
+#define MIN_ZERO_POLL_COUNT 10

 /* around 100ms at 2 Ghz */
 #define TIMER_RESOLUTION_CYCLES           200000000ULL
@@ -188,6 +189,9 @@ struct lcore_rx_queue {
 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
 #define MAX_RX_QUEUE_PER_PORT 128

+#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
+
+
 #define MAX_LCORE_PARAMS 1024
 struct lcore_params {
        uint8_t port_id;
@@ -214,7 +218,7 @@ static uint16_t nb_lcore_params = 
sizeof(lcore_params_array_default) /

 static struct rte_eth_conf port_conf = {
        .rxmode = {
-               .mq_mode        = ETH_MQ_RX_RSS,
+               .mq_mode = ETH_MQ_RX_RSS,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
                .header_split   = 0, /**< Header Split disabled */
@@ -226,11 +230,14 @@ static struct rte_eth_conf port_conf = {
        .rx_adv_conf = {
                .rss_conf = {
                        .rss_key = NULL,
-                       .rss_hf = ETH_RSS_IP,
+                       .rss_hf = ETH_RSS_UDP,
                },
        },
        .txmode = {
-               .mq_mode = ETH_DCB_NONE,
+               .mq_mode = ETH_MQ_TX_NONE,
+       },
+       .intr_conf = {
+               .rxq = 1, /**< rxq interrupt feature enabled */
        },
 };

@@ -402,19 +409,22 @@ power_timer_cb(__attribute__((unused)) struct rte_timer 
*tim,
        /* accumulate total execution time in us when callback is invoked */
        sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
                                        (float)SCALING_PERIOD;
-
        /**
         * check whether need to scale down frequency a step if it sleep a lot.
         */
-       if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD)
-               rte_power_freq_down(lcore_id);
+       if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
+               if (rte_power_freq_down)
+                       rte_power_freq_down(lcore_id);
+       }
        else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
-               stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST)
+               stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
                /**
                 * scale down a step if average packet per iteration less
                 * than expectation.
                 */
-               rte_power_freq_down(lcore_id);
+               if (rte_power_freq_down)
+                       rte_power_freq_down(lcore_id);
+       }

        /**
         * initialize another timer according to current frequency to ensure
@@ -707,22 +717,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid,

 }

-#define SLEEP_GEAR1_THRESHOLD            100
-#define SLEEP_GEAR2_THRESHOLD            1000
+#define MINIMUM_SLEEP_TIME         1
+#define SUSPEND_THRESHOLD          300

 static inline uint32_t
 power_idle_heuristic(uint32_t zero_rx_packet_count)
 {
-       /* If zero count is less than 100, use it as the sleep time in us */
-       if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD)
-               return zero_rx_packet_count;
-       /* If zero count is less than 1000, sleep time should be 100 us */
-       else if ((zero_rx_packet_count >= SLEEP_GEAR1_THRESHOLD) &&
-                       (zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD))
-               return SLEEP_GEAR1_THRESHOLD;
-       /* If zero count is greater than 1000, sleep time should be 1000 us */
-       else if (zero_rx_packet_count >= SLEEP_GEAR2_THRESHOLD)
-               return SLEEP_GEAR2_THRESHOLD;
+       /* If zero count is less than 100,  sleep 1us */
+       if (zero_rx_packet_count < SUSPEND_THRESHOLD)
+               return MINIMUM_SLEEP_TIME;
+       /* If zero count is less than 1000, sleep 100 us which is the minimum 
latency
+           switching from C3/C6 to C0
+       */
+       else
+               return SUSPEND_THRESHOLD;

        return 0;
 }
@@ -762,6 +770,35 @@ power_freq_scaleup_heuristic(unsigned lcore_id,
        return FREQ_CURRENT;
 }

+/**
+ * force polling thread sleep until one-shot rx interrupt triggers
+ * @param port_id
+ *  Port id.
+ * @param queue_id
+ *  Rx queue id.
+ * @return
+ *  0 on success
+ */
+static int
+sleep_until_rx_interrupt(uint8_t port_id, uint8_t queue_id)
+{
+       /* Enable one-shot rx interrupt */
+       rte_eth_dev_rx_queue_intr_enable(port_id, queue_id);
+
+       RTE_LOG(INFO, L3FWD_POWER,
+               "lcore %u sleeps until interrupt on port%d,rxq%d triggers\n",
+               rte_lcore_id(), port_id, queue_id);
+       rte_eal_wait_rx_intr(port_id, queue_id);
+       RTE_LOG(INFO, L3FWD_POWER,
+               "lcore %u is waked up from rx interrupt on port%d,rxq%d\n",
+               rte_lcore_id(), port_id, queue_id);
+
+       /* Disable one-shot rx interrupt */
+       rte_eth_dev_rx_queue_intr_disable(port_id, queue_id);
+
+       return 0;
+}
+
 /* main processing loop */
 static int
 main_loop(__attribute__((unused)) void *dummy)
@@ -775,7 +812,6 @@ main_loop(__attribute__((unused)) void *dummy)
        struct lcore_conf *qconf;
        struct lcore_rx_queue *rx_queue;
        enum freq_scale_hint_t lcore_scaleup_hint;
-
        uint32_t lcore_rx_idle_count = 0;
        uint32_t lcore_idle_hint = 0;

@@ -835,6 +871,8 @@ main_loop(__attribute__((unused)) void *dummy)
                        prev_tsc_power = cur_tsc_power;
                }

+
+start_rx:
                /*
                 * Read packet from RX queues
                 */
@@ -848,6 +886,7 @@ main_loop(__attribute__((unused)) void *dummy)

                        nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
                                                                MAX_PKT_BURST);
+
                        stats[lcore_id].nb_rx_processed += nb_rx;
                        if (unlikely(nb_rx == 0)) {
                                /**
@@ -910,10 +949,13 @@ main_loop(__attribute__((unused)) void *dummy)
                                                rx_queue->freq_up_hint;
                        }

-                       if (lcore_scaleup_hint == FREQ_HIGHEST)
-                               rte_power_freq_max(lcore_id);
-                       else if (lcore_scaleup_hint == FREQ_HIGHER)
-                               rte_power_freq_up(lcore_id);
+                       if (lcore_scaleup_hint == FREQ_HIGHEST) {
+                               if (rte_power_freq_max)
+                                       rte_power_freq_max(lcore_id);
+                       } else if (lcore_scaleup_hint == FREQ_HIGHER) {
+                               if (rte_power_freq_up)
+                                       rte_power_freq_up(lcore_id);
+                       }
                } else {
                        /**
                         * All Rx queues empty in recent consecutive polls,
@@ -928,21 +970,55 @@ main_loop(__attribute__((unused)) void *dummy)
                                        lcore_idle_hint = rx_queue->idle_hint;
                        }

-                       if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD)
+                       if (lcore_idle_hint < SUSPEND_THRESHOLD)
                                /**
-                                * execute "pause" instruction to avoid context
-                                * switch for short sleep.
-                                */
+                               * execute "pause" instruction to avoid context
+                               * switch which generally take hundres of 
microsecond
+                               * for short sleep.
+                               */
                                rte_delay_us(lcore_idle_hint);
-                       else
-                               /* long sleep force runing thread to suspend */
-                               usleep(lcore_idle_hint);
-
+                       else {
+                               /* suspend untill rx interrupt trigges */
+                               sleep_until_rx_interrupt(
+                                       qconf->rx_queue_list[0].port_id,
+                                       qconf->rx_queue_list[0].queue_id);
+                               /* start receiving packets immediately */
+                               goto start_rx;
+                       }
                        stats[lcore_id].sleep_time += lcore_idle_hint;
                }
        }
 }

+/**
+ * It will be called as the callback for specified port after a LSI interrupt
+ * has been fully handled. This callback needs to be implemented carefully as
+ * it will be called in the interrupt host thread which is different from the
+ * application main thread.
+ *
+ * @param port_id
+ *  Port id.
+ * @param type
+ *  event type.
+ * @param param
+ *  Pointer to(address of) the parameters.
+ *
+ * @return
+ *  void.
+ */
+
+/*
+static void
+rx_interrupt_event_callback(uint8_t port_id, enum rte_eth_event_type type, 
void *param)
+{
+       uint64_t rx_queues = *((uint64_t *)param);
+
+       port_id = port_id + 1;
+       if(type == RTE_ETH_EVENT_INTR_RX)
+               port_id = rx_queues;
+}
+*/
+
 static int
 check_lcore_params(void)
 {
@@ -1270,7 +1346,7 @@ setup_hash(int socketid)
        char s[64];

        /* create ipv4 hash */
-       snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
+       rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
        ipv4_l3fwd_hash_params.name = s;
        ipv4_l3fwd_hash_params.socket_id = socketid;
        ipv4_l3fwd_lookup_struct[socketid] =
@@ -1280,7 +1356,7 @@ setup_hash(int socketid)
                                "socket %d\n", socketid);

        /* create ipv6 hash */
-       snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
+       rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
        ipv6_l3fwd_hash_params.name = s;
        ipv6_l3fwd_hash_params.socket_id = socketid;
        ipv6_l3fwd_lookup_struct[socketid] =
@@ -1476,6 +1552,7 @@ main(int argc, char **argv)
        unsigned lcore_id;
        uint64_t hz;
        uint32_t n_tx_queue, nb_lcores;
+       uint32_t dev_rxq_num, dev_txq_num;
        uint8_t portid, nb_rx_queue, queue, socketid;

        /* catch SIGINT and restore cpufreq governor to ondemand */
@@ -1525,10 +1602,18 @@ main(int argc, char **argv)
                printf("Initializing port %d ... ", portid );
                fflush(stdout);

+               rte_eth_dev_info_get(portid, &dev_info);
+               dev_rxq_num = dev_info.max_rx_queues;
+               dev_txq_num = dev_info.max_tx_queues;
+
                nb_rx_queue = get_port_n_rx_queues(portid);
+               if (nb_rx_queue > dev_rxq_num)
+                       rte_exit(EXIT_FAILURE, "Cannot configure not existed 
rxq: "
+                                       "port=%d\n", portid);
+
                n_tx_queue = nb_lcores;
-               if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
-                       n_tx_queue = MAX_TX_QUEUE_PER_PORT;
+               if (n_tx_queue > dev_txq_num)
+                       n_tx_queue = dev_txq_num;
                printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
                        nb_rx_queue, (unsigned)n_tx_queue );
                ret = rte_eth_dev_configure(portid, nb_rx_queue,
@@ -1552,6 +1637,9 @@ main(int argc, char **argv)
                        if (rte_lcore_is_enabled(lcore_id) == 0)
                                continue;

+                       if (queueid >= dev_txq_num)
+                               continue;
+
                        if (numa_on)
                                socketid = \
                                (uint8_t)rte_lcore_to_socket_id(lcore_id);
@@ -1586,8 +1674,9 @@ main(int argc, char **argv)
                /* init power management library */
                ret = rte_power_init(lcore_id);
                if (ret)
-                       rte_exit(EXIT_FAILURE, "Power management library "
-                               "initialization failed on core%u\n", lcore_id);
+                       rte_log(RTE_LOG_ERR, RTE_LOGTYPE_POWER,
+                               "Power management library initialization "
+                               "failed on core%u", lcore_id);

                /* init timer structures for each enabled lcore */
                rte_timer_init(&power_timers[lcore_id]);
@@ -1635,7 +1724,6 @@ main(int argc, char **argv)
                if (ret < 0)
                        rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, "
                                                "port=%d\n", ret, portid);
-
                /*
                 * If enabled, put device in promiscuous mode.
                 * This allows IO forwarding mode to forward packets
-- 
1.8.1.4

Reply via email to