> -----Original Message----- > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Danny Zhou > Sent: Wednesday, January 28, 2015 2:51 AM > To: dev at dpdk.org > Subject: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt > and polling/interrupt mode switch > > Signed-off-by: Danny Zhou <danny.zhou at intel.com> > --- > examples/l3fwd-power/main.c | 170 > +++++++++++++++++++++++++++++++++----------- > 1 file changed, 129 insertions(+), 41 deletions(-) > > diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c > index f6b55b9..e6e4f55 100644 > --- a/examples/l3fwd-power/main.c > +++ b/examples/l3fwd-power/main.c > @@ -75,12 +75,13 @@ > #include <rte_string_fns.h> > #include <rte_timer.h> > #include <rte_power.h> > +#include <rte_eal.h> > > #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 > > #define MAX_PKT_BURST 32 > > -#define MIN_ZERO_POLL_COUNT 5 > +#define MIN_ZERO_POLL_COUNT 10 > > /* around 100ms at 2 Ghz */ > #define TIMER_RESOLUTION_CYCLES 200000000ULL > @@ -188,6 +189,9 @@ struct lcore_rx_queue { > #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS > #define MAX_RX_QUEUE_PER_PORT 128 > > +#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 > + > + > #define MAX_LCORE_PARAMS 1024 > struct lcore_params { > uint8_t port_id; > @@ -214,7 +218,7 @@ static uint16_t nb_lcore_params = > sizeof(lcore_params_array_default) / > > static struct rte_eth_conf port_conf = { > .rxmode = { > - .mq_mode = ETH_MQ_RX_RSS, > + .mq_mode = ETH_MQ_RX_RSS, > .max_rx_pkt_len = ETHER_MAX_LEN, > .split_hdr_size = 0, > .header_split = 0, /**< Header Split disabled */ > @@ -226,11 +230,14 @@ static struct rte_eth_conf port_conf = { > .rx_adv_conf = { > .rss_conf = { > .rss_key = NULL, > - .rss_hf = ETH_RSS_IP, > + .rss_hf = ETH_RSS_UDP, > }, > }, > .txmode = { > - .mq_mode = ETH_DCB_NONE, > + .mq_mode = ETH_MQ_TX_NONE, > + }, > + .intr_conf = { > + .rxq = 1, /**< rxq interrupt feature enabled */ > }, > }; > > @@ -402,19 +409,22 @@ power_timer_cb(__attribute__((unused)) struct > rte_timer *tim, > /* accumulate total execution time in us when callback is invoked */ > sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / > (float)SCALING_PERIOD; > - > /** > * check whether need to scale down frequency a step if it sleep a lot. > */ > - if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) > - rte_power_freq_down(lcore_id); > + if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { > + if (rte_power_freq_down) > + rte_power_freq_down(lcore_id); > + } > else if ( (unsigned)(stats[lcore_id].nb_rx_processed / > - stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) > + stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { > /** > * scale down a step if average packet per iteration less > * than expectation. > */ > - rte_power_freq_down(lcore_id); > + if (rte_power_freq_down) > + rte_power_freq_down(lcore_id); > + } > > /** > * initialize another timer according to current frequency to ensure > @@ -707,22 +717,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t > portid, > > } > > -#define SLEEP_GEAR1_THRESHOLD 100 > -#define SLEEP_GEAR2_THRESHOLD 1000 > +#define MINIMUM_SLEEP_TIME 1 > +#define SUSPEND_THRESHOLD 300 > > static inline uint32_t > power_idle_heuristic(uint32_t zero_rx_packet_count) > { > - /* If zero count is less than 100, use it as the sleep time in us */ > - if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD) > - return zero_rx_packet_count; > - /* If zero count is less than 1000, sleep time should be 100 us */ > - else if ((zero_rx_packet_count >= SLEEP_GEAR1_THRESHOLD) && > - (zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD)) > - return SLEEP_GEAR1_THRESHOLD; > - /* If zero count is greater than 1000, sleep time should be 1000 us */ > - else if (zero_rx_packet_count >= SLEEP_GEAR2_THRESHOLD) > - return SLEEP_GEAR2_THRESHOLD; > + /* If zero count is less than 100, sleep 1us */ > + if (zero_rx_packet_count < SUSPEND_THRESHOLD) > + return MINIMUM_SLEEP_TIME; > + /* If zero count is less than 1000, sleep 100 us which is the minimum > latency > + switching from C3/C6 to C0 > + */ > + else > + return SUSPEND_THRESHOLD; > > return 0; > } > @@ -762,6 +770,35 @@ power_freq_scaleup_heuristic(unsigned lcore_id, > return FREQ_CURRENT; > } > > +/** > + * force polling thread sleep until one-shot rx interrupt triggers > + * @param port_id > + * Port id. > + * @param queue_id > + * Rx queue id. > + * @return > + * 0 on success > + */ > +static int > +sleep_until_rx_interrupt(uint8_t port_id, uint8_t queue_id) > +{ > + /* Enable one-shot rx interrupt */ > + rte_eth_dev_rx_queue_intr_enable(port_id, queue_id); > + > + RTE_LOG(INFO, L3FWD_POWER, > + "lcore %u sleeps until interrupt on port%d,rxq%d triggers\n", > + rte_lcore_id(), port_id, queue_id); > + rte_eal_wait_rx_intr(port_id, queue_id); > + RTE_LOG(INFO, L3FWD_POWER, > + "lcore %u is waked up from rx interrupt on port%d,rxq%d\n", > + rte_lcore_id(), port_id, queue_id); > + > + /* Disable one-shot rx interrupt */ > + rte_eth_dev_rx_queue_intr_disable(port_id, queue_id); > + > + return 0; > +} > + > /* main processing loop */ > static int > main_loop(__attribute__((unused)) void *dummy) > @@ -775,7 +812,6 @@ main_loop(__attribute__((unused)) void *dummy) > struct lcore_conf *qconf; > struct lcore_rx_queue *rx_queue; > enum freq_scale_hint_t lcore_scaleup_hint; > - > uint32_t lcore_rx_idle_count = 0; > uint32_t lcore_idle_hint = 0; > > @@ -835,6 +871,8 @@ main_loop(__attribute__((unused)) void *dummy) > prev_tsc_power = cur_tsc_power; > } > > + > +start_rx: > /* > * Read packet from RX queues > */ > @@ -848,6 +886,7 @@ main_loop(__attribute__((unused)) void *dummy) > > nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, > MAX_PKT_BURST); > + > stats[lcore_id].nb_rx_processed += nb_rx; > if (unlikely(nb_rx == 0)) { > /** > @@ -910,10 +949,13 @@ main_loop(__attribute__((unused)) void *dummy) > rx_queue->freq_up_hint; > } > > - if (lcore_scaleup_hint == FREQ_HIGHEST) > - rte_power_freq_max(lcore_id); > - else if (lcore_scaleup_hint == FREQ_HIGHER) > - rte_power_freq_up(lcore_id); > + if (lcore_scaleup_hint == FREQ_HIGHEST) { > + if (rte_power_freq_max) > + rte_power_freq_max(lcore_id); > + } else if (lcore_scaleup_hint == FREQ_HIGHER) { > + if (rte_power_freq_up) > + rte_power_freq_up(lcore_id); > + } > } else { > /** > * All Rx queues empty in recent consecutive polls, > @@ -928,21 +970,55 @@ main_loop(__attribute__((unused)) void *dummy) > lcore_idle_hint = rx_queue->idle_hint; > } > > - if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD) > + if (lcore_idle_hint < SUSPEND_THRESHOLD) > /** > - * execute "pause" instruction to avoid context > - * switch for short sleep. > - */ > + * execute "pause" instruction to avoid context > + * switch which generally take hundres of > microsecond > + * for short sleep. > + */ > rte_delay_us(lcore_idle_hint); > - else > - /* long sleep force runing thread to suspend */ > - usleep(lcore_idle_hint); > - > + else { > + /* suspend untill rx interrupt trigges */ > + sleep_until_rx_interrupt( > + qconf->rx_queue_list[0].port_id, > + qconf->rx_queue_list[0].queue_id); > + /* start receiving packets immediately */ > + goto start_rx; > + } > stats[lcore_id].sleep_time += lcore_idle_hint; > } > } > } > > +/** > + * It will be called as the callback for specified port after a LSI interrupt > + * has been fully handled. This callback needs to be implemented carefully as > + * it will be called in the interrupt host thread which is different from the > + * application main thread. > + * > + * @param port_id > + * Port id. > + * @param type > + * event type. > + * @param param > + * Pointer to(address of) the parameters. > + * > + * @return > + * void. > + */ > + > +/* > +static void > +rx_interrupt_event_callback(uint8_t port_id, enum rte_eth_event_type type, > void *param) > +{ > + uint64_t rx_queues = *((uint64_t *)param); > + > + port_id = port_id + 1; > + if(type == RTE_ETH_EVENT_INTR_RX) > + port_id = rx_queues; [LCM] What's bunch of things for ?
> +} > +*/ > + > static int > check_lcore_params(void) > { > @@ -1270,7 +1346,7 @@ setup_hash(int socketid) > char s[64]; > > /* create ipv4 hash */ > - snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); > + rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); > ipv4_l3fwd_hash_params.name = s; > ipv4_l3fwd_hash_params.socket_id = socketid; > ipv4_l3fwd_lookup_struct[socketid] = > @@ -1280,7 +1356,7 @@ setup_hash(int socketid) > "socket %d\n", socketid); > > /* create ipv6 hash */ > - snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); > + rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); > ipv6_l3fwd_hash_params.name = s; > ipv6_l3fwd_hash_params.socket_id = socketid; > ipv6_l3fwd_lookup_struct[socketid] = > @@ -1476,6 +1552,7 @@ main(int argc, char **argv) > unsigned lcore_id; > uint64_t hz; > uint32_t n_tx_queue, nb_lcores; > + uint32_t dev_rxq_num, dev_txq_num; > uint8_t portid, nb_rx_queue, queue, socketid; > > /* catch SIGINT and restore cpufreq governor to ondemand */ > @@ -1525,10 +1602,18 @@ main(int argc, char **argv) > printf("Initializing port %d ... ", portid ); > fflush(stdout); > > + rte_eth_dev_info_get(portid, &dev_info); > + dev_rxq_num = dev_info.max_rx_queues; > + dev_txq_num = dev_info.max_tx_queues; > + > nb_rx_queue = get_port_n_rx_queues(portid); > + if (nb_rx_queue > dev_rxq_num) > + rte_exit(EXIT_FAILURE, "Cannot configure not existed > rxq: " > + "port=%d\n", portid); > + > n_tx_queue = nb_lcores; > - if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) > - n_tx_queue = MAX_TX_QUEUE_PER_PORT; > + if (n_tx_queue > dev_txq_num) > + n_tx_queue = dev_txq_num; > printf("Creating queues: nb_rxq=%d nb_txq=%u... ", > nb_rx_queue, (unsigned)n_tx_queue ); > ret = rte_eth_dev_configure(portid, nb_rx_queue, > @@ -1552,6 +1637,9 @@ main(int argc, char **argv) > if (rte_lcore_is_enabled(lcore_id) == 0) > continue; > > + if (queueid >= dev_txq_num) > + continue; > + > if (numa_on) > socketid = \ > (uint8_t)rte_lcore_to_socket_id(lcore_id); > @@ -1586,8 +1674,9 @@ main(int argc, char **argv) > /* init power management library */ > ret = rte_power_init(lcore_id); > if (ret) > - rte_exit(EXIT_FAILURE, "Power management library " > - "initialization failed on core%u\n", lcore_id); > + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_POWER, > + "Power management library initialization " > + "failed on core%u", lcore_id); > > /* init timer structures for each enabled lcore */ > rte_timer_init(&power_timers[lcore_id]); > @@ -1635,7 +1724,6 @@ main(int argc, char **argv) > if (ret < 0) > rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " > "port=%d\n", ret, portid); > - > /* > * If enabled, put device in promiscuous mode. > * This allows IO forwarding mode to forward packets > -- > 1.8.1.4