> -----Original Message----- > From: Liang, Cunming > Sent: Thursday, January 29, 2015 2:34 AM > To: Zhou, Danny; dev at dpdk.org > Subject: RE: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx > interrupt and polling/interrupt mode switch > > > > > -----Original Message----- > > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Danny Zhou > > Sent: Wednesday, January 28, 2015 2:51 AM > > To: dev at dpdk.org > > Subject: [dpdk-dev] [PATCH v1 5/5] L3fwd-power: enable one-shot rx interrupt > > and polling/interrupt mode switch > > > > Signed-off-by: Danny Zhou <danny.zhou at intel.com> > > --- > > examples/l3fwd-power/main.c | 170 > > +++++++++++++++++++++++++++++++++----------- > > 1 file changed, 129 insertions(+), 41 deletions(-) > > > > diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c > > index f6b55b9..e6e4f55 100644 > > --- a/examples/l3fwd-power/main.c > > +++ b/examples/l3fwd-power/main.c > > @@ -75,12 +75,13 @@ > > #include <rte_string_fns.h> > > #include <rte_timer.h> > > #include <rte_power.h> > > +#include <rte_eal.h> > > > > #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 > > > > #define MAX_PKT_BURST 32 > > > > -#define MIN_ZERO_POLL_COUNT 5 > > +#define MIN_ZERO_POLL_COUNT 10 > > > > /* around 100ms at 2 Ghz */ > > #define TIMER_RESOLUTION_CYCLES 200000000ULL > > @@ -188,6 +189,9 @@ struct lcore_rx_queue { > > #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS > > #define MAX_RX_QUEUE_PER_PORT 128 > > > > +#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16 > > + > > + > > #define MAX_LCORE_PARAMS 1024 > > struct lcore_params { > > uint8_t port_id; > > @@ -214,7 +218,7 @@ static uint16_t nb_lcore_params = > > sizeof(lcore_params_array_default) / > > > > static struct rte_eth_conf port_conf = { > > .rxmode = { > > - .mq_mode = ETH_MQ_RX_RSS, > > + .mq_mode = ETH_MQ_RX_RSS, > > .max_rx_pkt_len = ETHER_MAX_LEN, > > .split_hdr_size = 0, > > .header_split = 0, /**< Header Split disabled */ > > @@ -226,11 +230,14 @@ static struct rte_eth_conf port_conf = { > > .rx_adv_conf = { > > .rss_conf = { > > .rss_key = NULL, > > - .rss_hf = ETH_RSS_IP, > > + .rss_hf = ETH_RSS_UDP, > > }, > > }, > > .txmode = { > > - .mq_mode = ETH_DCB_NONE, > > + .mq_mode = ETH_MQ_TX_NONE, > > + }, > > + .intr_conf = { > > + .rxq = 1, /**< rxq interrupt feature enabled */ > > }, > > }; > > > > @@ -402,19 +409,22 @@ power_timer_cb(__attribute__((unused)) struct > > rte_timer *tim, > > /* accumulate total execution time in us when callback is invoked */ > > sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / > > (float)SCALING_PERIOD; > > - > > /** > > * check whether need to scale down frequency a step if it sleep a lot. > > */ > > - if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) > > - rte_power_freq_down(lcore_id); > > + if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) { > > + if (rte_power_freq_down) > > + rte_power_freq_down(lcore_id); > > + } > > else if ( (unsigned)(stats[lcore_id].nb_rx_processed / > > - stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) > > + stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) { > > /** > > * scale down a step if average packet per iteration less > > * than expectation. > > */ > > - rte_power_freq_down(lcore_id); > > + if (rte_power_freq_down) > > + rte_power_freq_down(lcore_id); > > + } > > > > /** > > * initialize another timer according to current frequency to ensure > > @@ -707,22 +717,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t > > portid, > > > > } > > > > -#define SLEEP_GEAR1_THRESHOLD 100 > > -#define SLEEP_GEAR2_THRESHOLD 1000 > > +#define MINIMUM_SLEEP_TIME 1 > > +#define SUSPEND_THRESHOLD 300 > > > > static inline uint32_t > > power_idle_heuristic(uint32_t zero_rx_packet_count) > > { > > - /* If zero count is less than 100, use it as the sleep time in us */ > > - if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD) > > - return zero_rx_packet_count; > > - /* If zero count is less than 1000, sleep time should be 100 us */ > > - else if ((zero_rx_packet_count >= SLEEP_GEAR1_THRESHOLD) && > > - (zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD)) > > - return SLEEP_GEAR1_THRESHOLD; > > - /* If zero count is greater than 1000, sleep time should be 1000 us */ > > - else if (zero_rx_packet_count >= SLEEP_GEAR2_THRESHOLD) > > - return SLEEP_GEAR2_THRESHOLD; > > + /* If zero count is less than 100, sleep 1us */ > > + if (zero_rx_packet_count < SUSPEND_THRESHOLD) > > + return MINIMUM_SLEEP_TIME; > > + /* If zero count is less than 1000, sleep 100 us which is the minimum > > latency > > + switching from C3/C6 to C0 > > + */ > > + else > > + return SUSPEND_THRESHOLD; > > > > return 0; > > } > > @@ -762,6 +770,35 @@ power_freq_scaleup_heuristic(unsigned lcore_id, > > return FREQ_CURRENT; > > } > > > > +/** > > + * force polling thread sleep until one-shot rx interrupt triggers > > + * @param port_id > > + * Port id. > > + * @param queue_id > > + * Rx queue id. > > + * @return > > + * 0 on success > > + */ > > +static int > > +sleep_until_rx_interrupt(uint8_t port_id, uint8_t queue_id) > > +{ > > + /* Enable one-shot rx interrupt */ > > + rte_eth_dev_rx_queue_intr_enable(port_id, queue_id); > > + > > + RTE_LOG(INFO, L3FWD_POWER, > > + "lcore %u sleeps until interrupt on port%d,rxq%d triggers\n", > > + rte_lcore_id(), port_id, queue_id); > > + rte_eal_wait_rx_intr(port_id, queue_id); > > + RTE_LOG(INFO, L3FWD_POWER, > > + "lcore %u is waked up from rx interrupt on port%d,rxq%d\n", > > + rte_lcore_id(), port_id, queue_id); > > + > > + /* Disable one-shot rx interrupt */ > > + rte_eth_dev_rx_queue_intr_disable(port_id, queue_id); > > + > > + return 0; > > +} > > + > > /* main processing loop */ > > static int > > main_loop(__attribute__((unused)) void *dummy) > > @@ -775,7 +812,6 @@ main_loop(__attribute__((unused)) void *dummy) > > struct lcore_conf *qconf; > > struct lcore_rx_queue *rx_queue; > > enum freq_scale_hint_t lcore_scaleup_hint; > > - > > uint32_t lcore_rx_idle_count = 0; > > uint32_t lcore_idle_hint = 0; > > > > @@ -835,6 +871,8 @@ main_loop(__attribute__((unused)) void *dummy) > > prev_tsc_power = cur_tsc_power; > > } > > > > + > > +start_rx: > > /* > > * Read packet from RX queues > > */ > > @@ -848,6 +886,7 @@ main_loop(__attribute__((unused)) void *dummy) > > > > nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, > > MAX_PKT_BURST); > > + > > stats[lcore_id].nb_rx_processed += nb_rx; > > if (unlikely(nb_rx == 0)) { > > /** > > @@ -910,10 +949,13 @@ main_loop(__attribute__((unused)) void *dummy) > > rx_queue->freq_up_hint; > > } > > > > - if (lcore_scaleup_hint == FREQ_HIGHEST) > > - rte_power_freq_max(lcore_id); > > - else if (lcore_scaleup_hint == FREQ_HIGHER) > > - rte_power_freq_up(lcore_id); > > + if (lcore_scaleup_hint == FREQ_HIGHEST) { > > + if (rte_power_freq_max) > > + rte_power_freq_max(lcore_id); > > + } else if (lcore_scaleup_hint == FREQ_HIGHER) { > > + if (rte_power_freq_up) > > + rte_power_freq_up(lcore_id); > > + } > > } else { > > /** > > * All Rx queues empty in recent consecutive polls, > > @@ -928,21 +970,55 @@ main_loop(__attribute__((unused)) void *dummy) > > lcore_idle_hint = rx_queue->idle_hint; > > } > > > > - if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD) > > + if (lcore_idle_hint < SUSPEND_THRESHOLD) > > /** > > - * execute "pause" instruction to avoid context > > - * switch for short sleep. > > - */ > > + * execute "pause" instruction to avoid context > > + * switch which generally take hundres of > > microsecond > > + * for short sleep. > > + */ > > rte_delay_us(lcore_idle_hint); > > - else > > - /* long sleep force runing thread to suspend */ > > - usleep(lcore_idle_hint); > > - > > + else { > > + /* suspend untill rx interrupt trigges */ > > + sleep_until_rx_interrupt( > > + qconf->rx_queue_list[0].port_id, > > + qconf->rx_queue_list[0].queue_id); > > + /* start receiving packets immediately */ > > + goto start_rx; > > + } > > stats[lcore_id].sleep_time += lcore_idle_hint; > > } > > } > > } > > > > +/** > > + * It will be called as the callback for specified port after a LSI > > interrupt > > + * has been fully handled. This callback needs to be implemented carefully > > as > > + * it will be called in the interrupt host thread which is different from > > the > > + * application main thread. > > + * > > + * @param port_id > > + * Port id. > > + * @param type > > + * event type. > > + * @param param > > + * Pointer to(address of) the parameters. > > + * > > + * @return > > + * void. > > + */ > > + > > +/* > > +static void > > +rx_interrupt_event_callback(uint8_t port_id, enum rte_eth_event_type type, > > void *param) > > +{ > > + uint64_t rx_queues = *((uint64_t *)param); > > + > > + port_id = port_id + 1; > > + if(type == RTE_ETH_EVENT_INTR_RX) > > + port_id = rx_queues; > [LCM] What's bunch of things for ?
Debug related code which will be removed in V2 patch. > > > +} > > +*/ > > + > > static int > > check_lcore_params(void) > > { > > @@ -1270,7 +1346,7 @@ setup_hash(int socketid) > > char s[64]; > > > > /* create ipv4 hash */ > > - snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); > > + rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); > > ipv4_l3fwd_hash_params.name = s; > > ipv4_l3fwd_hash_params.socket_id = socketid; > > ipv4_l3fwd_lookup_struct[socketid] = > > @@ -1280,7 +1356,7 @@ setup_hash(int socketid) > > "socket %d\n", socketid); > > > > /* create ipv6 hash */ > > - snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); > > + rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); > > ipv6_l3fwd_hash_params.name = s; > > ipv6_l3fwd_hash_params.socket_id = socketid; > > ipv6_l3fwd_lookup_struct[socketid] = > > @@ -1476,6 +1552,7 @@ main(int argc, char **argv) > > unsigned lcore_id; > > uint64_t hz; > > uint32_t n_tx_queue, nb_lcores; > > + uint32_t dev_rxq_num, dev_txq_num; > > uint8_t portid, nb_rx_queue, queue, socketid; > > > > /* catch SIGINT and restore cpufreq governor to ondemand */ > > @@ -1525,10 +1602,18 @@ main(int argc, char **argv) > > printf("Initializing port %d ... ", portid ); > > fflush(stdout); > > > > + rte_eth_dev_info_get(portid, &dev_info); > > + dev_rxq_num = dev_info.max_rx_queues; > > + dev_txq_num = dev_info.max_tx_queues; > > + > > nb_rx_queue = get_port_n_rx_queues(portid); > > + if (nb_rx_queue > dev_rxq_num) > > + rte_exit(EXIT_FAILURE, "Cannot configure not existed > > rxq: " > > + "port=%d\n", portid); > > + > > n_tx_queue = nb_lcores; > > - if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) > > - n_tx_queue = MAX_TX_QUEUE_PER_PORT; > > + if (n_tx_queue > dev_txq_num) > > + n_tx_queue = dev_txq_num; > > printf("Creating queues: nb_rxq=%d nb_txq=%u... ", > > nb_rx_queue, (unsigned)n_tx_queue ); > > ret = rte_eth_dev_configure(portid, nb_rx_queue, > > @@ -1552,6 +1637,9 @@ main(int argc, char **argv) > > if (rte_lcore_is_enabled(lcore_id) == 0) > > continue; > > > > + if (queueid >= dev_txq_num) > > + continue; > > + > > if (numa_on) > > socketid = \ > > (uint8_t)rte_lcore_to_socket_id(lcore_id); > > @@ -1586,8 +1674,9 @@ main(int argc, char **argv) > > /* init power management library */ > > ret = rte_power_init(lcore_id); > > if (ret) > > - rte_exit(EXIT_FAILURE, "Power management library " > > - "initialization failed on core%u\n", lcore_id); > > + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_POWER, > > + "Power management library initialization " > > + "failed on core%u", lcore_id); > > > > /* init timer structures for each enabled lcore */ > > rte_timer_init(&power_timers[lcore_id]); > > @@ -1635,7 +1724,6 @@ main(int argc, char **argv) > > if (ret < 0) > > rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " > > "port=%d\n", ret, portid); > > - > > /* > > * If enabled, put device in promiscuous mode. > > * This allows IO forwarding mode to forward packets > > -- > > 1.8.1.4