On Thu, May 28, 2020 at 10:13:51AM +0100, Anatoly Burakov wrote: > In addition to existing modes, add a mode which is very similar to > legacy mode, but does not do frequency scaling, and thus does not > depend on the power library. > > Signed-off-by: Anatoly Burakov <anatoly.bura...@intel.com> > --- > examples/l3fwd-power/main.c | 215 +++++++++++++++++++++++++++++++++--- > 1 file changed, 202 insertions(+), 13 deletions(-) > > diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c > index 5cee9d5387..4161e01974 100644 > --- a/examples/l3fwd-power/main.c > +++ b/examples/l3fwd-power/main.c > @@ -195,9 +195,11 @@ static int parse_ptype; /**< Parse packet type using rx > callback, and */ > /**< disabled by default */ > > enum appmode { > - APP_MODE_LEGACY = 0, > + APP_MODE_DEFAULT = 0, > + APP_MODE_LEGACY, > APP_MODE_EMPTY_POLL, > - APP_MODE_TELEMETRY > + APP_MODE_TELEMETRY, > + APP_MODE_INTERRUPT > }; > > enum appmode app_mode; > @@ -900,6 +902,170 @@ static int event_register(struct lcore_conf *qconf) > > return 0; > } > + > +/* main processing loop */ > +static int main_intr_loop(__rte_unused void *dummy) > +{ > + struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; > + unsigned int lcore_id; > + uint64_t prev_tsc, diff_tsc, cur_tsc; > + int i, j, nb_rx; > + uint8_t queueid; > + uint16_t portid; > + struct lcore_conf *qconf; > + struct lcore_rx_queue *rx_queue; > + uint32_t lcore_rx_idle_count = 0; > + uint32_t lcore_idle_hint = 0; > + int intr_en = 0; > + > + const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / > + US_PER_S * BURST_TX_DRAIN_US; > + > + prev_tsc = 0; > + > + lcore_id = rte_lcore_id(); > + qconf = &lcore_conf[lcore_id]; > + > + if (qconf->n_rx_queue == 0) { > + RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", > + lcore_id); > + return 0; > + } > + > + RTE_LOG(INFO, L3FWD_POWER, "entering main interrupt loop on lcore %u\n", > + lcore_id); > + > + for (i = 0; i < qconf->n_rx_queue; i++) { > + portid = qconf->rx_queue_list[i].port_id; > + queueid = qconf->rx_queue_list[i].queue_id; > + RTE_LOG(INFO, L3FWD_POWER, > + " -- lcoreid=%u portid=%u rxqueueid=%hhu\n", > + lcore_id, portid, queueid); > + } > + > + /* add into event wait list */ > + if (event_register(qconf) == 0) > + intr_en = 1; > + else > + RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n"); > + > + while (!is_done()) { > + stats[lcore_id].nb_iteration_looped++; > + > + cur_tsc = rte_rdtsc(); > + > + /* > + * TX burst queue drain > + */ > + diff_tsc = cur_tsc - prev_tsc; > + if (unlikely(diff_tsc > drain_tsc)) { > + for (i = 0; i < qconf->n_tx_port; ++i) { > + portid = qconf->tx_port_id[i]; > + rte_eth_tx_buffer_flush(portid, > + qconf->tx_queue_id[portid], > + qconf->tx_buffer[portid]); > + } > + prev_tsc = cur_tsc; > + } > + > +start_rx: > + /* > + * Read packet from RX queues > + */ > + lcore_rx_idle_count = 0; > + for (i = 0; i < qconf->n_rx_queue; ++i) { > + rx_queue = &(qconf->rx_queue_list[i]); > + rx_queue->idle_hint = 0; > + portid = rx_queue->port_id; > + queueid = rx_queue->queue_id; > + > + nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, > + MAX_PKT_BURST); > + > + stats[lcore_id].nb_rx_processed += nb_rx; > + if (unlikely(nb_rx == 0)) { > + /** > + * no packet received from rx queue, try to > + * sleep for a while forcing CPU enter deeper > + * C states. > + */ > + rx_queue->zero_rx_packet_count++; > + > + if (rx_queue->zero_rx_packet_count <= > + MIN_ZERO_POLL_COUNT) > + continue; > + > + rx_queue->idle_hint = power_idle_heuristic( > + rx_queue->zero_rx_packet_count); > + lcore_rx_idle_count++; > + } else { > + rx_queue->zero_rx_packet_count = 0; > + } > + > + /* Prefetch first packets */ > + for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) { > + rte_prefetch0(rte_pktmbuf_mtod( > + pkts_burst[j], void *)); > + } > + > + /* Prefetch and forward already prefetched packets */ > + for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) { > + rte_prefetch0(rte_pktmbuf_mtod( > + pkts_burst[j + PREFETCH_OFFSET], > + void *)); > + l3fwd_simple_forward( > + pkts_burst[j], portid, qconf); > + } > + > + /* Forward remaining prefetched packets */ > + for (; j < nb_rx; j++) { > + l3fwd_simple_forward( > + pkts_burst[j], portid, qconf); > + } > + } > + > + if (unlikely(lcore_rx_idle_count == qconf->n_rx_queue)) { > + /** > + * All Rx queues empty in recent consecutive polls, > + * sleep in a conservative manner, meaning sleep as > + * less as possible. > + */ > + for (i = 1, > + lcore_idle_hint = qconf->rx_queue_list[0].idle_hint; > + i < qconf->n_rx_queue; ++i) { > + rx_queue = &(qconf->rx_queue_list[i]); > + if (rx_queue->idle_hint < lcore_idle_hint) > + lcore_idle_hint = rx_queue->idle_hint; > + } > + > + if (lcore_idle_hint < SUSPEND_THRESHOLD) > + /** > + * execute "pause" instruction to avoid context > + * switch which generally take hundred of > + * microseconds for short sleep. > + */ > + rte_delay_us(lcore_idle_hint); > + else { > + /* suspend until rx interrupt triggers */ > + if (intr_en) { > + turn_on_off_intr(qconf, 1); > + sleep_until_rx_interrupt( > + qconf->n_rx_queue); > + turn_on_off_intr(qconf, 0); > + /** > + * start receiving packets immediately > + */ > + if (likely(!is_done())) > + goto start_rx; > + } > + } > + stats[lcore_id].sleep_time += lcore_idle_hint; > + } > + } > + > + return 0; > +} > + > /* main processing loop */ > static int > main_telemetry_loop(__rte_unused void *dummy) > @@ -1126,7 +1292,7 @@ main_empty_poll_loop(__rte_unused void *dummy) > } > /* main processing loop */ > static int > -main_loop(__rte_unused void *dummy) > +main_legacy_loop(__rte_unused void *dummy) > { > struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; > unsigned lcore_id; > @@ -1438,7 +1604,8 @@ print_usage(const char *prgname) > " --empty-poll: enable empty poll detection" > " follow (training_flag, high_threshold, med_threshold)\n" > " --telemetry: enable telemetry mode, to update" > - " empty polls, full polls, and core busyness to telemetry\n", > + " empty polls, full polls, and core busyness to telemetry\n" > + " --interrupt-only: enable interrupt-only mode\n", > prgname); > } > > @@ -1582,6 +1749,7 @@ parse_ep_config(const char *q_arg) > } > #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype" > #define CMD_LINE_OPT_TELEMETRY "telemetry" > +#define CMD_LINE_OPT_INTERRUPT_ONLY "interrupt-only" > > /* Parse the argument given in the command line of the application */ > static int > @@ -1601,6 +1769,7 @@ parse_args(int argc, char **argv) > {"empty-poll", 1, 0, 0}, > {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0}, > {CMD_LINE_OPT_TELEMETRY, 0, 0, 0}, > + {CMD_LINE_OPT_INTERRUPT_ONLY, 0, 0, 0}, > {NULL, 0, 0, 0} > }; > > @@ -1674,8 +1843,8 @@ parse_args(int argc, char **argv) > > if (!strncmp(lgopts[option_index].name, > "empty-poll", 10)) { > - if (app_mode == APP_MODE_TELEMETRY) { > - printf(" empty-poll cannot be enabled > as telemetry mode is enabled\n"); > + if (app_mode != APP_MODE_DEFAULT) { > + printf(" empty-poll mode is mutually > exclusive with other modes\n"); > return -1; > } > app_mode = APP_MODE_EMPTY_POLL; > @@ -1692,14 +1861,25 @@ parse_args(int argc, char **argv) > if (!strncmp(lgopts[option_index].name, > CMD_LINE_OPT_TELEMETRY, > sizeof(CMD_LINE_OPT_TELEMETRY))) { > - if (app_mode == APP_MODE_EMPTY_POLL) { > - printf("telemetry mode cannot be > enabled as empty poll mode is enabled\n"); > + if (app_mode != APP_MODE_DEFAULT) { > + printf(" telemetry mode is mutually > exclusive with other modes\n"); > return -1; > } > app_mode = APP_MODE_TELEMETRY; > printf("telemetry mode is enabled\n"); > } > > + if (!strncmp(lgopts[option_index].name, > + CMD_LINE_OPT_INTERRUPT_ONLY, > + sizeof(CMD_LINE_OPT_INTERRUPT_ONLY))) { > + if (app_mode != APP_MODE_DEFAULT) { > + printf(" interrupt-only mode is > mutually exclusive with other modes\n"); > + return -1; > + } > + app_mode = APP_MODE_INTERRUPT; > + printf("interrupt-only mode is enabled\n"); > + } > + > if (!strncmp(lgopts[option_index].name, > "enable-jumbo", 12)) { > struct option lenopts = > @@ -2253,7 +2433,12 @@ main(int argc, char **argv) > if (ret < 0) > rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); > > - if (app_mode != APP_MODE_TELEMETRY && init_power_library()) > + if (app_mode == APP_MODE_DEFAULT) > + app_mode = APP_MODE_LEGACY; > + > + /* only legacy and empty poll mode rely on power library */ > + if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && > + init_power_library()) > rte_exit(EXIT_FAILURE, "init_power_library failed\n"); > Hi,
Rather than just exiting from here can we have a else condition to automatically enter into the "interrupt only" mode. Please correct me if I am missing something. Thanks Harman > if (update_lcore_params() < 0) > @@ -2277,7 +2462,8 @@ main(int argc, char **argv) > RTE_ETH_FOREACH_DEV(portid) { > struct rte_eth_conf local_port_conf = port_conf; > /* not all app modes need interrupts */ > - bool need_intr = app_mode == APP_MODE_LEGACY; > + bool need_intr = app_mode == APP_MODE_LEGACY || > + app_mode == APP_MODE_INTERRUPT; > > /* skip ports that are not enabled */ > if ((enabled_port_mask & (1 << portid)) == 0) { > @@ -2526,12 +2712,12 @@ main(int argc, char **argv) > > /* launch per-lcore init on every lcore */ > if (app_mode == APP_MODE_LEGACY) { > - rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER); > + rte_eal_mp_remote_launch(main_legacy_loop, NULL, CALL_MASTER); > } else if (app_mode == APP_MODE_EMPTY_POLL) { > empty_poll_stop = false; > rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, > SKIP_MASTER); > - } else { > + } else if (app_mode == APP_MODE_TELEMETRY) { > unsigned int i; > > /* Init metrics library */ > @@ -2555,6 +2741,8 @@ main(int argc, char **argv) > "Returns global power stats. Parameters: None"); > rte_eal_mp_remote_launch(main_telemetry_loop, NULL, > SKIP_MASTER); > + } else if (app_mode == APP_MODE_INTERRUPT) { > + rte_eal_mp_remote_launch(main_intr_loop, NULL, CALL_MASTER); > } > > if (app_mode == APP_MODE_EMPTY_POLL || app_mode == APP_MODE_TELEMETRY) > @@ -2577,7 +2765,8 @@ main(int argc, char **argv) > if (app_mode == APP_MODE_EMPTY_POLL) > rte_power_empty_poll_stat_free(); > > - if (app_mode != APP_MODE_TELEMETRY && deinit_power_library()) > + if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && > + deinit_power_library()) > rte_exit(EXIT_FAILURE, "deinit_power_library failed\n"); > > if (rte_eal_cleanup() < 0) > -- > 2.17.1