New appctl command to perform manual pinning of RX queues to desired cores.
Signed-off-by: Ilya Maximets <i.maxim...@samsung.com> --- INSTALL.DPDK.md | 24 +++++- NEWS | 2 + lib/dpif-netdev.c | 199 ++++++++++++++++++++++++++++++++++++--------- vswitchd/ovs-vswitchd.8.in | 7 ++ 4 files changed, 192 insertions(+), 40 deletions(-) diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md index bb14bb5..6e727c7 100644 --- a/INSTALL.DPDK.md +++ b/INSTALL.DPDK.md @@ -337,7 +337,29 @@ Performance Tuning: `ovs-appctl dpif-netdev/pmd-rxq-show` - This can also be checked with: + To change default rxq assignment to pmd threads rxq may be manually + pinned to desired core using: + + `ovs-appctl dpif-netdev/pmd-rxq-set [dp] <port> <rx_queue_id> <core_id>` + + To apply new configuration after `pmd-rxq-set` reconfiguration required: + + `ovs-appctl dpif-netdev/pmd-reconfigure` + + After that PMD thread on core `core_id` will become `isolated`. This means + that this thread will poll only pinned RX queues. + + WARNING: If there are no `non-isolated` PMD threads, `non-pinned` RX queues + will not be polled. Also, if provided `core_id` is non-negative and not + available (ex. this `core_id` not in `pmd-cpu-mask`), RX queue will not be + polled by any pmd-thread. + + Isolation of PMD threads and pinning settings also can be checked using + `ovs-appctl dpif-netdev/pmd-rxq-show` command. + + To unpin RX queue use same command with `core-id` equal to `-1`. + + Affinity mask of the pmd thread can be checked with: ``` top -H diff --git a/NEWS b/NEWS index 817cba1..8fedeb7 100644 --- a/NEWS +++ b/NEWS @@ -22,6 +22,8 @@ Post-v2.5.0 - DPDK: * New option "n_rxq" for PMD interfaces. Old 'other_config:n-dpdk-rxqs' is no longer supported. + * New appctl command 'dpif-netdev/pmd-rxq-set' to perform manual + pinning of RX queues to desired core. * New appctl command 'dpif-netdev/pmd-rxq-show' to check the port/rxq assignment. * New appctl command 'dpif-netdev/pmd-reconfigure' to force diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 5ad6845..c1338f4 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -250,6 +250,13 @@ enum pmd_cycles_counter_type { #define XPS_CYCLES 1000000000ULL +/* Contained by struct dp_netdev_port's 'rxqs' member. */ +struct dp_netdev_rxq { + struct netdev_rxq *rxq; + unsigned core_id; /* Сore to which this queue is pinned. */ + bool pinned; /* 'True' if this rxq pinned to some core. */ +}; + /* A port in a netdev-based datapath. */ struct dp_netdev_port { odp_port_t port_no; @@ -257,7 +264,7 @@ struct dp_netdev_port { struct hmap_node node; /* Node in dp_netdev's 'ports'. */ struct netdev_saved_flags *sf; unsigned n_rxq; /* Number of elements in 'rxq' */ - struct netdev_rxq **rxq; + struct dp_netdev_rxq *rxqs; unsigned *txq_used; /* Number of threads that uses each tx queue. */ char *type; /* Port type as requested by user. */ }; @@ -447,6 +454,7 @@ struct dp_netdev_pmd_thread { pthread_t thread; unsigned core_id; /* CPU core id of this pmd thread. */ int numa_id; /* numa node id of this pmd thread. */ + bool isolated; struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'. */ /* List of rx queues to poll. */ @@ -722,21 +730,35 @@ pmd_info_show_rxq(struct ds *reply, struct dp_netdev_pmd_thread *pmd) struct rxq_poll *poll; const char *prev_name = NULL; - ds_put_format(reply, "pmd thread numa_id %d core_id %u:\n", - pmd->numa_id, pmd->core_id); + ds_put_format(reply, + "pmd thread numa_id %d core_id %u:\nisolated : %s\n", + pmd->numa_id, pmd->core_id, (pmd->isolated) + ? "true" : "false"); ovs_mutex_lock(&pmd->port_mutex); LIST_FOR_EACH (poll, node, &pmd->poll_list) { const char *name = netdev_get_name(poll->port->netdev); + struct dp_netdev_rxq *rxq; + int rx_qid; if (!prev_name || strcmp(name, prev_name)) { if (prev_name) { ds_put_cstr(reply, "\n"); } - ds_put_format(reply, "\tport: %s\tqueue-id:", + ds_put_format(reply, "\tport: %s\n", netdev_get_name(poll->port->netdev)); } - ds_put_format(reply, " %d", netdev_rxq_get_queue_id(poll->rx)); + + rx_qid = netdev_rxq_get_queue_id(poll->rx); + rxq = &poll->port->rxqs[rx_qid]; + + ds_put_format(reply, "\t\tqueue-id: %d\tpinned = %s", + rx_qid, (rxq->pinned) ? "true" : "false"); + if (rxq->pinned) { + ds_put_format(reply, "\tcore_id: %u\n", rxq->core_id); + } else { + ds_put_cstr(reply, "\n"); + } prev_name = name; } ovs_mutex_unlock(&pmd->port_mutex); @@ -800,6 +822,74 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[], } static void +dpif_netdev_pmd_rxq_set(struct unixctl_conn *conn, int argc, + const char *argv[], void *aux OVS_UNUSED) +{ + struct ds reply = DS_EMPTY_INITIALIZER; + struct dp_netdev_port *port; + struct dp_netdev *dp = NULL; + int k = 0; + int core_id, rx_qid; + + if (argc < 4) { + unixctl_command_reply_error(conn, "Invalid argument"); + return; + } + + ovs_mutex_lock(&dp_netdev_mutex); + + if (argc == 5) { + dp = shash_find_data(&dp_netdevs, argv[1]); + k = 2; + } else if (shash_count(&dp_netdevs) == 1) { + /* There's only one datapath */ + dp = shash_first(&dp_netdevs)->data; + k = 1; + } + + if (!dp) { + unixctl_command_reply_error(conn, + "please specify an existing datapath"); + goto exit_dp; + } + + ovs_mutex_lock(&dp->port_mutex); + if (get_port_by_name(dp, argv[k], &port)) { + unixctl_command_reply_error(conn, "Unknown port"); + goto exit; + } + + if (!netdev_is_pmd(port->netdev)) { + unixctl_command_reply_error(conn, "Not a PMD port."); + goto exit; + } + + rx_qid = strtol(argv[k + 1], NULL, 10); + if (rx_qid < 0 || rx_qid >= port->n_rxq) { + unixctl_command_reply_error(conn, "Bad rx queue id."); + goto exit; + } + + core_id = strtol(argv[k + 2], NULL, 10); + if (core_id < 0) { + port->rxqs[rx_qid].pinned = false; + unixctl_command_reply(conn, "Unpinned."); + goto exit; + } + + port->rxqs[rx_qid].pinned = true; + port->rxqs[rx_qid].core_id = core_id; + + unixctl_command_reply(conn, ds_cstr(&reply)); + ds_destroy(&reply); + +exit: + ovs_mutex_unlock(&dp->port_mutex); +exit_dp: + ovs_mutex_unlock(&dp_netdev_mutex); +} + +static void dpif_netdev_pmd_reconfigure(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { @@ -852,6 +942,9 @@ dpif_netdev_init(void) unixctl_command_register("dpif-netdev/pmd-rxq-show", "[dp]", 0, 1, dpif_netdev_pmd_info, (void *)&poll_aux); + unixctl_command_register("dpif-netdev/pmd-rxq-set", + "[dp] port queue-id core-id", + 3, 4, dpif_netdev_pmd_rxq_set, NULL); unixctl_command_register("dpif-netdev/pmd-reconfigure", "[dp]", 0, 1, dpif_netdev_pmd_reconfigure, NULL); return 0; @@ -1208,17 +1301,18 @@ port_create(const char *devname, const char *open_type, const char *type, port->port_no = port_no; port->netdev = netdev; port->n_rxq = netdev_n_rxq(netdev); - port->rxq = xcalloc(port->n_rxq, sizeof *port->rxq); + port->rxqs = xcalloc(port->n_rxq, sizeof *port->rxqs); port->txq_used = xcalloc(netdev_n_txq(netdev), sizeof *port->txq_used); port->type = xstrdup(type); for (i = 0; i < port->n_rxq; i++) { - error = netdev_rxq_open(netdev, &port->rxq[i], i); + error = netdev_rxq_open(netdev, &port->rxqs[i].rxq, i); if (error) { VLOG_ERR("%s: cannot receive packets on this network device (%s)", devname, ovs_strerror(errno)); goto out_rxq_close; } + port->rxqs[i].pinned = false; n_open_rxqs++; } @@ -1234,11 +1328,11 @@ port_create(const char *devname, const char *open_type, const char *type, out_rxq_close: for (i = 0; i < n_open_rxqs; i++) { - netdev_rxq_close(port->rxq[i]); + netdev_rxq_close(port->rxqs[i].rxq); } free(port->type); free(port->txq_used); - free(port->rxq); + free(port->rxqs); free(port); out: @@ -1375,11 +1469,11 @@ port_destroy(struct dp_netdev_port *port) netdev_restore_flags(port->sf); for (unsigned i = 0; i < port->n_rxq; i++) { - netdev_rxq_close(port->rxq[i]); + netdev_rxq_close(port->rxqs[i].rxq); } free(port->txq_used); - free(port->rxq); + free(port->rxqs); free(port->type); free(port); } @@ -2622,8 +2716,8 @@ port_reconfigure(struct dp_netdev_port *port) /* Closes the existing 'rxq's. */ for (i = 0; i < port->n_rxq; i++) { - netdev_rxq_close(port->rxq[i]); - port->rxq[i] = NULL; + netdev_rxq_close(port->rxqs[i].rxq); + port->rxqs[i].rxq = NULL; } port->n_rxq = 0; @@ -2635,13 +2729,14 @@ port_reconfigure(struct dp_netdev_port *port) return err; } /* If the netdev_reconfigure( above succeeds, reopens the 'rxq's. */ - port->rxq = xrealloc(port->rxq, sizeof *port->rxq * netdev_n_rxq(netdev)); + port->rxqs = xrealloc(port->rxqs, + sizeof *port->rxqs * netdev_n_rxq(netdev)); /* Realloc 'used' counters for tx queues. */ free(port->txq_used); port->txq_used = xcalloc(netdev_n_txq(netdev), sizeof *port->txq_used); for (i = 0; i < netdev_n_rxq(netdev); i++) { - err = netdev_rxq_open(netdev, &port->rxq[i], i); + err = netdev_rxq_open(netdev, &port->rxqs[i].rxq, i); if (err) { return err; } @@ -2715,7 +2810,7 @@ dpif_netdev_run(struct dpif *dpif) int i; for (i = 0; i < port->n_rxq; i++) { - dp_netdev_process_rxq_port(non_pmd, port, port->rxq[i]); + dp_netdev_process_rxq_port(non_pmd, port, port->rxqs[i].rxq); } } } @@ -2754,7 +2849,7 @@ dpif_netdev_wait(struct dpif *dpif) int i; for (i = 0; i < port->n_rxq; i++) { - netdev_rxq_wait(port->rxq[i]); + netdev_rxq_wait(port->rxqs[i].rxq); } } } @@ -3229,9 +3324,9 @@ dp_netdev_del_port_from_all_pmds(struct dp_netdev *dp, } -/* Returns PMD thread from this numa node with fewer rx queues to poll. - * Returns NULL if there is no PMD threads on this numa node. - * Can be called safely only by main thread. */ +/* Returns non-isolated PMD thread from this numa node with fewer + * rx queues to poll. Returns NULL if there is no non-isolated PMD threads + * on this numa node. Can be called safely only by main thread. */ static struct dp_netdev_pmd_thread * dp_netdev_less_loaded_pmd_on_numa(struct dp_netdev *dp, int numa_id) { @@ -3239,7 +3334,7 @@ dp_netdev_less_loaded_pmd_on_numa(struct dp_netdev *dp, int numa_id) struct dp_netdev_pmd_thread *pmd, *res = NULL; CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { - if (pmd->numa_id == numa_id + if (!pmd->isolated && pmd->numa_id == numa_id && (min_cnt > pmd->poll_cnt || res == NULL)) { min_cnt = pmd->poll_cnt; res = pmd; @@ -3281,14 +3376,16 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd, ovs_mutex_unlock(&pmd->port_mutex); } -/* Distribute all rx queues of 'port' between PMD threads in 'dp'. The pmd - * threads that need to be restarted are inserted in 'to_reload'. */ +/* Distribute all {pinned|non-pinned} rx queues of 'port' between PMD + * threads in 'dp'. The pmd threads that need to be restarted are inserted + * in 'to_reload'. PMD threads with pinned queues marked as isolated. */ static void dp_netdev_add_port_rx_to_pmds(struct dp_netdev *dp, struct dp_netdev_port *port, - struct hmapx *to_reload) + struct hmapx *to_reload, bool pinned) { int numa_id = netdev_get_numa_id(port->netdev); + struct dp_netdev_pmd_thread *pmd; int i; if (!netdev_is_pmd(port->netdev)) { @@ -3296,32 +3393,49 @@ dp_netdev_add_port_rx_to_pmds(struct dp_netdev *dp, } for (i = 0; i < port->n_rxq; i++) { - struct dp_netdev_pmd_thread *pmd; - - pmd = dp_netdev_less_loaded_pmd_on_numa(dp, numa_id); - if (!pmd) { - VLOG_WARN("There's no pmd thread on numa node %d", numa_id); - break; + if (pinned) { + if (!port->rxqs[i].pinned) { + continue; + } + pmd = dp_netdev_get_pmd(dp, port->rxqs[i].core_id); + if (!pmd) { + VLOG_WARN("There is no PMD thread on core %d. " + "Queue %d on port \'%s\' will not be polled.", + port->rxqs[i].core_id, i, + netdev_get_name(port->netdev)); + continue; + } + pmd->isolated = true; + dp_netdev_pmd_unref(pmd); + } else { + if (port->rxqs[i].pinned) { + continue; + } + pmd = dp_netdev_less_loaded_pmd_on_numa(dp, numa_id); + if (!pmd) { + VLOG_WARN("There's no available pmd thread on numa node %d", + numa_id); + break; + } } - ovs_mutex_lock(&pmd->port_mutex); - dp_netdev_add_rxq_to_pmd(pmd, port, port->rxq[i]); + dp_netdev_add_rxq_to_pmd(pmd, port, port->rxqs[i].rxq); ovs_mutex_unlock(&pmd->port_mutex); hmapx_add(to_reload, pmd); } } -/* Distributes all rx queues of 'port' between all PMD threads in 'dp' and - * inserts 'port' in the PMD threads 'tx_ports'. The pmd threads that need to - * be restarted are inserted in 'to_reload'. */ +/* Distributes all non-pinned rx queues of 'port' between all PMD threads + * in 'dp' and inserts 'port' in the PMD threads 'tx_ports'. The pmd threads + * that need to be restarted are inserted in 'to_reload'. */ static void dp_netdev_add_port_to_pmds__(struct dp_netdev *dp, struct dp_netdev_port *port, struct hmapx *to_reload) { struct dp_netdev_pmd_thread *pmd; - dp_netdev_add_port_rx_to_pmds(dp, port, to_reload); + dp_netdev_add_port_rx_to_pmds(dp, port, to_reload, false); CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { dp_netdev_add_port_tx_to_pmd(pmd, port); @@ -3329,8 +3443,9 @@ dp_netdev_add_port_to_pmds__(struct dp_netdev *dp, struct dp_netdev_port *port, } } -/* Distributes all rx queues of 'port' between all PMD threads in 'dp', inserts - * 'port' in the PMD threads 'tx_ports' and reloads them, if needed. */ +/* Distributes all non-pinned rx queues of 'port' between all PMD threads + * in 'dp', inserts 'port' in the PMD threads 'tx_ports' and reloads them, + * if needed. */ static void dp_netdev_add_port_to_pmds(struct dp_netdev *dp, struct dp_netdev_port *port) { @@ -3415,7 +3530,13 @@ dp_netdev_reset_pmd_threads(struct dp_netdev *dp) dp_netdev_set_pmds_on_numa(dp, numa_id); } - dp_netdev_add_port_rx_to_pmds(dp, port, &to_reload); + /* Distribute only pinned rx queues first to mark threads as isolated */ + dp_netdev_add_port_rx_to_pmds(dp, port, &to_reload, true); + } + + /* Distribute remaining non-pinned rx queues to non-isolated PMD threads. */ + HMAP_FOR_EACH (port, node, &dp->ports) { + dp_netdev_add_port_rx_to_pmds(dp, port, &to_reload, false); } HMAPX_FOR_EACH (node, &to_reload) { diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index b181918..9855f2a 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -259,6 +259,13 @@ measuring infrastructure. Resets to zero the per pmd thread performance numbers shown by the \fBdpif-netdev/pmd-stats-show\fR command. It will NOT reset datapath or bridge statistics, only the values shown by the above command. +.IP "\fBdpif-netdev/pmd-rxq-set\fR [\fIdp\fR] \fIport\fR \fIqueue-id\fR \fIcore-id\fR" +This command can be used to perform manual pinning of RX queues to desired core. +If \fIcore-id\fR is non-negative and not in \fIpmd-cpu-mask\fR than this RX +queue will not be polled by any pmd-thread. PMD thread will become ``isolated''. +This means that this thread will poll only pinned RX queues. If there are no +``non-isolated'' PMD threads non-pinned RX queues will not be polled. +To unpin RX queue use same command with \fIcore-id\fR = -1. .IP "\fBdpif-netdev/pmd-rxq-show\fR [\fIdp\fR]" For each pmd thread of the datapath \fIdp\fR shows list of queue-ids with port names, which this thread polls. -- 2.5.0 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev