On 04 Sep 11:33, Ananyev, Konstantin wrote:
<snip> > > +struct rte_eth_dev *dev = &rte_eth_devices[port_id]; > > + > > +if (unlikely(nb_rx == 0)) { > > +dev->empty_poll_stats[qidx].num++; > Hi Konstantin, Agree, v4 will relocate the meta data to seperate structure. and without touch the rte_ethdev structure. > I believe there are two fundamental issues with that approach: > 1. You put metadata specific lib (power) callbacks into rte_eth_dev struct. > 2. These callbacks do access rte_eth_devices[] directly. > That doesn't look right to me - rte_eth_dev structure supposed to be treated > as internal one librt_ether and underlying drivers and should be accessed > directly > by outer code. > If these callbacks need some extra metadata, then it is responsibility > of power library to allocate/manage these metadata. > You can pass pointer to this metadata via last parameter for > rte_eth_add_rx_callback(). > > > +if (unlikely(dev->empty_poll_stats[qidx].num > > > + ETH_EMPTYPOLL_MAX)) { > > +volatile void *target_addr; > > +uint64_t expected, mask; > > +uint16_t ret; > > + > > +/* > > + * get address of next descriptor in the RX > > + * ring for this queue, as well as expected > > + * value and a mask. > > + */ > > +ret = (*dev->dev_ops->next_rx_desc) > > +(dev->data->rx_queues[qidx], > > + &target_addr, &expected, &mask); > > +if (ret == 0) > > +/* -1ULL is maximum value for TSC */ > > +rte_power_monitor(target_addr, > > + expected, mask, > > + 0, -1ULL); > > +} > > +} else > > +dev->empty_poll_stats[qidx].num = 0; > > + > > +return nb_rx; > > +} > > + > > +static uint16_t > > +rte_power_mgmt_pause(uint16_t port_id, uint16_t qidx, > > +struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx, > > +uint16_t max_pkts __rte_unused, void *_ __rte_unused) > > +{ > > +struct rte_eth_dev *dev = &rte_eth_devices[port_id]; > > + > > +int i; > > + > > +if (unlikely(nb_rx == 0)) { > > + > > +dev->empty_poll_stats[qidx].num++; > > + > > +if (unlikely(dev->empty_poll_stats[qidx].num > > > + ETH_EMPTYPOLL_MAX)) { > > + > > +for (i = 0; i < RTE_ETH_PAUSE_NUM; i++) > > +rte_pause(); > > + > > +} > > +} else > > +dev->empty_poll_stats[qidx].num = 0; > > + > > +return nb_rx; > > +} > > + > > +static uint16_t > > +rte_power_mgmt_scalefreq(uint16_t port_id, uint16_t qidx, > > +struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx, > > +uint16_t max_pkts __rte_unused, void *_ __rte_unused) > > +{ > > +struct rte_eth_dev *dev = &rte_eth_devices[port_id]; > > + > > +if (unlikely(nb_rx == 0)) { > > +dev->empty_poll_stats[qidx].num++; > > +if (unlikely(dev->empty_poll_stats[qidx].num > > > + ETH_EMPTYPOLL_MAX)) { > > + > > +/*scale down freq */ > > +rte_power_freq_min(rte_lcore_id()); > > + > > +} > > +} else { > > +dev->empty_poll_stats[qidx].num = 0; > > +/* scal up freq */ > > +rte_power_freq_max(rte_lcore_id()); > > +} > > + > > +return nb_rx; > > +} > > + > > +int > > +rte_power_pmd_mgmt_enable(unsigned int lcore_id, > > +uint16_t port_id, > > +enum rte_eth_dev_power_mgmt_cb_mode mode) > > +{ > > +struct rte_eth_dev *dev; > > + > > +RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); > > +dev = &rte_eth_devices[port_id]; > > + > > +if (dev->pwr_mgmt_state == RTE_ETH_DEV_POWER_MGMT_ENABLED) > > +return -EINVAL; > > +/* allocate memory for empty poll stats */ > > +dev->empty_poll_stats = rte_malloc_socket(NULL, > > + sizeof(struct rte_eth_ep_stat) > > + * RTE_MAX_QUEUES_PER_PORT, > > + 0, dev->data->numa_node); > > +if (dev->empty_poll_stats == NULL) > > +return -ENOMEM; > > + > > +switch (mode) { > > +case RTE_ETH_DEV_POWER_MGMT_CB_WAIT: > > +if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG)) > > +return -ENOTSUP; > > Here and in other places: in case of error return you don't' free your > empty_poll_stats. > > > +dev->cur_pwr_cb = rte_eth_add_rx_callback(port_id, 0, > > Why zero for queue number, why not to pass queue_id as a parameter for that > function? v4 will move to use queue_id instead of 0. v3 still assume only queue 0 is used. > > > +rte_power_mgmt_umwait, NULL); > > As I said above, instead of NULL - could be pointer to metadata struct. v4 will address this. > > > +break; > > +case RTE_ETH_DEV_POWER_MGMT_CB_SCALE: > > +/* init scale freq */ > > +if (rte_power_init(lcore_id)) > > +return -EINVAL; > > +dev->cur_pwr_cb = rte_eth_add_rx_callback(port_id, 0, > > +rte_power_mgmt_scalefreq, NULL); > > +break; > > +case RTE_ETH_DEV_POWER_MGMT_CB_PAUSE: > > +dev->cur_pwr_cb = rte_eth_add_rx_callback(port_id, 0, > > +rte_power_mgmt_pause, NULL); > > +break; > > +} > > + > > +dev->cb_mode = mode; > > +dev->pwr_mgmt_state = RTE_ETH_DEV_POWER_MGMT_ENABLED; > > +return 0; > > +} > > + > > +int > > +rte_power_pmd_mgmt_disable(unsigned int lcore_id, > > +uint16_t port_id) > > +{ > > +struct rte_eth_dev *dev; > > + > > +RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); > > +dev = &rte_eth_devices[port_id]; > > + > > +/*add flag check */ > > + > > +if (dev->pwr_mgmt_state == RTE_ETH_DEV_POWER_MGMT_DISABLED) > > +return -EINVAL; > > + > > +/* rte_free ignores NULL so safe to call without checks */ > > +rte_free(dev->empty_poll_stats); > > You can't free callback metadata before removing the callback itself. > In fact, with current rx callback code it is not safe to free it > even after (we discussed it offline). agree. > > > + > > +switch (dev->cb_mode) { > > +case RTE_ETH_DEV_POWER_MGMT_CB_WAIT: > > +case RTE_ETH_DEV_POWER_MGMT_CB_PAUSE: > > +rte_eth_remove_rx_callback(port_id, 0, > > + dev->cur_pwr_cb); > > +break; > > +case RTE_ETH_DEV_POWER_MGMT_CB_SCALE: > > +rte_power_freq_max(lcore_id); > > Stupid q: what makes you think that lcore frequency was max, > *before* you setup the callback? that is because the rte_power_init() has figured out the system max. the init code invocate rte_power_init() already. > > > +rte_eth_remove_rx_callback(port_id, 0, > > + dev->cur_pwr_cb); > > +if (rte_power_exit(lcore_id)) > > +return -EINVAL; > > +break; > > +} > > + > > +dev->pwr_mgmt_state = RTE_ETH_DEV_POWER_MGMT_DISABLED; > > +dev->cur_pwr_cb = NULL; > > +dev->cb_mode = 0; > > + > > +return 0; > > +} > > diff --git a/lib/librte_power/rte_power_version.map > > b/lib/librte_power/rte_power_version.map > > index 00ee5753e2..ade83cfd4f 100644 > > --- a/lib/librte_power/rte_power_version.map > > +++ b/lib/librte_power/rte_power_version.map > > @@ -34,4 +34,8 @@ EXPERIMENTAL { > > rte_power_guest_channel_receive_msg; > > rte_power_poll_stat_fetch; > > rte_power_poll_stat_update; > > +# added in 20.08 > > +rte_power_pmd_mgmt_disable; > > +rte_power_pmd_mgmt_enable; > > + > > }; > > -- > > 2.17.1 >