On 6/24/2018 11:56 AM, Zhang, Qi Z wrote: > Hi Stephen: > >> -----Original Message----- >> From: Stephen Hemminger [mailto:step...@networkplumber.org] >> Sent: Friday, June 22, 2018 11:44 PM >> To: Zhang, Qi Z <qi.z.zh...@intel.com> >> Cc: Xing, Beilei <beilei.x...@intel.com>; Wu, Jingjing >> <jingjing...@intel.com>; >> Yu, De <de...@intel.com>; dev@dpdk.org >> Subject: Re: [dpdk-dev] [PATCH v2] net/i40e: remove VF interrupt handler >> >> On Fri, 22 Jun 2018 08:44:14 +0800 >> Qi Zhang <qi.z.zh...@intel.com> wrote: >> >>> For i40evf, internal rx interrupt and adminq interrupt share the same >>> source, that cause a lot cpu cycles be wasted on interrupt handler on >>> rx path. This is complained by customers which require low latency >>> (when set I40E_ITR_INTERVAL to small value), but have to be sufferred >>> by tremendous interrupts handling that eat significant CPU resources. >>> >>> The patch disable pci interrupt and remove the interrupt handler, >>> replace it with a low frequency (50ms) interrupt polling daemon which >>> is implemented by registering a alarm callback periodly, this save CPU >>> time significently: On a typical x86 server with 2.1GHz CPU, with low >>> latency configure (32us) we saw CPU usage from top commmand reduced >>> from 20% to 0% on management core in testpmd). >>> >>> Also with the new method we can remove compile option: >>> I40E_ITR_INTERVAL which is used to balance between low latency and low >> CPU usage previously. >>> Now we don't need it since we can reach both at same time. >>> >>> Suggested-by: Jingjing Wu <jingjing...@intel.com> >>> Signed-off-by: Qi Zhang <qi.z.zh...@intel.com> >>> --- >>> >>> v2: >>> - update doc >>> >>> config/common_base | 2 -- >>> doc/guides/nics/i40e.rst | 5 ----- >>> drivers/net/i40e/i40e_ethdev.c | 3 +-- >>> drivers/net/i40e/i40e_ethdev.h | 22 +++++++++++----------- >>> drivers/net/i40e/i40e_ethdev_vf.c | 36 >>> ++++++++++++++---------------------- >>> 5 files changed, 26 insertions(+), 42 deletions(-) >>> >>> diff --git a/config/common_base b/config/common_base index >>> 6b0d1cbbb..9e21c6865 100644 >>> --- a/config/common_base >>> +++ b/config/common_base >>> @@ -264,8 +264,6 @@ CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y >>> CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n >>> CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64 >>> CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4 >>> -# interval up to 8160 us, aligned to 2 (or default value) >>> -CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1 >>> >>> # >>> # Compile burst-oriented FM10K PMD >>> diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst index >>> 18549bf5a..3fc4ceac7 100644 >>> --- a/doc/guides/nics/i40e.rst >>> +++ b/doc/guides/nics/i40e.rst >>> @@ -96,11 +96,6 @@ Please note that enabling debugging options may >> affect system performance. >>> >>> Number of queues reserved for each VMDQ Pool. >>> >>> -- ``CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL`` (default ``-1``) >>> - >>> - Interrupt Throttling interval. >>> - >>> - >>> Runtime Config Options >>> ~~~~~~~~~~~~~~~~~~~~~~ >>> >>> diff --git a/drivers/net/i40e/i40e_ethdev.c >>> b/drivers/net/i40e/i40e_ethdev.c index 13c5d3296..c8f9566e0 100644 >>> --- a/drivers/net/i40e/i40e_ethdev.c >>> +++ b/drivers/net/i40e/i40e_ethdev.c >>> @@ -1829,8 +1829,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi, >> uint16_t msix_vect, >>> /* Write first RX queue to Link list register as the head element */ >>> if (vsi->type != I40E_VSI_SRIOV) { >>> uint16_t interval = >>> - i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1, >>> - pf->support_multi_driver); >>> + i40e_calc_itr_interval(1, pf->support_multi_driver); >>> >>> if (msix_vect == I40E_MISC_VEC_ID) { >>> I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, diff --git >>> a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h >>> index 11c4c76bd..599993dac 100644 >>> --- a/drivers/net/i40e/i40e_ethdev.h >>> +++ b/drivers/net/i40e/i40e_ethdev.h >>> @@ -178,7 +178,7 @@ enum i40e_flxpld_layer_idx { >>> #define I40E_ITR_INDEX_NONE 3 >>> #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */ >>> #define I40E_QUEUE_ITR_INTERVAL_MAX 8160 /* 8160 us */ >>> -#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 8160 /* 8160 us */ >>> +#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */ >>> /* Special FW support this floating VEB feature */ #define >>> FLOATING_VEB_SUPPORTED_FW_MAJ 5 #define >> FLOATING_VEB_SUPPORTED_FW_MIN >>> 0 @@ -1328,17 +1328,17 @@ i40e_align_floor(int n) } >>> >>> static inline uint16_t >>> -i40e_calc_itr_interval(int16_t interval, bool is_pf, bool >>> is_multi_drv) >>> +i40e_calc_itr_interval(bool is_pf, bool is_multi_drv) >>> { >>> - if (interval < 0 || interval > I40E_QUEUE_ITR_INTERVAL_MAX) { >>> - if (is_multi_drv) { >>> - interval = I40E_QUEUE_ITR_INTERVAL_MAX; >>> - } else { >>> - if (is_pf) >>> - interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT; >>> - else >>> - interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT; >>> - } >>> + uint16_t interval = 0; >>> + >>> + if (is_multi_drv) { >>> + interval = I40E_QUEUE_ITR_INTERVAL_MAX; >>> + } else { >>> + if (is_pf) >>> + interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT; >>> + else >>> + interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT; >>> } >>> >>> /* Convert to hardware count, as writing each 1 represents 2 us */ >>> diff --git a/drivers/net/i40e/i40e_ethdev_vf.c >>> b/drivers/net/i40e/i40e_ethdev_vf.c >>> index 804e44530..ad5c069e8 100644 >>> --- a/drivers/net/i40e/i40e_ethdev_vf.c >>> +++ b/drivers/net/i40e/i40e_ethdev_vf.c >>> @@ -44,6 +44,8 @@ >>> #define I40EVF_BUSY_WAIT_COUNT 50 >>> #define MAX_RESET_WAIT_CNT 20 >>> >>> +#define I40EVF_ALARM_INTERVAL 50000 /* us */ >>> + >>> struct i40evf_arq_msg_info { >>> enum virtchnl_ops ops; >>> enum i40e_status_code result; >>> @@ -1133,7 +1135,7 @@ i40evf_init_vf(struct rte_eth_dev *dev) >>> struct i40e_hw *hw = >> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); >>> struct i40e_vf *vf = >> I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); >>> uint16_t interval = >>> - i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0); >>> + i40e_calc_itr_interval(0, 0); >>> >>> vf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); >>> vf->dev_data = dev->data; >>> @@ -1370,7 +1372,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev) >>> * void >>> */ >>> static void >>> -i40evf_dev_interrupt_handler(void *param) >>> +i40evf_dev_alarm_handler(void *param) >>> { >>> struct rte_eth_dev *dev = (struct rte_eth_dev *)param; >>> struct i40e_hw *hw = >> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); >>> @@ -1399,6 +1401,8 @@ i40evf_dev_interrupt_handler(void *param) >>> >>> done: >>> i40evf_enable_irq0(hw); >>> + rte_eal_alarm_set(I40EVF_ALARM_INTERVAL, >>> + i40evf_dev_alarm_handler, dev); >>> } >>> >>> static int >>> @@ -1442,12 +1446,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev) >>> return -1; >>> } >>> >>> - /* register callback func to eal lib */ >>> - rte_intr_callback_register(&pci_dev->intr_handle, >>> - i40evf_dev_interrupt_handler, (void *)eth_dev); >>> - >>> - /* enable uio intr after callback register */ >>> - rte_intr_enable(&pci_dev->intr_handle); >>> + rte_eal_alarm_set(I40EVF_ALARM_INTERVAL, >>> + i40evf_dev_alarm_handler, eth_dev); >>> >>> /* configure and enable device interrupt */ >>> i40evf_enable_irq0(hw); >>> @@ -1836,7 +1836,7 @@ i40evf_dev_rx_queue_intr_enable(struct >> rte_eth_dev *dev, uint16_t queue_id) >>> struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; >>> struct i40e_hw *hw = >> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); >>> uint16_t interval = >>> - i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0); >>> + i40e_calc_itr_interval(0, 0); >>> uint16_t msix_intr; >>> >>> msix_intr = intr_handle->intr_vec[queue_id]; @@ -1859,8 +1859,6 @@ >>> i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t >>> queue_id) >>> >>> I40EVF_WRITE_FLUSH(hw); >>> >>> - rte_intr_enable(&pci_dev->intr_handle); >>> - >>> return 0; >>> } >>> >>> @@ -2023,10 +2021,8 @@ i40evf_dev_start(struct rte_eth_dev *dev) >>> * queue interrupt to other VFIO vectors. >>> * So clear uio/vfio intr/evevnfd first to avoid failure. >>> */ >>> - if (dev->data->dev_conf.intr_conf.rxq != 0) { >>> - rte_intr_disable(intr_handle); >>> + if (dev->data->dev_conf.intr_conf.rxq != 0) >>> rte_intr_enable(intr_handle); >>> - } >>> >>> i40evf_enable_queues_intr(dev); >>> >>> @@ -2050,6 +2046,9 @@ i40evf_dev_stop(struct rte_eth_dev *dev) >>> >>> PMD_INIT_FUNC_TRACE(); >>> >>> + if (dev->data->dev_conf.intr_conf.rxq != 0) >>> + rte_intr_disable(intr_handle); >>> + >>> if (hw->adapter_stopped == 1) >>> return; >>> i40evf_stop_queues(dev); >>> @@ -2285,9 +2284,8 @@ static void >>> i40evf_dev_close(struct rte_eth_dev *dev) { >>> struct i40e_hw *hw = >> I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); >>> - struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); >>> - struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; >>> >>> + rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev); >>> i40evf_dev_stop(dev); >>> i40e_dev_free_queues(dev); >>> /* >>> @@ -2300,12 +2298,6 @@ i40evf_dev_close(struct rte_eth_dev *dev) >>> >>> i40evf_reset_vf(hw); >>> i40e_shutdown_adminq(hw); >>> - /* disable uio intr before callback unregister */ >>> - rte_intr_disable(intr_handle); >>> - >>> - /* unregister callback func from eal lib */ >>> - rte_intr_callback_unregister(intr_handle, >>> - i40evf_dev_interrupt_handler, dev); >>> i40evf_disable_irq0(hw); >>> } >>> >> >> Rather than adding a polling routine internally, why not change the driver to >> not support Link State or receive interrupts. Better yet, let the application >> decide. >> Keep the interrupt logic but only enable interrupts if application has >> requested >> LSC or recveive interrupt mode. > > The interrupt handler is not only for LSC (actually VF does not support LSC) > or rx interrupt mode, > it is used for PF to VF message through admin queue which is always required.
I guess the question is, is it possible to disable Rx interrupts? And if possible can user control this enable/disable per interrupt source?