Hi, Nambiar

On 2017/10/17 0:03, Nambiar, Amritha wrote:
> On 10/16/2017 1:53 AM, Yunsheng Lin wrote:
>> Hi, Jeff
>>
>> On 2017/10/14 5:52, Jeff Kirsher wrote:
>>> From: Amritha Nambiar <amritha.namb...@intel.com>
>>>
>>> The i40e driver is modified to enable the new mqprio hardware
>>> offload mode and factor the TCs and queue configuration by
>>> creating channel VSIs. In this mode, the priority to traffic
>>> class mapping and the user specified queue ranges are used
>>> to configure the traffic classes by setting the mode option to
>>> 'channel'.
>>>
>>> Example:
>>>   map 0 0 0 0 1 2 2 3 queues 2@0 2@2 1@4 1@5\
>>>   hw 1 mode channel
>>>
>>> qdisc mqprio 8038: root  tc 4 map 0 0 0 0 1 2 2 3 0 0 0 0 0 0 0 0
>>>              queues:(0:1) (2:3) (4:4) (5:5)
>>>              mode:channel
>>>              shaper:dcb
>>>
>>> The HW channels created are removed and all the queue configuration
>>> is set to default when the qdisc is detached from the root of the
>>> device.
>>>
>>> This patch also disables setting up channels via ethtool (ethtool -L)
>>> when the TCs are configured using mqprio scheduler.
>>>
>>> The patch also limits setting ethtool Rx flow hash indirection
>>> (ethtool -X eth0 equal N) to max queues configured via mqprio.
>>> The Rx flow hash indirection input through ethtool should be
>>> validated so that it is within in the queue range configured via
>>> tc/mqprio. The bound checking is achieved by reporting the current
>>> rss size to the kernel when queues are configured via mqprio.
>>>
>>> Example:
>>>   map 0 0 0 1 0 2 3 0 queues 2@0 4@2 8@6 11@14\
>>>   hw 1 mode channel
>>>
>>> Cannot set RX flow hash configuration: Invalid argument
>>>
>>> Signed-off-by: Amritha Nambiar <amritha.namb...@intel.com>
>>> Tested-by: Andrew Bowers <andrewx.bow...@intel.com>
>>> Signed-off-by: Jeff Kirsher <jeffrey.t.kirs...@intel.com>
>>> ---
>>>  drivers/net/ethernet/intel/i40e/i40e.h         |   3 +
>>>  drivers/net/ethernet/intel/i40e/i40e_ethtool.c |   8 +-
>>>  drivers/net/ethernet/intel/i40e/i40e_main.c    | 457 
>>> +++++++++++++++++++------
>>>  3 files changed, 362 insertions(+), 106 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e.h 
>>> b/drivers/net/ethernet/intel/i40e/i40e.h
>>> index bde982541772..024c88474951 100644
>>> --- a/drivers/net/ethernet/intel/i40e/i40e.h
>>> +++ b/drivers/net/ethernet/intel/i40e/i40e.h
>>> @@ -54,6 +54,7 @@
>>>  #include <linux/clocksource.h>
>>>  #include <linux/net_tstamp.h>
>>>  #include <linux/ptp_clock_kernel.h>
>>> +#include <net/pkt_cls.h>
>>>  #include "i40e_type.h"
>>>  #include "i40e_prototype.h"
>>>  #include "i40e_client.h"
>>> @@ -700,6 +701,7 @@ struct i40e_vsi {
>>>     enum i40e_vsi_type type;  /* VSI type, e.g., LAN, FCoE, etc */
>>>     s16 vf_id;              /* Virtual function ID for SRIOV VSIs */
>>>  
>>> +   struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */
>>>     struct i40e_tc_configuration tc_config;
>>>     struct i40e_aqc_vsi_properties_data info;
>>>  
>>> @@ -725,6 +727,7 @@ struct i40e_vsi {
>>>     u16 cnt_q_avail;        /* num of queues available for channel usage */
>>>     u16 orig_rss_size;
>>>     u16 current_rss_size;
>>> +   bool reconfig_rss;
>>>  
>>>     u16 next_base_queue;    /* next queue to be used for channel setup */
>>>  
>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c 
>>> b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
>>> index afd3ca8d9851..72d5f2cdf419 100644
>>> --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
>>> @@ -2652,7 +2652,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, 
>>> struct ethtool_rxnfc *cmd,
>>>  
>>>     switch (cmd->cmd) {
>>>     case ETHTOOL_GRXRINGS:
>>> -           cmd->data = vsi->num_queue_pairs;
>>> +           cmd->data = vsi->rss_size;
>>>             ret = 0;
>>>             break;
>>>     case ETHTOOL_GRXFH:
>>> @@ -3897,6 +3897,12 @@ static int i40e_set_channels(struct net_device *dev,
>>>     if (vsi->type != I40E_VSI_MAIN)
>>>             return -EINVAL;
>>>  
>>> +   /* We do not support setting channels via ethtool when TCs are
>>> +    * configured through mqprio
>>> +    */
>>> +   if (pf->flags & I40E_FLAG_TC_MQPRIO)
>>> +           return -EINVAL;
>>> +
>>>     /* verify they are not requesting separate vectors */
>>>     if (!count || ch->rx_count || ch->tx_count)
>>>             return -EINVAL;
>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
>>> b/drivers/net/ethernet/intel/i40e/i40e_main.c
>>> index e23105bee6d1..e803aa1552c6 100644
>>> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
>>> @@ -1588,6 +1588,170 @@ static int i40e_set_mac(struct net_device *netdev, 
>>> void *p)
>>>     return 0;
>>>  }
>>>  
>>> +/**
>>> + * i40e_config_rss_aq - Prepare for RSS using AQ commands
>>> + * @vsi: vsi structure
>>> + * @seed: RSS hash seed
>>> + **/
>>
>> [...]
>>
>>> + * i40e_vsi_set_default_tc_config - set default values for tc configuration
>>> + * @vsi: the VSI being configured
>>> + **/
>>> +static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
>>> +{
>>> +   u16 qcount;
>>> +   int i;
>>> +
>>> +   /* Only TC0 is enabled */
>>> +   vsi->tc_config.numtc = 1;
>>> +   vsi->tc_config.enabled_tc = 1;
>>> +   qcount = min_t(int, vsi->alloc_queue_pairs,
>>> +                  i40e_pf_get_max_q_per_tc(vsi->back));
>>> +   for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
>>> +           /* For the TC that is not enabled set the offset to to default
>>> +            * queue and allocate one queue for the given TC.
>>> +            */
>>> +           vsi->tc_config.tc_info[i].qoffset = 0;
>>> +           if (i == 0)
>>> +                   vsi->tc_config.tc_info[i].qcount = qcount;
>>> +           else
>>> +                   vsi->tc_config.tc_info[i].qcount = 1;
>>> +           vsi->tc_config.tc_info[i].netdev_tc = 0;
>>> +   }
>>> +}
>>> +
>>>  /**
>>>   * i40e_setup_tc - configure multiple traffic classes
>>>   * @netdev: net device to configure
>>> - * @tc: number of traffic classes to enable
>>> + * @type_data: tc offload data
>>>   **/
>>> -static int i40e_setup_tc(struct net_device *netdev, u8 tc)
>>> +static int i40e_setup_tc(struct net_device *netdev, void *type_data)
>>>  {
>>> +   struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
>>>     struct i40e_netdev_priv *np = netdev_priv(netdev);
>>>     struct i40e_vsi *vsi = np->vsi;
>>>     struct i40e_pf *pf = vsi->back;
>>> -   u8 enabled_tc = 0;
>>> +   u8 enabled_tc = 0, num_tc, hw;
>>> +   bool need_reset = false;
>>>     int ret = -EINVAL;
>>> +   u16 mode;
>>>     int i;
>>>  
>>> -   /* Check if DCB enabled to continue */
>>> -   if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
>>> -           netdev_info(netdev, "DCB is not enabled for adapter\n");
>>> -           goto exit;
>>> +   num_tc = mqprio_qopt->qopt.num_tc;
>>> +   hw = mqprio_qopt->qopt.hw;
>>> +   mode = mqprio_qopt->mode;
>>> +   if (!hw) {
>>
>> When stack call the ndo_setup_tc, then qopt.hw is always non-zero, Can you
>> tell me why you need to check for this?
> 
> This needs to be checked here because when the qdisc is detached from
> root of the device, i.e. 'tc qdisc del dev <interface>', the 'mqprio
> destroy' flow calls ndo_setup_tc with the offload values zeroed out for
> proper clean up.

I see, Thanks.

> 
> -Amritha
> 
>>
>> Thanks,
>> Yunsheng Lin
>>
>>> +           pf->flags &= ~I40E_FLAG_TC_MQPRIO;
>>> +           memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
>>> +           goto config_tc;
>>>     }
>>>  
>>>     /* Check if MFP enabled */
>>>     if (pf->flags & I40E_FLAG_MFP_ENABLED) {
>>> -           netdev_info(netdev, "Configuring TC not supported in MFP 
>>> mode\n");
>>> -           goto exit;
>>> +           netdev_info(netdev,
>>> +                       "Configuring TC not supported in MFP mode\n");
>>> +           return ret;
>>>     }
>>> +   switch (mode) {
>>> +   case TC_MQPRIO_MODE_DCB:
>>> +           pf->flags &= ~I40E_FLAG_TC_MQPRIO;
>>>  
>>> -   /* Check whether tc count is within enabled limit */
>>> -   if (tc > i40e_pf_get_num_tc(pf)) {
>>> -           netdev_info(netdev, "TC count greater than enabled on link for 
>>> adapter\n");
>>> -           goto exit;
>>> +           /* Check if DCB enabled to continue */
>>> +           if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
>>> +                   netdev_info(netdev,
>>> +                               "DCB is not enabled for adapter\n");
>>> +                   return ret;
>>> +           }
>>> +
>>> +           /* Check whether tc count is within enabled limit */
>>> +           if (num_tc > i40e_pf_get_num_tc(pf)) {
>>> +                   netdev_info(netdev,
>>> +                               "TC count greater than enabled on link for 
>>> adapter\n");
>>> +                   return ret;
>>> +           }
>>> +           break;
>>> +   case TC_MQPRIO_MODE_CHANNEL:
>>> +           if (pf->flags & I40E_FLAG_DCB_ENABLED) {
>>> +                   netdev_info(netdev,
>>> +                               "Full offload of TC Mqprio options is not 
>>> supported when DCB is enabled\n");
>>> +                   return ret;
>>> +           }
>>> +           if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
>>> +                   return ret;
>>> +           ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt);
>>> +           if (ret)
>>> +                   return ret;
>>> +           memcpy(&vsi->mqprio_qopt, mqprio_qopt,
>>> +                  sizeof(*mqprio_qopt));
>>> +           pf->flags |= I40E_FLAG_TC_MQPRIO;
>>> +           pf->flags &= ~I40E_FLAG_DCB_ENABLED;
>>> +           break;
>>> +   default:
>>> +           return -EINVAL;
>>>     }
>>>  
>>> +config_tc:
>>>     /* Generate TC map for number of tc requested */
>>> -   for (i = 0; i < tc; i++)
>>> +   for (i = 0; i < num_tc; i++)
>>>             enabled_tc |= BIT(i);
>>>  
>>>     /* Requesting same TC configuration as already enabled */
>>> -   if (enabled_tc == vsi->tc_config.enabled_tc)
>>> +   if (enabled_tc == vsi->tc_config.enabled_tc &&
>>> +       mode != TC_MQPRIO_MODE_CHANNEL)
>>>             return 0;
>>>  
>>>     /* Quiesce VSI queues */
>>>     i40e_quiesce_vsi(vsi);
>>>  
>>> +   if (!hw && !(pf->flags & I40E_FLAG_TC_MQPRIO))
>>> +           i40e_remove_queue_channels(vsi);
>>> +
>>>     /* Configure VSI for enabled TCs */
>>>     ret = i40e_vsi_config_tc(vsi, enabled_tc);
>>>     if (ret) {
>>>             netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
>>>                         vsi->seid);
>>> +           need_reset = true;
>>>             goto exit;
>>>     }
>>>  
>>> @@ -6272,11 +6595,18 @@ static int i40e_setup_tc(struct net_device *netdev, 
>>> u8 tc)
>>>             if (ret) {
>>>                     netdev_info(netdev,
>>>                                 "Failed configuring queue channels\n");
>>> +                   need_reset = true;
>>>                     goto exit;
>>>             }
>>>     }
>>>  
>>>  exit:
>>> +   /* Reset the configuration data to defaults, only TC0 is enabled */
>>> +   if (need_reset) {
>>> +           i40e_vsi_set_default_tc_config(vsi);
>>> +           need_reset = false;
>>> +   }
>>> +
>>>     /* Unquiesce VSI */
>>>     i40e_unquiesce_vsi(vsi);
>>>     return ret;
>>> @@ -6285,14 +6615,10 @@ static int i40e_setup_tc(struct net_device *netdev, 
>>> u8 tc)
>>>  static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type 
>>> type,
>>>                        void *type_data)
>>>  {
>>> -   struct tc_mqprio_qopt *mqprio = type_data;
>>> -
>>>     if (type != TC_SETUP_MQPRIO)
>>>             return -EOPNOTSUPP;
>>>  
>>> -   mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
>>> -
>>> -   return i40e_setup_tc(netdev, mqprio->num_tc);
>>> +   return i40e_setup_tc(netdev, type_data);
>>>  }
>>>  
>>>  /**
>>> @@ -9153,45 +9479,6 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf)
>>>     return err;
>>>  }
>>>  
>>> -/**
>>> - * i40e_config_rss_aq - Prepare for RSS using AQ commands
>>> - * @vsi: vsi structure
>>> - * @seed: RSS hash seed
>>> - **/
>>> -static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
>>> -                         u8 *lut, u16 lut_size)
>>> -{
>>> -   struct i40e_pf *pf = vsi->back;
>>> -   struct i40e_hw *hw = &pf->hw;
>>> -   int ret = 0;
>>> -
>>> -   if (seed) {
>>> -           struct i40e_aqc_get_set_rss_key_data *seed_dw =
>>> -                   (struct i40e_aqc_get_set_rss_key_data *)seed;
>>> -           ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
>>> -           if (ret) {
>>> -                   dev_info(&pf->pdev->dev,
>>> -                            "Cannot set RSS key, err %s aq_err %s\n",
>>> -                            i40e_stat_str(hw, ret),
>>> -                            i40e_aq_str(hw, hw->aq.asq_last_status));
>>> -                   return ret;
>>> -           }
>>> -   }
>>> -   if (lut) {
>>> -           bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
>>> -
>>> -           ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
>>> -           if (ret) {
>>> -                   dev_info(&pf->pdev->dev,
>>> -                            "Cannot set RSS lut, err %s aq_err %s\n",
>>> -                            i40e_stat_str(hw, ret),
>>> -                            i40e_aq_str(hw, hw->aq.asq_last_status));
>>> -                   return ret;
>>> -           }
>>> -   }
>>> -   return ret;
>>> -}
>>> -
>>>  /**
>>>   * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands
>>>   * @vsi: Pointer to vsi structure
>>> @@ -9238,46 +9525,6 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, 
>>> const u8 *seed,
>>>     return ret;
>>>  }
>>>  
>>> -/**
>>> - * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
>>> - * @vsi: VSI structure
>>> - **/
>>> -static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
>>> -{
>>> -   u8 seed[I40E_HKEY_ARRAY_SIZE];
>>> -   struct i40e_pf *pf = vsi->back;
>>> -   u8 *lut;
>>> -   int ret;
>>> -
>>> -   if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
>>> -           return 0;
>>> -
>>> -   if (!vsi->rss_size)
>>> -           vsi->rss_size = min_t(int, pf->alloc_rss_size,
>>> -                                 vsi->num_queue_pairs);
>>> -   if (!vsi->rss_size)
>>> -           return -EINVAL;
>>> -
>>> -   lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
>>> -   if (!lut)
>>> -           return -ENOMEM;
>>> -   /* Use the user configured hash keys and lookup table if there is one,
>>> -    * otherwise use default
>>> -    */
>>> -   if (vsi->rss_lut_user)
>>> -           memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
>>> -   else
>>> -           i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
>>> -   if (vsi->rss_hkey_user)
>>> -           memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
>>> -   else
>>> -           netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
>>> -   ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
>>> -   kfree(lut);
>>> -
>>> -   return ret;
>>> -}
>>> -
>>>  /**
>>>   * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
>>>   * @vsi: Pointer to vsi structure
>>>
>>
> 
> .
> 

Reply via email to