Re: [Intel-wired-lan] [iwl-next v1 03/14] ice: add basic devlink subfunctions support
On Thu, May 09, 2024 at 01:06:52PM +0200, Jiri Pirko wrote: > Tue, May 07, 2024 at 01:45:04PM CEST, michal.swiatkow...@linux.intel.com > wrote: > >From: Piotr Raczynski > > > >Implement devlink port handlers responsible for ethernet type devlink > >subfunctions. Create subfunction devlink port and setup all resources > >needed for a subfunction netdev to operate. Configure new VSI for each > >new subfunction, initialize and configure interrupts and Tx/Rx resources. > >Set correct MAC filters and create new netdev. > > > >For now, subfunction is limited to only one Tx/Rx queue pair. > > > >Only allocate new subfunction VSI with devlink port new command. > >This makes sure that real resources are configured only when a new > >subfunction gets activated. Allocate and free subfunction MSIX > > Interesting. You talk about actitation, yet you don't implement it here. > I will rephrase it. I meant that new VSI needs to be created even before any activation or configuration. > > > >interrupt vectors using new API calls with pci_msix_alloc_irq_at > >and pci_msix_free_irq. > > > >Support both automatic and manual subfunction numbers. If no subfunction > >number is provided, use xa_alloc to pick a number automatically. This > >will find the first free index and use that as the number. This reduces > >burden on users in the simple case where a specific number is not > >required. It may also be slightly faster to check that a number exists > >since xarray lookup should be faster than a linear scan of the dyn_ports > >xarray. > > > >Reviewed-by: Wojciech Drewek > >Co-developed-by: Jacob Keller > >Signed-off-by: Jacob Keller > >Signed-off-by: Piotr Raczynski > >Signed-off-by: Michal Swiatkowski > >--- > > .../net/ethernet/intel/ice/devlink/devlink.c | 3 + > > .../ethernet/intel/ice/devlink/devlink_port.c | 357 ++ > > .../ethernet/intel/ice/devlink/devlink_port.h | 33 ++ > > drivers/net/ethernet/intel/ice/ice.h | 4 + > > drivers/net/ethernet/intel/ice/ice_lib.c | 5 +- > > drivers/net/ethernet/intel/ice/ice_lib.h | 2 + > > drivers/net/ethernet/intel/ice/ice_main.c | 9 +- > > 7 files changed, 410 insertions(+), 3 deletions(-) > > > >diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c > >b/drivers/net/ethernet/intel/ice/devlink/devlink.c > >index 10073342e4f0..3fb3a7e828a4 100644 > >--- a/drivers/net/ethernet/intel/ice/devlink/devlink.c > >+++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c > >@@ -6,6 +6,7 @@ > > #include "ice.h" > > #include "ice_lib.h" > > #include "devlink.h" > >+#include "devlink_port.h" > > #include "ice_eswitch.h" > > #include "ice_fw_update.h" > > #include "ice_dcb_lib.h" > >@@ -1277,6 +1278,8 @@ static const struct devlink_ops ice_devlink_ops = { > > > > .rate_leaf_parent_set = ice_devlink_set_parent, > > .rate_node_parent_set = ice_devlink_set_parent, > >+ > >+.port_new = ice_devlink_port_new, > > }; > > > >+ [...] > >+/** > >+ * ice_devlink_port_fn_hw_addr_set - devlink handler for mac address set > >+ * @port: pointer to devlink port > >+ * @hw_addr: hw address to set > >+ * @hw_addr_len: hw address length > >+ * @extack: extack for reporting error messages > >+ * > >+ * Sets mac address for the port, verifies arguments and copies address > >+ * to the subfunction structure. > >+ * > >+ * Return: zero on success or an error code on failure. > >+ */ > >+static int > >+ice_devlink_port_fn_hw_addr_set(struct devlink_port *port, const u8 > >*hw_addr, > >+int hw_addr_len, > >+struct netlink_ext_ack *extack) > >+{ > >+struct ice_dynamic_port *dyn_port; > >+ > >+dyn_port = ice_devlink_port_to_dyn(port); > >+ > >+if (hw_addr_len != ETH_ALEN || !is_valid_ether_addr(hw_addr)) { > >+NL_SET_ERR_MSG_MOD(extack, "Invalid ethernet address"); > >+return -EADDRNOTAVAIL; > >+} > >+ > >+ether_addr_copy(dyn_port->hw_addr, hw_addr); > > How does this work? You copy the address to the internal structure, but > where you update the HW? > When the basic MAC filter is added in HW. > > >+ > >+return 0; > >+} [...] > > > >@@ -5383,6 +5389,7 @@ static void ice_remove(struct pci_dev *pdev) > > ice_remove_arfs(pf); > > > > devl_lock(priv_to_devlink(pf)); > >+ice_dealloc_all_dynamic_ports(pf); > > ice_deinit_devlink(pf); > > > > ice_unload(pf); > >@@ -6677,7 +6684,7 @@ static int ice_up_complete(struct ice_vsi *vsi) > > > > if (vsi->port_info && > > (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) && > >-vsi->netdev && vsi->type == ICE_VSI_PF) { > >+((vsi->netdev && vsi->type == ICE_VSI_PF))) { > > I think this is some leftover, remove. > Yeah, thanks, will remove it. > > > ice_print_link_msg(vsi, true); > > netif_tx_start_all_queues(vsi->netdev); > > netif_carrier_on(vsi->netdev); > >-- > >2.42.0 > > > >
Re: [Intel-wired-lan] [iwl-next v1 06/14] ice: base subfunction aux driver
On Thu, May 09, 2024 at 01:13:55PM +0200, Jiri Pirko wrote: > Tue, May 07, 2024 at 01:45:07PM CEST, michal.swiatkow...@linux.intel.com > wrote: > >From: Piotr Raczynski > > > >Implement subfunction driver. It is probe when subfunction port is > >activated. > > > >VSI is already created. During the probe VSI is being configured. > >MAC unicast and broadcast filter is added to allow traffic to pass. > > > >Store subfunction pointer in VSI struct. The same is done for VF > >pointer. Make union of subfunction and VF pointer as only one of them > >can be set with one VSI. > > > >Reviewed-by: Jiri Pirko > >Signed-off-by: Piotr Raczynski > >Signed-off-by: Michal Swiatkowski > > Perhaps it would make things clearer for reviewer to have all patches > related to sf auxdev/devlink/netdev at the end of the patchset, after > activation patch. Not sure why you want to mix it here. I need this code to use it in port representor implementation. You suggested in previous review to move activation at the end [1]. [1] https://lore.kernel.org/netdev/Zhje0mQgQTMXwICb@nanopsycho/
Re: [Intel-wired-lan] [iwl-next v1 00/14] ice: support devlink subfunction
On Thu, May 09, 2024 at 01:18:29PM +0200, Jiri Pirko wrote: > Tue, May 07, 2024 at 01:45:01PM CEST, michal.swiatkow...@linux.intel.com > wrote: > >Hi, > > > >Currently ice driver does not allow creating more than one networking > >device per physical function. The only way to have more hardware backed > >netdev is to use SR-IOV. > > > >Following patchset adds support for devlink port API. For each new > >pcisf type port, driver allocates new VSI, configures all resources > >needed, including dynamically MSIX vectors, program rules and registers > >new netdev. > > > >This series supports only one Tx/Rx queue pair per subfunction. > > > >Example commands: > >devlink port add pci/:31:00.1 flavour pcisf pfnum 1 sfnum 1000 > >devlink port function set pci/:31:00.1/1 hw_addr 00:00:00:00:03:14 > >devlink port function set pci/:31:00.1/1 state active > >devlink port function del pci/:31:00.1/1 > > > >Make the port representor and eswitch code generic to support > >subfunction representor type. > > > >VSI configuration is slightly different between VF and SF. It needs to > >be reflected in the code. > > > >Most recent previous patchset (not containing port representor for SF > >support). [1] > > > >[1] > >https://lore.kernel.org/netdev/20240417142028.2171-1-michal.swiatkow...@linux.intel.com/ > > > > > I don't understand howcome the patchset is v1, yet there are patches > that came through multiple iterations alread. Changelog is missing > completely :/ > What is wrong here? There is a link to previous patchset with whole changlog and links to previous ones. I didn't add changlog here as it is new patchset (partialy the same as from [1], because of that I added a link). I can add the changlog from [1] if you want, but for me it can be missleading. > > >Michal Swiatkowski (7): > > ice: treat subfunction VSI the same as PF VSI > > ice: create port representor for SF > > ice: don't set target VSI for subfunction > > ice: check if SF is ready in ethtool ops > > ice: netdevice ops for SF representor > > ice: support subfunction devlink Tx topology > > ice: basic support for VLAN in subfunctions > > > >Piotr Raczynski (7): > > ice: add new VSI type for subfunctions > > ice: export ice ndo_ops functions > > ice: add basic devlink subfunctions support > > ice: allocate devlink for subfunction > > ice: base subfunction aux driver > > ice: implement netdev for subfunction > > ice: allow to activate and deactivate subfunction > > > > drivers/net/ethernet/intel/ice/Makefile | 2 + > > .../net/ethernet/intel/ice/devlink/devlink.c | 48 ++ > > .../net/ethernet/intel/ice/devlink/devlink.h | 1 + > > .../ethernet/intel/ice/devlink/devlink_port.c | 516 ++ > > .../ethernet/intel/ice/devlink/devlink_port.h | 43 ++ > > drivers/net/ethernet/intel/ice/ice.h | 19 +- > > drivers/net/ethernet/intel/ice/ice_base.c | 5 +- > > drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 1 + > > drivers/net/ethernet/intel/ice/ice_eswitch.c | 85 ++- > > drivers/net/ethernet/intel/ice/ice_eswitch.h | 22 +- > > drivers/net/ethernet/intel/ice/ice_ethtool.c | 7 +- > > drivers/net/ethernet/intel/ice/ice_lib.c | 52 +- > > drivers/net/ethernet/intel/ice/ice_lib.h | 3 + > > drivers/net/ethernet/intel/ice/ice_main.c | 66 ++- > > drivers/net/ethernet/intel/ice/ice_repr.c | 195 +-- > > drivers/net/ethernet/intel/ice/ice_repr.h | 22 +- > > drivers/net/ethernet/intel/ice/ice_sf_eth.c | 329 +++ > > drivers/net/ethernet/intel/ice/ice_sf_eth.h | 33 ++ > > .../ethernet/intel/ice/ice_sf_vsi_vlan_ops.c | 21 + > > .../ethernet/intel/ice/ice_sf_vsi_vlan_ops.h | 13 + > > drivers/net/ethernet/intel/ice/ice_sriov.c| 4 +- > > drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- > > drivers/net/ethernet/intel/ice/ice_type.h | 1 + > > drivers/net/ethernet/intel/ice/ice_vf_lib.c | 4 +- > > .../net/ethernet/intel/ice/ice_vsi_vlan_ops.c | 4 + > > drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +- > > 26 files changed, 1362 insertions(+), 138 deletions(-) > > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_eth.c > > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_eth.h > > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.c > > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.h > > > >-- > >2.42.0 > > > >
Re: [Intel-wired-lan] [iwl-next v1 11/14] ice: netdevice ops for SF representor
On Thu, May 09, 2024 at 01:17:26PM +0200, Jiri Pirko wrote: > Subject does not have verb. Please add it. > > Otherwise, the patch looks ok. Thanks, will add sth like "implement". > > Reviewed-by: Jiri Pirko > > > > Tue, May 07, 2024 at 01:45:12PM CEST, michal.swiatkow...@linux.intel.com > wrote: > >Subfunction port representor needs the basic netdevice ops to work > >correctly. Create them. > > > >Reviewed-by: Wojciech Drewek > >Signed-off-by: Michal Swiatkowski > >--- > > drivers/net/ethernet/intel/ice/ice_repr.c | 57 +-- > > 1 file changed, 43 insertions(+), 14 deletions(-) > > > >diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c > >b/drivers/net/ethernet/intel/ice/ice_repr.c > >index 3cb3fc5f52ea..ec4f5b8b46e6 100644 > >--- a/drivers/net/ethernet/intel/ice/ice_repr.c > >+++ b/drivers/net/ethernet/intel/ice/ice_repr.c > >@@ -59,12 +59,13 @@ static void > > ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 > > *stats) > > { > > struct ice_netdev_priv *np = netdev_priv(netdev); > >+struct ice_repr *repr = np->repr; > > struct ice_eth_stats *eth_stats; > > struct ice_vsi *vsi; > > > >-if (ice_is_vf_disabled(np->repr->vf)) > >+if (repr->ops.ready(repr)) > > return; > >-vsi = np->repr->src_vsi; > >+vsi = repr->src_vsi; > > > > ice_update_vsi_stats(vsi); > > eth_stats = &vsi->eth_stats; > >@@ -93,7 +94,7 @@ struct ice_repr *ice_netdev_to_repr(const struct > >net_device *netdev) > > } > > > > /** > >- * ice_repr_open - Enable port representor's network interface > >+ * ice_repr_vf_open - Enable port representor's network interface > > * @netdev: network interface device structure > > * > > * The open entry point is called when a port representor's network > >@@ -102,7 +103,7 @@ struct ice_repr *ice_netdev_to_repr(const struct > >net_device *netdev) > > * > > * Returns 0 on success > > */ > >-static int ice_repr_open(struct net_device *netdev) > >+static int ice_repr_vf_open(struct net_device *netdev) > > { > > struct ice_repr *repr = ice_netdev_to_repr(netdev); > > struct ice_vf *vf; > >@@ -118,8 +119,16 @@ static int ice_repr_open(struct net_device *netdev) > > return 0; > > } > > > >+static int ice_repr_sf_open(struct net_device *netdev) > >+{ > >+netif_carrier_on(netdev); > >+netif_tx_start_all_queues(netdev); > >+ > >+return 0; > >+} > >+ > > /** > >- * ice_repr_stop - Disable port representor's network interface > >+ * ice_repr_vf_stop - Disable port representor's network interface > > * @netdev: network interface device structure > > * > > * The stop entry point is called when a port representor's network > >@@ -128,7 +137,7 @@ static int ice_repr_open(struct net_device *netdev) > > * > > * Returns 0 on success > > */ > >-static int ice_repr_stop(struct net_device *netdev) > >+static int ice_repr_vf_stop(struct net_device *netdev) > > { > > struct ice_repr *repr = ice_netdev_to_repr(netdev); > > struct ice_vf *vf; > >@@ -144,6 +153,14 @@ static int ice_repr_stop(struct net_device *netdev) > > return 0; > > } > > > >+static int ice_repr_sf_stop(struct net_device *netdev) > >+{ > >+netif_carrier_off(netdev); > >+netif_tx_stop_all_queues(netdev); > >+ > >+return 0; > >+} > >+ > > /** > > * ice_repr_sp_stats64 - get slow path stats for port representor > > * @dev: network interface device structure > >@@ -245,10 +262,20 @@ ice_repr_setup_tc(struct net_device *netdev, enum > >tc_setup_type type, > > } > > } > > > >-static const struct net_device_ops ice_repr_netdev_ops = { > >+static const struct net_device_ops ice_repr_vf_netdev_ops = { > >+.ndo_get_stats64 = ice_repr_get_stats64, > >+.ndo_open = ice_repr_vf_open, > >+.ndo_stop = ice_repr_vf_stop, > >+.ndo_start_xmit = ice_eswitch_port_start_xmit, > >+.ndo_setup_tc = ice_repr_setup_tc, > >+.ndo_has_offload_stats = ice_repr_ndo_has_offload_stats, > >+.ndo_get_offload_stats = ice_repr_ndo_get_offload_stats, > >+}; > >+ > >+static const struct net_device_ops ice_repr_sf_netdev_ops = { > > .ndo_get_stats64 = ice_repr_get_stats64, > >-.ndo_open = ice_repr_open, > >-.ndo_stop = ice_repr_stop, > >+.ndo_open = ice_repr_sf_open, > >+.ndo_stop = ice_repr_sf_stop, > > .ndo_start_xmit = ice_eswitch_port_start_xmit, > > .ndo_setup_tc = ice_repr_setup_tc, > > .ndo_has_offload_stats = ice_repr_ndo_has_offload_stats, > >@@ -261,18 +288,20 @@ static const struct net_device_ops ice_repr_netdev_ops > >= { > > */ > > bool ice_is_port_repr_netdev(const struct net_device *netdev) > > { > >-return netdev && (netdev->netdev_ops == &ice_repr_netdev_ops); > >+return netdev && (netdev->netdev_ops == &ice_repr_vf_netdev_ops || > >+ netdev->netdev_ops == &ice_repr_sf_netdev_ops); > > } > > > > /** > > * ice_repr_reg_netdev - register port representor netdev > > * @netdev: pointer to port representor netdev > >+
Re: [Intel-wired-lan] [iwl-next v1 08/14] ice: create port representor for SF
On Thu, May 09, 2024 at 01:16:05PM +0200, Jiri Pirko wrote: > Tue, May 07, 2024 at 01:45:09PM CEST, michal.swiatkow...@linux.intel.com > wrote: > >Store subfunction and VF pointer in port representor structure as an > >union. Add port representor type to distinguish between each of them. > > > >Keep the same flow of port representor creation, but instead of general > >attach function create helpers for VF and subfunction attach function. > > > >Type of port representor can be also known based on VSI type, but it > >is more clean to have it directly saved in port representor structure. > > > >Create port representor when subfunction port is created. > > > >Add devlink lock for whole VF port representor creation and destruction. > >It is done to be symmetric with what happens in case of SF port > >representor. SF port representor is always added or removed with devlink > >lock taken. Doing the same with VF port representor simplify logic. > > > >Reviewed-by: Wojciech Drewek > >Signed-off-by: Michal Swiatkowski > >--- > > .../ethernet/intel/ice/devlink/devlink_port.c | 6 +- > > .../ethernet/intel/ice/devlink/devlink_port.h | 1 + > > drivers/net/ethernet/intel/ice/ice_eswitch.c | 85 +--- > > drivers/net/ethernet/intel/ice/ice_eswitch.h | 22 +++- > > drivers/net/ethernet/intel/ice/ice_repr.c | 124 +++--- > > drivers/net/ethernet/intel/ice/ice_repr.h | 21 ++- > > drivers/net/ethernet/intel/ice/ice_sriov.c| 4 +- > > drivers/net/ethernet/intel/ice/ice_vf_lib.c | 4 +- > > 8 files changed, 187 insertions(+), 80 deletions(-) > > This calls for a split to at least 2 patches. One patch to prepare and > one to add the SF support? Is 187 insertions and 80 deletions too many for one patch? Or the problem is with number of files changed? I don't see what here can be moved to preparation part as most changes depends on each other. Do you want me to have one patch with unused functions that are adding/removing SF repr and another with calling them? Thanks, Michal
Re: [Intel-wired-lan] [iwl-next v1 14/14] ice: allow to activate and deactivate subfunction
On Thu, May 09, 2024 at 01:36:13PM +0200, Jiri Pirko wrote: > Tue, May 07, 2024 at 01:45:15PM CEST, michal.swiatkow...@linux.intel.com > wrote: > >From: Piotr Raczynski > > > >Use previously implemented SF aux driver. It is probe during SF > >activation and remove after deactivation. > > > >Signed-off-by: Piotr Raczynski > >Signed-off-by: Michal Swiatkowski > >--- > > .../ethernet/intel/ice/devlink/devlink_port.c | 108 ++ > > .../ethernet/intel/ice/devlink/devlink_port.h | 6 + > > drivers/net/ethernet/intel/ice/ice_sf_eth.c | 107 + > > drivers/net/ethernet/intel/ice/ice_sf_eth.h | 3 + > > 4 files changed, 224 insertions(+) > > > >diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c > >b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c > >index e8929e91aff2..43ed05e5c883 100644 > >--- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c > >+++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c > >@@ -482,6 +482,42 @@ void ice_devlink_destroy_sf_dev_port(struct ice_sf_dev > >*sf_dev) > > devl_port_unregister(&sf_dev->priv->devlink_port); > > } > > > >+/** > >+ * ice_activate_dynamic_port - Activate a dynamic port > >+ * @dyn_port: dynamic port instance to activate > >+ * @extack: extack for reporting error messages > >+ * > >+ * Activate the dynamic port based on its flavour. > >+ * > >+ * Return: zero on success or an error code on failure. > >+ */ > >+static int > >+ice_activate_dynamic_port(struct ice_dynamic_port *dyn_port, > >+ struct netlink_ext_ack *extack) > >+{ > >+int err; > >+ > >+err = ice_sf_eth_activate(dyn_port, extack); > >+if (err) > >+return err; > >+ > >+dyn_port->active = true; > >+ > >+return 0; > >+} > >+ > >+/** > >+ * ice_deactivate_dynamic_port - Deactivate a dynamic port > >+ * @dyn_port: dynamic port instance to deactivate > >+ * > >+ * Undo activation of a dynamic port. > >+ */ > >+static void ice_deactivate_dynamic_port(struct ice_dynamic_port *dyn_port) > >+{ > >+ice_sf_eth_deactivate(dyn_port); > >+dyn_port->active = false; > >+} > >+ > > /** > > * ice_dealloc_dynamic_port - Deallocate and remove a dynamic port > > * @dyn_port: dynamic port instance to deallocate > >@@ -494,6 +530,9 @@ static void ice_dealloc_dynamic_port(struct > >ice_dynamic_port *dyn_port) > > struct devlink_port *devlink_port = &dyn_port->devlink_port; > > struct ice_pf *pf = dyn_port->pf; > > > >+if (dyn_port->active) > >+ice_deactivate_dynamic_port(dyn_port); > >+ > > xa_erase(&pf->sf_nums, devlink_port->attrs.pci_sf.sf); > > ice_eswitch_detach_sf(pf, dyn_port); > > ice_vsi_free(dyn_port->vsi); > >@@ -638,10 +677,79 @@ ice_devlink_port_fn_hw_addr_get(struct devlink_port > >*port, u8 *hw_addr, > > return 0; > > } > > > >+/** > >+ * ice_devlink_port_fn_state_set - devlink handler for port state set > >+ * @port: pointer to devlink port > >+ * @state: state to set > >+ * @extack: extack for reporting error messages > >+ * > >+ * Activates or deactivates the port. > >+ * > >+ * Return: zero on success or an error code on failure. > >+ */ > >+static int > >+ice_devlink_port_fn_state_set(struct devlink_port *port, > >+ enum devlink_port_fn_state state, > >+ struct netlink_ext_ack *extack) > >+{ > >+struct ice_dynamic_port *dyn_port; > >+ > >+dyn_port = ice_devlink_port_to_dyn(port); > >+ > >+switch (state) { > >+case DEVLINK_PORT_FN_STATE_ACTIVE: > >+if (!dyn_port->active) > >+return ice_activate_dynamic_port(dyn_port, extack); > >+break; > >+case DEVLINK_PORT_FN_STATE_INACTIVE: > >+if (dyn_port->active) > >+ice_deactivate_dynamic_port(dyn_port); > >+break; > >+} > >+ > >+return 0; > >+} > >+ > >+/** > >+ * ice_devlink_port_fn_state_get - devlink handler for port state get > >+ * @port: pointer to devlink port > >+ * @state: admin configured state of the port > >+ * @opstate: current port operational state > >+ * @extack: extack for reporting error messages > >+ * > >+ * Gets port state. > >+ * > >+ * Return: zero on success or an error code on failure. > >+ */ > >+static int > >+ice_devlink_port_fn_state_get(struct devlink_port *port, > >+ enum devlink_port_fn_state *state, > >+ enum devlink_port_fn_opstate *opstate, > >+ struct netlink_ext_ack *extack) > >+{ > >+struct ice_dynamic_port *dyn_port; > >+ > >+dyn_port = ice_devlink_port_to_dyn(port); > >+ > >+if (dyn_port->active) > >+*state = DEVLINK_PORT_FN_STATE_ACTIVE; > >+else > >+*state = DEVLINK_PORT_FN_STATE_INACTIVE; > >+ > >+if (dyn_port->attached) > >+*opstate = DEVLINK_PORT_FN_OPSTATE_ATTACHED; > >+else > >+*opstate = DEVLINK_PORT_FN_OPSTATE_DETACHED; > >+ > >+
Re: [Intel-wired-lan] [PATCH v2 2/2] e1000e: fix link fluctuations problem
On 09/05/2024 16:46, Andrew Lunn wrote: On Thu, May 09, 2024 at 12:13:27PM +0300, Ruinskiy, Dima wrote: On 08/05/2024 8:05, Sasha Neftin wrote: On 07/05/2024 15:31, Andrew Lunn wrote: On Fri, May 03, 2024 at 06:18:36PM +0800, Ricky Wu wrote: As described in https://bugzilla.kernel.org/show_bug.cgi?id=218642, Intel I219-LM reports link up -> link down -> link up after hot-plugging the Ethernet cable. Please could you quote some parts of 802.3 which state this is a problem. How is this breaking the standard. Andrew In I219-* parts used LSI PHY. This PHY is compliant with the 802.3 IEEE standard if I recall correctly. Auto-negotiation and link establishment are processed following the IEEE standard and could vary from platform to platform but are not violent to the IEEE standard. En-Wei, My recommendation is not to accept these patches. If you think there is a HW/PHY problem - open a ticket on Intel PAE. Sasha I concur. I am wary of changing the behavior of some driver fundamentals, to satisfy a particular validation/certification flow, if there is no real functionality problem. It can open a big Pandora box. Checking the Bugzilla report again, I am not sure we understand the issue fully: [ 143.141006] e1000e :00:1f.6 enp0s31f6: NIC Link is Up 1000 Mbps Half Duplex, Flow Control: None [ 143.144878] e1000e :00:1f.6 enp0s31f6: NIC Link is Down [ 146.838980] e1000e :00:1f.6 enp0s31f6: NIC Link is Up 1000 Mbps Full Duplex, Flow Control: None This looks like a very quick link "flap", following by proper link establishment ~3.7 seconds later. These ~3.7 seconds are in line of what link auto-negotiation would take (auto-negotiation is the default mode for this driver). That actually seems slow. It is normally a little over 1 second. I forget the exact number. But is the PHY being polled once a second, rather than being interrupt driven? The first print (1000 Mbps Half Duplex) actually makes no sense - it cannot be real link status since 1000/Half is not a supported speed. It would be interesting to see what the link partner sees. What does it think the I219-LM is advertising? Is it advertising 1000BaseT_Half? i219 parts come with LSI PHY. 1000BASE-T half-duplex is not supported. 1000BASET half-duplex not advertised in IEEE 1000BASE-T Control Register 9. But why would auto-neg resolve to that if 1000BaseT_Full is available? So it seems to me that actually the first "link up" is an incorrect/incomplete/premature reading, not the "link down". Agreed. Root cause this, which looks like a real problem, rather than apply a band-aid for a test system. Andrew
Re: [Intel-wired-lan] [PATCH v2] e1000e: move force SMBUS near the end of enable_ulp function
On Wed, May 08, 2024 at 08:06:04PM +0800, Hui Wang wrote: > The commit 861e8086029e ("e1000e: move force SMBUS from enable ulp > function to avoid PHY loss issue") introduces a regression on > CH_MTP_I219_LM18 (PCIID: 0x8086550A). Without the referred commit, the > ethernet works well after suspend and resume, but after applying the > commit, the ethernet couldn't work anymore after the resume and the > dmesg shows that the NIC Link changes to 10Mbps (1000Mbps originally): > [ 43.305084] e1000e :00:1f.6 enp0s31f6: NIC Link is Up 10 Mbps Full > Duplex, Flow Control: Rx/Tx > > Without the commit, the force SMBUS code will not be executed if > "return 0" or "goto out" is executed in the enable_ulp(), and in my > case, the "goto out" is executed since FWSM_FW_VALID is set. But after > applying the commit, the force SMBUS code will be ran unconditionally. > > Here move the force SMBUS code back to enable_ulp() and put it > immediate ahead of hw->phy.ops.release(hw), this could allow the > longest settling time as possible for interface in this function and > doesn't change the original code logic. > > Fixes: 861e8086029e ("e1000e: move force SMBUS from enable ulp function to > avoid PHY loss issue") > Signed-off-by: Hui Wang > Acked-by: Vitaly Lifshits > Tested-by: Naama Meir > Signed-off-by: Tony Nguyen > --- > In the v2: > Change "this commit" to "the referred commit" in the commit header > Fix a potential infinite loop if ret_val is not zero Reviewed-by: Simon Horman
Re: [Intel-wired-lan] [iwl-next v1 03/14] ice: add basic devlink subfunctions support
Fri, May 10, 2024 at 09:13:51AM CEST, michal.swiatkow...@linux.intel.com wrote: >On Thu, May 09, 2024 at 01:06:52PM +0200, Jiri Pirko wrote: >> Tue, May 07, 2024 at 01:45:04PM CEST, michal.swiatkow...@linux.intel.com >> wrote: >> >From: Piotr Raczynski >> > >> >Implement devlink port handlers responsible for ethernet type devlink >> >subfunctions. Create subfunction devlink port and setup all resources >> >needed for a subfunction netdev to operate. Configure new VSI for each >> >new subfunction, initialize and configure interrupts and Tx/Rx resources. >> >Set correct MAC filters and create new netdev. >> > >> >For now, subfunction is limited to only one Tx/Rx queue pair. >> > >> >Only allocate new subfunction VSI with devlink port new command. >> >This makes sure that real resources are configured only when a new >> >subfunction gets activated. Allocate and free subfunction MSIX >> >> Interesting. You talk about actitation, yet you don't implement it here. >> > >I will rephrase it. I meant that new VSI needs to be created even before >any activation or configuration. > >> >> >> >interrupt vectors using new API calls with pci_msix_alloc_irq_at >> >and pci_msix_free_irq. >> > >> >Support both automatic and manual subfunction numbers. If no subfunction >> >number is provided, use xa_alloc to pick a number automatically. This >> >will find the first free index and use that as the number. This reduces >> >burden on users in the simple case where a specific number is not >> >required. It may also be slightly faster to check that a number exists >> >since xarray lookup should be faster than a linear scan of the dyn_ports >> >xarray. >> > >> >Reviewed-by: Wojciech Drewek >> >Co-developed-by: Jacob Keller >> >Signed-off-by: Jacob Keller >> >Signed-off-by: Piotr Raczynski >> >Signed-off-by: Michal Swiatkowski >> >--- >> > .../net/ethernet/intel/ice/devlink/devlink.c | 3 + >> > .../ethernet/intel/ice/devlink/devlink_port.c | 357 ++ >> > .../ethernet/intel/ice/devlink/devlink_port.h | 33 ++ >> > drivers/net/ethernet/intel/ice/ice.h | 4 + >> > drivers/net/ethernet/intel/ice/ice_lib.c | 5 +- >> > drivers/net/ethernet/intel/ice/ice_lib.h | 2 + >> > drivers/net/ethernet/intel/ice/ice_main.c | 9 +- >> > 7 files changed, 410 insertions(+), 3 deletions(-) >> > >> >diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c >> >b/drivers/net/ethernet/intel/ice/devlink/devlink.c >> >index 10073342e4f0..3fb3a7e828a4 100644 >> >--- a/drivers/net/ethernet/intel/ice/devlink/devlink.c >> >+++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c >> >@@ -6,6 +6,7 @@ >> > #include "ice.h" >> > #include "ice_lib.h" >> > #include "devlink.h" >> >+#include "devlink_port.h" >> > #include "ice_eswitch.h" >> > #include "ice_fw_update.h" >> > #include "ice_dcb_lib.h" >> >@@ -1277,6 +1278,8 @@ static const struct devlink_ops ice_devlink_ops = { >> > >> >.rate_leaf_parent_set = ice_devlink_set_parent, >> >.rate_node_parent_set = ice_devlink_set_parent, >> >+ >> >+ .port_new = ice_devlink_port_new, >> > }; >> > >> >+ > >[...] > >> >+/** >> >+ * ice_devlink_port_fn_hw_addr_set - devlink handler for mac address set >> >+ * @port: pointer to devlink port >> >+ * @hw_addr: hw address to set >> >+ * @hw_addr_len: hw address length >> >+ * @extack: extack for reporting error messages >> >+ * >> >+ * Sets mac address for the port, verifies arguments and copies address >> >+ * to the subfunction structure. >> >+ * >> >+ * Return: zero on success or an error code on failure. >> >+ */ >> >+static int >> >+ice_devlink_port_fn_hw_addr_set(struct devlink_port *port, const u8 >> >*hw_addr, >> >+ int hw_addr_len, >> >+ struct netlink_ext_ack *extack) >> >+{ >> >+ struct ice_dynamic_port *dyn_port; >> >+ >> >+ dyn_port = ice_devlink_port_to_dyn(port); >> >+ >> >+ if (hw_addr_len != ETH_ALEN || !is_valid_ether_addr(hw_addr)) { >> >+ NL_SET_ERR_MSG_MOD(extack, "Invalid ethernet address"); >> >+ return -EADDRNOTAVAIL; >> >+ } >> >+ >> >+ ether_addr_copy(dyn_port->hw_addr, hw_addr); >> >> How does this work? You copy the address to the internal structure, but >> where you update the HW? >> > >When the basic MAC filter is added in HW. When is that. My point is, user can all this function at any time, and when he calls it, he expect it's applied right away. In case it can't be for example applied on activated SF, you should block the request. > >> >> >+ >> >+ return 0; >> >+} > >[...] > >> > >> >@@ -5383,6 +5389,7 @@ static void ice_remove(struct pci_dev *pdev) >> >ice_remove_arfs(pf); >> > >> >devl_lock(priv_to_devlink(pf)); >> >+ ice_dealloc_all_dynamic_ports(pf); >> >ice_deinit_devlink(pf); >> > >> >ice_unload(pf); >> >@@ -6677,7 +6684,7 @@ static int ice_up_complete(struct ice_vsi *vsi) >> > >> >if (vsi->port_info && >> >(vsi->port_info->phy.link_info.link_i
Re: [Intel-wired-lan] [iwl-next v1 06/14] ice: base subfunction aux driver
Fri, May 10, 2024 at 09:20:51AM CEST, michal.swiatkow...@linux.intel.com wrote: >On Thu, May 09, 2024 at 01:13:55PM +0200, Jiri Pirko wrote: >> Tue, May 07, 2024 at 01:45:07PM CEST, michal.swiatkow...@linux.intel.com >> wrote: >> >From: Piotr Raczynski >> > >> >Implement subfunction driver. It is probe when subfunction port is >> >activated. >> > >> >VSI is already created. During the probe VSI is being configured. >> >MAC unicast and broadcast filter is added to allow traffic to pass. >> > >> >Store subfunction pointer in VSI struct. The same is done for VF >> >pointer. Make union of subfunction and VF pointer as only one of them >> >can be set with one VSI. >> > >> >Reviewed-by: Jiri Pirko >> >Signed-off-by: Piotr Raczynski >> >Signed-off-by: Michal Swiatkowski >> >> Perhaps it would make things clearer for reviewer to have all patches >> related to sf auxdev/devlink/netdev at the end of the patchset, after >> activation patch. Not sure why you want to mix it here. > >I need this code to use it in port representor implementation. You >suggested in previous review to move activation at the end [1]. Yeah, I just thought that sfdev patches could be separate. Nevermind then. > >[1] https://lore.kernel.org/netdev/Zhje0mQgQTMXwICb@nanopsycho/
Re: [Intel-wired-lan] [iwl-next v1 08/14] ice: create port representor for SF
Fri, May 10, 2024 at 09:31:15AM CEST, michal.swiatkow...@linux.intel.com wrote: >On Thu, May 09, 2024 at 01:16:05PM +0200, Jiri Pirko wrote: >> Tue, May 07, 2024 at 01:45:09PM CEST, michal.swiatkow...@linux.intel.com >> wrote: >> >Store subfunction and VF pointer in port representor structure as an >> >union. Add port representor type to distinguish between each of them. >> > >> >Keep the same flow of port representor creation, but instead of general >> >attach function create helpers for VF and subfunction attach function. >> > >> >Type of port representor can be also known based on VSI type, but it >> >is more clean to have it directly saved in port representor structure. >> > >> >Create port representor when subfunction port is created. >> > >> >Add devlink lock for whole VF port representor creation and destruction. >> >It is done to be symmetric with what happens in case of SF port >> >representor. SF port representor is always added or removed with devlink >> >lock taken. Doing the same with VF port representor simplify logic. >> > >> >Reviewed-by: Wojciech Drewek >> >Signed-off-by: Michal Swiatkowski >> >--- >> > .../ethernet/intel/ice/devlink/devlink_port.c | 6 +- >> > .../ethernet/intel/ice/devlink/devlink_port.h | 1 + >> > drivers/net/ethernet/intel/ice/ice_eswitch.c | 85 +--- >> > drivers/net/ethernet/intel/ice/ice_eswitch.h | 22 +++- >> > drivers/net/ethernet/intel/ice/ice_repr.c | 124 +++--- >> > drivers/net/ethernet/intel/ice/ice_repr.h | 21 ++- >> > drivers/net/ethernet/intel/ice/ice_sriov.c| 4 +- >> > drivers/net/ethernet/intel/ice/ice_vf_lib.c | 4 +- >> > 8 files changed, 187 insertions(+), 80 deletions(-) >> >> This calls for a split to at least 2 patches. One patch to prepare and >> one to add the SF support? > >Is 187 insertions and 80 deletions too many for one patch? Or the >problem is with number of files changed? The patch is hard to follow, that's the problem. > >I don't see what here can be moved to preparation part as most changes >depends on each other. Do you want me to have one patch with unused >functions that are adding/removing SF repr and another with calling >them? > >Thanks, >Michal
Re: [Intel-wired-lan] [iwl-next v1 14/14] ice: allow to activate and deactivate subfunction
Fri, May 10, 2024 at 09:33:54AM CEST, michal.swiatkow...@linux.intel.com wrote: >On Thu, May 09, 2024 at 01:36:13PM +0200, Jiri Pirko wrote: >> Tue, May 07, 2024 at 01:45:15PM CEST, michal.swiatkow...@linux.intel.com >> wrote: >> >From: Piotr Raczynski >> > >> >Use previously implemented SF aux driver. It is probe during SF >> >activation and remove after deactivation. >> > >> >Signed-off-by: Piotr Raczynski >> >Signed-off-by: Michal Swiatkowski >> >--- >> > .../ethernet/intel/ice/devlink/devlink_port.c | 108 ++ >> > .../ethernet/intel/ice/devlink/devlink_port.h | 6 + >> > drivers/net/ethernet/intel/ice/ice_sf_eth.c | 107 + >> > drivers/net/ethernet/intel/ice/ice_sf_eth.h | 3 + >> > 4 files changed, 224 insertions(+) >> > >> >diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c >> >b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c >> >index e8929e91aff2..43ed05e5c883 100644 >> >--- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c >> >+++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c >> >@@ -482,6 +482,42 @@ void ice_devlink_destroy_sf_dev_port(struct ice_sf_dev >> >*sf_dev) >> >devl_port_unregister(&sf_dev->priv->devlink_port); >> > } >> > >> >+/** >> >+ * ice_activate_dynamic_port - Activate a dynamic port >> >+ * @dyn_port: dynamic port instance to activate >> >+ * @extack: extack for reporting error messages >> >+ * >> >+ * Activate the dynamic port based on its flavour. >> >+ * >> >+ * Return: zero on success or an error code on failure. >> >+ */ >> >+static int >> >+ice_activate_dynamic_port(struct ice_dynamic_port *dyn_port, >> >+ struct netlink_ext_ack *extack) >> >+{ >> >+ int err; >> >+ >> >+ err = ice_sf_eth_activate(dyn_port, extack); >> >+ if (err) >> >+ return err; >> >+ >> >+ dyn_port->active = true; >> >+ >> >+ return 0; >> >+} >> >+ >> >+/** >> >+ * ice_deactivate_dynamic_port - Deactivate a dynamic port >> >+ * @dyn_port: dynamic port instance to deactivate >> >+ * >> >+ * Undo activation of a dynamic port. >> >+ */ >> >+static void ice_deactivate_dynamic_port(struct ice_dynamic_port *dyn_port) >> >+{ >> >+ ice_sf_eth_deactivate(dyn_port); >> >+ dyn_port->active = false; >> >+} >> >+ >> > /** >> > * ice_dealloc_dynamic_port - Deallocate and remove a dynamic port >> > * @dyn_port: dynamic port instance to deallocate >> >@@ -494,6 +530,9 @@ static void ice_dealloc_dynamic_port(struct >> >ice_dynamic_port *dyn_port) >> >struct devlink_port *devlink_port = &dyn_port->devlink_port; >> >struct ice_pf *pf = dyn_port->pf; >> > >> >+ if (dyn_port->active) >> >+ ice_deactivate_dynamic_port(dyn_port); >> >+ >> >xa_erase(&pf->sf_nums, devlink_port->attrs.pci_sf.sf); >> >ice_eswitch_detach_sf(pf, dyn_port); >> >ice_vsi_free(dyn_port->vsi); >> >@@ -638,10 +677,79 @@ ice_devlink_port_fn_hw_addr_get(struct devlink_port >> >*port, u8 *hw_addr, >> >return 0; >> > } >> > >> >+/** >> >+ * ice_devlink_port_fn_state_set - devlink handler for port state set >> >+ * @port: pointer to devlink port >> >+ * @state: state to set >> >+ * @extack: extack for reporting error messages >> >+ * >> >+ * Activates or deactivates the port. >> >+ * >> >+ * Return: zero on success or an error code on failure. >> >+ */ >> >+static int >> >+ice_devlink_port_fn_state_set(struct devlink_port *port, >> >+ enum devlink_port_fn_state state, >> >+ struct netlink_ext_ack *extack) >> >+{ >> >+ struct ice_dynamic_port *dyn_port; >> >+ >> >+ dyn_port = ice_devlink_port_to_dyn(port); >> >+ >> >+ switch (state) { >> >+ case DEVLINK_PORT_FN_STATE_ACTIVE: >> >+ if (!dyn_port->active) >> >+ return ice_activate_dynamic_port(dyn_port, extack); >> >+ break; >> >+ case DEVLINK_PORT_FN_STATE_INACTIVE: >> >+ if (dyn_port->active) >> >+ ice_deactivate_dynamic_port(dyn_port); >> >+ break; >> >+ } >> >+ >> >+ return 0; >> >+} >> >+ >> >+/** >> >+ * ice_devlink_port_fn_state_get - devlink handler for port state get >> >+ * @port: pointer to devlink port >> >+ * @state: admin configured state of the port >> >+ * @opstate: current port operational state >> >+ * @extack: extack for reporting error messages >> >+ * >> >+ * Gets port state. >> >+ * >> >+ * Return: zero on success or an error code on failure. >> >+ */ >> >+static int >> >+ice_devlink_port_fn_state_get(struct devlink_port *port, >> >+ enum devlink_port_fn_state *state, >> >+ enum devlink_port_fn_opstate *opstate, >> >+ struct netlink_ext_ack *extack) >> >+{ >> >+ struct ice_dynamic_port *dyn_port; >> >+ >> >+ dyn_port = ice_devlink_port_to_dyn(port); >> >+ >> >+ if (dyn_port->active) >> >+ *state = DEVLINK_PORT_FN_STATE_ACTIVE; >> >+ else >> >+ *state = DEVLINK_PORT_FN_STATE_INACTIVE; >> >+
Re: [Intel-wired-lan] [iwl-next v1 00/14] ice: support devlink subfunction
Fri, May 10, 2024 at 09:24:48AM CEST, michal.swiatkow...@linux.intel.com wrote: >On Thu, May 09, 2024 at 01:18:29PM +0200, Jiri Pirko wrote: >> Tue, May 07, 2024 at 01:45:01PM CEST, michal.swiatkow...@linux.intel.com >> wrote: >> >Hi, >> > >> >Currently ice driver does not allow creating more than one networking >> >device per physical function. The only way to have more hardware backed >> >netdev is to use SR-IOV. >> > >> >Following patchset adds support for devlink port API. For each new >> >pcisf type port, driver allocates new VSI, configures all resources >> >needed, including dynamically MSIX vectors, program rules and registers >> >new netdev. >> > >> >This series supports only one Tx/Rx queue pair per subfunction. >> > >> >Example commands: >> >devlink port add pci/:31:00.1 flavour pcisf pfnum 1 sfnum 1000 >> >devlink port function set pci/:31:00.1/1 hw_addr 00:00:00:00:03:14 >> >devlink port function set pci/:31:00.1/1 state active >> >devlink port function del pci/:31:00.1/1 >> > >> >Make the port representor and eswitch code generic to support >> >subfunction representor type. >> > >> >VSI configuration is slightly different between VF and SF. It needs to >> >be reflected in the code. >> > >> >Most recent previous patchset (not containing port representor for SF >> >support). [1] >> > >> >[1] >> >https://lore.kernel.org/netdev/20240417142028.2171-1-michal.swiatkow...@linux.intel.com/ >> > >> >> >> I don't understand howcome the patchset is v1, yet there are patches >> that came through multiple iterations alread. Changelog is missing >> completely :/ >> > >What is wrong here? There is a link to previous patchset with whole >changlog and links to previous ones. I didn't add changlog here as it is >new patchset (partialy the same as from [1], because of that I added a >link). I can add the changlog from [1] if you want, but for me it can be >missleading. It's always good to see what you changed if you send modified patches. That's all. > >> >> >Michal Swiatkowski (7): >> > ice: treat subfunction VSI the same as PF VSI >> > ice: create port representor for SF >> > ice: don't set target VSI for subfunction >> > ice: check if SF is ready in ethtool ops >> > ice: netdevice ops for SF representor >> > ice: support subfunction devlink Tx topology >> > ice: basic support for VLAN in subfunctions >> > >> >Piotr Raczynski (7): >> > ice: add new VSI type for subfunctions >> > ice: export ice ndo_ops functions >> > ice: add basic devlink subfunctions support >> > ice: allocate devlink for subfunction >> > ice: base subfunction aux driver >> > ice: implement netdev for subfunction >> > ice: allow to activate and deactivate subfunction >> > >> > drivers/net/ethernet/intel/ice/Makefile | 2 + >> > .../net/ethernet/intel/ice/devlink/devlink.c | 48 ++ >> > .../net/ethernet/intel/ice/devlink/devlink.h | 1 + >> > .../ethernet/intel/ice/devlink/devlink_port.c | 516 ++ >> > .../ethernet/intel/ice/devlink/devlink_port.h | 43 ++ >> > drivers/net/ethernet/intel/ice/ice.h | 19 +- >> > drivers/net/ethernet/intel/ice/ice_base.c | 5 +- >> > drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 1 + >> > drivers/net/ethernet/intel/ice/ice_eswitch.c | 85 ++- >> > drivers/net/ethernet/intel/ice/ice_eswitch.h | 22 +- >> > drivers/net/ethernet/intel/ice/ice_ethtool.c | 7 +- >> > drivers/net/ethernet/intel/ice/ice_lib.c | 52 +- >> > drivers/net/ethernet/intel/ice/ice_lib.h | 3 + >> > drivers/net/ethernet/intel/ice/ice_main.c | 66 ++- >> > drivers/net/ethernet/intel/ice/ice_repr.c | 195 +-- >> > drivers/net/ethernet/intel/ice/ice_repr.h | 22 +- >> > drivers/net/ethernet/intel/ice/ice_sf_eth.c | 329 +++ >> > drivers/net/ethernet/intel/ice/ice_sf_eth.h | 33 ++ >> > .../ethernet/intel/ice/ice_sf_vsi_vlan_ops.c | 21 + >> > .../ethernet/intel/ice/ice_sf_vsi_vlan_ops.h | 13 + >> > drivers/net/ethernet/intel/ice/ice_sriov.c| 4 +- >> > drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- >> > drivers/net/ethernet/intel/ice/ice_type.h | 1 + >> > drivers/net/ethernet/intel/ice/ice_vf_lib.c | 4 +- >> > .../net/ethernet/intel/ice/ice_vsi_vlan_ops.c | 4 + >> > drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +- >> > 26 files changed, 1362 insertions(+), 138 deletions(-) >> > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_eth.c >> > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_eth.h >> > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.c >> > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.h >> > >> >-- >> >2.42.0 >> > >> >
Re: [Intel-wired-lan] [iwl-next v1 03/14] ice: add basic devlink subfunctions support
On Fri, May 10, 2024 at 01:05:33PM +0200, Jiri Pirko wrote: > Fri, May 10, 2024 at 09:13:51AM CEST, michal.swiatkow...@linux.intel.com > wrote: > >On Thu, May 09, 2024 at 01:06:52PM +0200, Jiri Pirko wrote: > >> Tue, May 07, 2024 at 01:45:04PM CEST, michal.swiatkow...@linux.intel.com > >> wrote: > >> >From: Piotr Raczynski > >> > > >> >Implement devlink port handlers responsible for ethernet type devlink > >> >subfunctions. Create subfunction devlink port and setup all resources > >> >needed for a subfunction netdev to operate. Configure new VSI for each > >> >new subfunction, initialize and configure interrupts and Tx/Rx resources. > >> >Set correct MAC filters and create new netdev. > >> > > >> >For now, subfunction is limited to only one Tx/Rx queue pair. > >> > > >> >Only allocate new subfunction VSI with devlink port new command. > >> >This makes sure that real resources are configured only when a new > >> >subfunction gets activated. Allocate and free subfunction MSIX > >> > >> Interesting. You talk about actitation, yet you don't implement it here. > >> > > > >I will rephrase it. I meant that new VSI needs to be created even before > >any activation or configuration. > > > >> > >> > >> >interrupt vectors using new API calls with pci_msix_alloc_irq_at > >> >and pci_msix_free_irq. > >> > > >> >Support both automatic and manual subfunction numbers. If no subfunction > >> >number is provided, use xa_alloc to pick a number automatically. This > >> >will find the first free index and use that as the number. This reduces > >> >burden on users in the simple case where a specific number is not > >> >required. It may also be slightly faster to check that a number exists > >> >since xarray lookup should be faster than a linear scan of the dyn_ports > >> >xarray. > >> > > >> >Reviewed-by: Wojciech Drewek > >> >Co-developed-by: Jacob Keller > >> >Signed-off-by: Jacob Keller > >> >Signed-off-by: Piotr Raczynski > >> >Signed-off-by: Michal Swiatkowski > >> >--- > >> > .../net/ethernet/intel/ice/devlink/devlink.c | 3 + > >> > .../ethernet/intel/ice/devlink/devlink_port.c | 357 ++ > >> > .../ethernet/intel/ice/devlink/devlink_port.h | 33 ++ > >> > drivers/net/ethernet/intel/ice/ice.h | 4 + > >> > drivers/net/ethernet/intel/ice/ice_lib.c | 5 +- > >> > drivers/net/ethernet/intel/ice/ice_lib.h | 2 + > >> > drivers/net/ethernet/intel/ice/ice_main.c | 9 +- > >> > 7 files changed, 410 insertions(+), 3 deletions(-) > >> > > >> >diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c > >> >b/drivers/net/ethernet/intel/ice/devlink/devlink.c > >> >index 10073342e4f0..3fb3a7e828a4 100644 > >> >--- a/drivers/net/ethernet/intel/ice/devlink/devlink.c > >> >+++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c > >> >@@ -6,6 +6,7 @@ > >> > #include "ice.h" > >> > #include "ice_lib.h" > >> > #include "devlink.h" > >> >+#include "devlink_port.h" > >> > #include "ice_eswitch.h" > >> > #include "ice_fw_update.h" > >> > #include "ice_dcb_lib.h" > >> >@@ -1277,6 +1278,8 @@ static const struct devlink_ops ice_devlink_ops = { > >> > > >> > .rate_leaf_parent_set = ice_devlink_set_parent, > >> > .rate_node_parent_set = ice_devlink_set_parent, > >> >+ > >> >+ .port_new = ice_devlink_port_new, > >> > }; > >> > > >> >+ > > > >[...] > > > >> >+/** > >> >+ * ice_devlink_port_fn_hw_addr_set - devlink handler for mac address set > >> >+ * @port: pointer to devlink port > >> >+ * @hw_addr: hw address to set > >> >+ * @hw_addr_len: hw address length > >> >+ * @extack: extack for reporting error messages > >> >+ * > >> >+ * Sets mac address for the port, verifies arguments and copies address > >> >+ * to the subfunction structure. > >> >+ * > >> >+ * Return: zero on success or an error code on failure. > >> >+ */ > >> >+static int > >> >+ice_devlink_port_fn_hw_addr_set(struct devlink_port *port, const u8 > >> >*hw_addr, > >> >+ int hw_addr_len, > >> >+ struct netlink_ext_ack *extack) > >> >+{ > >> >+ struct ice_dynamic_port *dyn_port; > >> >+ > >> >+ dyn_port = ice_devlink_port_to_dyn(port); > >> >+ > >> >+ if (hw_addr_len != ETH_ALEN || !is_valid_ether_addr(hw_addr)) { > >> >+ NL_SET_ERR_MSG_MOD(extack, "Invalid ethernet address"); > >> >+ return -EADDRNOTAVAIL; > >> >+ } > >> >+ > >> >+ ether_addr_copy(dyn_port->hw_addr, hw_addr); > >> > >> How does this work? You copy the address to the internal structure, but > >> where you update the HW? > >> > > > >When the basic MAC filter is added in HW. > > When is that. My point is, user can all this function at any time, and > when he calls it, he expect it's applied right away. In case it can't be > for example applied on activated SF, you should block the request. > > Good point, I will block the request in this case. The filter is added during VSI configuration, in this case during SF activation. > > > >> > >> >+ > >> >+ return 0; > >> >+} > > > >[...] > > > >
Re: [Intel-wired-lan] [iwl-next v1 08/14] ice: create port representor for SF
On Fri, May 10, 2024 at 01:07:44PM +0200, Jiri Pirko wrote: > Fri, May 10, 2024 at 09:31:15AM CEST, michal.swiatkow...@linux.intel.com > wrote: > >On Thu, May 09, 2024 at 01:16:05PM +0200, Jiri Pirko wrote: > >> Tue, May 07, 2024 at 01:45:09PM CEST, michal.swiatkow...@linux.intel.com > >> wrote: > >> >Store subfunction and VF pointer in port representor structure as an > >> >union. Add port representor type to distinguish between each of them. > >> > > >> >Keep the same flow of port representor creation, but instead of general > >> >attach function create helpers for VF and subfunction attach function. > >> > > >> >Type of port representor can be also known based on VSI type, but it > >> >is more clean to have it directly saved in port representor structure. > >> > > >> >Create port representor when subfunction port is created. > >> > > >> >Add devlink lock for whole VF port representor creation and destruction. > >> >It is done to be symmetric with what happens in case of SF port > >> >representor. SF port representor is always added or removed with devlink > >> >lock taken. Doing the same with VF port representor simplify logic. > >> > > >> >Reviewed-by: Wojciech Drewek > >> >Signed-off-by: Michal Swiatkowski > >> >--- > >> > .../ethernet/intel/ice/devlink/devlink_port.c | 6 +- > >> > .../ethernet/intel/ice/devlink/devlink_port.h | 1 + > >> > drivers/net/ethernet/intel/ice/ice_eswitch.c | 85 +--- > >> > drivers/net/ethernet/intel/ice/ice_eswitch.h | 22 +++- > >> > drivers/net/ethernet/intel/ice/ice_repr.c | 124 +++--- > >> > drivers/net/ethernet/intel/ice/ice_repr.h | 21 ++- > >> > drivers/net/ethernet/intel/ice/ice_sriov.c| 4 +- > >> > drivers/net/ethernet/intel/ice/ice_vf_lib.c | 4 +- > >> > 8 files changed, 187 insertions(+), 80 deletions(-) > >> > >> This calls for a split to at least 2 patches. One patch to prepare and > >> one to add the SF support? > > > >Is 187 insertions and 80 deletions too many for one patch? Or the > >problem is with number of files changed? > > The patch is hard to follow, that's the problem. > Ok, I will do my best to make it easier to read in next version. > > > > >I don't see what here can be moved to preparation part as most changes > >depends on each other. Do you want me to have one patch with unused > >functions that are adding/removing SF repr and another with calling > >them? > > > >Thanks, > >Michal
Re: [Intel-wired-lan] [iwl-next v1 00/14] ice: support devlink subfunction
On Fri, May 10, 2024 at 01:09:20PM +0200, Jiri Pirko wrote: > Fri, May 10, 2024 at 09:24:48AM CEST, michal.swiatkow...@linux.intel.com > wrote: > >On Thu, May 09, 2024 at 01:18:29PM +0200, Jiri Pirko wrote: > >> Tue, May 07, 2024 at 01:45:01PM CEST, michal.swiatkow...@linux.intel.com > >> wrote: > >> >Hi, > >> > > >> >Currently ice driver does not allow creating more than one networking > >> >device per physical function. The only way to have more hardware backed > >> >netdev is to use SR-IOV. > >> > > >> >Following patchset adds support for devlink port API. For each new > >> >pcisf type port, driver allocates new VSI, configures all resources > >> >needed, including dynamically MSIX vectors, program rules and registers > >> >new netdev. > >> > > >> >This series supports only one Tx/Rx queue pair per subfunction. > >> > > >> >Example commands: > >> >devlink port add pci/:31:00.1 flavour pcisf pfnum 1 sfnum 1000 > >> >devlink port function set pci/:31:00.1/1 hw_addr 00:00:00:00:03:14 > >> >devlink port function set pci/:31:00.1/1 state active > >> >devlink port function del pci/:31:00.1/1 > >> > > >> >Make the port representor and eswitch code generic to support > >> >subfunction representor type. > >> > > >> >VSI configuration is slightly different between VF and SF. It needs to > >> >be reflected in the code. > >> > > >> >Most recent previous patchset (not containing port representor for SF > >> >support). [1] > >> > > >> >[1] > >> >https://lore.kernel.org/netdev/20240417142028.2171-1-michal.swiatkow...@linux.intel.com/ > >> > > >> > >> > >> I don't understand howcome the patchset is v1, yet there are patches > >> that came through multiple iterations alread. Changelog is missing > >> completely :/ > >> > > > >What is wrong here? There is a link to previous patchset with whole > >changlog and links to previous ones. I didn't add changlog here as it is > >new patchset (partialy the same as from [1], because of that I added a > >link). I can add the changlog from [1] if you want, but for me it can be > >missleading. > > It's always good to see what you changed if you send modified patches. > That's all. > I will paste previous changelog in next version so. > > > > >> > >> >Michal Swiatkowski (7): > >> > ice: treat subfunction VSI the same as PF VSI > >> > ice: create port representor for SF > >> > ice: don't set target VSI for subfunction > >> > ice: check if SF is ready in ethtool ops > >> > ice: netdevice ops for SF representor > >> > ice: support subfunction devlink Tx topology > >> > ice: basic support for VLAN in subfunctions > >> > > >> >Piotr Raczynski (7): > >> > ice: add new VSI type for subfunctions > >> > ice: export ice ndo_ops functions > >> > ice: add basic devlink subfunctions support > >> > ice: allocate devlink for subfunction > >> > ice: base subfunction aux driver > >> > ice: implement netdev for subfunction > >> > ice: allow to activate and deactivate subfunction > >> > > >> > drivers/net/ethernet/intel/ice/Makefile | 2 + > >> > .../net/ethernet/intel/ice/devlink/devlink.c | 48 ++ > >> > .../net/ethernet/intel/ice/devlink/devlink.h | 1 + > >> > .../ethernet/intel/ice/devlink/devlink_port.c | 516 ++ > >> > .../ethernet/intel/ice/devlink/devlink_port.h | 43 ++ > >> > drivers/net/ethernet/intel/ice/ice.h | 19 +- > >> > drivers/net/ethernet/intel/ice/ice_base.c | 5 +- > >> > drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 1 + > >> > drivers/net/ethernet/intel/ice/ice_eswitch.c | 85 ++- > >> > drivers/net/ethernet/intel/ice/ice_eswitch.h | 22 +- > >> > drivers/net/ethernet/intel/ice/ice_ethtool.c | 7 +- > >> > drivers/net/ethernet/intel/ice/ice_lib.c | 52 +- > >> > drivers/net/ethernet/intel/ice/ice_lib.h | 3 + > >> > drivers/net/ethernet/intel/ice/ice_main.c | 66 ++- > >> > drivers/net/ethernet/intel/ice/ice_repr.c | 195 +-- > >> > drivers/net/ethernet/intel/ice/ice_repr.h | 22 +- > >> > drivers/net/ethernet/intel/ice/ice_sf_eth.c | 329 +++ > >> > drivers/net/ethernet/intel/ice/ice_sf_eth.h | 33 ++ > >> > .../ethernet/intel/ice/ice_sf_vsi_vlan_ops.c | 21 + > >> > .../ethernet/intel/ice/ice_sf_vsi_vlan_ops.h | 13 + > >> > drivers/net/ethernet/intel/ice/ice_sriov.c| 4 +- > >> > drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- > >> > drivers/net/ethernet/intel/ice/ice_type.h | 1 + > >> > drivers/net/ethernet/intel/ice/ice_vf_lib.c | 4 +- > >> > .../net/ethernet/intel/ice/ice_vsi_vlan_ops.c | 4 + > >> > drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +- > >> > 26 files changed, 1362 insertions(+), 138 deletions(-) > >> > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_eth.c > >> > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_eth.h > >> > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.c > >> > create mode 100644 drivers/net/ethernet/intel/ice/ice_sf_vsi_vlan_ops.h > >> > > >> >-- >
Re: [Intel-wired-lan] [PATCH v2 2/2] e1000e: fix link fluctuations problem
> > It would be interesting to see what the link partner sees. What does > > it think the I219-LM is advertising? Is it advertising 1000BaseT_Half? > > i219 parts come with LSI PHY. 1000BASE-T half-duplex is not supported. > 1000BASET half-duplex not advertised in IEEE 1000BASE-T Control Register 9. That is the theory. But in practice? What does the link partner really see? I've come across systems which get advertisement wrong. However, in that case, i suspect it is the software above the PHY, not the PHY itself which was wrong. Andrew
[Intel-wired-lan] [tnguy-next-queue:10GbE] BUILD SUCCESS e7073830cc8b52ef3df7dd150e4dac7706e0e104
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue.git 10GbE branch HEAD: e7073830cc8b52ef3df7dd150e4dac7706e0e104 Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net elapsed time: 1141m configs tested: 130 configs skipped: 3 The following configs have been built successfully. More configs may be tested in the coming days. tested configs: alpha allnoconfig gcc alphaallyesconfig gcc alpha defconfig gcc arc allmodconfig gcc arc allnoconfig gcc arc allyesconfig gcc arc axs103_smp_defconfig gcc arc defconfig gcc arm allmodconfig gcc arm allnoconfig clang arm allyesconfig gcc arm defconfig clang armmvebu_v5_defconfig gcc arm omap1_defconfig gcc arm stm32_defconfig gcc arm wpcm450_defconfig gcc arm64allmodconfig clang arm64 allnoconfig gcc arm64 defconfig gcc csky allmodconfig gcc csky allnoconfig gcc csky allyesconfig gcc cskydefconfig gcc hexagon allmodconfig clang hexagon allnoconfig clang hexagon allyesconfig clang hexagon defconfig clang i386 allmodconfig gcc i386 allnoconfig gcc i386 allyesconfig gcc i386 buildonly-randconfig-001-20240510 clang i386 buildonly-randconfig-002-20240510 gcc i386 buildonly-randconfig-003-20240510 clang i386 buildonly-randconfig-004-20240510 clang i386 buildonly-randconfig-005-20240510 clang i386 buildonly-randconfig-006-20240510 gcc i386defconfig clang i386 randconfig-001-20240510 clang i386 randconfig-002-20240510 clang i386 randconfig-003-20240510 gcc i386 randconfig-004-20240510 gcc i386 randconfig-005-20240510 clang i386 randconfig-006-20240510 clang i386 randconfig-011-20240510 clang i386 randconfig-012-20240510 clang i386 randconfig-013-20240510 gcc i386 randconfig-014-20240510 clang i386 randconfig-015-20240510 gcc i386 randconfig-016-20240510 gcc loongarchallmodconfig gcc loongarch allnoconfig gcc loongarchallyesconfig gcc loongarch defconfig gcc m68k allmodconfig gcc m68k allnoconfig gcc m68k allyesconfig gcc m68kdefconfig gcc m68km5307c3_defconfig gcc microblaze allmodconfig gcc microblazeallnoconfig gcc microblaze allyesconfig gcc microblaze defconfig gcc mips allmodconfig gcc mips allnoconfig gcc mips allyesconfig gcc mips decstation_defconfig gcc nios2allmodconfig gcc nios2 allnoconfig gcc nios2allyesconfig gcc nios2 defconfig gcc openrisc allmodconfig gcc openrisc allnoconfig gcc openrisc allyesconfig gcc openriscdefconfig gcc parisc allmodconfig gcc pariscallnoconfig gcc parisc allyesconfig gcc parisc defconfig gcc parisc64defconfig gcc powerpc allmodconfig gcc powerpc allnoconfig gcc powerpc allyesconfig clang powerpc walnut_defconfig gcc riscvallmodconfig clang riscv allnoconfig gcc riscv
Re: [Intel-wired-lan] [PATCH v7 10/12] pps: generators: Add PPS Generator TIO Driver
On Thu, May 09, 2024 at 04:38:49AM +, D, Lakshmi Sowjanya wrote: > Will update as suggested. Just a side note: Since the series most likely missed v6.10, don't forget to bump dates and versions in ABI documentation in the next version. -- With Best Regards, Andy Shevchenko
[Intel-wired-lan] [PATCH iwl-next v2 0/3] ice:Support to dump PHY config, FEC
Implementation to dump PHY configuration and FEC statistics to facilitate link level debugging of customer issues. Implementation has two parts a. Serdes equalization # ethtool -d eth0 Output: Offset Values -- -- 0x: 00 00 00 00 03 00 00 00 05 00 00 00 01 08 00 40 0x0010: 01 00 00 40 00 00 39 3c 01 00 00 00 00 00 00 00 0x0020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 …… ….. 0x01f0: 01 00 00 00 ef be ad de 8f 00 00 00 00 00 00 00 0x0200: 00 00 00 00 ef be ad de 00 00 00 00 00 00 00 00 0x0210: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0220: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0230: 00 00 00 00 00 00 00 00 00 00 00 00 fa ff 00 00 0x0240: 06 00 00 00 03 00 00 00 00 00 00 00 00 00 00 00 0x0250: 0f b0 0f b0 00 00 00 00 00 00 00 00 00 00 00 00 0x0260: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0270: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0290: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x02a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x02b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x02c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x02d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x02e0: 00 00 00 00 00 00 00 00 00 00 00 00 Current implementation appends 176 bytes i.e. 44 bytes * 4 serdes lane. For port with 2 serdes lane, first 88 bytes are valid values and remaining 88 bytes are filled with zero. Similarly for port with 1 serdes lane, first 44 bytes are valid and remaining 132 bytes are marked zero. Each set of serdes equalizer parameter (i.e. set of 44 bytes) follows below order a. rx_equalization_pre2 b. rx_equalization_pre1 c. rx_equalization_post1 d. rx_equalization_bflf e. rx_equalization_bfhf f. rx_equalization_drate g. tx_equalization_pre1 h. tx_equalization_pre3 i. tx_equalization_atten j. tx_equalization_post1 k. tx_equalization_pre2 Where each individual equalizer parameter is of 4 bytes. As ethtool prints values as individual bytes, for little endian machine these values will be in reverse byte order. b. FEC block counts # ethtool -I --show-fec eth0 Output: FEC parameters for eth0: Supported/Configured FEC encodings: Auto RS BaseR Active FEC encoding: RS Statistics: corrected_blocks: 0 uncorrectable_blocks: 0 This series do following: Patch 1 – Implementation to support user provided flag for side band queue command. Patch 2 – Currently driver does not have a way to derive serdes lane number, pcs quad , pcs port from port number.So we introduced a mechanism to derive above info. Ethtool interface extension to include FEC statistics counter. Patch 3 – Ethtool interface extension to include serdes equalizer output. v1 -> V2, Squashed 4 commit to 3 commit. Removed extra null check of arguments. Removed initialization of local variable that are not required. .../net/ethernet/intel/ice/ice_adminq_cmd.h | 51 ++ drivers/net/ethernet/intel/ice/ice_common.c | 99 +++- drivers/net/ethernet/intel/ice/ice_common.h | 28 +- drivers/net/ethernet/intel/ice/ice_ethtool.c | 449 +- drivers/net/ethernet/intel/ice/ice_ethtool.h | 29 ++ .../net/ethernet/intel/ice/ice_hw_autogen.h | 2 + drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 16 +- drivers/net/ethernet/intel/ice/ice_type.h | 8 + 8 files changed, 669 insertions(+), 13 deletions(-) -- 2.44.0
[Intel-wired-lan] [PATCH iwl-next v2 1/3] ice: Extend Sideband Queue command to support dynamic flag
Current driver implementation for Sideband Queue supports a fixed flag (ICE_AQ_FLAG_RD). To retrieve FEC statistics from firmware, Sideband Queue command is used with a different flag. Extend API for Sideband Queue command to use 'flag' as input argument. Reviewed-by: Anthony L Nguyen Reviewed-by: Jesse Brandeburg Signed-off-by: Anil Samal --- drivers/net/ethernet/intel/ice/ice_common.c | 5 +++-- drivers/net/ethernet/intel/ice/ice_common.h | 2 +- drivers/net/ethernet/intel/ice/ice_ptp_hw.c | 16 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 5649b257e631..9a0a533613ff 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1473,8 +1473,9 @@ ice_sbq_send_cmd(struct ice_hw *hw, struct ice_sbq_cmd_desc *desc, * ice_sbq_rw_reg - Fill Sideband Queue command * @hw: pointer to the HW struct * @in: message info to be filled in descriptor + * @flag: flag to fill desc structure */ -int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in) +int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in, u16 flag) { struct ice_sbq_cmd_desc desc = {0}; struct ice_sbq_msg_req msg = {0}; @@ -1498,7 +1499,7 @@ int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in) */ msg_len -= sizeof(msg.data); - desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD); + desc.flags = cpu_to_le16(flag); desc.opcode = cpu_to_le16(ice_sbq_opc_neigh_dev_req); desc.param0.cmd_len = cpu_to_le16(msg_len); status = ice_sbq_send_cmd(hw, &desc, &msg, msg_len, NULL); diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index ffb22c7ce28b..42cda1bbbaab 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -201,7 +201,7 @@ int ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle); void ice_replay_post(struct ice_hw *hw); struct ice_q_ctx * ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle); -int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in); +int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in, u16 flag); int ice_aq_get_cgu_abilities(struct ice_hw *hw, struct ice_aqc_get_cgu_abilities *abilities); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 2b9423a173bb..e97b73d1b0f4 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -433,7 +433,7 @@ ice_read_phy_reg_e82x(struct ice_hw *hw, u8 port, u16 offset, u32 *val) ice_fill_phy_msg_e82x(&msg, port, offset); msg.opcode = ice_sbq_msg_rd; - err = ice_sbq_rw_reg(hw, &msg); + err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n", err); @@ -511,7 +511,7 @@ ice_write_phy_reg_e82x(struct ice_hw *hw, u8 port, u16 offset, u32 val) msg.opcode = ice_sbq_msg_wr; msg.data = val; - err = ice_sbq_rw_reg(hw, &msg); + err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n", err); @@ -667,7 +667,7 @@ ice_read_quad_reg_e82x(struct ice_hw *hw, u8 quad, u16 offset, u32 *val) msg.opcode = ice_sbq_msg_rd; - err = ice_sbq_rw_reg(hw, &msg); + err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n", err); @@ -702,7 +702,7 @@ ice_write_quad_reg_e82x(struct ice_hw *hw, u8 quad, u16 offset, u32 val) msg.opcode = ice_sbq_msg_wr; msg.data = val; - err = ice_sbq_rw_reg(hw, &msg); + err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n", err); @@ -840,7 +840,7 @@ ice_read_cgu_reg_e82x(struct ice_hw *hw, u32 addr, u32 *val) cgu_msg.msg_addr_low = addr; cgu_msg.msg_addr_high = 0x0; - err = ice_sbq_rw_reg(hw, &cgu_msg); + err = ice_sbq_rw_reg(hw, &cgu_msg, ICE_AQ_FLAG_RD); if (err) { ice_debug(hw, ICE_DBG_PTP, "Failed to read CGU register 0x%04x, err %d\n", addr, err); @@ -873,7 +873,7 @@ ice_write_cgu_reg_e82x(struct ice_hw *hw, u32 addr, u32 val) cgu_msg.msg_addr_high = 0x0; cgu_msg.data = val; - err = ice_sbq_rw_reg(hw, &cgu_msg); + err = ice_sbq_rw_reg(hw, &cgu_msg, ICE_AQ_FLAG_RD); if (err) { ice_deb
[Intel-wired-lan] [PATCH iwl-next v2 2/3] ice: Implement driver functionality to dump fec statistics
To debug link issues in the field, it is paramount to dump fec corrected/uncorrected block counts from firmware. Firmware requires PCS quad number and PCS port number to read FEC statistics. Current driver implementation does not maintain above physical properties of a port. Add new driver API to derive physical properties of an input port. These properties include PCS quad number, PCS port number, serdes lane count, primary serdes lane number. Extend ethtool option '--show-fec' to support fec statistics. The IEEE standard mandates two sets of counters: - 30.5.1.1.17 aFECCorrectedBlocks - 30.5.1.1.18 aFECUncorrectableBlocks Standard defines above statistics per lane but current implementation supports total FEC statistics per port i.e. sum of all lane per port. Find sample output below # ethtool -I --show-fec ens21f0np0 FEC parameters for ens21f0np0: Supported/Configured FEC encodings: Auto RS BaseR Active FEC encoding: RS Statistics: corrected_blocks: 0 uncorrectable_blocks: 0 Reviewed-by: Anthony L Nguyen Reviewed-by: Jesse Brandeburg Signed-off-by: Anil Samal --- drivers/net/ethernet/intel/ice/ice_common.c | 57 drivers/net/ethernet/intel/ice/ice_common.h | 24 ++ drivers/net/ethernet/intel/ice/ice_ethtool.c | 308 ++ drivers/net/ethernet/intel/ice/ice_ethtool.h | 10 + .../net/ethernet/intel/ice/ice_hw_autogen.h | 2 + drivers/net/ethernet/intel/ice/ice_type.h | 8 + 6 files changed, 409 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 9a0a533613ff..26cea47a9705 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -3299,6 +3299,63 @@ int ice_update_link_info(struct ice_port_info *pi) return status; } +#define FEC_REG_PORT(port) { \ + FEC_CORR_LOW_REG_PORT##port,\ + FEC_CORR_HIGH_REG_PORT##port, \ + FEC_UNCORR_LOW_REG_PORT##port, \ + FEC_UNCORR_HIGH_REG_PORT##port, \ +} + +static const u32 fec_reg[][ICE_FEC_MAX] = { + FEC_REG_PORT(0), + FEC_REG_PORT(1), + FEC_REG_PORT(2), + FEC_REG_PORT(3) +}; + +/** + * ice_aq_get_fec_stats - reads fec stats from phy + * @hw: pointer to the HW struct + * @pcs_quad: represents pcsquad of user input serdes + * @pcs_port: represents the pcs port number part of above pcs quad + * @fec_type: represents FEC stats type + * @output: pointer to the caller-supplied buffer to return requested fec stats + * + * Return: non-zero status on error and 0 on success. + */ +int ice_aq_get_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, +enum ice_fec_stats_types fec_type, u32 *output) +{ + u16 flag = (ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF | ICE_AQ_FLAG_SI); + struct ice_sbq_msg_input msg = {}; + u32 receiver_id, reg_offset; + int err; + + if (pcs_port > 3) + return -EINVAL; + + reg_offset = fec_reg[pcs_port][fec_type]; + + if (pcs_quad == 0) + receiver_id = FEC_RECEIVER_ID_PCS0; + else if (pcs_quad == 1) + receiver_id = FEC_RECEIVER_ID_PCS1; + else + return -EINVAL; + + msg.msg_addr_low = lower_16_bits(reg_offset); + msg.msg_addr_high = receiver_id; + msg.opcode = ice_sbq_msg_rd; + msg.dest_dev = rmn_0; + + err = ice_sbq_rw_reg(hw, &msg, flag); + if (err) + return err; + + *output = msg.data; + return 0; +} + /** * ice_cache_phy_user_req * @pi: port information structure diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 42cda1bbbaab..6b888efce593 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -17,6 +17,27 @@ #define ICE_SQ_SEND_DELAY_TIME_MS 10 #define ICE_SQ_SEND_MAX_EXECUTE3 +#define FEC_REG_SHIFT 2 +#define FEC_RECV_ID_SHIFT 4 +#define FEC_CORR_LOW_REG_PORT0 (0x02 << FEC_REG_SHIFT) +#define FEC_CORR_HIGH_REG_PORT0 (0x03 << FEC_REG_SHIFT) +#define FEC_UNCORR_LOW_REG_PORT0 (0x04 << FEC_REG_SHIFT) +#define FEC_UNCORR_HIGH_REG_PORT0 (0x05 << FEC_REG_SHIFT) +#define FEC_CORR_LOW_REG_PORT1 (0x42 << FEC_REG_SHIFT) +#define FEC_CORR_HIGH_REG_PORT1 (0x43 << FEC_REG_SHIFT) +#define FEC_UNCORR_LOW_REG_PORT1 (0x44 << FEC_REG_SHIFT) +#define FEC_UNCORR_HIGH_REG_PORT1 (0x45 << FEC_REG_SHIFT) +#define FEC_CORR_LOW_REG_PORT2 (0x4A << FEC_REG_SHIFT) +#define FEC_CORR_HIGH_REG_PORT2 (0x4B << FEC_REG_SHIFT) +#define FEC_UNCORR_LOW_REG_PORT2 (0x4C << FEC_REG_SHIFT) +#define FEC_UNCORR_HIGH_REG_PORT2 (0x4D << FEC_REG_SHIFT) +#define FEC_CORR_LOW_REG_PORT3 (0x52 << FEC_REG_SHIFT) +#define FEC_CORR_HIGH_REG_PORT3 (0x53 << FEC_REG_SHIFT) +#define FEC_UNCORR_LOW_REG_PORT3 (0x54 << FEC_REG_SHIFT) +#define FEC_UNCORR_HIGH_REG_PORT3 (0x55 << FEC_REG_SHIFT) +#define FEC_RECEIVER_ID_PCS0 (0x33 << FEC_RECV_ID_
[Intel-wired-lan] [PATCH iwl-next v2 3/3] ice: Implement driver functionality to dump serdes equalizer values
To debug link issues in the field, serdes Tx/Rx equalizer values help to determine the health of serdes lane. Extend 'ethtool -d' option to dump serdes Tx/Rx equalizer. The following list of equalizer param is supported a. rx_equalization_pre2 b. rx_equalization_pre1 c. rx_equalization_post1 d. rx_equalization_bflf e. rx_equalization_bfhf f. rx_equalization_drate g. tx_equalization_pre1 h. tx_equalization_pre3 i. tx_equalization_atten j. tx_equalization_post1 k. tx_equalization_pre2 Reviewed-by: Anthony L Nguyen Reviewed-by: Jesse Brandeburg Signed-off-by: Anil Samal --- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 51 +++ drivers/net/ethernet/intel/ice/ice_common.c | 37 + drivers/net/ethernet/intel/ice/ice_common.h | 2 + drivers/net/ethernet/intel/ice/ice_ethtool.c | 141 +- drivers/net/ethernet/intel/ice/ice_ethtool.h | 19 +++ 5 files changed, 248 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index e76c388b9905..92d96c85d924 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1460,6 +1460,55 @@ struct ice_aqc_get_sensor_reading_resp { } data; }; +/* DNL call command (indirect 0x0682) + * Struct is used for both command and response + */ +struct ice_aqc_dnl_call_command { + u8 ctx; /* Used in command, reserved in response */ + u8 reserved; + __le16 activity_id; +#define ICE_AQC_ACT_ID_DNL 0x1129 + __le32 reserved1; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_dnl_equa_param { + __le16 data_in; +#define ICE_AQC_RX_EQU_SHIFT 8 +#define ICE_AQC_RX_EQU_PRE2 (0x10 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_PRE1 (0x11 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_POST1 (0x12 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_BFLF (0x13 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_BFHF (0x14 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_RX_EQU_DRATE (0x15 << ICE_AQC_RX_EQU_SHIFT) +#define ICE_AQC_TX_EQU_PRE1 0x0 +#define ICE_AQC_TX_EQU_PRE3 0x3 +#define ICE_AQC_TX_EQU_ATTEN 0x4 +#define ICE_AQC_TX_EQU_POST1 0x8 +#define ICE_AQC_TX_EQU_PRE2 0xC + __le16 op_code_serdes_sel; +#define ICE_AQC_OP_CODE_SHIFT 4 +#define ICE_AQC_OP_CODE_RX_EQU (0x9 << ICE_AQC_OP_CODE_SHIFT) +#define ICE_AQC_OP_CODE_TX_EQU (0x10 << ICE_AQC_OP_CODE_SHIFT) + __le32 reserved[3]; +}; + +struct ice_aqc_dnl_equa_respon { + /* Equalization value can be negative */ + int val; + __le32 reserved[3]; +}; + +/* DNL call command/response buffer (indirect 0x0682) */ +struct ice_aqc_dnl_call { + union { + struct ice_aqc_dnl_equa_param txrx_equa_reqs; + __le32 stores[4]; + struct ice_aqc_dnl_equa_respon txrx_equa_resp; + } sto; +}; + struct ice_aqc_link_topo_params { u8 lport_num; u8 lport_num_valid; @@ -2563,6 +2612,7 @@ struct ice_aq_desc { struct ice_aqc_get_link_status get_link_status; struct ice_aqc_event_lan_overflow lan_overflow; struct ice_aqc_get_link_topo get_link_topo; + struct ice_aqc_dnl_call_command dnl_call; struct ice_aqc_i2c read_write_i2c; struct ice_aqc_read_i2c_resp read_i2c_resp; struct ice_aqc_get_set_tx_topo get_set_tx_topo; @@ -2687,6 +2737,7 @@ enum ice_adminq_opc { ice_aqc_opc_set_phy_rec_clk_out = 0x0630, ice_aqc_opc_get_phy_rec_clk_out = 0x0631, ice_aqc_opc_get_sensor_reading = 0x0632, + ice_aqc_opc_dnl_call= 0x0682, ice_aqc_opc_get_link_topo = 0x06E0, ice_aqc_opc_read_i2c= 0x06E2, ice_aqc_opc_write_i2c = 0x06E3, diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 26cea47a9705..90af21114c87 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -3299,6 +3299,43 @@ int ice_update_link_info(struct ice_port_info *pi) return status; } +/** + * ice_aq_get_phy_equalization - function to read serdes equaliser + * value from firmware using admin queue command. + * @hw: pointer to the HW struct + * @data_in: represents the serdes equalization parameter requested + * @op_code: represents the serdes number and flag to represent tx or rx + * @serdes_num: represents the serdes number + * @output: pointer to the caller-supplied buffer to return serdes equaliser + * + * Return: non-zero status on error and 0 on success. + */ +int ice_aq_get_phy_equalization(struct ice_hw *hw, u16 data_in, u16 op_code, + u8 serdes_num, int *output) +{ + struct ice_aq
[Intel-wired-lan] [PATCH RFC iwl-next 00/12] idpf: XDP chapter I: convert Rx to libeth
Applies on top of "idpf: don't enable NAPI and interrupts prior to allocating Rx buffers" from Tony's tree. Sent as RFC as we're at the end of the development cycle and several kdocs are messed up. I'll fix them when sending non-RFC after the window opens. XDP for idpf is currently 5 chapters: * convert Rx to libeth (this); * convert Tx and stats to libeth; * generic XDP and XSk code changes; * actual XDP for idpf via libeth_xdp; * XSk for idpf (^). Part I does the following: * splits &idpf_queue into 4 (RQ, SQ, FQ, CQ) and puts them on a diet; * ensures optimal cacheline placement, strictly asserts CL sizes; * moves currently unused/dead singleq mode out of line; * reuses libeth's Rx ptype definitions and helpers; * uses libeth's Rx buffer management for both header and payload; * eliminates memcpy()s and coherent DMA uses on hotpath, uses napi_build_skb() instead of in-place short skb allocation. Most idpf patches, except for the queue split, removes more lines than adds. Expect far better memory utilization and +5-8% on Rx depending on the case (+17% on skb XDP_DROP :>). Alexander Lobakin (12): libeth: add cacheline / struct alignment helpers idpf: stop using macros for accessing queue descriptors idpf: split &idpf_queue into 4 strictly-typed queue structures idpf: avoid bloating &idpf_q_vector with big %NR_CPUS idpf: strictly assert cachelines of queue and queue vector structures idpf: merge singleq and splitq &net_device_ops idpf: compile singleq code only under default-n CONFIG_IDPF_SINGLEQ idpf: reuse libeth's definitions of parsed ptype structures idpf: remove legacy Page Pool Ethtool stats libeth: support different types of buffers for Rx idpf: convert header split mode to libeth + napi_build_skb() idpf: use libeth Rx buffer management for payload buffer drivers/net/ethernet/intel/Kconfig| 13 +- drivers/net/ethernet/intel/idpf/Kconfig | 25 + drivers/net/ethernet/intel/idpf/Makefile |3 +- drivers/net/ethernet/intel/idpf/idpf.h| 10 +- .../net/ethernet/intel/idpf/idpf_lan_txrx.h |2 + drivers/net/ethernet/intel/idpf/idpf_txrx.h | 746 - include/net/libeth/cache.h| 64 + include/net/libeth/rx.h | 19 + .../net/ethernet/intel/idpf/idpf_ethtool.c| 152 +- drivers/net/ethernet/intel/idpf/idpf_lib.c| 88 +- drivers/net/ethernet/intel/idpf/idpf_main.c |1 + .../ethernet/intel/idpf/idpf_singleq_txrx.c | 311 ++-- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 1405 + .../net/ethernet/intel/idpf/idpf_virtchnl.c | 175 +- drivers/net/ethernet/intel/libeth/rx.c| 122 +- 15 files changed, 1726 insertions(+), 1410 deletions(-) create mode 100644 drivers/net/ethernet/intel/idpf/Kconfig create mode 100644 include/net/libeth/cache.h -- 2.45.0
[Intel-wired-lan] [PATCH RFC iwl-next 01/12] libeth: add cacheline / struct alignment helpers
Following the latest netdev trend, i.e. effective and usage-based field cacheline placement, add helpers to group and then assert struct fields by cachelines. For 64-bit with 64-byte cachelines, the assertions are more strict as the size can then be easily predicted. For the rest, just make sure they don't cross the specified bound. Signed-off-by: Alexander Lobakin --- include/net/libeth/cache.h | 64 ++ 1 file changed, 64 insertions(+) create mode 100644 include/net/libeth/cache.h diff --git a/include/net/libeth/cache.h b/include/net/libeth/cache.h new file mode 100644 index ..3245a20b22d3 --- /dev/null +++ b/include/net/libeth/cache.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_CACHE_H +#define __LIBETH_CACHE_H + +#include + +/* __aligned_largest is architecture-dependent. Get the actual alignment */ +#define ___LIBETH_LARGEST_ALIGN \ + sizeof(struct { long __UNIQUE_ID(long_); } __aligned_largest) +#define __LIBETH_LARGEST_ALIGN\ + (___LIBETH_LARGEST_ALIGN > SMP_CACHE_BYTES ? \ +___LIBETH_LARGEST_ALIGN : SMP_CACHE_BYTES) +#define __LIBETH_LARGEST_ALIGNED(sz) \ + ALIGN(sz, __LIBETH_LARGEST_ALIGN) + +#define __libeth_cacheline_group_begin(grp) \ + __cacheline_group_begin(grp) __aligned(__LIBETH_LARGEST_ALIGN) +#define __libeth_cacheline_group_end(grp) \ + __cacheline_group_end(grp) __aligned(4) + +#define libeth_cacheline_group(grp, ...) \ + struct_group(grp, \ + __libeth_cacheline_group_begin(grp); \ + __VA_ARGS__\ + __libeth_cacheline_group_end(grp); \ + ) + +#if defined(CONFIG_64BIT) && L1_CACHE_BYTES == 64 +#define libeth_cacheline_group_assert(type, grp, sz) \ + static_assert(offsetof(type, __cacheline_group_end__##grp) - \ + offsetofend(type, __cacheline_group_begin__##grp) == \ + (sz)) +#define __libeth_cacheline_struct_assert(type, sz)\ + static_assert(sizeof(type) == (sz)) +#else /* !CONFIG_64BIT || L1_CACHE_BYTES != 64 */ +#define libeth_cacheline_group_assert(type, grp, sz) \ + static_assert(offsetof(type, __cacheline_group_end__##grp) - \ + offsetofend(type, __cacheline_group_begin__##grp) <= \ + (sz)) +#define __libeth_cacheline_struct_assert(type, sz)\ + static_assert(sizeof(type) <= (sz)) +#endif /* !CONFIG_64BIT || L1_CACHE_BYTES != 64 */ + +#define __libeth_cls1(sz1)\ + __LIBETH_LARGEST_ALIGNED(sz1) +#define __libeth_cls2(sz1, sz2) \ + (__LIBETH_LARGEST_ALIGNED(sz1) + __LIBETH_LARGEST_ALIGNED(sz2)) +#define __libeth_cls3(sz1, sz2, sz3) \ + (__LIBETH_LARGEST_ALIGNED(sz1) + __LIBETH_LARGEST_ALIGNED(sz2) + \ +__LIBETH_LARGEST_ALIGNED(sz3)) +#define __libeth_cls(...) \ + CONCATENATE(__libeth_cls, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__) +#define libeth_cacheline_struct_assert(type, ...) \ + __libeth_cacheline_struct_assert(type, __libeth_cls(__VA_ARGS__)) + +#define libeth_cacheline_set_assert(type, ro, rw, c) \ + libeth_cacheline_group_assert(type, read_mostly, ro); \ + libeth_cacheline_group_assert(type, read_write, rw); \ + libeth_cacheline_group_assert(type, cold, c); \ + libeth_cacheline_struct_assert(type, ro, rw, c) + +#endif /* __LIBETH_CACHE_H */ -- 2.45.0
[Intel-wired-lan] [PATCH RFC iwl-next 02/12] idpf: stop using macros for accessing queue descriptors
In C, we have structures and unions. Casting `void *` via macros is not only error-prone, but also looks confusing and awful in general. In preparation for splitting the queue structs, replace it with a union and direct array dereferences. Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/idpf/idpf.h| 1 - .../net/ethernet/intel/idpf/idpf_lan_txrx.h | 2 + drivers/net/ethernet/intel/idpf/idpf_txrx.h | 37 --- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 20 +- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 32 5 files changed, 43 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index e7a036538246..0b26dd9b8a51 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -20,7 +20,6 @@ struct idpf_vport_max_q; #include #include "virtchnl2.h" -#include "idpf_lan_txrx.h" #include "idpf_txrx.h" #include "idpf_controlq.h" diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h index a5752dcab888..8c7f8ef8f1a1 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h @@ -4,6 +4,8 @@ #ifndef _IDPF_LAN_TXRX_H_ #define _IDPF_LAN_TXRX_H_ +#include + enum idpf_rss_hash { IDPF_HASH_INVALID = 0, /* Values 1 - 28 are reserved for future use */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 551391e20464..8794ce018911 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -8,6 +8,7 @@ #include #include +#include "idpf_lan_txrx.h" #include "virtchnl2_lan_desc.h" #define IDPF_LARGE_MAX_Q 256 @@ -117,24 +118,6 @@ do { \ #define IDPF_RXD_EOF_SPLITQVIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M #define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M -#define IDPF_SINGLEQ_RX_BUF_DESC(rxq, i) \ - (&(((struct virtchnl2_singleq_rx_buf_desc *)((rxq)->desc_ring))[i])) -#define IDPF_SPLITQ_RX_BUF_DESC(rxq, i)\ - (&(((struct virtchnl2_splitq_rx_buf_desc *)((rxq)->desc_ring))[i])) -#define IDPF_SPLITQ_RX_BI_DESC(rxq, i) rxq)->ring))[i]) - -#define IDPF_BASE_TX_DESC(txq, i) \ - (&(((struct idpf_base_tx_desc *)((txq)->desc_ring))[i])) -#define IDPF_BASE_TX_CTX_DESC(txq, i) \ - (&(((struct idpf_base_tx_ctx_desc *)((txq)->desc_ring))[i])) -#define IDPF_SPLITQ_TX_COMPLQ_DESC(txcq, i)\ - (&(((struct idpf_splitq_tx_compl_desc *)((txcq)->desc_ring))[i])) - -#define IDPF_FLEX_TX_DESC(txq, i) \ - (&(((union idpf_tx_flex_desc *)((txq)->desc_ring))[i])) -#define IDPF_FLEX_TX_CTX_DESC(txq, i) \ - (&(((struct idpf_flex_tx_ctx_desc *)((txq)->desc_ring))[i])) - #define IDPF_DESC_UNUSED(txq) \ txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ (txq)->next_to_clean - (txq)->next_to_use - 1) @@ -317,8 +300,6 @@ struct idpf_rx_extracted { #define IDPF_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) -#define IDPF_RX_DESC(rxq, i) \ - (&(((union virtchnl2_rx_desc *)((rxq)->desc_ring))[i])) struct idpf_rx_buf { struct page *page; @@ -733,7 +714,21 @@ struct idpf_queue { struct idpf_q_vector *q_vector; unsigned int size; dma_addr_t dma; - void *desc_ring; + union { + union virtchnl2_rx_desc *rx; + + struct virtchnl2_singleq_rx_buf_desc *single_buf; + struct virtchnl2_splitq_rx_buf_desc *split_buf; + + struct idpf_base_tx_desc *base_tx; + struct idpf_base_tx_ctx_desc *base_ctx; + union idpf_tx_flex_desc *flex_tx; + struct idpf_flex_tx_ctx_desc *flex_ctx; + + struct idpf_splitq_tx_compl_desc *comp; + + void *desc_ring; + }; u16 tx_max_bufs; u8 tx_min_pkt_len; diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 27b93592c4ba..b17d88e15000 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -205,7 +205,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, data_len = skb->data_len; size = skb_headlen(skb); - tx_desc = IDPF_BASE_TX_DESC(tx_q, i); + tx_desc = &tx_q->base_tx[i]; dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); @@ -239,7 +239,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q, i++; if (i == tx_q->desc_count) { - tx_desc = IDP
[Intel-wired-lan] [PATCH RFC iwl-next 04/12] idpf: avoid bloating &idpf_q_vector with big %NR_CPUS
With CONFIG_MAXSMP, sizeof(cpumask_t) is 1 Kb. The queue vector structure has them embedded, which means 1 additional Kb of not really hotpath data. We have cpumask_var_t, which is either an embedded cpumask or a pointer for allocating it dynamically when it's big. Use it instead of plain cpumasks and put &idpf_q_vector on a good diet. Also remove redundant pointer to the interrupt name from the structure. request_irq() saves it and free_irq() returns it on deinit, so that you can free the memory. Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 7 +++ drivers/net/ethernet/intel/idpf/idpf_lib.c | 13 ++--- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 20 +--- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index fb645c6887b3..428b82b4de80 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -505,7 +505,6 @@ struct idpf_intr_reg { /** * struct idpf_q_vector * @vport: Vport back pointer - * @affinity_mask: CPU affinity mask * @napi: napi handler * @v_idx: Vector index * @intr_reg: See struct idpf_intr_reg @@ -526,11 +525,10 @@ struct idpf_intr_reg { * @num_bufq: Number of buffer queues * @bufq: Array of buffer queues to service * @total_events: Number of interrupts processed - * @name: Queue vector name + * @affinity_mask: CPU affinity mask */ struct idpf_q_vector { struct idpf_vport *vport; - cpumask_t affinity_mask; struct napi_struct napi; u16 v_idx; struct idpf_intr_reg intr_reg; @@ -556,7 +554,8 @@ struct idpf_q_vector { struct idpf_buf_queue **bufq; u16 total_events; - char *name; + + cpumask_var_t affinity_mask; }; struct idpf_rx_queue_stats { diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 3e8b24430dd8..a8be09a89943 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -69,7 +69,7 @@ static void idpf_deinit_vector_stack(struct idpf_adapter *adapter) static void idpf_mb_intr_rel_irq(struct idpf_adapter *adapter) { clear_bit(IDPF_MB_INTR_MODE, adapter->flags); - free_irq(adapter->msix_entries[0].vector, adapter); + kfree(free_irq(adapter->msix_entries[0].vector, adapter)); queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0); } @@ -124,15 +124,14 @@ static void idpf_mb_irq_enable(struct idpf_adapter *adapter) */ static int idpf_mb_intr_req_irq(struct idpf_adapter *adapter) { - struct idpf_q_vector *mb_vector = &adapter->mb_vector; int irq_num, mb_vidx = 0, err; + char *name; irq_num = adapter->msix_entries[mb_vidx].vector; - mb_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d", - dev_driver_string(&adapter->pdev->dev), - "Mailbox", mb_vidx); - err = request_irq(irq_num, adapter->irq_mb_handler, 0, - mb_vector->name, adapter); + name = kasprintf(GFP_KERNEL, "%s-%s-%d", +dev_driver_string(&adapter->pdev->dev), +"Mailbox", mb_vidx); + err = request_irq(irq_num, adapter->irq_mb_handler, 0, name, adapter); if (err) { dev_err(&adapter->pdev->dev, "IRQ request for mailbox failed, error: %d\n", err); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 211c403a4c98..500754795cc8 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3613,6 +3613,8 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) q_vector->tx = NULL; kfree(q_vector->rx); q_vector->rx = NULL; + + free_cpumask_var(q_vector->affinity_mask); } /* Clean up the mapping of queues to vectors */ @@ -3661,7 +3663,7 @@ static void idpf_vport_intr_rel_irq(struct idpf_vport *vport) /* clear the affinity_mask in the IRQ descriptor */ irq_set_affinity_hint(irq_num, NULL); - free_irq(irq_num, q_vector); + kfree(free_irq(irq_num, q_vector)); } } @@ -3812,6 +3814,7 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) for (vector = 0; vector < vport->num_q_vectors; vector++) { struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; + char *name; vidx = vport->q_vector_idxs[vector]; irq_num = adapter->msix_entries[vidx].vector; @@ -3825,18 +3828,18 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) else continue;
[Intel-wired-lan] [PATCH RFC iwl-next 05/12] idpf: strictly assert cachelines of queue and queue vector structures
Now that the queue and queue vector structures are separated and layed out optimally, group the fields as read-mostly, read-write, and cold cachelines and add size assertions to make sure new features won't push something out of its place and provoke perf regression. Despite looking innocent, this gives up to 2% of perf bump on Rx. Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 370 +++- 1 file changed, 205 insertions(+), 165 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 428b82b4de80..0192d33744ff 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -6,6 +6,7 @@ #include +#include #include #include #include @@ -528,35 +529,43 @@ struct idpf_intr_reg { * @affinity_mask: CPU affinity mask */ struct idpf_q_vector { - struct idpf_vport *vport; - struct napi_struct napi; - u16 v_idx; - struct idpf_intr_reg intr_reg; - - u16 num_txq; - u16 num_complq; - struct idpf_tx_queue **tx; - struct idpf_compl_queue **complq; - - struct dim tx_dim; - u16 tx_itr_value; - bool tx_intr_mode; - u32 tx_itr_idx; - - u16 num_rxq; - struct idpf_rx_queue **rx; - struct dim rx_dim; - u16 rx_itr_value; - bool rx_intr_mode; - u32 rx_itr_idx; - - u16 num_bufq; - struct idpf_buf_queue **bufq; - - u16 total_events; - - cpumask_var_t affinity_mask; + libeth_cacheline_group(read_mostly, + struct idpf_vport *vport; + + u16 num_rxq; + u16 num_txq; + u16 num_bufq; + u16 num_complq; + struct idpf_rx_queue **rx; + struct idpf_tx_queue **tx; + struct idpf_buf_queue **bufq; + struct idpf_compl_queue **complq; + + struct idpf_intr_reg intr_reg; + ); + libeth_cacheline_group(read_write, + struct napi_struct napi; + u16 total_events; + + struct dim tx_dim; + u16 tx_itr_value; + bool tx_intr_mode; + u32 tx_itr_idx; + + struct dim rx_dim; + u16 rx_itr_value; + bool rx_intr_mode; + u32 rx_itr_idx; + ); + libeth_cacheline_group(cold, + u16 v_idx; + + cpumask_var_t affinity_mask; + ); }; +libeth_cacheline_set_assert(struct idpf_q_vector, 104, + 424 + 2 * sizeof(struct dim), + 8 + sizeof(cpumask_var_t)); struct idpf_rx_queue_stats { u64_stats_t packets; @@ -641,52 +650,59 @@ struct idpf_txq_stash { * @rx_max_pkt_size: RX max packet size */ struct idpf_rx_queue { - union { - union virtchnl2_rx_desc *rx; - struct virtchnl2_singleq_rx_buf_desc *single_buf; + libeth_cacheline_group(read_mostly, + union { + union virtchnl2_rx_desc *rx; + struct virtchnl2_singleq_rx_buf_desc *single_buf; - void *desc_ring; - }; - union { - struct { - struct idpf_bufq_set *bufq_sets; - struct napi_struct *napi; + void *desc_ring; }; - struct { - struct idpf_rx_buf *rx_buf; - struct page_pool *pp; + union { + struct { + struct idpf_bufq_set *bufq_sets; + struct napi_struct *napi; + }; + struct { + struct idpf_rx_buf *rx_buf; + struct page_pool *pp; + }; }; - }; - struct net_device *netdev; - void __iomem *tail; - - DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); - u16 idx; - u16 desc_count; - u16 next_to_use; - u16 next_to_clean; - u16 next_to_alloc; - - u32 rxdids; - - const struct idpf_rx_ptype_decoded *rx_ptype_lkup; - struct sk_buff *skb; - - struct u64_stats_sync stats_sync; - struct idpf_rx_queue_stats q_stats; - - /* Slowpath */ - u32 q_id; - u32 size; - dma_addr_t dma; - - struct idpf_q_vector *q_vector; - - u16 rx_buffer_low_watermark; - u16 rx_hbuf_size; - u16 rx_buf_size; - u16 rx_max_pkt_size; -} cacheline_aligned; + struct net_device *netdev; + void __iomem *tail; + + DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); + u16 idx; + u16 desc_count; + + u32 rxdids; + const struct idpf_rx_ptype_dec
intel-wired-lan@osuosl.org
It makes no sense to have a second &net_device_ops struct (800 bytes of rodata) with only one difference in .ndo_start_xmit, which can easily be just one `if`. This `if` is a drop in the ocean and you won't see any difference. Define unified idpf_xmit_start(). The preparation for sending is the same, just call either idpf_tx_splitq_frame() or idpf_tx_singleq_frame() depending on the active model to actually map and send the skb. Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 7 ++--- drivers/net/ethernet/intel/idpf/idpf_lib.c| 26 +++- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 31 ++- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 10 +++--- 4 files changed, 15 insertions(+), 59 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 0192d33744ff..015aba5abb3c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -1190,10 +1190,9 @@ bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, unsigned int count); int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size); void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); -netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb, -struct net_device *netdev); -netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb, - struct net_device *netdev); +netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + struct idpf_tx_queue *tx_q); +netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev); bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, u16 cleaned_count); int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index a8be09a89943..fe91475c7b4c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -4,8 +4,7 @@ #include "idpf.h" #include "idpf_virtchnl.h" -static const struct net_device_ops idpf_netdev_ops_splitq; -static const struct net_device_ops idpf_netdev_ops_singleq; +static const struct net_device_ops idpf_netdev_ops; /** * idpf_init_vector_stack - Fill the MSIX vector stack with vector index @@ -764,10 +763,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) } /* assign netdev_ops */ - if (idpf_is_queue_model_split(vport->txq_model)) - netdev->netdev_ops = &idpf_netdev_ops_splitq; - else - netdev->netdev_ops = &idpf_netdev_ops_singleq; + netdev->netdev_ops = &idpf_netdev_ops; /* setup watchdog timeout value to be 5 second */ netdev->watchdog_timeo = 5 * HZ; @@ -2353,24 +2349,10 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem) mem->pa = 0; } -static const struct net_device_ops idpf_netdev_ops_splitq = { - .ndo_open = idpf_open, - .ndo_stop = idpf_stop, - .ndo_start_xmit = idpf_tx_splitq_start, - .ndo_features_check = idpf_features_check, - .ndo_set_rx_mode = idpf_set_rx_mode, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = idpf_set_mac, - .ndo_change_mtu = idpf_change_mtu, - .ndo_get_stats64 = idpf_get_stats64, - .ndo_set_features = idpf_set_features, - .ndo_tx_timeout = idpf_tx_timeout, -}; - -static const struct net_device_ops idpf_netdev_ops_singleq = { +static const struct net_device_ops idpf_netdev_ops = { .ndo_open = idpf_open, .ndo_stop = idpf_stop, - .ndo_start_xmit = idpf_tx_singleq_start, + .ndo_start_xmit = idpf_tx_start, .ndo_features_check = idpf_features_check, .ndo_set_rx_mode = idpf_set_rx_mode, .ndo_validate_addr = eth_validate_addr, diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index b51f7cd6db01..a3b60a2dfcaa 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -351,8 +351,8 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq, * * Returns NETDEV_TX_OK if sent, else an error code */ -static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, -struct idpf_tx_queue *tx_q) +netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, + struct idpf_tx_queue *tx_q) { struct idpf_tx_offload_params offload = { }; struct idpf_tx_buf *first; @@ -408,33 +408,6 @@ static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, return idpf_tx_drop_skb(tx_q, skb); } -/** - * idpf_tx_singleq_start - Selects the right Tx
[Intel-wired-lan] [PATCH RFC iwl-next 07/12] idpf: compile singleq code only under default-n CONFIG_IDPF_SINGLEQ
Currently, there's no HW supporting idpf in the singleq model. Still, this dead code is supported by the driver and often times add hotpath branches and redundant cacheline accesses. While it can't currently be removed, add CONFIG_IDPF_SINGLEQ and build the singleq code only when it's enabled manually. This corresponds to -10 Kb of object code size and a good bunch of hotpath checks. idpf_is_queue_model_split() works as a gate and compiles out to `true` when the config option is disabled. Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/Kconfig| 13 +- drivers/net/ethernet/intel/idpf/Kconfig | 26 +++ drivers/net/ethernet/intel/idpf/Makefile | 3 ++- drivers/net/ethernet/intel/idpf/idpf.h| 3 ++- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 2 +- .../net/ethernet/intel/idpf/idpf_virtchnl.c | 15 --- 6 files changed, 43 insertions(+), 19 deletions(-) create mode 100644 drivers/net/ethernet/intel/idpf/Kconfig diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index e0287fbd501d..0375c7448a57 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -384,17 +384,6 @@ config IGC_LEDS Optional support for controlling the NIC LED's with the netdev LED trigger. -config IDPF - tristate "Intel(R) Infrastructure Data Path Function Support" - depends on PCI_MSI - select DIMLIB - select PAGE_POOL - select PAGE_POOL_STATS - help - This driver supports Intel(R) Infrastructure Data Path Function - devices. - - To compile this driver as a module, choose M here. The module - will be called idpf. +source "drivers/net/ethernet/intel/idpf/Kconfig" endif # NET_VENDOR_INTEL diff --git a/drivers/net/ethernet/intel/idpf/Kconfig b/drivers/net/ethernet/intel/idpf/Kconfig new file mode 100644 index ..bee83a40f218 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/Kconfig @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2024 Intel Corporation + +config IDPF + tristate "Intel(R) Infrastructure Data Path Function Support" + depends on PCI_MSI + select DIMLIB + select PAGE_POOL + select PAGE_POOL_STATS + help + This driver supports Intel(R) Infrastructure Data Path Function + devices. + + To compile this driver as a module, choose M here. The module + will be called idpf. + +if IDPF + +config IDPF_SINGLEQ + bool "idpf singleq support" + help + This option enables support for legacy single Rx/Tx queues w/no + completion and fill queues. Only enable if you have such hardware + as it increases the driver size and adds runtme checks on hotpath. + +endif # IDPF diff --git a/drivers/net/ethernet/intel/idpf/Makefile b/drivers/net/ethernet/intel/idpf/Makefile index 6844ead2f3ac..2ce01a0b5898 100644 --- a/drivers/net/ethernet/intel/idpf/Makefile +++ b/drivers/net/ethernet/intel/idpf/Makefile @@ -12,7 +12,8 @@ idpf-y := \ idpf_ethtool.o \ idpf_lib.o \ idpf_main.o \ - idpf_singleq_txrx.o \ idpf_txrx.o \ idpf_virtchnl.o \ idpf_vf_dev.o + +idpf-$(CONFIG_IDPF_SINGLEQ)+= idpf_singleq_txrx.o diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index f9e43d171f17..5d9529f5b41b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -599,7 +599,8 @@ struct idpf_adapter { */ static inline int idpf_is_queue_model_split(u16 q_model) { - return q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT; + return !IS_ENABLED(CONFIG_IDPF_SINGLEQ) || + q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT; } #define idpf_is_cap_ena(adapter, field, flag) \ diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 4aa5ee781bd7..2bc1a5a0b50f 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -1306,7 +1306,7 @@ static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport, static void idpf_rxq_set_descids(const struct idpf_vport *vport, struct idpf_rx_queue *q) { - if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) { + if (idpf_is_queue_model_split(vport->rxq_model)) { q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M; } else { if (vport->base_rxd) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index 44602b87cd41..d1705fcb701a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -1256,12 +1256,12 @@ int idpf_send_create_vport_msg(struct idpf_adapter *adapt
[Intel-wired-lan] [PATCH RFC iwl-next 09/12] idpf: remove legacy Page Pool Ethtool stats
Page Pool Ethtool stats are deprecated since the Netlink Page Pool interface introduction. idpf receives big changes in Rx buffer management, including &page_pool layout, so keeping these deprecated stats does only harm, not speaking of that CONFIG_IDPF selects CONFIG_PAGE_POOL_STATS unconditionally, while the latter is often turned off for better performance. Remove all the references to PP stats from the Ethtool code. The stats are still available in their full via the generic Netlink interface. Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/idpf/Kconfig | 1 - .../net/ethernet/intel/idpf/idpf_ethtool.c| 29 +-- 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/Kconfig b/drivers/net/ethernet/intel/idpf/Kconfig index bd96ab923b07..af6126eeb61e 100644 --- a/drivers/net/ethernet/intel/idpf/Kconfig +++ b/drivers/net/ethernet/intel/idpf/Kconfig @@ -7,7 +7,6 @@ config IDPF select DIMLIB select LIBETH select PAGE_POOL - select PAGE_POOL_STATS help This driver supports Intel(R) Infrastructure Data Path Function devices. diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 83447c3d026c..f66f866ec4fb 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -573,8 +573,6 @@ static void idpf_get_stat_strings(struct net_device *netdev, u8 *data) for (i = 0; i < vport_config->max_q.max_rxq; i++) idpf_add_qstat_strings(&data, idpf_gstrings_rx_queue_stats, "rx", i); - - page_pool_ethtool_stats_get_strings(data); } /** @@ -608,7 +606,6 @@ static int idpf_get_sset_count(struct net_device *netdev, int sset) struct idpf_netdev_priv *np = netdev_priv(netdev); struct idpf_vport_config *vport_config; u16 max_txq, max_rxq; - unsigned int size; if (sset != ETH_SS_STATS) return -EINVAL; @@ -627,11 +624,8 @@ static int idpf_get_sset_count(struct net_device *netdev, int sset) max_txq = vport_config->max_q.max_txq; max_rxq = vport_config->max_q.max_rxq; - size = IDPF_PORT_STATS_LEN + (IDPF_TX_QUEUE_STATS_LEN * max_txq) + + return IDPF_PORT_STATS_LEN + (IDPF_TX_QUEUE_STATS_LEN * max_txq) + (IDPF_RX_QUEUE_STATS_LEN * max_rxq); - size += page_pool_ethtool_stats_get_count(); - - return size; } /** @@ -884,7 +878,6 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, { struct idpf_netdev_priv *np = netdev_priv(netdev); struct idpf_vport_config *vport_config; - struct page_pool_stats pp_stats = { }; struct idpf_vport *vport; unsigned int total = 0; unsigned int i, j; @@ -954,32 +947,12 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, idpf_add_empty_queue_stats(&data, qtype); else idpf_add_queue_stats(&data, rxq, qtype); - - /* In splitq mode, don't get page pool stats here since -* the pools are attached to the buffer queues -*/ - if (is_splitq) - continue; - - if (rxq) - page_pool_get_stats(rxq->pp, &pp_stats); - } - } - - for (i = 0; i < vport->num_rxq_grp; i++) { - for (j = 0; j < vport->num_bufqs_per_qgrp; j++) { - struct idpf_buf_queue *rxbufq = - &vport->rxq_grps[i].splitq.bufq_sets[j].bufq; - - page_pool_get_stats(rxbufq->pp, &pp_stats); } } for (; total < vport_config->max_q.max_rxq; total++) idpf_add_empty_queue_stats(&data, VIRTCHNL2_QUEUE_TYPE_RX); - page_pool_ethtool_stats_get(data, &pp_stats); - rcu_read_unlock(); idpf_vport_ctrl_unlock(netdev); -- 2.45.0
[Intel-wired-lan] [PATCH RFC iwl-next 08/12] idpf: reuse libeth's definitions of parsed ptype structures
idpf's in-kernel parsed ptype structure is almost identical to the one used in the previous Intel drivers, which means it can be converted to use libeth's definitions and even helpers. The only difference is that it doesn't use a constant table (libie), rather than one obtained from the device. Remove the driver counterpart and use libeth's helpers for hashes and checksums. This slightly optimizes skb fields processing due to faster checks. Also don't define big static array of ptypes in &idpf_vport -- allocate them dynamically. The pointer to it is anyway cached in &idpf_rx_queue. Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/idpf/Kconfig | 1 + drivers/net/ethernet/intel/idpf/idpf.h| 2 +- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 88 +--- drivers/net/ethernet/intel/idpf/idpf_lib.c| 3 + drivers/net/ethernet/intel/idpf/idpf_main.c | 1 + .../ethernet/intel/idpf/idpf_singleq_txrx.c | 113 +++- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 125 +++--- .../net/ethernet/intel/idpf/idpf_virtchnl.c | 69 ++ 8 files changed, 151 insertions(+), 251 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/Kconfig b/drivers/net/ethernet/intel/idpf/Kconfig index bee83a40f218..bd96ab923b07 100644 --- a/drivers/net/ethernet/intel/idpf/Kconfig +++ b/drivers/net/ethernet/intel/idpf/Kconfig @@ -5,6 +5,7 @@ config IDPF tristate "Intel(R) Infrastructure Data Path Function Support" depends on PCI_MSI select DIMLIB + select LIBETH select PAGE_POOL select PAGE_POOL_STATS help diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 5d9529f5b41b..078340a01757 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -312,7 +312,7 @@ struct idpf_vport { u16 num_rxq_grp; struct idpf_rxq_group *rxq_grps; u32 rxq_model; - struct idpf_rx_ptype_decoded rx_ptype_lkup[IDPF_RX_MAX_PTYPE]; + struct libeth_rx_pt *rx_ptype_lkup; struct idpf_adapter *adapter; struct net_device *netdev; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 015aba5abb3c..83e960a1549e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -331,72 +331,6 @@ struct idpf_rx_buf { #define IDPF_RX_MAX_BASE_PTYPE 256 #define IDPF_INVALID_PTYPE_ID 0x -/* Packet type non-ip values */ -enum idpf_rx_ptype_l2 { - IDPF_RX_PTYPE_L2_RESERVED = 0, - IDPF_RX_PTYPE_L2_MAC_PAY2 = 1, - IDPF_RX_PTYPE_L2_TIMESYNC_PAY2 = 2, - IDPF_RX_PTYPE_L2_FIP_PAY2 = 3, - IDPF_RX_PTYPE_L2_OUI_PAY2 = 4, - IDPF_RX_PTYPE_L2_MACCNTRL_PAY2 = 5, - IDPF_RX_PTYPE_L2_LLDP_PAY2 = 6, - IDPF_RX_PTYPE_L2_ECP_PAY2 = 7, - IDPF_RX_PTYPE_L2_EVB_PAY2 = 8, - IDPF_RX_PTYPE_L2_QCN_PAY2 = 9, - IDPF_RX_PTYPE_L2_EAPOL_PAY2 = 10, - IDPF_RX_PTYPE_L2_ARP= 11, -}; - -enum idpf_rx_ptype_outer_ip { - IDPF_RX_PTYPE_OUTER_L2 = 0, - IDPF_RX_PTYPE_OUTER_IP = 1, -}; - -#define IDPF_RX_PTYPE_TO_IPV(ptype, ipv) \ - (((ptype)->outer_ip == IDPF_RX_PTYPE_OUTER_IP) && \ -((ptype)->outer_ip_ver == (ipv))) - -enum idpf_rx_ptype_outer_ip_ver { - IDPF_RX_PTYPE_OUTER_NONE= 0, - IDPF_RX_PTYPE_OUTER_IPV4= 1, - IDPF_RX_PTYPE_OUTER_IPV6= 2, -}; - -enum idpf_rx_ptype_outer_fragmented { - IDPF_RX_PTYPE_NOT_FRAG = 0, - IDPF_RX_PTYPE_FRAG = 1, -}; - -enum idpf_rx_ptype_tunnel_type { - IDPF_RX_PTYPE_TUNNEL_NONE = 0, - IDPF_RX_PTYPE_TUNNEL_IP_IP = 1, - IDPF_RX_PTYPE_TUNNEL_IP_GRENAT = 2, - IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3, - IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4, -}; - -enum idpf_rx_ptype_tunnel_end_prot { - IDPF_RX_PTYPE_TUNNEL_END_NONE = 0, - IDPF_RX_PTYPE_TUNNEL_END_IPV4 = 1, - IDPF_RX_PTYPE_TUNNEL_END_IPV6 = 2, -}; - -enum idpf_rx_ptype_inner_prot { - IDPF_RX_PTYPE_INNER_PROT_NONE = 0, - IDPF_RX_PTYPE_INNER_PROT_UDP= 1, - IDPF_RX_PTYPE_INNER_PROT_TCP= 2, - IDPF_RX_PTYPE_INNER_PROT_SCTP = 3, - IDPF_RX_PTYPE_INNER_PROT_ICMP = 4, - IDPF_RX_PTYPE_INNER_PROT_TIMESYNC = 5, -}; - -enum idpf_rx_ptype_payload_layer { - IDPF_RX_PTYPE_PAYLOAD_LAYER_NONE= 0, - IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY2= 1, - IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY3= 2, - IDPF_RX_PTYPE_PAYLOAD_LAYER_PAY4= 3, -}; - enum idpf_tunnel_state { IDPF_PTYPE_TUNNEL_IP= BIT(0), IDPF_PTYPE_TUNNEL_IP_GRENAT = BIT(1), @@ -404,22 +338,9 @@ enum
[Intel-wired-lan] [PATCH RFC iwl-next 10/12] libeth: support different types of buffers for Rx
Unlike previous generations, idpf requires more buffer types for optimal performance. This includes: header buffers, short buffers, and no-overhead buffers (w/o headroom and tailroom, for TCP zerocopy when the header split is enabled). Introduce libeth Rx buffer type and calculate page_pool params accordingly. All the HW-related details like buffer alignment are still accounted. For the header buffers, pick 256 bytes as in most places in the kernel (have you ever seen frames with bigger headers?). Signed-off-by: Alexander Lobakin --- include/net/libeth/rx.h| 19 drivers/net/ethernet/intel/libeth/rx.c | 122 ++--- 2 files changed, 129 insertions(+), 12 deletions(-) diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h index f29ea3e34c6c..43574bd6612f 100644 --- a/include/net/libeth/rx.h +++ b/include/net/libeth/rx.h @@ -17,6 +17,8 @@ #define LIBETH_MAX_HEADROOMLIBETH_SKB_HEADROOM /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ #define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) +/* Maximum supported L2-L4 header length */ +#define LIBETH_MAX_HEADroundup_pow_of_two(max(MAX_HEADER, 256)) /* Always use order-0 pages */ #define LIBETH_RX_PAGE_ORDER 0 @@ -43,6 +45,18 @@ struct libeth_fqe { u32 truesize; } __aligned_largest; +/** + * enum libeth_fqe_type - enum representing types of Rx buffers + * @LIBETH_FQE_MTU: buffer size is determined by MTU + * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames + * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers + */ +enum libeth_fqe_type { + LIBETH_FQE_MTU = 0U, + LIBETH_FQE_SHORT, + LIBETH_FQE_HDR, +}; + /** * struct libeth_fq - structure representing a buffer (fill) queue * @fp: hotpath part of the structure @@ -50,6 +64,8 @@ struct libeth_fqe { * @fqes: array of Rx buffers * @truesize: size to allocate per buffer, w/overhead * @count: number of descriptors/buffers the queue has + * @type: type of the buffers this queue has + * @hsplit: flag whether header split is enabled * @buf_len: HW-writeable length per each buffer * @nid: ID of the closest NUMA node with memory */ @@ -63,6 +79,9 @@ struct libeth_fq { ); /* Cold fields */ + enum libeth_fqe_typetype:2; + boolhsplit:1; + u32 buf_len; int nid; }; diff --git a/drivers/net/ethernet/intel/libeth/rx.c b/drivers/net/ethernet/intel/libeth/rx.c index 6221b88c34ac..4f7952a344c6 100644 --- a/drivers/net/ethernet/intel/libeth/rx.c +++ b/drivers/net/ethernet/intel/libeth/rx.c @@ -6,7 +6,7 @@ /* Rx buffer management */ /** - * libeth_rx_hw_len - get the actual buffer size to be passed to HW + * libeth_rx_hw_len_mtu - get the actual buffer size to be passed to HW * @pp: &page_pool_params of the netdev to calculate the size for * @max_len: maximum buffer size for a single descriptor * @@ -14,7 +14,7 @@ * MTU the @dev has, HW required alignment, minimum and maximum allowed values, * and system's page size. */ -static u32 libeth_rx_hw_len(const struct page_pool_params *pp, u32 max_len) +static u32 libeth_rx_hw_len_mtu(const struct page_pool_params *pp, u32 max_len) { u32 len; @@ -26,6 +26,106 @@ static u32 libeth_rx_hw_len(const struct page_pool_params *pp, u32 max_len) return len; } +/** + * libeth_rx_hw_len_truesize - get the short buffer size to be passed to HW + * @pp: &page_pool_params of the netdev to calculate the size for + * @max_len: maximum buffer size for a single descriptor + * @truesize: desired truesize for the buffers + * + * Return: HW-writeable length per one buffer to pass it to the HW ignoring the + * MTU and closest to the passed truesize. Can be used for "short" buffer + * queues to fragment pages more efficiently. + */ +static u32 libeth_rx_hw_len_truesize(const struct page_pool_params *pp, +u32 max_len, u32 truesize) +{ + u32 min, len; + + min = SKB_HEAD_ALIGN(pp->offset + LIBETH_RX_BUF_STRIDE); + truesize = clamp(roundup_pow_of_two(truesize), roundup_pow_of_two(min), +PAGE_SIZE << LIBETH_RX_PAGE_ORDER); + + len = SKB_WITH_OVERHEAD(truesize - pp->offset); + len = ALIGN_DOWN(len, LIBETH_RX_BUF_STRIDE) ? : LIBETH_RX_BUF_STRIDE; + len = min3(len, ALIGN_DOWN(max_len ? : U32_MAX, LIBETH_RX_BUF_STRIDE), + pp->max_len); + + return len; +} + +static bool libeth_rx_page_pool_params(struct libeth_fq *fq, + struct page_pool_params *pp) +{ + pp->offset = LIBETH_SKB_HEADROOM; + /* HW-writeable / syncable length per one page */ + pp->max_len = LIBETH_RX_PAGE_LEN(pp->offset); + + /* HW-writeable length per buffer */ + switch (fq->type) { + case LIBETH_F
[Intel-wired-lan] [PATCH RFC iwl-next 11/12] idpf: convert header split mode to libeth + napi_build_skb()
Currently, idpf uses the following model for the header buffers: * buffers are allocated via dma_alloc_coherent(); * when receiving, napi_alloc_skb() is called and then the header is copied to the newly allocated linear part. This is far from optimal as DMA coherent zone is slow on many systems and memcpy() neutralizes the idea and benefits of the header split. Not speaking of that XDP can't be run on DMA coherent buffers, but at the same time the idea of allocating an skb to run XDP program is ill. Instead, use libeth to create page_pools for the header buffers, allocate them dynamically and then build an skb via napi_build_skb() around them with no memory copy. With one exception... When you enable header split, you except you'll always have a separate header buffer, so that you could reserve headroom and tailroom only there and then use full buffers for the data. For example, this is how TCP zerocopy works -- you have to have the payload aligned to PAGE_SIZE. The current hardware running idpf does *not* guarantee that you'll always have headers placed separately. For example, on my setup, even ICMP packets are written as one piece to the data buffers. You can't build a valid skb around a data buffer in this case. To not complicate things and not lose TCP zerocopy etc., when such thing happens, use the empty header buffer and pull either full frame (if it's short) or the Ethernet header there and build an skb around it. GRO layer will pull more from the data buffer later. This W/A will hopefully be removed one day. Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 54 ++-- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 1 + drivers/net/ethernet/intel/idpf/idpf_txrx.c | 237 +++--- .../net/ethernet/intel/idpf/idpf_virtchnl.c | 14 +- 4 files changed, 189 insertions(+), 117 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 83e960a1549e..f6885ad3da94 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include @@ -103,8 +103,6 @@ do { \ #define IDPF_RX_BUF_STRIDE 32 #define IDPF_RX_BUF_POST_STRIDE16 #define IDPF_LOW_WATERMARK 64 -/* Size of header buffer specifically for header split */ -#define IDPF_HDR_BUF_SIZE 256 #define IDPF_PACKET_HDR_PAD\ (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN * 2) #define IDPF_TX_TSO_MIN_MSS88 @@ -300,14 +298,7 @@ struct idpf_rx_extracted { #define IDPF_TX_MAX_DESC_DATA_ALIGNED \ ALIGN_DOWN(IDPF_TX_MAX_DESC_DATA, IDPF_TX_MAX_READ_REQ_SIZE) -#define IDPF_RX_DMA_ATTR \ - (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) - -struct idpf_rx_buf { - struct page *page; - unsigned int page_offset; - u16 truesize; -}; +#define idpf_rx_buf libeth_fqe #define IDPF_RX_MAX_PTYPE_PROTO_IDS32 #define IDPF_RX_MAX_PTYPE_SZ (sizeof(struct virtchnl2_ptype) + \ @@ -744,18 +735,18 @@ libeth_cacheline_set_assert(struct idpf_tx_queue, 64, /** * struct idpf_buf_queue - software structure represting a buffer queue - * @rx_buf: Struct with RX buffer related members - * @rx_buf.buf: See struct idpf_rx_buf - * @rx_buf.hdr_buf_pa: DMA handle - * @rx_buf.hdr_buf_va: Virtual address + * @hdr_buf: &libeth_fqe for header buffers + * @buf: &idpf_rx_buf for data buffers * @split_buf: Descriptor ring memory - * @pp: Page pool pointer + * @hdr_pp: &page_pool for header buffers + * @pp: &page_pool for data buffers * @tail: Tail offset * @flags: See enum idpf_queue_flags_t * @desc_count: Number of descriptors * @next_to_use: Next descriptor to use * @next_to_clean: Next descriptor to clean * @next_to_alloc: RX buffer to allocate at + * @hdr_truesize: truesize for buffer headers * @q_id: Queue id * @size: Length of descriptor ring in bytes * @dma: Physical address of ring @@ -767,16 +758,16 @@ libeth_cacheline_set_assert(struct idpf_tx_queue, 64, struct idpf_buf_queue { libeth_cacheline_group(read_mostly, struct virtchnl2_splitq_rx_buf_desc *split_buf; - struct { - struct idpf_rx_buf *buf; - dma_addr_t hdr_buf_pa; - void *hdr_buf_va; - } rx_buf; + struct libeth_fqe *hdr_buf; + struct page_pool *hdr_pp; + struct idpf_rx_buf *buf; struct page_pool *pp; void __iomem *tail; DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); u32 desc_count; + + u32 hdr_truesize; ); libeth_cacheline_group(read_write, u32 next_to_use; @@ -795,7 +786,7 @@ struct idpf_buf_q
[Intel-wired-lan] [PATCH RFC iwl-next 12/12] idpf: use libeth Rx buffer management for payload buffer
idpf uses Page Pool for data buffers with hardcoded buffer lengths of 4k for "classic" buffers and 2k for "short" ones. This is not flexible and does not ensure optimal memory usage. Why would you need 4k buffers when the MTU is 1500? Use libeth for the data buffers and don't hardcode any buffer sizes. Let them be calculated from the MTU for "classics" and then divide the truesize by 2 for "short" ones. The memory usage is now greatly reduced and 2 buffer queues starts make sense: on frames <= 1024, you'll recycle (and resync) a page only after 4 HW writes rather than two. Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/idpf/Kconfig | 1 - drivers/net/ethernet/intel/idpf/idpf.h| 1 - drivers/net/ethernet/intel/idpf/idpf_txrx.h | 88 ++- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 25 +- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 240 ++ .../net/ethernet/intel/idpf/idpf_virtchnl.c | 8 +- 6 files changed, 119 insertions(+), 244 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/Kconfig b/drivers/net/ethernet/intel/idpf/Kconfig index af6126eeb61e..48f74aef72e5 100644 --- a/drivers/net/ethernet/intel/idpf/Kconfig +++ b/drivers/net/ethernet/intel/idpf/Kconfig @@ -6,7 +6,6 @@ config IDPF depends on PCI_MSI select DIMLIB select LIBETH - select PAGE_POOL help This driver supports Intel(R) Infrastructure Data Path Function devices. diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 078340a01757..d498e5746b3a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -308,7 +308,6 @@ struct idpf_vport { u32 rxq_desc_count; u8 num_bufqs_per_qgrp; u32 bufq_desc_count[IDPF_MAX_BUFQS_PER_RXQ_GRP]; - u32 bufq_size[IDPF_MAX_BUFQS_PER_RXQ_GRP]; u16 num_rxq_grp; struct idpf_rxq_group *rxq_grps; u32 rxq_model; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index f6885ad3da94..1715f825063b 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -7,7 +7,6 @@ #include #include -#include #include #include @@ -97,14 +96,10 @@ do { \ idx = 0;\ } while (0) -#define IDPF_RX_HDR_SIZE 256 -#define IDPF_RX_BUF_2048 2048 -#define IDPF_RX_BUF_4096 4096 #define IDPF_RX_BUF_STRIDE 32 #define IDPF_RX_BUF_POST_STRIDE16 #define IDPF_LOW_WATERMARK 64 -#define IDPF_PACKET_HDR_PAD\ - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN * 2) + #define IDPF_TX_TSO_MIN_MSS88 /* Minimum number of descriptors between 2 descriptors with the RE bit set; @@ -532,7 +527,7 @@ struct idpf_txq_stash { /** * struct idpf_rx_queue - software structure represting a receive queue * @bufq_sets: Pointer to the array of buffer queues in splitq mode - * @rx_buf: See struct idpf_rx_buf + * @rx_buf: see struct &libeth_fqe * @rx: Received packets descriptor ring memory * @single_buf: Posted buffers descriptor ring memory * @desc_ring: Descriptor ring memory @@ -547,9 +542,10 @@ struct idpf_txq_stash { * @next_to_use: Next descriptor to use * @next_to_clean: Next descriptor to clean * @next_to_alloc: RX buffer to allocate at - * @rxdids: Supported RX descriptor ids * @rx_ptype_lkup: LUT of Rx ptypes * @skb: Pointer to the skb + * @truesize: data buffer truesize in singleq + * @rxdids: Supported RX descriptor ids * @stats_sync: See struct u64_stats_sync * @q_stats: See union idpf_rx_queue_stats * @q_id: Queue id @@ -575,7 +571,7 @@ struct idpf_rx_queue { struct napi_struct *napi; }; struct { - struct idpf_rx_buf *rx_buf; + struct libeth_fqe *rx_buf; struct page_pool *pp; }; }; @@ -595,6 +591,7 @@ struct idpf_rx_queue { u16 next_to_alloc; struct sk_buff *skb; + u32 truesize; struct u64_stats_sync stats_sync; struct idpf_rx_queue_stats q_stats; @@ -613,7 +610,7 @@ struct idpf_rx_queue { ); }; libeth_cacheline_set_assert(struct idpf_rx_queue, 64, - 72 + sizeof(struct u64_stats_sync), + 80 + sizeof(struct u64_stats_sync), 32); /** @@ -736,7 +733,7 @@ libeth_cacheline_set_assert(struct idpf_tx_queue, 64, /** * struct idpf_buf_queue - software structure represting a buffer queue * @hdr_buf: &libe
[Intel-wired-lan] [PATCH RFC iwl-next 03/12] idpf: split &idpf_queue into 4 strictly-typed queue structures
Currently, sizeof(struct idpf_queue) is 32 Kb. This is due to the 12-bit hashtable declaration at the end of the queue. This HT is needed only for Tx queues when the flow scheduling mode is enabled. But &idpf_queue is unified for all of the queue types, provoking excessive memory usage. The unified structure in general makes the code less effective via suboptimal fields placement. You can't avoid that unless you make unions each 2 fields. Even then, different field alignment etc., doesn't allow you to optimize things to the limit. Split &idpf_queue into 4 structures corresponding to the queue types: RQ (Rx queue), SQ (Tx queue), FQ (buffer queue), and CQ (completion queue). Place only needed fields there and shortcuts handy for hotpath. Allocate the abovementioned hashtable dynamically and only when needed, keeping &idpf_tx_queue relatively short (192 bytes, same as Rx). This HT is used only for OOO completions, which aren't really hotpath anyway. Note that this change must be done atomically, otherwise it's really easy to get lost and miss something. Signed-off-by: Alexander Lobakin --- drivers/net/ethernet/intel/idpf/idpf.h| 3 +- drivers/net/ethernet/intel/idpf/idpf_txrx.h | 431 ++--- .../net/ethernet/intel/idpf/idpf_ethtool.c| 125 +-- drivers/net/ethernet/intel/idpf/idpf_lib.c| 46 +- .../ethernet/intel/idpf/idpf_singleq_txrx.c | 149 +-- drivers/net/ethernet/intel/idpf/idpf_txrx.c | 913 +++--- .../net/ethernet/intel/idpf/idpf_virtchnl.c | 73 +- 7 files changed, 1019 insertions(+), 721 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 0b26dd9b8a51..f9e43d171f17 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -17,7 +17,6 @@ struct idpf_vport_max_q; #include #include #include -#include #include "virtchnl2.h" #include "idpf_txrx.h" @@ -301,7 +300,7 @@ struct idpf_vport { u16 num_txq_grp; struct idpf_txq_group *txq_grps; u32 txq_model; - struct idpf_queue **txqs; + struct idpf_tx_queue **txqs; bool crc_enable; u16 num_rxq; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 8794ce018911..fb645c6887b3 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -4,6 +4,8 @@ #ifndef _IDPF_TXRX_H_ #define _IDPF_TXRX_H_ +#include + #include #include #include @@ -84,7 +86,7 @@ do { \ if (unlikely(++(ntc) == (rxq)->desc_count)) { \ ntc = 0;\ - change_bit(__IDPF_Q_GEN_CHK, (rxq)->flags); \ + idpf_queue_change(GEN_CHK, rxq);\ } \ } while (0) @@ -111,10 +113,9 @@ do { \ */ #define IDPF_TX_SPLITQ_RE_MIN_GAP 64 -#define IDPF_RX_BI_BUFID_S 0 -#define IDPF_RX_BI_BUFID_M GENMASK(14, 0) -#define IDPF_RX_BI_GEN_S 15 -#define IDPF_RX_BI_GEN_M BIT(IDPF_RX_BI_GEN_S) +#define IDPF_RX_BI_GEN_M BIT(16) +#define IDPF_RX_BI_BUFID_M GENMASK(15, 0) + #define IDPF_RXD_EOF_SPLITQVIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M #define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M @@ -122,7 +123,7 @@ do { \ txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ (txq)->next_to_clean - (txq)->next_to_use - 1) -#define IDPF_TX_BUF_RSV_UNUSED(txq)((txq)->buf_stack.top) +#define IDPF_TX_BUF_RSV_UNUSED(txq)((txq)->stash->buf_stack.top) #define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \ (txq)->desc_count >> 2) @@ -433,23 +434,37 @@ struct idpf_rx_ptype_decoded { * to 1 and knows that reading a gen bit of 1 in any * descriptor on the initial pass of the ring indicates a * writeback. It also flips on every ring wrap. - * @__IDPF_RFLQ_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW bit - * and RFLGQ_GEN is the SW bit. + * @__IDPF_Q_RFL_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW + * bit and Q_RFL_GEN is the SW bit. * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions * @__IDPF_Q_POLL_MODE: Enable poll mode + * @__IDPF_Q_CRC_EN: enable CRC offload in singleq mode + * @__IDPF_Q_HSPLIT_EN: enable header split on Rx (splitq) * @__IDPF_Q_FLAGS_NBITS: Must be last */ enum idpf_queue_flags_t { __IDPF_Q_GEN_CH
Re: [Intel-wired-lan] [PATCH RFC iwl-next 08/12] idpf: reuse libeth's definitions of parsed ptype structures
On Fri, May 10, 2024 at 8:30 AM Alexander Lobakin wrote: > > idpf's in-kernel parsed ptype structure is almost identical to the one > used in the previous Intel drivers, which means it can be converted to > use libeth's definitions and even helpers. The only difference is that > it doesn't use a constant table (libie), rather than one obtained from > the device. > Remove the driver counterpart and use libeth's helpers for hashes and > checksums. This slightly optimizes skb fields processing due to faster > checks. Also don't define big static array of ptypes in &idpf_vport -- > allocate them dynamically. The pointer to it is anyway cached in > &idpf_rx_queue. > > Signed-off-by: Alexander Lobakin > --- > drivers/net/ethernet/intel/idpf/Kconfig | 1 + > drivers/net/ethernet/intel/idpf/idpf.h| 2 +- > drivers/net/ethernet/intel/idpf/idpf_txrx.h | 88 +--- > drivers/net/ethernet/intel/idpf/idpf_lib.c| 3 + > drivers/net/ethernet/intel/idpf/idpf_main.c | 1 + > .../ethernet/intel/idpf/idpf_singleq_txrx.c | 113 +++- > drivers/net/ethernet/intel/idpf/idpf_txrx.c | 125 +++--- > .../net/ethernet/intel/idpf/idpf_virtchnl.c | 69 ++ > 8 files changed, 151 insertions(+), 251 deletions(-) > ... > * idpf_send_get_rx_ptype_msg - Send virtchnl for ptype info > * @vport: virtual port data structure > @@ -2526,7 +2541,7 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport) > { > struct virtchnl2_get_ptype_info *get_ptype_info __free(kfree) = NULL; > struct virtchnl2_get_ptype_info *ptype_info __free(kfree) = NULL; > - struct idpf_rx_ptype_decoded *ptype_lkup = vport->rx_ptype_lkup; > + struct libeth_rx_pt *ptype_lkup __free(kfree) = NULL; > int max_ptype, ptypes_recvd = 0, ptype_offset; > struct idpf_adapter *adapter = vport->adapter; > struct idpf_vc_xn_params xn_params = {}; > @@ -2534,12 +2549,17 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport > *vport) > ssize_t reply_sz; > int i, j, k; > > + if (vport->rx_ptype_lkup) > + return 0; > + > if (idpf_is_queue_model_split(vport->rxq_model)) > max_ptype = IDPF_RX_MAX_PTYPE; > else > max_ptype = IDPF_RX_MAX_BASE_PTYPE; > > - memset(vport->rx_ptype_lkup, 0, sizeof(vport->rx_ptype_lkup)); > + ptype_lkup = kcalloc(max_ptype, sizeof(*ptype_lkup), GFP_KERNEL); > + if (!ptype_lkup) > + return -ENOMEM; > > get_ptype_info = kzalloc(sizeof(*get_ptype_info), GFP_KERNEL); > if (!get_ptype_info) > @@ -2604,9 +2624,6 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport *vport) > else > k = ptype->ptype_id_8; > > - if (ptype->proto_id_count) > - ptype_lkup[k].known = 1; > - > for (j = 0; j < ptype->proto_id_count; j++) { > id = le16_to_cpu(ptype->proto_id[j]); > switch (id) { > @@ -2614,18 +2631,18 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport > *vport) > if (pstate.tunnel_state == > IDPF_PTYPE_TUNNEL_IP) > { > ptype_lkup[k].tunnel_type = > - > IDPF_RX_PTYPE_TUNNEL_IP_GRENAT; > + LIBETH_RX_PT_TUNNEL_IP_GRENAT; > pstate.tunnel_state |= > IDPF_PTYPE_TUNNEL_IP_GRENAT; > } > break; > case VIRTCHNL2_PROTO_HDR_MAC: > ptype_lkup[k].outer_ip = > - IDPF_RX_PTYPE_OUTER_L2; > + LIBETH_RX_PT_OUTER_L2; > if (pstate.tunnel_state == > IDPF_TUN_IP_GRE) { > ptype_lkup[k].tunnel_type = > - > IDPF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC; > + > LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC; > pstate.tunnel_state |= > > IDPF_PTYPE_TUNNEL_IP_GRENAT_MAC; > } > @@ -2652,23 +2669,23 @@ int idpf_send_get_rx_ptype_msg(struct idpf_vport > *vport) > break; > case VIRTCHNL2_PROTO_HDR_UDP: > ptype_lkup[k].inner_prot = > -