Hi, Shahaf > -----Original Message----- > From: Shahaf Shuler > Sent: Sunday, April 14, 2019 10:43 > To: Slava Ovsiienko <viachesl...@mellanox.com>; dev@dpdk.org > Subject: RE: [dpdk-dev] [PATCH 1/1] net/mlx5: add support for PF > representor > > Hi Slava, > > Friday, April 12, 2019 6:48 PM, Viacheslav Ovsiienko: > > Subject: [dpdk-dev] [PATCH 1/1] net/mlx5: add support for PF > > representor > > > > On BlueField platform we have the new entity - PF representor. > > This one represents the PCI PF attached to external host on the side of > ARM. > > The traffic sent by the external host to the NIC via PF will be seem > > by ARM on this PF representor. > > > > This patch extends port recognizing capability on the base of physical > > port name. The following naming formats are supported: > > > > - missing physical port name (no sysfs/netlink key) at all, > > this is old style (before kernel 5.0) format, master assumed > > - 1 (decimal digits) - old style (before kernel 5.0) format, > > exists only for representors, master does not have physical > > port name at all (see above) > > - p0 ("p" followed by decimal digits), new style (kernel version > > is 5.0 or higher, Mellanox OFED 4.6 or higher) name format > > for uplink representor, plays the role of master > > - pf0vf0 ("pf" followed by PF index concatenated with "vf" > > followed by VF index), new style (kernel version is 5.0 > > or higher, Mellanox OFED 4.6 or higher) name format for > > VF representor. If index of VF is "-1" it is a special case > > of host PF representor, this representor must be indexed in > > devargs as 65535, for example representor=[0-3,65535] will > > allow representors for VF0, VF1, VF2, VF3 and host PF. > > Note: do not specify representor=[0-65535] it causes devargs > > processing error, because number of ports (rte_eth_dev) is > > limited. > > > > The above is a bit complex to understand and in fact we have 2 modes: > 1. legacy - phys_port_name are numbers. Master doesn't have > phys_port_name 2. modern - phys_port_name are strings. > uplink representor is p%d > representors are (including PF representor) pf%dvf%d. the vf index for the PF > representor is 65535.
OK, I'll try to update the commit message to make it more clear. > > > Applications should distinguish representors and master devices > > exclusively by device flag RTE_ETH_DEV_REPRESENTOR and do not rely on > > switch port_id (mlx5 PMD deduces ones from representor_id) values > > returned by > > dev_infos_get() API. > > > > Please also reference the kernel commit which introduce the name change. OK. > > > Signed-off-by: Viacheslav Ovsiienko <viachesl...@mellanox.com> > > --- > > drivers/net/mlx5/mlx5.h | 11 ++++++- > > drivers/net/mlx5/mlx5_ethdev.c | 68 +++++++++++++++++++++++++++---- > > ----------- > > drivers/net/mlx5/mlx5_nl.c | 42 +++++++++++++++++--------- > > 3 files changed, 82 insertions(+), 39 deletions(-) > > > > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index > > 8eb1019..81c02ce 100644 > > --- a/drivers/net/mlx5/mlx5.h > > +++ b/drivers/net/mlx5/mlx5.h > > @@ -80,11 +80,20 @@ struct mlx5_mp_param { > > /** Key string for IPC. */ > > #define MLX5_MP_NAME "net_mlx5_mp" > > > > +/* Recognized Infiniband device physical port name types. */ enum > > +mlx5_phys_port_name_type { > > + MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN = 0, /* Unrecognized. > > */ > > + MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 > > */ > > + MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */ > > + MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 > > */ }; > > + > > /** Switch information returned by mlx5_nl_switch_info(). */ struct > > mlx5_switch_info { > > uint32_t master:1; /**< Master device. */ > > uint32_t representor:1; /**< Representor device. */ > > - uint32_t port_name_new:1; /**< Rep. port name is in new format. > > */ > > + enum mlx5_phys_port_name_type name_type; /** < Port name > > type. */ > > + int32_t pf_num; /**< PF number (valid for pfxvfx format only). */ > > int32_t port_name; /**< Representor port name. */ > > uint64_t switch_id; /**< Switch identifier. */ }; diff --git > > a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c > > index 3992918..371989f 100644 > > --- a/drivers/net/mlx5/mlx5_ethdev.c > > +++ b/drivers/net/mlx5/mlx5_ethdev.c > > @@ -1395,12 +1395,11 @@ int mlx5_fw_version_get(struct rte_eth_dev > > *dev, char *fw_ver, size_t fw_size) > > struct mlx5_switch_info data = { > > .master = 0, > > .representor = 0, > > - .port_name_new = 0, > > + .name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, > > .port_name = 0, > > .switch_id = 0, > > }; > > DIR *dir; > > - bool port_name_set = false; > > bool port_switch_id_set = false; > > bool device_dir = false; > > char c; > > @@ -1423,8 +1422,7 @@ int mlx5_fw_version_get(struct rte_eth_dev > *dev, > > char *fw_ver, size_t fw_size) > > ret = fscanf(file, "%s", port_name); > > fclose(file); > > if (ret == 1) > > - port_name_set = > > mlx5_translate_port_name(port_name, > > - &data); > > + mlx5_translate_port_name(port_name, &data); > > } > > file = fopen(phys_switch_id, "rb"); > > if (file == NULL) { > > @@ -1440,8 +1438,15 @@ int mlx5_fw_version_get(struct rte_eth_dev > > *dev, char *fw_ver, size_t fw_size) > > closedir(dir); > > device_dir = true; > > } > > - data.master = port_switch_id_set && (!port_name_set || > > device_dir); > > - data.representor = port_switch_id_set && port_name_set && > > !device_dir; > > + if (port_switch_id_set) { > > + data.master = > > + device_dir || > > + data.name_type == > > MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN || > > + data.name_type == > > MLX5_PHYS_PORT_NAME_TYPE_UPLINK; > > + data.representor = !device_dir && > > + (data.name_type == > > MLX5_PHYS_PORT_NAME_TYPE_LEGACY || > > + data.name_type == > > MLX5_PHYS_PORT_NAME_TYPE_PFVF); > > > Why we need to split the logic of the master/representor detection between > the mlx5_translate_port_name and the caller function? We have two stages: - gathering the port attributes (name, vf_num, switchdev_id, presence of device directory, etc.) in callbacks - analyzing the parameters on gathering completion. We need all the parameters to make a reliable master/representor recognition. Translate routine is called on gathering stage and just recognizes the name format, extracts useful values (pf/vf num) and stores the results in compact form. It is easier than storing the entire port name. > > The way I envision it is mlx5_tranlate_port_name receives the > phys_port_name and maybe more metadata and return the port This "metadata" may be not gathered yet at the moment of port name processing. > classification (master/representor) and the representor/pf number. > No need for data.master = some_logic(translate_port_name_info). > > Inside the translate function I would expect to have 2 smaller function: > 1. to handle the modern format (strings) 2. to handle the legacy format > (integers) Actually this patch is also some kind of preparation for coming subfuctions. If we have set of mutual exclusive formats the switch/case seems to be the most native way to treat these ones. I'd prefer to keep switch/case. Comments are clear and it is easy to understand where legacy/new format is treated. And it is easy to extend for coming subfunctions. I think we should isolate master/representor recognition logic into separate routine, "translate" routine is for parameter gathering stage, "recognize" for analyzing. > > > + } > > *info = data; > > assert(!(data.master && data.representor)); > > if (data.master && data.representor) { @@ -1459,10 +1464,11 @@ > int > > mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t > > fw_size) > > * @param[in] port_name_in > > * String representing the port name. > > * @param[out] port_info_out > > - * Port information, including port name as a number. > > + * Port information, including port name as a number and port name > > + * type if recognized > > * > > * @return > > - * true on success, false otherwise. > > + * true on success (if name format recognized), false otherwise. > > */ > > bool > > mlx5_translate_port_name(const char *port_name_in, @@ -1470,25 > > +1476,39 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char > > *fw_ver, size_t fw_size) { > > char pf_c1, pf_c2, vf_c1, vf_c2; > > char *end; > > - int32_t pf_num; > > - bool port_name_set = false; > > + int sc_items; > > > > /* > > * Check for port-name as a string of the form pf0vf0 > > - * (support kernel ver >= 5.0) > > + * (support kernel ver >= 5.0 or OFED ver >= 4.6). > > */ > > - port_name_set = (sscanf(port_name_in, "%c%c%d%c%c%d", > > &pf_c1, &pf_c2, > > - &pf_num, &vf_c1, &vf_c2, > > - &port_info_out->port_name) == 6); > > - if (port_name_set) { > > - port_info_out->port_name_new = 1; > > - } else { > > - /* Check for port-name as a number (support kernel ver < > > 5.0 */ > > - errno = 0; > > - port_info_out->port_name = strtol(port_name_in, &end, 0); > > - if (!errno && > > - (size_t)(end - port_name_in) == strlen(port_name_in)) > > - port_name_set = true; > > + sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", > > + &pf_c1, &pf_c2, &port_info_out->pf_num, > > + &vf_c1, &vf_c2, &port_info_out->port_name); > > + if (sc_items == 6 && > > + pf_c1 == 'p' && pf_c2 == 'f' && > > + vf_c1 == 'v' && vf_c2 == 'f') { > > + port_info_out->name_type = > > MLX5_PHYS_PORT_NAME_TYPE_PFVF; > > + return true; > > + } > > + /* > > + * Check for port-name as a string of the form p0 > > + * (support kernel ver >= 5.0, or OFED ver >= 4.6). > > + */ > > + sc_items = sscanf(port_name_in, "%c%d", > > + &pf_c1, &port_info_out->port_name); > > + if (sc_items == 2 && pf_c1 == 'p') { > > + port_info_out->name_type = > > MLX5_PHYS_PORT_NAME_TYPE_UPLINK; > > + return true; > > + } > > + /* Check for port-name as a number (support kernel ver < 5.0 */ > > + errno = 0; > > + port_info_out->port_name = strtol(port_name_in, &end, 0); > > + if (!errno && > > + (size_t)(end - port_name_in) == strlen(port_name_in)) { > > + port_info_out->name_type = > > MLX5_PHYS_PORT_NAME_TYPE_LEGACY; > > + return true; > > } > > - return port_name_set; > > + port_info_out->name_type = > > MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; > > + return false; > > } > > diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c > > index > > fd9226b..669de76 100644 > > --- a/drivers/net/mlx5/mlx5_nl.c > > +++ b/drivers/net/mlx5/mlx5_nl.c > > @@ -887,12 +887,11 @@ struct mlx5_nl_ifindex_data { > > struct mlx5_switch_info info = { > > .master = 0, > > .representor = 0, > > - .port_name_new = 0, > > + .name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, > > .port_name = 0, > > .switch_id = 0, > > }; > > size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); > > - bool port_name_set = false; > > bool switch_id_set = false; > > bool num_vf_set = false; > > > > @@ -910,9 +909,7 @@ struct mlx5_nl_ifindex_data { > > num_vf_set = true; > > break; > > case IFLA_PHYS_PORT_NAME: > > - port_name_set = > > - mlx5_translate_port_name((char *)payload, > > - &info); > > + mlx5_translate_port_name((char *)payload, &info); > > break; > > case IFLA_PHYS_SWITCH_ID: > > info.switch_id = 0; > > @@ -926,16 +923,33 @@ struct mlx5_nl_ifindex_data { > > off += RTA_ALIGN(ra->rta_len); > > } > > if (switch_id_set) { > > - if (info.port_name_new) { > > - /* New representors naming schema. */ > > - if (port_name_set) { > > - info.master = (info.port_name == -1); > > - info.representor = (info.port_name != -1); > > - } > > - } else { > > + /* We have some E-Switch configuration. */ > > + switch (info.name_type) { > > + case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: > > + /* > > + * Name is not recognized or not set, > > + * it can not be representor, check > > + * VF number to see if it is a master. > > + */ > > + info.master = num_vf_set; > > + break; > > + case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: > > /* Legacy representors naming schema. */ > > - info.master = (!port_name_set || num_vf_set); > > - info.representor = port_name_set && !num_vf_set; > > + info.representor = !num_vf_set; > > + break; > > + case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: > > + /* New uplink naming schema. */ > > + info.master = 1; > > + break; > > + case MLX5_PHYS_PORT_NAME_TYPE_PFVF: > > + /* New representors naming schema. */ > > + info.representor = 1; > > + break; > > + } > > + if (!info.master && !info.representor) { > > + DRV_LOG(INFO, > > + "unable to recognize master/representors" > > + " on the device in switch domain"); > > Same comment as above. Would like to avoid this switch case outside of the > translate function . Don't you mind if we provide separated analyzing routine ? > > > } > > } > > assert(!(info.master && info.representor)); > > -- > > 1.8.3.1 With best regards, Slava