On Mon, May 11, 2026 at 05:08:50PM +0200, Maxime Peim wrote:
> In eSwitch mode, the async (template) flow creation path automatically
> prepends implicit match items to scope flow rules to the correct
> representor port:
>   - Ingress: REPRESENTED_PORT item matching dev->data->port_id
>   - Egress: REG_C_0 TAG item matching the port's tx tag value
> 
> The sync path (flow_hw_list_create) was missing this logic, causing all
> flow rules created via the non-template API to match traffic from all
> ports rather than being scoped to the specific representor.
> 
> Add the same implicit item prepending to flow_hw_list_create, right
> after pattern validation and before any branching (sample/RSS/single/
> prefix), mirroring the behavior of flow_hw_pattern_template_create
> and flow_hw_get_rule_items. The ingress case prepends
> REPRESENTED_PORT with the current port_id; the egress case prepends
> MLX5_RTE_FLOW_ITEM_TYPE_TAG with REG_C_0 value/mask (skipped when
> user provides an explicit SQ item).
> 
> Also fix a pre-existing bug where 'return split' on metadata split
> failure returned a negative int cast to uintptr_t, which callers
> would treat as a valid flow handle instead of an error.
> 
> Fixes: e38776c36c8a ("net/mlx5: introduce HWS for non-template flow API")
> Fixes: 821a6a5cc495 ("net/mlx5: add metadata split for compatibility")
> Signed-off-by: Maxime Peim <[email protected]>
> ---
> v3:
>   - Factor the implicit-item prepend logic out of
>     flow_hw_pattern_template_create() into a new helper
>     flow_hw_adjust_pattern() and reuse it from flow_hw_list_create(),
>     instead of duplicating the prepend logic inline in the sync path.
>   - Zero-initialize item_flags in both callers. The validator is
>     read-modify-write on item_flags (reads MLX5_FLOW_LAYER_TUNNEL on
>     the first iteration), so leaving it uninitialized was UB.
>   - Call __flow_hw_pattern_validate() with nt_flow=true from the sync
>     path (was effectively nt_flow=false via the wrapper), restoring the
>     previous behavior that skips GENEVE_OPT TLV parser validation on
>     the non-template path.
>   - Document flow_hw_adjust_pattern(): the dual role of the nt_flow
>     parameter (template spec-left-zero vs. sync spec-filled + validator
>     flag), the three-way return, and the caller's ownership of
>     *copied_items across every exit path.
>   - Clarify the "omitting implicit REG_C_0 match" debug log now that
>     the helper runs on both the template and sync paths.
>   - Add Fixes: tags for the two original commits.
> 
> v4:
>   - Fix items in case splitted metadata are not needed.
> 
>  drivers/net/mlx5/mlx5_flow_hw.c | 194 ++++++++++++++++++++++----------
>  1 file changed, 132 insertions(+), 62 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
> index bca5b2769e..6b3fcb43a7 100644
> --- a/drivers/net/mlx5/mlx5_flow_hw.c
> +++ b/drivers/net/mlx5/mlx5_flow_hw.c
> @@ -9255,33 +9255,40 @@ pattern_template_validate(struct rte_eth_dev *dev,
>       return -ret;
>  }
>  
> -/**
> - * Create flow item template.
> +/*
> + * Validate the user-supplied items and, in eSwitch mode, prepend the 
> implicit
> + * scoping item so the rule/template is bound to the current representor 
> port:
> + *   - ingress -> RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT (dev->data->port_id)
> + *   - egress  -> MLX5_RTE_FLOW_ITEM_TYPE_TAG on REG_C_0 (tx vport tag),
> + *                skipped when the user already supplied an SQ item.
>   *
> - * @param[in] dev
> - *   Pointer to the rte_eth_dev structure.
> - * @param[in] attr
> - *   Pointer to the item template attributes.
> - * @param[in] items
> - *   The template item pattern.
> - * @param[out] error
> - *   Pointer to error structure.
> + * @param nt_flow
> + *   Selects between the two call paths that share this helper:
> + *     false -> pattern template creation (async API). The prepended item's
> + *              spec is left zeroed so mlx5dr matches any value; the live
> + *              port_id / tx-tag value is substituted later by
> + *              flow_hw_get_rule_items() at rule-create time.
> + *     true  -> sync (non-template) flow creation. The prepended item's spec
> + *              is filled immediately with the live values, and the flag is
> + *              forwarded to __flow_hw_pattern_validate() so that validation
> + *              paths gated on nt_flow (e.g. GENEVE_OPT TLV parser creation)
> + *              take the non-template branch.
>   *
> - * @return
> - *  Item template pointer on success, NULL otherwise and rte_errno is set.
> + * Return / ownership:
> + *   - NULL on validation or allocation failure (error populated).
> + *   - `items` unchanged when no prepending is required; *copied_items == 
> NULL.
> + *   - A newly-allocated array otherwise; also stored in *copied_items. The
> + *     caller must mlx5_free(*copied_items) on every path (it is safe to call
> + *     with NULL). Do not free the returned pointer directly.
>   */
> -static struct rte_flow_pattern_template *
> -flow_hw_pattern_template_create(struct rte_eth_dev *dev,
> -                          const struct rte_flow_pattern_template_attr *attr,
> -                          const struct rte_flow_item items[],
> -                          bool external,
> -                          struct rte_flow_error *error)
> +static const struct rte_flow_item *
> +flow_hw_adjust_pattern(struct rte_eth_dev *dev, const struct 
> rte_flow_pattern_template_attr *attr,
> +                    bool nt_flow, const struct rte_flow_item *items, 
> uint64_t *item_flags,
> +                    uint64_t *nb_items, struct rte_flow_item **copied_items,
> +                    struct rte_flow_error *error)

It seems that there's an issue with dangling pointer in 
flow_hw_adjust_pattern():

- flow_hw_adjust_pattern() defines TAG/REPRESENTED_PORT item spec/mask on
  its local stack frame,
- If prepending is needed, then flow_hw_prepend_item() will copy only
  the rte_flow_item struct. spec/mask is not copied.
- After flow_hw_adjust_pattern() finishes,
  "copied_items" will have an item with spec/mask pointing to invalid stack 
data.

This wasn't previously a problem in flow_hw_pattern_template_create(),
because items' spec/mask were only needed during the duration of that
function.

Could you please fix the above? It'll require extending amount of memory
allocated by flow_hw_prepend_item(), ideally everything should be done
in single allocation, so freeing copied_items would still require
single call to mlx5_free().

ASAN, with detect_stack_use_after_return enabled, report is below.

        # repro steps

        meson setup build -Dbuildtype=debug -Db_sanitize=address 
-Denable_drivers=bus/auxiliary,common/mlx5,net/mlx5 -Denable_apps=test-pmd
        meson compile -C build -j $(nproc --ignore 1)

        env ASAN_OPTIONS=detect_stack_use_after_return=1 
./build/app/dpdk-testpmd -a 08:00.0,dv_flow_en=2,representor=vf0-1 -- 
--flow-isolate-all -i

        testpmd> flow create 0 group 0 priority 0 ingress pattern end actions 
jump group 1 / end

        # ASAN report

        =================================================================
        ==59105==ERROR: AddressSanitizer: stack-use-after-return on address 
0x71e744675730 at pc 0x5d70bed42c6d bp 0x7fff52519b40 sp 0x7fff52519b30
        READ of size 2 at 0x71e744675730 thread T0
            #0 0x5d70bed42c6c in flow_dv_translate_item_represented_port 
../drivers/net/mlx5/mlx5_flow_dv.c:11059
            #1 0x5d70bed5c9b1 in flow_dv_translate_items 
../drivers/net/mlx5/mlx5_flow_dv.c:14301
            #2 0x5d70bed60620 in flow_dv_translate_items_nta 
../drivers/net/mlx5/mlx5_flow_dv.c:14646
            #3 0x5d70bed60fb5 in mlx5_flow_dv_translate_items_hws_impl 
../drivers/net/mlx5/mlx5_flow_dv.c:14714
            #4 0x5d70c1288ea2 in mlx5_flow_hw_create_flow 
../drivers/net/mlx5/mlx5_flow_hw.c:14134
            #5 0x5d70c128e45c in flow_hw_list_create 
../drivers/net/mlx5/mlx5_flow_hw.c:14394
            #6 0x5d70beca71b9 in mlx5_flow_list_create 
../drivers/net/mlx5/mlx5_flow.c:8068
            #7 0x5d70beca6e33 in mlx5_flow_create 
../drivers/net/mlx5/mlx5_flow.c:8044
            #8 0x5d70be95670e in rte_flow_create ../lib/ethdev/rte_flow.c:426
            #9 0x5d70bdcaf19a in port_flow_create ../app/test-pmd/config.c:3869
            #10 0x5d70bdc3983d in cmd_flow_parsed 
../app/test-pmd/cmdline_flow.c:13564
            #11 0x5d70bdc3a376 in cmd_flow_cb 
../app/test-pmd/cmdline_flow.c:13634
            #12 0x5d70be8ad00a in __cmdline_parse 
../lib/cmdline/cmdline_parse.c:296
            #13 0x5d70be8ad0b8 in cmdline_parse 
../lib/cmdline/cmdline_parse.c:305
            #14 0x5d70be8a87ef in cmdline_valid_buffer 
../lib/cmdline/cmdline.c:25
            #15 0x5d70be8b405e in rdline_char_in 
../lib/cmdline/cmdline_rdline.c:470
            #16 0x5d70be8a9097 in cmdline_in ../lib/cmdline/cmdline.c:154
            #17 0x5d70be8a932f in cmdline_interact ../lib/cmdline/cmdline.c:202
            #18 0x5d70bdc12af6 in prompt ../app/test-pmd/cmdline.c:14586
            #19 0x5d70bdd5d636 in main ../app/test-pmd/testpmd.c:4792
            #20 0x71e746c2a1c9 in __libc_start_call_main 
../sysdeps/nptl/libc_start_call_main.h:58
            #21 0x71e746c2a28a in __libc_start_main_impl ../csu/libc-start.c:360
            #22 0x5d70bdbc0944 in _start 
(/auto/swgwork9/dsosnowski/code/nvidia/dpdk/build/app/dpdk-testpmd+0x358944) 
(BuildId: 04ff6d26a7ff0a65637cb32f087bae007e00c965)

        Address 0x71e744675730 is located in stack of thread T0 at offset 48 in 
frame
            #0 0x5d70c115c601 in flow_hw_adjust_pattern 
../drivers/net/mlx5/mlx5_flow_hw.c:9289

          This frame has 5 object(s):
            [48, 50) 'port_spec' (line 9291) <== Memory access at offset 48 is 
inside this variable
            [64, 72) 'tag_v' (line 9296)
            [96, 104) 'tag_m' (line 9300)
            [128, 160) 'port' (line 9292)
            [192, 224) 'tag' (line 9304)
        HINT: this may be a false positive if your program uses some custom 
stack unwind mechanism, swapcontext or vfork
              (longjmp and C++ exceptions *are* supported)
        SUMMARY: AddressSanitizer: stack-use-after-return 
../drivers/net/mlx5/mlx5_flow_dv.c:11059 in 
flow_dv_translate_item_represented_port
        Shadow bytes around the buggy address:
          0x71e744675480: f5 f5 f5 f5 00 00 00 00 00 00 00 00 00 00 00 00
          0x71e744675500: f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5
          0x71e744675580: f5 f5 f5 f5 f5 f5 f5 f5 00 00 00 00 00 00 00 00
          0x71e744675600: f1 f1 f1 f1 f1 f1 02 f2 00 f2 f2 f2 00 f2 f2 f2
          0x71e744675680: 00 f2 f2 f2 00 f3 f3 f3 00 00 00 00 00 00 00 00
        =>0x71e744675700: f5 f5 f5 f5 f5 f5[f5]f5 f5 f5 f5 f5 f5 f5 f5 f5
          0x71e744675780: f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5 f5
          0x71e744675800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
          0x71e744675880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
          0x71e744675900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
          0x71e744675980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
        Shadow byte legend (one shadow byte represents 8 application bytes):
          Addressable:           00
          Partially addressable: 01 02 03 04 05 06 07
          Heap left redzone:       fa
          Freed heap region:       fd
          Stack left redzone:      f1
          Stack mid redzone:       f2
          Stack right redzone:     f3
          Stack after return:      f5
          Stack use after scope:   f8
          Global redzone:          f9
          Global init order:       f6
          Poisoned by user:        f7
          Container overflow:      fc
          Array cookie:            ac
          Intra object redzone:    bb
          ASan internal:           fe
          Left alloca redzone:     ca
          Right alloca redzone:    cb
        ==59105==ABORTING

>  {
>       struct mlx5_priv *priv = dev->data->dev_private;
> -     struct rte_flow_pattern_template *it;
> -     struct rte_flow_item *copied_items = NULL;
> -     const struct rte_flow_item *tmpl_items;
> -     uint64_t orig_item_nb, item_flags = 0;
> +     struct rte_flow_item_ethdev port_spec = {.port_id = dev->data->port_id};
>       struct rte_flow_item port = {
>               .type = RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT,
>               .mask = &rte_flow_item_ethdev_mask,
> @@ -9298,39 +9305,89 @@ flow_hw_pattern_template_create(struct rte_eth_dev 
> *dev,
>               .type = (enum rte_flow_item_type)MLX5_RTE_FLOW_ITEM_TYPE_TAG,
>               .spec = &tag_v,
>               .mask = &tag_m,
> -             .last = NULL
> +             .last = NULL,
>       };
> -     int it_items_size;
> -     unsigned int i = 0;
>       int rc;
>  
> +     if (!copied_items || !item_flags || !nb_items)
> +             return NULL;
> +
> +     if (nt_flow) {
> +             port.spec = &port_spec;
> +             tag_v.data = flow_hw_tx_tag_regc_value(dev);
> +     }
> +
> +     /*
> +      * item_flags must be zero-initialized: __flow_hw_pattern_validate()
> +      * OR-accumulates bits into it and reads it (MLX5_FLOW_LAYER_TUNNEL)
> +      * on the first iteration.
> +      */
> +     *item_flags = 0;
> +
>       /* Validate application items only */
> -     rc = flow_hw_pattern_validate(dev, attr, items, &item_flags, error);
> +     rc = __flow_hw_pattern_validate(dev, attr, items, item_flags, nt_flow, 
> error);
>       if (rc < 0)
>               return NULL;
> -     orig_item_nb = rc;
> -     if (priv->sh->config.dv_esw_en &&
> -         attr->ingress && !attr->egress && !attr->transfer) {
> -             copied_items = flow_hw_prepend_item(items, orig_item_nb, &port, 
> error);
> -             if (!copied_items)
> +     *nb_items = rc;
> +
> +     if (priv->sh->config.dv_esw_en && attr->ingress && !attr->egress && 
> !attr->transfer) {
> +             *copied_items = flow_hw_prepend_item(items, *nb_items, &port, 
> error);
> +             if (!*copied_items)
>                       return NULL;
> -             tmpl_items = copied_items;
> -     } else if (priv->sh->config.dv_esw_en &&
> -                !attr->ingress && attr->egress && !attr->transfer) {
> -             if (item_flags & MLX5_FLOW_ITEM_SQ) {
> -                     DRV_LOG(DEBUG, "Port %u omitting implicit REG_C_0 match 
> for egress "
> -                                    "pattern template", dev->data->port_id);
> -                     tmpl_items = items;
> -                     goto setup_pattern_template;
> +             return *copied_items;
> +     } else if (priv->sh->config.dv_esw_en && !attr->ingress && attr->egress 
> &&
> +                !attr->transfer) {
> +             if (*item_flags & MLX5_FLOW_ITEM_SQ) {
> +                     DRV_LOG(DEBUG,
> +                             "Port %u: explicit SQ item present, omitting 
> implicit "
> +                             "REG_C_0 match for egress pattern",
> +                             dev->data->port_id);
> +                     return items;
>               }
> -             copied_items = flow_hw_prepend_item(items, orig_item_nb, &tag, 
> error);
> -             if (!copied_items)
> +             *copied_items = flow_hw_prepend_item(items, *nb_items, &tag, 
> error);
> +             if (!*copied_items)
>                       return NULL;
> -             tmpl_items = copied_items;
> -     } else {
> -             tmpl_items = items;
> +             return *copied_items;
>       }
> -setup_pattern_template:
> +     return items;
> +}
> +
> +/**
> + * Create flow item template.
> + *
> + * @param[in] dev
> + *   Pointer to the rte_eth_dev structure.
> + * @param[in] attr
> + *   Pointer to the item template attributes.
> + * @param[in] items
> + *   The template item pattern.
> + * @param[out] error
> + *   Pointer to error structure.
> + *
> + * @return
> + *  Item template pointer on success, NULL otherwise and rte_errno is set.
> + */
> +static struct rte_flow_pattern_template *
> +flow_hw_pattern_template_create(struct rte_eth_dev *dev,
> +                          const struct rte_flow_pattern_template_attr *attr,
> +                          const struct rte_flow_item items[],
> +                          bool external,
> +                          struct rte_flow_error *error)
> +{
> +     struct mlx5_priv *priv = dev->data->dev_private;
> +     struct rte_flow_pattern_template *it;
> +     struct rte_flow_item *copied_items = NULL;
> +     const struct rte_flow_item *tmpl_items;
> +     int it_items_size;
> +     uint64_t orig_item_nb, item_flags;
> +     unsigned int i = 0;
> +     int rc;
> +
> +     tmpl_items = flow_hw_adjust_pattern(dev, attr, false, items, 
> &orig_item_nb, &item_flags,
> +                                         &copied_items, error);

This is something I missed on v3, sorry about that.
flow_hw_adjust_pattern() takes:

- "item_flags" as 5th argument,
- "nb_items" as 6th argument.

But both call sites of flow_hw_adjust_pattern() pass them
in reverse order.
Please rearrange the call sites.

> +     if (!tmpl_items)
> +             return NULL;
> +
>       it = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*it), 0, SOCKET_ID_ANY);
>       if (!it) {
>               rte_flow_error_set(error, ENOMEM,
> @@ -14272,7 +14329,6 @@ static uintptr_t flow_hw_list_create(struct 
> rte_eth_dev *dev,
>       struct rte_flow_hw *prfx_flow = NULL;
>       const struct rte_flow_action *qrss = NULL;
>       const struct rte_flow_action *mark = NULL;
> -     uint64_t item_flags = 0;
>       uint64_t action_flags = mlx5_flow_hw_action_flags_get(actions, &qrss, 
> &mark,
>                                                        &encap_idx, 
> &actions_n, error);
>       struct mlx5_flow_hw_split_resource resource = {
> @@ -14289,20 +14345,27 @@ static uintptr_t flow_hw_list_create(struct 
> rte_eth_dev *dev,
>               .egress = attr->egress,
>               .transfer = attr->transfer,
>       };
> -
> -     /* Validate application items only */
> -     ret = __flow_hw_pattern_validate(dev, &pattern_template_attr, items,
> -                                             &item_flags, true, error);
> -     if (ret < 0)
> -             return 0;
> +     struct rte_flow_item *copied_items = NULL;
> +     const struct rte_flow_item *prepend_items;
> +     uint64_t orig_item_nb, item_flags;
>  
>       RTE_SET_USED(encap_idx);
>       if (!error)
>               error = &shadow_error;
> +
> +     prepend_items = flow_hw_adjust_pattern(dev, &pattern_template_attr, 
> true, items,
> +                                            &orig_item_nb, &item_flags, 
> &copied_items, error);
> +     if (!prepend_items)
> +             return 0;
> +
>       split = mlx5_flow_nta_split_metadata(dev, attr, actions, qrss, 
> action_flags,
>                                            actions_n, external, &resource, 
> error);
> -     if (split < 0)
> -             return split;
> +     if (split < 0) {
> +             mlx5_free(copied_items);
> +             return 0;
> +     } else if (!split) {
> +             resource.suffix.items = prepend_items;
> +     }
>  
>       /* Update the metadata copy table - MLX5_FLOW_MREG_CP_TABLE_GROUP */
>       if (((attr->ingress && attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP) ||
> @@ -14313,23 +14376,26 @@ static uintptr_t flow_hw_list_create(struct 
> rte_eth_dev *dev,
>                       goto free;
>       }
>       if (action_flags & MLX5_FLOW_ACTION_SAMPLE) {
> -             flow = mlx5_nta_sample_flow_list_create(dev, type, attr, items, 
> actions,
> +             flow = mlx5_nta_sample_flow_list_create(dev, type, attr, 
> prepend_items, actions,
>                                                       item_flags, 
> action_flags, error);
> -             if (flow != NULL)
> +             if (flow != NULL) {
> +                     mlx5_free(copied_items);
>                       return (uintptr_t)flow;
> +             }
>               goto free;
>       }
>       if (action_flags & MLX5_FLOW_ACTION_RSS) {
>               const struct rte_flow_action_rss
>                       *rss_conf = mlx5_flow_nta_locate_rss(dev, actions, 
> error);
> -             flow = mlx5_flow_nta_handle_rss(dev, attr, items, actions, 
> rss_conf,
> -                                             item_flags, action_flags, 
> external,
> -                                             type, error);
> +             flow = mlx5_flow_nta_handle_rss(dev, attr, prepend_items, 
> actions, rss_conf,
> +                                             item_flags, action_flags, 
> external, type, error);
>               if (flow) {
>                       flow->nt2hws->rix_mreg_copy = cpy_idx;
>                       cpy_idx = 0;
> -                     if (!split)
> +                     if (!split) {
> +                             mlx5_free(copied_items);
>                               return (uintptr_t)flow;
> +                     }
>                       goto prefix_flow;
>               }
>               goto free;
> @@ -14343,12 +14409,14 @@ static uintptr_t flow_hw_list_create(struct 
> rte_eth_dev *dev,
>       if (flow) {
>               flow->nt2hws->rix_mreg_copy = cpy_idx;
>               cpy_idx = 0;
> -             if (!split)
> +             if (!split) {
> +                     mlx5_free(copied_items);
>                       return (uintptr_t)flow;
> +             }
>               /* Fall Through to prefix flow creation. */
>       }
>  prefix_flow:
> -     ret = mlx5_flow_hw_create_flow(dev, type, attr, items, 
> resource.prefix.actions,
> +     ret = mlx5_flow_hw_create_flow(dev, type, attr, prepend_items, 
> resource.prefix.actions,
>                                      item_flags, action_flags, external, 
> &prfx_flow, error);
>       if (ret)
>               goto free;
> @@ -14357,6 +14425,7 @@ static uintptr_t flow_hw_list_create(struct 
> rte_eth_dev *dev,
>               flow->nt2hws->chaned_flow = 1;
>               SLIST_INSERT_AFTER(prfx_flow, flow, nt2hws->next);
>               mlx5_flow_nta_split_resource_free(dev, &resource);
> +             mlx5_free(copied_items);
>               return (uintptr_t)prfx_flow;
>       }
>  free:
> @@ -14368,6 +14437,7 @@ static uintptr_t flow_hw_list_create(struct 
> rte_eth_dev *dev,
>               mlx5_flow_nta_del_copy_action(dev, cpy_idx);
>       if (split > 0)
>               mlx5_flow_nta_split_resource_free(dev, &resource);
> +     mlx5_free(copied_items);
>       return 0;
>  }
>  

Reply via email to