> Subject: [dpdk-dev] [PATCH] acl: fix build issue with some arm64 compiler > > From: Jerin Jacob <jer...@marvell.com> > > Some compilers reporting the following error, though the existing code > doesn't have any uninitialized variable case. > Just to make compiler happy, initialize the int32x4_t variable one shot in C > language. > > ../lib/librte_acl/acl_run_neon.h: In function 'search_neon_4' > ../lib/librte_acl/acl_run_neon.h:230:12: error: 'input' may be used > uninitialized in this function [-Werror=maybe-uninitialized] > int32x4_t input; > > Fixes: 34fa6c27c156 ("acl: add NEON optimization for ARMv8") > Cc: sta...@dpdk.org > > Signed-off-by: Jerin Jacob <jer...@marvell.com> > --- > lib/librte_acl/acl_run_neon.h | 29 ++++++++++++----------------- > 1 file changed, 12 insertions(+), 17 deletions(-) > > diff --git a/lib/librte_acl/acl_run_neon.h b/lib/librte_acl/acl_run_neon.h > index 01b9766d8..dc9e9efe9 100644 > --- a/lib/librte_acl/acl_run_neon.h > +++ b/lib/librte_acl/acl_run_neon.h > @@ -165,7 +165,6 @@ search_neon_8(const struct rte_acl_ctx *ctx, const > uint8_t **data, > uint64_t index_array[8]; > struct completion cmplt[8]; > struct parms parms[8]; > - int32x4_t input0, input1; > > acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, > total_packets, categories, ctx->trans_table); @@ -181,17 > +180,14 @@ search_neon_8(const struct rte_acl_ctx *ctx, const uint8_t > **data, > > while (flows.started > 0) { > /* Gather 4 bytes of input data for each stream. */ > - input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), > input0, 0); > - input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 4), > input1, 0); > - > - input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), > input0, 1); > - input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 5), > input1, 1); > - > - input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), > input0, 2); > - input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 6), > input1, 2); > - > - input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), > input0, 3); > - input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 7), > input1, 3); > + int32x4_t input0 = {GET_NEXT_4BYTES(parms, 0), > + GET_NEXT_4BYTES(parms, 1), > + GET_NEXT_4BYTES(parms, 2), > + GET_NEXT_4BYTES(parms, 3)}; > + int32x4_t input1 = {GET_NEXT_4BYTES(parms, 4), > + GET_NEXT_4BYTES(parms, 5), > + GET_NEXT_4BYTES(parms, 6), > + GET_NEXT_4BYTES(parms, 7)}; > This mixes the use of NEON intrinsics with GCC vector extensions. ACLE (Arm C Language Extensions) specifically recommends not to mix the two methods in section 12.2.6. IMO, Aaron's suggestion of using a temp vector should be good.
> /* Process the 4 bytes of input on each stream. */ > > @@ -227,7 +223,6 @@ search_neon_4(const struct rte_acl_ctx *ctx, const > uint8_t **data, > uint64_t index_array[4]; > struct completion cmplt[4]; > struct parms parms[4]; > - int32x4_t input; > > acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, > total_packets, categories, ctx->trans_table); @@ -242,10 > +237,10 @@ search_neon_4(const struct rte_acl_ctx *ctx, const uint8_t > **data, > > while (flows.started > 0) { > /* Gather 4 bytes of input data for each stream. */ > - input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input, > 0); > - input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), input, > 1); > - input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), input, > 2); > - input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), input, > 3); > + int32x4_t input = {GET_NEXT_4BYTES(parms, 0), > + GET_NEXT_4BYTES(parms, 1), > + GET_NEXT_4BYTES(parms, 2), > + GET_NEXT_4BYTES(parms, 3)}; > > /* Process the 4 bytes of input on each stream. */ > input = transition4(input, flows.trans, index_array); > -- > 2.21.0