To reproduce the problem:
$ cat rule1
@0.0.0.0/0 0.0.0.0/0 0 : 65535 0 : 65535 0x00/0x00
$ cat trace1
0xc80a0001      0x640a0001      11   101    6
$ testacl -n 1 -c 0x1 -- --rulesf=rule1 --tracef=trace1

Note that rte_acl_classify_scalar() still works correctly for that case.
The problem is that in some cases we don't check for matches
after acl_start_next_trie().

Signed-off-by: Konstantin Ananyev <konstantin.ananyev at intel.com>
Tested-by: Waterman Cao <waterman.cao at intel.com>
---
 lib/librte_acl/acl_run.c |   27 ++++++++++++++++++++++-----
 1 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/lib/librte_acl/acl_run.c b/lib/librte_acl/acl_run.c
index d08d7ea..79e6e76 100644
--- a/lib/librte_acl/acl_run.c
+++ b/lib/librte_acl/acl_run.c
@@ -401,8 +401,9 @@ acl_match_check_x2(int slot, const struct rte_acl_ctx *ctx, 
struct parms *parms,
        xmm_t temp;

        temp = MM_AND(match_mask, *indicies);
-       if (!MM_TESTZ(temp, temp)) {
+       while (!MM_TESTZ(temp, temp)) {
                acl_process_matches(indicies, slot, ctx, parms, flows);
+               temp = MM_AND(match_mask, *indicies);
        }
 }

@@ -419,12 +420,17 @@ acl_match_check_x4(int slot, const struct rte_acl_ctx 
*ctx, struct parms *parms,
        /* put low 32 bits of each transition into one register */
        temp = (xmm_t)MM_SHUFFLEPS((__m128)*indicies1, (__m128)*indicies2,
                0x88);
-
        /* test for match node */
        temp = MM_AND(match_mask, temp);
-       if (!MM_TESTZ(temp, temp)) {
+
+       while (!MM_TESTZ(temp, temp)) {
                acl_process_matches(indicies1, slot, ctx, parms, flows);
                acl_process_matches(indicies2, slot + 2, ctx, parms, flows);
+
+               temp = (xmm_t)MM_SHUFFLEPS((__m128)*indicies1,
+                                       (__m128)*indicies2,
+                                       0x88);
+               temp = MM_AND(match_mask, temp);
        }
 }

@@ -599,6 +605,12 @@ search_sse_8(const struct rte_acl_ctx *ctx, const uint8_t 
**data,
        indicies3 = MM_LOADU((xmm_t *) &index_array[4]);
        indicies4 = MM_LOADU((xmm_t *) &index_array[6]);

+        /* Check for any matches. */
+       acl_match_check_x4(0, ctx, parms, &flows,
+               &indicies1, &indicies2, mm_match_mask.m);
+       acl_match_check_x4(4, ctx, parms, &flows,
+               &indicies3, &indicies4, mm_match_mask.m);
+
        while (flows.started > 0) {

                /* Gather 4 bytes of input data for each stream. */
@@ -659,7 +671,6 @@ search_sse_8(const struct rte_acl_ctx *ctx, const uint8_t 
**data,
                        flows.trans, &indicies3, &indicies4);

                 /* Check for any matches. */
-
                acl_match_check_x4(0, ctx, parms, &flows,
                        &indicies1, &indicies2, mm_match_mask.m);
                acl_match_check_x4(4, ctx, parms, &flows,
@@ -692,6 +703,10 @@ search_sse_4(const struct rte_acl_ctx *ctx, const uint8_t 
**data,
        indicies1 = MM_LOADU((xmm_t *) &index_array[0]);
        indicies2 = MM_LOADU((xmm_t *) &index_array[2]);

+       /* Check for any matches. */
+       acl_match_check_x4(0, ctx, parms, &flows,
+               &indicies1, &indicies2, mm_match_mask.m);
+
        while (flows.started > 0) {

                /* Gather 4 bytes of input data for each stream. */
@@ -722,7 +737,6 @@ search_sse_4(const struct rte_acl_ctx *ctx, const uint8_t 
**data,
                        flows.trans, &indicies1, &indicies2);

                /* Check for any matches. */
-
                acl_match_check_x4(0, ctx, parms, &flows,
                        &indicies1, &indicies2, mm_match_mask.m);
        }
@@ -776,6 +790,9 @@ search_sse_2(const struct rte_acl_ctx *ctx, const uint8_t 
**data,

        indicies = MM_LOADU((xmm_t *) &index_array[0]);

+       /* Check for any matches. */
+       acl_match_check_x2(0, ctx, parms, &flows, &indicies, mm_match_mask64.m);
+
        while (flows.started > 0) {

                /* Gather 4 bytes of input data for each stream. */
-- 
1.7.7.6

Reply via email to