Various types of hash tables presented under the Packet Framework toolbox.

Hash table types:
1. Extendible bucket (ext): when bucket is full, bucket is extended with more 
keys
2. Least Recently Used (LRU): when bucket is full, the LRU entry is discarded
3. Pre-computed key signature: RX core extracts the key n-tuple from the 
packet, computes the key signature and saves the key and key signature within 
the packet meta-data; flow classification core performs the actual lookup (the 
bucket search stage) after reading the key and key signature from packet 
meta-data
4. Signature computed on-the-fly (do-sig version): the same CPU core extracts 
the key n-tuple from pkt, computes key signature and performs the table lookup
5. Configurable key size or optimized for single key size (8-byte, 16-byte and 
32-byte key sizes)

Please checkout the Intel DPDK documentation for more details on these hash 
tables.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu at intel.com>
---
 lib/librte_table/rte_lru.h              |  198 +++++
 lib/librte_table/rte_table_hash.h       |  349 ++++++++
 lib/librte_table/rte_table_hash_ext.c   | 1070 ++++++++++++++++++++++++
 lib/librte_table/rte_table_hash_key16.c | 1086 ++++++++++++++++++++++++
 lib/librte_table/rte_table_hash_key32.c | 1100 +++++++++++++++++++++++++
 lib/librte_table/rte_table_hash_key8.c  | 1372 +++++++++++++++++++++++++++++++
 lib/librte_table/rte_table_hash_lru.c   | 1021 +++++++++++++++++++++++
 7 files changed, 6196 insertions(+), 0 deletions(-)
 create mode 100644 lib/librte_table/rte_lru.h
 create mode 100644 lib/librte_table/rte_table_hash.h
 create mode 100644 lib/librte_table/rte_table_hash_ext.c
 create mode 100644 lib/librte_table/rte_table_hash_key16.c
 create mode 100644 lib/librte_table/rte_table_hash_key32.c
 create mode 100644 lib/librte_table/rte_table_hash_key8.c
 create mode 100644 lib/librte_table/rte_table_hash_lru.c

diff --git a/lib/librte_table/rte_lru.h b/lib/librte_table/rte_lru.h
new file mode 100644
index 0000000..a164745
--- /dev/null
+++ b/lib/librte_table/rte_lru.h
@@ -0,0 +1,198 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_LRU_H__
+#define __INCLUDE_RTE_LRU_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#ifdef __INTEL_COMPILER
+#define GCC_VERSION (0)
+#else
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + 
__GNUC_PATCHLEVEL__)
+#endif
+
+#ifndef RTE_TABLE_HASH_LRU_STRATEGY
+#ifdef __SSE4_2__
+#define RTE_TABLE_HASH_LRU_STRATEGY                        2
+#else /* if no SSE, use simple scalar version */
+#define RTE_TABLE_HASH_LRU_STRATEGY                        1
+#endif
+#endif
+
+#ifndef RTE_ARCH_X86_64
+#undef RTE_TABLE_HASH_LRU_STRATEGY
+#define RTE_TABLE_HASH_LRU_STRATEGY                        1
+#endif
+
+#if (RTE_TABLE_HASH_LRU_STRATEGY < 0) || (RTE_TABLE_HASH_LRU_STRATEGY > 3)
+#error Invalid value for RTE_TABLE_HASH_LRU_STRATEGY
+#endif
+
+#if RTE_TABLE_HASH_LRU_STRATEGY == 0
+
+#define lru_init(bucket) while(0){bucket = bucket;}
+
+#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU)
+
+#define lru_update(bucket, mru_val) while(0){bucket = bucket; mru_val = 
mru_val;}
+
+#elif RTE_TABLE_HASH_LRU_STRATEGY == 1
+
+#define lru_init(bucket) do { bucket->lru_list = 0x0000000100020003LLU; } 
while(0)
+
+#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU)
+
+#define lru_update(bucket, mru_val)                                            
\
+do {                                                                           
\
+       uint64_t x, pos, x0, x1, x2, mask;                                      
   \
+                                                                               
   \
+       x = bucket->lru_list;                                                   
   \
+                                                                               
   \
+       pos = 4;                                                                
   \
+       if ((x >> 48) == ((uint64_t) mru_val)) {                                
   \
+               pos = 3;                                                        
       \
+       }                                                                       
   \
+       if (((x >> 32) & 0xFFFFLLU) == ((uint64_t) mru_val)) {                  
   \
+               pos = 2;                                                        
       \
+       }                                                                       
   \
+       if (((x >> 16) & 0xFFFFLLU) == ((uint64_t) mru_val)) {                  
   \
+               pos = 1;                                                        
       \
+       }                                                                       
   \
+       if ((x & 0xFFFFLLU) == ((uint64_t) mru_val)) {                          
   \
+               pos = 0;                                                        
       \
+       }                                                                       
   \
+                                                                               
   \
+       pos <<= 4;                                                              
   \
+       mask = (~0LLU) << pos;                                                  
   \
+       x0 = x & (~mask);                                                       
   \
+       x1 = (x >> 16) & mask;                                                  
   \
+       x2 = (x << (48 - pos)) & (0xFFFFLLU << 48);                             
   \
+       x = x0 | x1 | x2;                                                       
   \
+                                                                               
   \
+       if (pos != 64) {                                                        
   \
+               bucket->lru_list = x;                                           
       \
+       }                                                                       
   \
+} while(0)
+
+#elif RTE_TABLE_HASH_LRU_STRATEGY == 2
+
+#if GCC_VERSION > 40306
+#include <x86intrin.h>
+#else
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <xmmintrin.h>
+#endif
+
+#define lru_init(bucket) do { bucket->lru_list = 0x0000000100020003LLU; } 
while(0)
+
+#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU)
+
+#define lru_update(bucket, mru_val)                                            
\
+do {                                                                           
\
+    /* set up the masks for all possible shuffes, depends on pos */            
\
+    static uint64_t masks[10] = {                                              
\
+        /* Shuffle order */  /* Make Zero (see _mm_shuffle_epi8 manual) */     
\
+        0x0100070605040302, 0x8080808080808080,                                
\
+        0x0302070605040100, 0x8080808080808080,                                
\
+        0x0504070603020100, 0x8080808080808080,                                
\
+        0x0706050403020100, 0x8080808080808080,                                
\
+        0x0706050403020100, 0x8080808080808080};                               
\
+    /* load up one register with repeats of mru-val  */                        
\
+    uint64_t mru2 = mru_val;                                                   
\
+    uint64_t mru3 = mru2 | (mru2 << 16);                                       
\
+    uint64_t lru = bucket->lru_list;                                           
\
+    /* XOR to cause the word we're looking for to go to zero */                
\
+    uint64_t mru = lru ^ ((mru3 << 32) | mru3);                                
\
+    __m128i c = _mm_cvtsi64_si128(mru);                                        
\
+    __m128i b = _mm_cvtsi64_si128(lru);                                        
\
+    /* Find the minimum value (first zero word, if it's in there) */           
\
+    __m128i d = _mm_minpos_epu16(c);                                           
\
+    /* Second word contains index to found word (first word contains value) */ 
\
+    unsigned pos = _mm_extract_epi16(d,1);                                     
\
+    /* move the recently used location to top of list */                       
\
+    __m128i k = _mm_shuffle_epi8(b,*((__m128i *)&masks[2 * pos]));             
\
+    /* Finally, update the original list with the reordered data */            
\
+    bucket->lru_list = _mm_extract_epi64(k,0);                                 
\
+    /* Phwew! */                                                               
\
+} while(0);
+
+#elif RTE_TABLE_HASH_LRU_STRATEGY == 3
+
+#if GCC_VERSION > 40306
+#include <x86intrin.h>
+#else
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <xmmintrin.h>
+#endif
+
+#define lru_init(bucket) do { bucket->lru_list = ~0LLU; } while(0)
+
+
+static inline int
+f_lru_pos(uint64_t lru_list)
+{
+    __m128i lst = _mm_set_epi64x((uint64_t)-1, lru_list);
+    __m128i min = _mm_minpos_epu16(lst);
+    return _mm_extract_epi16(min, 1);
+}
+#define lru_pos(bucket) f_lru_pos(bucket->lru_list)                            
                           \
+
+#define lru_update(bucket, mru_val)                                            
\
+do {                                                                           
\
+       const uint64_t orvals[] = { 0xFFFFLLU, 0xFFFFLLU<<16,                   
   \
+                       0xFFFFLLU<<32, 0xFFFFLLU<<48, 0LLU };                   
           \
+       const uint64_t decs[] = {0x1000100010001LLU, 0};                        
   \
+    __m128i lru = _mm_cvtsi64_si128(bucket->lru_list);                         
\
+    __m128i vdec = _mm_cvtsi64_si128(decs[mru_val>>2]);                        
\
+    lru = _mm_subs_epu16(lru, vdec);                                           
\
+    bucket->lru_list = _mm_extract_epi64(lru, 0) | orvals[mru_val];            
\
+} while(0)
+
+#else
+
+#error "Incorrect value for RTE_TABLE_HASH_LRU_STRATEGY"
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table_hash.h 
b/lib/librte_table/rte_table_hash.h
new file mode 100644
index 0000000..b5bb8ad
--- /dev/null
+++ b/lib/librte_table/rte_table_hash.h
@@ -0,0 +1,349 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+ 
+#ifndef __INCLUDE_RTE_TABLE_HASH_H__
+#define __INCLUDE_RTE_TABLE_HASH_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Table Hash
+ *
+ * These tables use the exact match criterion to uniquely associate data to
+ * lookup keys.
+ *
+ * Use-cases: Flow classification table, Address Resolution Protocol (ARP) 
table
+ *
+ * Hash table types:
+ * 1. Entry add strategy on bucket full:
+ *     a. Least Recently Used (LRU): One of the existing keys in the bucket is
+ *        deleted and the new key is added in its place. The number of keys in
+ *        each bucket never grows bigger than 4. The logic to pick the key to
+ *        be dropped from the bucket is LRU. The hash table lookup operation
+ *        maintains the order in which the keys in the same bucket are hit, so
+ *        every time a key is hit, it becomes the new Most Recently Used (MRU)
+ *        key, i.e. the most unlikely candidate for drop. When a key is added
+ *        to the bucket, it also becomes the new MRU key. When a key needs to
+ *        be picked and dropped, the most likely candidate for drop, i.e. the
+ *        current LRU key, is always picked. The LRU logic requires maintaining
+ *        specific data structures per each bucket.
+ *     b. Extendible bucket (ext): The bucket is extended with space for 4 more
+ *        keys. This is done by allocating additional memory at table init 
time,
+ *        which is used to create a pool of free keys (the size of this pool is
+ *        configurable and always a multiple of 4). On key add operation, the
+ *        allocation of a group of 4 keys only happens successfully within the
+ *        limit of free keys, otherwise the key add operation fails. On key
+ *        delete operation, a group of 4 keys is freed back to the pool of free
+ *        keys when the key to be deleted is the only key that was used within
+ *        its group of 4 keys at that time. On key lookup operation, if the
+ *        current bucket is in extended state and a match is not found in the
+ *        first group of 4 keys, the search continues beyond the first group of
+ *        4 keys, potentially until all keys in this bucket are examined. The
+ *        extendible bucket logic requires maintaining specific data structures
+ *        per table and per each bucket.
+ * 2. Key signature computation:
+ *     a. Pre-computed key signature: The key lookup operation is split between
+ *        two CPU cores. The first CPU core (typically the CPU core that 
performs
+ *        packet RX) extracts the key from the input packet, computes the key
+ *        signature and saves both the key and the key signature in the packet
+ *        buffer as packet meta-data. The second CPU core reads both the key 
and
+ *        the key signature from the packet meta-data and performs the bucket
+ *        search step of the key lookup operation.
+ *     b. Key signature computed on lookup (do-sig): The same CPU core reads
+ *        the key from the packet meta-data, uses it to compute the key 
signature
+ *        and also performs the bucket search step of the key lookup operation.
+ * 3. Key size:
+ *     a. Configurable key size
+ *     b. Single key size (8-byte, 16-byte or 32-byte key size)
+ *
+ ***/
+#include <stdint.h>
+
+#include "rte_table.h"
+
+/** Hash function */
+typedef uint64_t (*rte_table_hash_op_hash)(
+       void *key, 
+       uint32_t key_size, 
+       uint64_t seed);
+
+/** 
+ * Hash tables with configurable key size
+ *
+ */
+/** Extendible bucket hash table parameters */
+struct rte_table_hash_ext_params {
+       /** Key size (number of bytes) */
+       uint32_t key_size;
+
+       /** Maximum number of keys */
+       uint32_t n_keys;
+
+       /** Number of hash table buckets. Each bucket stores up to 4 keys. */
+       uint32_t n_buckets;
+
+       /** Number of hash table bucket extensions. Each bucket extension has 
space
+           for 4 keys and each bucket can have 0, 1 or multiple extensions. */
+       uint32_t n_buckets_ext;
+
+       /** Hash function */
+       rte_table_hash_op_hash f_hash;
+
+       /** Seed value for the hash function */
+       uint64_t seed;
+
+       /** Byte offset within packet meta-data where the 4-byte key signature 
is
+           located. Valid for pre-computed key signature tables, ignored for
+               do-sig tables. */
+       uint32_t signature_offset;
+       
+       /** Byte offset within packet meta-data where the key is located */
+       uint32_t key_offset;
+};
+
+/** Extendible bucket hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_ext_ops;
+
+/** Extendible bucket hash table operations for key signature computed on
+    lookup ("do-sig") */
+extern struct rte_table_ops rte_table_hash_ext_dosig_ops;
+
+/** LRU hash table parameters */
+struct rte_table_hash_lru_params {
+       /** Key size (number of bytes) */
+       uint32_t key_size;
+
+       /** Maximum number of keys */
+       uint32_t n_keys;
+
+       /** Number of hash table buckets. Each bucket stores up to 4 keys. */
+       uint32_t n_buckets;
+
+       /** Hash function */
+       rte_table_hash_op_hash f_hash;
+
+       /** Seed value for the hash function */
+       uint64_t seed;
+
+       /** Byte offset within packet meta-data where the 4-byte key signature 
is
+           located. Valid for pre-computed key signature tables, ignored for
+               do-sig tables. */
+       uint32_t signature_offset;
+       
+       /** Byte offset within packet meta-data where the key is located */
+       uint32_t key_offset;
+};
+
+/** LRU hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_lru_ops;
+
+/** LRU hash table operations for key signature computed on lookup ("do-sig") 
*/
+extern struct rte_table_ops rte_table_hash_lru_dosig_ops;
+
+/** 
+ * 8-byte key hash tables
+ *
+ */
+/** LRU hash table parameters */
+struct rte_table_hash_key8_lru_params {
+       /** Maximum number of entries (and keys) in the table */
+       uint32_t n_entries;
+
+       /** Hash function */
+       rte_table_hash_op_hash f_hash;
+
+       /** Seed for the hash function */
+       uint64_t seed;
+
+       /** Byte offset within packet meta-data where the 4-byte key signature 
is
+           located. Valid for pre-computed key signature tables, ignored for
+               do-sig tables. */
+       uint32_t signature_offset;
+       
+       /** Byte offset within packet meta-data where the key is located */
+       uint32_t key_offset;
+};
+
+/** LRU hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_key8_lru_ops;
+
+/** LRU hash table operations for key signature computed on lookup ("do-sig") 
*/
+extern struct rte_table_ops rte_table_hash_key8_lru_dosig_ops;
+
+/** Extendible bucket hash table parameters */
+struct rte_table_hash_key8_ext_params {
+       /** Maximum number of entries (and keys) in the table */
+       uint32_t n_entries;
+
+       /** Number of entries (and keys) for hash table bucket extensions. Each
+           bucket is extended in increments of 4 keys. */
+       uint32_t n_entries_ext;
+
+       /** Hash function */
+       rte_table_hash_op_hash f_hash;
+
+       /** Seed for the hash function */
+       uint64_t seed;
+
+       /** Byte offset within packet meta-data where the 4-byte key signature 
is
+           located. Valid for pre-computed key signature tables, ignored for
+               do-sig tables. */
+       uint32_t signature_offset;
+       
+       /** Byte offset within packet meta-data where the key is located */
+       uint32_t key_offset;
+};
+
+/** Extendible bucket hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_key8_ext_ops;
+
+/** Extendible bucket hash table operations for key signature computed on
+    lookup ("do-sig") */
+extern struct rte_table_ops rte_table_hash_key8_ext_dosig_ops;
+
+/**
+ * 16-byte key hash tables
+ *
+ */
+/** LRU hash table parameters */
+struct rte_table_hash_key16_lru_params {
+       /** Maximum number of entries (and keys) in the table */
+       uint32_t n_entries;
+
+       /** Hash function */
+       rte_table_hash_op_hash f_hash;
+
+       /** Seed for the hash function */
+       uint64_t seed;
+
+       /** Byte offset within packet meta-data where the 4-byte key signature 
is
+           located. Valid for pre-computed key signature tables, ignored for
+               do-sig tables. */
+       uint32_t signature_offset;
+       
+       /** Byte offset within packet meta-data where the key is located */
+       uint32_t key_offset;
+};
+
+/** LRU hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_key16_lru_ops;
+
+/** Extendible bucket hash table parameters */
+struct rte_table_hash_key16_ext_params {
+       /** Maximum number of entries (and keys) in the table */
+       uint32_t n_entries;
+
+       /** Number of entries (and keys) for hash table bucket extensions. Each
+           bucket is extended in increments of 4 keys. */
+       uint32_t n_entries_ext;
+
+       /** Hash function */
+       rte_table_hash_op_hash f_hash;
+
+       /** Seed for the hash function */
+       uint64_t seed;
+
+       /** Byte offset within packet meta-data where the 4-byte key signature 
is
+           located. Valid for pre-computed key signature tables, ignored for
+               do-sig tables. */
+       uint32_t signature_offset;
+       
+       /** Byte offset within packet meta-data where the key is located */
+       uint32_t key_offset;
+};
+
+/** Extendible bucket operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_key16_ext_ops;
+
+/**
+ * 32-byte key hash tables
+ *
+ */
+/** LRU hash table parameters */
+struct rte_table_hash_key32_lru_params {
+       /** Maximum number of entries (and keys) in the table */
+       uint32_t n_entries;
+
+       /** Hash function */
+       rte_table_hash_op_hash f_hash;
+
+       /** Seed for the hash function */
+       uint64_t seed;
+
+       /** Byte offset within packet meta-data where the 4-byte key signature 
is
+           located. Valid for pre-computed key signature tables, ignored for
+               do-sig tables. */
+       uint32_t signature_offset;
+       
+       /** Byte offset within packet meta-data where the key is located */
+       uint32_t key_offset;
+};
+
+/** LRU hash table operations for pre-computed key signature */
+extern struct rte_table_ops rte_table_hash_key32_lru_ops;
+
+/** Extendible bucket hash table parameters */
+struct rte_table_hash_key32_ext_params {
+       /** Maximum number of entries (and keys) in the table */
+       uint32_t n_entries;
+
+       /** Number of entries (and keys) for hash table bucket extensions. Each
+           bucket is extended in increments of 4 keys. */
+       uint32_t n_entries_ext;
+
+       /** Hash function */
+       rte_table_hash_op_hash f_hash;
+
+       /** Seed for the hash function */
+       uint64_t seed;
+
+       /** Byte offset within packet meta-data where the 4-byte key signature 
is
+           located. Valid for pre-computed key signature tables, ignored for
+               do-sig tables. */
+       uint32_t signature_offset;
+       
+       /** Byte offset within packet meta-data where the key is located */
+       uint32_t key_offset;
+};
+
+/** Extendible bucket hash table operations */
+extern struct rte_table_ops rte_table_hash_key32_ext_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table_hash_ext.c 
b/lib/librte_table/rte_table_hash_ext.c
new file mode 100644
index 0000000..9ad5e88
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_ext.c
@@ -0,0 +1,1070 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_hash.h"
+
+#define KEYS_PER_BUCKET    4
+
+struct bucket {
+       union {
+               uintptr_t next;
+               uint64_t lru_list;
+       };
+       uint16_t sig[KEYS_PER_BUCKET];
+       uint32_t key_pos[KEYS_PER_BUCKET];
+};
+
+#define BUCKET_NEXT(bucket)                                                    
\
+       ((void *) ((bucket)->next & (~1LU)))
+
+#define BUCKET_NEXT_VALID(bucket)                                              
\
+       ((bucket)->next & 1LU)
+
+#define BUCKET_NEXT_SET(bucket, bucket_next)                                   
\
+       (bucket)->next = ((uintptr_t) ((void *) (bucket_next))) | 1LU
+
+#define BUCKET_NEXT_SET_NULL(bucket)                                           
\
+       (bucket)->next = 0;
+
+#define BUCKET_NEXT_COPY(bucket, bucket2)                                      
\
+       (bucket)->next = (bucket2)->next;
+
+struct grinder {
+       struct bucket *bkt;
+       uint64_t sig;
+       uint64_t match;
+       uint32_t key_index;
+};
+
+struct rte_table_hash {
+       /* Input parameters */
+       uint32_t key_size;
+       uint32_t entry_size;
+       uint32_t n_keys;
+       uint32_t n_buckets;
+       uint32_t n_buckets_ext;
+       rte_table_hash_op_hash f_hash;
+       uint64_t seed;
+       uint32_t signature_offset;
+       uint32_t key_offset;
+
+       /* Internal */
+       uint64_t bucket_mask;
+       uint32_t key_size_shl;
+       uint32_t data_size_shl;
+       uint32_t key_stack_tos;
+       uint32_t bkt_ext_stack_tos;
+       
+       /* Grinder */
+       struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX];
+
+       /* Tables */
+       struct bucket *buckets;
+       struct bucket *buckets_ext;
+       uint8_t *key_mem;
+       uint8_t *data_mem;
+       uint32_t *key_stack;
+       uint32_t *bkt_ext_stack;
+       
+       /* Table memory */
+       uint8_t memory[0] __rte_cache_aligned;
+};
+
+static int
+check_params_create(struct rte_table_hash_ext_params * params)
+{
+       uint32_t n_buckets_min;
+       
+       /* key_size */
+       if ((params->key_size == 0) ||
+               (!rte_is_power_of_2(params->key_size))) {
+               RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
+               return -EINVAL;
+       }
+
+       /* n_keys */
+       if ((params->n_keys == 0) ||
+           (!rte_is_power_of_2(params->n_keys))) {
+               RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__);
+               return -EINVAL;
+       }
+
+       /* n_buckets */
+       n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) /  
params->n_keys;
+       if ((params->n_buckets == 0) || 
+           (!rte_is_power_of_2(params->n_keys)) ||
+               (params->n_buckets < n_buckets_min)) {
+               RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
+               return -EINVAL;
+       }
+
+       /* f_hash */
+       if (params->f_hash == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: f_hash invalid value\n", __func__);
+               return -EINVAL;
+       }
+
+       /* signature offset */
+       if ((params->signature_offset & 0x3) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: signature_offset invalid value\n", 
__func__);
+               return -EINVAL;
+       }
+
+       /* key offset */
+       if ((params->key_offset & 0x7) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: key_offset invalid value\n", __func__);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void *
+rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size)
+{
+       struct rte_table_hash_ext_params *p = (struct rte_table_hash_ext_params 
*) params;
+       struct rte_table_hash *t;
+       uint32_t total_size, table_meta_sz, bucket_sz, bucket_ext_sz, key_sz, 
key_stack_sz, bkt_ext_stack_sz, data_sz;
+       uint32_t table_meta_offset, bucket_offset, bucket_ext_offset, 
key_offset, key_stack_offset, bkt_ext_stack_offset, data_offset;
+       uint32_t i;
+
+       /* Check input parameters */
+       if ((check_params_create(p) != 0) ||
+           (!rte_is_power_of_2(entry_size)) ||
+               ((sizeof(struct rte_table_hash) % CACHE_LINE_SIZE) != 0) ||
+               (sizeof(struct bucket) != (CACHE_LINE_SIZE / 2))) {
+               return NULL;
+       }
+
+       /* Memory allocation */ 
+       table_meta_sz = CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash));
+       bucket_sz = CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket));
+       bucket_ext_sz = CACHE_LINE_ROUNDUP(p->n_buckets_ext * sizeof(struct 
bucket));
+       key_sz = CACHE_LINE_ROUNDUP(p->n_keys * p->key_size);
+       key_stack_sz = CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t));
+       bkt_ext_stack_sz = CACHE_LINE_ROUNDUP(p->n_buckets_ext * 
sizeof(uint32_t));
+       data_sz = CACHE_LINE_ROUNDUP(p->n_keys * entry_size);
+       total_size = table_meta_sz + bucket_sz + bucket_ext_sz + key_sz + 
key_stack_sz + bkt_ext_stack_sz + data_sz;
+
+       t = rte_zmalloc_socket("TABLE", total_size, CACHE_LINE_SIZE, socket_id);
+       if (t == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: Cannot allocate %u bytes for hash 
table\n", __func__, total_size);
+               return NULL;
+       }
+       RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is 
%u bytes\n",
+               __func__, p->key_size, total_size);
+
+       /* Memory initialization */
+       t->key_size = p->key_size;
+       t->entry_size = entry_size;
+       t->n_keys = p->n_keys;
+       t->n_buckets = p->n_buckets;
+       t->n_buckets_ext = p->n_buckets_ext;
+       t->f_hash = p->f_hash;
+       t->seed = p->seed;
+       t->signature_offset = p->signature_offset;
+       t->key_offset = p->key_offset;
+
+       /* Internal */
+       t->bucket_mask = t->n_buckets - 1;
+       t->key_size_shl = __builtin_ctzl(p->key_size);
+       t->data_size_shl = __builtin_ctzl(p->key_size);
+       
+       /* Tables */
+       table_meta_offset = 0;
+       bucket_offset = table_meta_offset + table_meta_sz;
+       bucket_ext_offset = bucket_offset + bucket_sz;
+       key_offset = bucket_ext_offset + bucket_ext_sz;
+       key_stack_offset = key_offset + key_sz;
+       bkt_ext_stack_offset = key_stack_offset + key_stack_sz;
+       data_offset = bkt_ext_stack_offset + bkt_ext_stack_sz;
+
+       t->buckets = (struct bucket *) &t->memory[bucket_offset];
+       t->buckets_ext = (struct bucket *) &t->memory[bucket_ext_offset];
+       t->key_mem = &t->memory[key_offset];
+       t->key_stack = (uint32_t *) &t->memory[key_stack_offset];
+       t->bkt_ext_stack = (uint32_t *) &t->memory[bkt_ext_stack_offset];
+       t->data_mem = &t->memory[data_offset];
+       
+       /* Key stack */
+       for (i = 0; i < t->n_keys; i ++) {
+               t->key_stack[i] = t->n_keys - 1 - i;
+       }
+       t->key_stack_tos = t->n_keys;
+
+       /* Bucket ext stack */
+       for (i = 0; i < t->n_buckets_ext; i ++) {
+               t->bkt_ext_stack[i] = t->n_buckets_ext - 1 - i;
+       }
+       t->bkt_ext_stack_tos = t->n_buckets_ext;
+
+       return t;
+}
+
+static int
+rte_table_hash_ext_free(void *table)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       
+       /* Check input parameters */
+       if (t == NULL) {
+               return -EINVAL;
+       }
+       
+       rte_free(t);
+       return 0;
+}
+
+static int
+rte_table_hash_ext_entry_add(void *table, void *key, void *entry, int 
*key_found, void **entry_ptr)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       struct bucket *bkt0, *bkt, *bkt_prev;
+       uint64_t sig;
+       uint32_t bkt_index, i;
+       
+       sig = t->f_hash(key, t->key_size, t->seed);
+       bkt_index = sig & t->bucket_mask;
+       bkt0 = &t->buckets[bkt_index];
+       sig = (sig >> 16) | 1LLU;
+       
+       /* Key is present in the bucket */
+       for (bkt = bkt0; bkt != NULL; bkt = BUCKET_NEXT(bkt)) {
+               for (i = 0; i < KEYS_PER_BUCKET; i ++) {
+                       uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+                       uint32_t bkt_key_index = bkt->key_pos[i];
+                       uint8_t *bkt_key = &t->key_mem[bkt_key_index << 
t->key_size_shl];
+
+                       if ((sig == bkt_sig) && (memcmp(key, bkt_key, 
t->key_size) == 0)) {
+                               uint8_t *data = &t->data_mem[bkt_key_index << 
t->data_size_shl];
+
+                               memcpy(data, entry, t->entry_size);
+                               *key_found = 1;
+                               *entry_ptr = (void *) data;
+                               return 0;
+                       }
+               }
+       }
+       
+       /* Key is not present in the bucket */
+       for (bkt_prev = NULL, bkt = bkt0; bkt != NULL; bkt_prev = bkt, bkt = 
BUCKET_NEXT(bkt)) {
+               for (i = 0; i < KEYS_PER_BUCKET; i ++) {
+                       uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+
+                       if (bkt_sig == 0) {
+                               uint32_t bkt_key_index;
+                               uint8_t *bkt_key, *data;
+                               
+                               /* Allocate new key */
+                               if (t->key_stack_tos == 0) {
+                                       /* No keys available */
+                                       return -ENOSPC;
+                               }
+                               bkt_key_index = t->key_stack[-- 
t->key_stack_tos];
+                               
+                               /* Install new key */
+                               bkt_key = &t->key_mem[bkt_key_index << 
t->key_size_shl];
+                               data = &t->data_mem[bkt_key_index << 
t->data_size_shl];
+
+                               bkt->sig[i] = (uint16_t) sig;
+                               bkt->key_pos[i] = bkt_key_index;
+                               memcpy(bkt_key, key, t->key_size);
+                               memcpy(data, entry, t->entry_size);
+
+                               *key_found = 0;
+                               *entry_ptr = (void *) data;
+                               return 0;
+                       }
+               }
+       }
+       
+       /* Bucket full: extend bucket */
+       if ((t->bkt_ext_stack_tos > 0) && (t->key_stack_tos > 0)) {
+               uint32_t bkt_key_index;
+               uint8_t *bkt_key, *data;
+               
+               /* Allocate new bucket ext */
+               bkt_index = t->bkt_ext_stack[-- t->bkt_ext_stack_tos];
+               bkt = &t->buckets_ext[bkt_index];
+               
+               /* Chain the new bucket ext */
+               BUCKET_NEXT_SET(bkt_prev, bkt);
+               BUCKET_NEXT_SET_NULL(bkt);
+               
+               /* Allocate new key */
+               bkt_key_index = t->key_stack[-- t->key_stack_tos];
+               bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl];
+               
+               data = &t->data_mem[bkt_key_index << t->data_size_shl];
+               
+               /* Install new key into bucket */
+               bkt->sig[0] = (uint16_t) sig;
+               bkt->key_pos[0] = bkt_key_index;
+               memcpy(bkt_key, key, t->key_size);
+               memcpy(data, entry, t->entry_size);
+
+               *key_found = 0;
+               *entry_ptr = (void *) data;
+               return 0;
+       }
+
+       return -ENOSPC;
+}
+
+static int
+rte_table_hash_ext_entry_delete(void *table, void *key, int *key_found, void 
*entry)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       struct bucket *bkt0, *bkt, *bkt_prev;
+       uint64_t sig;
+       uint32_t bkt_index, i;
+       
+       sig = t->f_hash(key, t->key_size, t->seed);
+       bkt_index = sig & t->bucket_mask;
+       bkt0 = &t->buckets[bkt_index];
+       sig = (sig >> 16) | 1LLU;
+       
+       /* Key is present in the bucket */
+       for (bkt_prev = NULL, bkt = bkt0; bkt != NULL; bkt_prev = bkt, bkt = 
BUCKET_NEXT(bkt)) {
+               for (i = 0; i < KEYS_PER_BUCKET; i ++) {
+                       uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+                       uint32_t bkt_key_index = bkt->key_pos[i];
+                       uint8_t *bkt_key = &t->key_mem[bkt_key_index << 
t->key_size_shl];
+
+                       if ((sig == bkt_sig) && (memcmp(key, bkt_key, 
t->key_size) == 0)) {
+                               uint8_t *data = &t->data_mem[bkt_key_index << 
t->data_size_shl];
+                               
+                               /* Uninstall key from bucket */
+                               bkt->sig[i] = 0;
+                               *key_found = 1;
+                               if (entry) {
+                                       memcpy(entry, data, t->entry_size);
+                               }
+                               
+                               /* Free key */
+                               t->key_stack[t->key_stack_tos ++] = 
bkt_key_index;
+                               
+                               /*Check if bucket is unused */
+                               if ((bkt_prev != NULL) && 
+                                   (bkt->sig[0] == 0) && (bkt->sig[1] == 0) && 
+                                       (bkt->sig[2] == 0) && (bkt->sig[3] == 
0)) {
+                                       /* Clear bucket */
+                                       memset(bkt, 0, sizeof(struct bucket));
+                                       
+                                       /* Unchain bucket */
+                                       BUCKET_NEXT_COPY(bkt_prev, bkt);
+                                       
+                                       /* Free bucket back to buckets ext */
+                                       bkt_index = bkt - t->buckets_ext;
+                                       t->bkt_ext_stack[t->bkt_ext_stack_tos 
++] = bkt_index;
+                               }
+
+                               return 0;
+                       }
+               }
+       }
+       
+       /* Key is not present in the bucket */
+       *key_found = 0;
+       return 0;
+}
+
+static int rte_table_hash_ext_lookup_unoptimized(
+       void *table, 
+       struct rte_mbuf **pkts, 
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries,
+       int dosig)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       uint64_t pkts_mask_out = 0;
+       
+       for ( ; pkts_mask; ) {
+               struct bucket *bkt0, *bkt;
+               struct rte_mbuf *pkt;
+               uint8_t *key;
+               uint64_t pkt_mask, sig;
+               uint32_t pkt_index, bkt_index, i;
+               
+               pkt_index = __builtin_ctzll(pkts_mask);
+               pkt_mask = 1LLU << pkt_index;
+               pkts_mask &= ~pkt_mask;
+               
+               pkt = pkts[pkt_index];
+               key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset);
+               if (dosig) {
+                       sig = (uint64_t) t->f_hash(key, t->key_size, t->seed);
+               } else {
+                       sig = RTE_MBUF_METADATA_UINT32(pkt, 
t->signature_offset);
+               }
+               
+               bkt_index = sig & t->bucket_mask;
+               bkt0 = &t->buckets[bkt_index];
+               sig = (sig >> 16) | 1LLU;
+               
+               /* Key is present in the bucket */
+               for (bkt = bkt0; bkt != NULL; bkt = BUCKET_NEXT(bkt)) {
+                       for (i = 0; i < KEYS_PER_BUCKET; i ++) {
+                               uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+                               uint32_t bkt_key_index = bkt->key_pos[i];
+                               uint8_t *bkt_key = &t->key_mem[bkt_key_index << 
t->key_size_shl];
+                               
+                               if ((sig == bkt_sig) && (memcmp(key, bkt_key, 
t->key_size) == 0)) {
+                                       uint8_t *data = 
&t->data_mem[bkt_key_index << t->data_size_shl];
+
+                                       pkts_mask_out |= pkt_mask;
+                                       entries[pkt_index] = (void *) data;
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       return 0;
+}
+
+/***
+ * 
+ * mask = match bitmask
+ * match = at least one match
+ * match_many = more than one match
+ * match_pos = position of first match
+ * 
+ * ----------------------------------------
+ * mask      match   match_many   match_pos
+ * ----------------------------------------
+ * 0000      0       0            00
+ * 0001      1       0            00
+ * 0010      1       0            01
+ * 0011      1       1            00
+ * ----------------------------------------
+ * 0100      1       0            10
+ * 0101      1       1            00
+ * 0110      1       1            01
+ * 0111      1       1            00
+ * ----------------------------------------
+ * 1000      1       0            11
+ * 1001      1       1            00
+ * 1010      1       1            01
+ * 1011      1       1            00
+ * ----------------------------------------
+ * 1100      1       1            10
+ * 1101      1       1            00
+ * 1110      1       1            01
+ * 1111      1       1            00
+ * ----------------------------------------
+ *
+ * match = 1111_1111_1111_1110
+ * match_many = 1111_1110_1110_1000
+ * match_pos = 0001_0010_0001_0011__0001_0010_0001_0000
+ *
+ * match = 0xFFFELLU
+ * match_many = 0xFEE8LLU
+ * match_pos = 0x12131210LLU
+ *
+ ***/
+
+#define LUT_MATCH                                          0xFFFELLU
+#define LUT_MATCH_MANY                                     0xFEE8LLU
+#define LUT_MATCH_POS                                      0x12131210LLU
+
+#define lookup_cmp_sig(mbuf_sig, bucket, match, match_many, match_pos)         
\
+{                                                                              
\
+       uint64_t bucket_sig[4], mask[4], mask_all;                              
   \
+                                                                               
\
+       bucket_sig[0] = bucket->sig[0];                                         
   \
+       bucket_sig[1] = bucket->sig[1];                                         
   \
+       bucket_sig[2] = bucket->sig[2];                                         
   \
+       bucket_sig[3] = bucket->sig[3];                                         
   \
+                                                                               
\
+       bucket_sig[0] ^= mbuf_sig;                                              
   \
+       bucket_sig[1] ^= mbuf_sig;                                              
   \
+       bucket_sig[2] ^= mbuf_sig;                                              
   \
+       bucket_sig[3] ^= mbuf_sig;                                              
   \
+                                                                               
\
+       mask[0] = 0;                                                            
   \
+       mask[1] = 0;                                                            
   \
+       mask[2] = 0;                                                            
   \
+       mask[3] = 0;                                                            
   \
+                                                                               
\
+       if (bucket_sig[0] == 0) mask[0] = 1;                                    
   \
+       if (bucket_sig[1] == 0) mask[1] = 2;                                    
   \
+       if (bucket_sig[2] == 0) mask[2] = 4;                                    
   \
+       if (bucket_sig[3] == 0) mask[3] = 8;                                    
   \
+                                                                               
\
+       mask_all = (mask[0] | mask[1]) | (mask[2] | mask[3]);                   
   \
+                                                                               
\
+       match = (LUT_MATCH >> mask_all) & 1;                                    
   \
+       match_many = (LUT_MATCH_MANY >> mask_all) & 1;                          
   \
+       match_pos = (LUT_MATCH_POS >> (mask_all << 1)) & 3;                     
   \
+}
+
+#define lookup_cmp_key(mbuf, key, match_key, f)                                
\
+{                                                                              
\
+       uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);  
   \
+       uint64_t *bkt_key = (uint64_t *) key;                                   
   \
+                                                                               
   \
+       switch (f->key_size)                                                    
   \
+       {                                                                       
   \
+               case 8:                                                         
       \
+               {                                                               
       \
+                       uint64_t xor = pkt_key[0] ^ bkt_key[0];                 
           \
+                       match_key = 0;                                          
           \
+                       if (xor == 0) match_key = 1;                            
           \
+               }                                                               
       \
+               break;                                                          
       \
+                                                                               
       \
+               case 16:                                                        
       \
+               {                                                               
       \
+                       uint64_t xor[2], or;                                    
           \
+                                                                               
\
+                       xor[0] = pkt_key[0] ^ bkt_key[0];                       
           \
+                       xor[1] = pkt_key[1] ^ bkt_key[1];                       
           \
+                       or = xor[0] | xor[1];                                   
           \
+                       match_key = 0;                                          
           \
+                       if (or == 0) match_key = 1;                             
           \
+               }                                                               
       \
+               break;                                                          
       \
+                                                                               
   \
+               case 32:                                                        
       \
+               {                                                               
       \
+                       uint64_t xor[4], or;                                    
           \
+                                                                               
\
+                       xor[0] = pkt_key[0] ^ bkt_key[0];                       
           \
+                       xor[1] = pkt_key[1] ^ bkt_key[1];                       
           \
+                       xor[2] = pkt_key[2] ^ bkt_key[2];                       
           \
+                       xor[3] = pkt_key[3] ^ bkt_key[3];                       
           \
+                       or = xor[0] | xor[1] | xor[2] | xor[3];                 
           \
+                       match_key = 0;                                          
           \
+                       if (or == 0) match_key = 1;                             
           \
+               }                                                               
       \
+               break;                                                          
       \
+                                                                               
\
+               case 64:                                                        
       \
+               {                                                               
       \
+                       uint64_t xor[8], or;                                    
           \
+                                                                               
\
+                       xor[0] = pkt_key[0] ^ bkt_key[0];                       
           \
+                       xor[1] = pkt_key[1] ^ bkt_key[1];                       
           \
+                       xor[2] = pkt_key[2] ^ bkt_key[2];                       
           \
+                       xor[3] = pkt_key[3] ^ bkt_key[3];                       
           \
+                       xor[4] = pkt_key[4] ^ bkt_key[4];                       
           \
+                       xor[5] = pkt_key[5] ^ bkt_key[5];                       
           \
+                       xor[6] = pkt_key[6] ^ bkt_key[6];                       
           \
+                       xor[7] = pkt_key[7] ^ bkt_key[7];                       
           \
+                       or = xor[0] | xor[1] | xor[2] | xor[3] | xor[4] | 
xor[5] | xor[6] | xor[7]; \
+                       match_key = 0;                                          
           \
+                       if (or == 0) match_key = 1;                             
           \
+               }                                                               
       \
+               break;                                                          
       \
+                                                                               
   \
+               default:                                                        
       \
+                       match_key = 0;                                          
           \
+                       if (memcmp(pkt_key, bkt_key, f->key_size) == 0) 
{match_key = 1;}   \
+       }                                                                       
   \
+}
+
+#define lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index)        
\
+{                                                                              
\
+       uint64_t pkt00_mask, pkt01_mask;                                        
   \
+       struct rte_mbuf *mbuf00, *mbuf01;                                       
   \
+                                                                               
   \
+       pkt00_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt00_mask = 1LLU << pkt00_index;                                       
   \
+       pkts_mask &= ~pkt00_mask;                                               
   \
+       mbuf00 = pkts[pkt00_index];                                             
   \
+                                                                               
   \
+       pkt01_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt01_mask = 1LLU << pkt01_index;                                       
   \
+       pkts_mask &= ~pkt01_mask;                                               
   \
+       mbuf01 = pkts[pkt01_index];                                             
   \
+                                                                               
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));                  
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));                  
   \
+}
+
+#define lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, pkt00_index, 
pkt01_index) \
+{                                                                              
\
+       uint64_t pkt00_mask, pkt01_mask;                                        
   \
+       struct rte_mbuf *mbuf00, *mbuf01;                                       
   \
+                                                                               
   \
+       pkt00_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt00_mask = 1LLU << pkt00_index;                                       
   \
+       pkts_mask &= ~pkt00_mask;                                               
   \
+       mbuf00 = pkts[pkt00_index];                                             
   \
+                                                                               
   \
+       pkt01_index = __builtin_ctzll(pkts_mask);                               
   \
+       if (pkts_mask == 0) {                                                   
   \
+               pkt01_index = pkt00_index;                                      
       \
+       }                                                                       
   \
+       pkt01_mask = 1LLU << pkt01_index;                                       
   \
+       pkts_mask &= ~pkt01_mask;                                               
   \
+       mbuf01 = pkts[pkt01_index];                                             
   \
+                                                                               
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));                  
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));                  
   \
+}
+
+#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index)                   
\
+{                                                                              
\
+       struct grinder *g10, *g11;                                              
   \
+       uint64_t sig10, sig11, bkt10_index, bkt11_index;                        
   \
+       struct rte_mbuf *mbuf10, *mbuf11;                                       
   \
+       struct bucket *bkt10, *bkt11, *buckets = t->buckets;                    
   \
+       uint64_t bucket_mask = t->bucket_mask;                                  
   \
+       uint32_t signature_offset = t->signature_offset;                        
   \
+                                                                               
   \
+       mbuf10 = pkts[pkt10_index];                                             
   \
+       sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);  
   \
+       bkt10_index = sig10 & bucket_mask;                                      
   \
+       bkt10 = &buckets[bkt10_index];                                          
   \
+                                                                               
   \
+       mbuf11 = pkts[pkt11_index];                                             
   \
+       sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);  
   \
+       bkt11_index = sig11 & bucket_mask;                                      
   \
+       bkt11 = &buckets[bkt11_index];                                          
   \
+                                                                               
   \
+       rte_prefetch0(bkt10);                                                   
   \
+       rte_prefetch0(bkt11);                                                   
   \
+                                                                               
   \
+       g10 = &g[pkt10_index];                                                  
   \
+       g10->sig = sig10;                                                       
   \
+       g10->bkt = bkt10;                                                       
   \
+                                                                               
   \
+       g11 = &g[pkt11_index];                                                  
   \
+       g11->sig = sig11;                                                       
   \
+       g11->bkt = bkt11;                                                       
   \
+}
+
+#define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index)             
\
+{                                                                              
\
+       struct grinder *g10, *g11;                                              
   \
+       uint64_t sig10, sig11, bkt10_index, bkt11_index;                        
   \
+       struct rte_mbuf *mbuf10, *mbuf11;                                       
   \
+       struct bucket *bkt10, *bkt11, *buckets = t->buckets;                    
   \
+       uint8_t *key10, *key11;                                                 
   \
+       uint64_t bucket_mask = t->bucket_mask;                                  
   \
+       rte_table_hash_op_hash f_hash = t->f_hash;                              
   \
+       uint64_t seed = t->seed;                                                
   \
+       uint32_t key_size = t->key_size;                                        
   \
+       uint32_t key_offset = t->key_offset;                                    
   \
+                                                                               
   \
+       mbuf10 = pkts[pkt10_index];                                             
   \
+       key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset);                
   \
+       sig10 = (uint64_t) f_hash(key10, key_size, seed);                       
   \
+       bkt10_index = sig10 & bucket_mask;                                      
   \
+       bkt10 = &buckets[bkt10_index];                                          
   \
+                                                                               
   \
+       mbuf11 = pkts[pkt11_index];                                             
   \
+       key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset);                
   \
+       sig11 = (uint64_t) f_hash(key11, key_size, seed);                       
   \
+       bkt11_index = sig11 & bucket_mask;                                      
   \
+       bkt11 = &buckets[bkt11_index];                                          
   \
+                                                                               
   \
+       rte_prefetch0(bkt10);                                                   
   \
+       rte_prefetch0(bkt11);                                                   
   \
+                                                                               
   \
+       g10 = &g[pkt10_index];                                                  
   \
+       g10->sig = sig10;                                                       
   \
+       g10->bkt = bkt10;                                                       
   \
+                                                                               
   \
+       g11 = &g[pkt11_index];                                                  
   \
+       g11->sig = sig11;                                                       
   \
+       g11->bkt = bkt11;                                                       
   \
+}
+
+#define lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many)   
\
+{                                                                              
\
+       struct grinder *g20, *g21;                                              
   \
+       uint64_t sig20, sig21;                                                  
   \
+       struct bucket *bkt20, *bkt21;                                           
   \
+       uint8_t *key20, *key21, *key_mem = t->key_mem;                          
   \
+       uint64_t match20, match21, match_many20, match_many21, match_pos20, 
match_pos21; \
+       uint32_t key20_index, key21_index, key_size_shl = t->key_size_shl;      
   \
+                                                                               
   \
+       g20 = &g[pkt20_index];                                                  
   \
+       sig20 = g20->sig;                                                       
   \
+       bkt20 = g20->bkt;                                                       
   \
+       sig20 = (sig20 >> 16) | 1LLU;                                           
   \
+       lookup_cmp_sig(sig20, bkt20, match20, match_many20, match_pos20);       
   \
+       match20 <<= pkt20_index;                                                
   \
+       match_many20 |= BUCKET_NEXT_VALID(bkt20);                               
   \
+       match_many20 <<= pkt20_index;                                           
   \
+       key20_index = bkt20->key_pos[match_pos20];                              
   \
+       key20 = &key_mem[key20_index << key_size_shl];                          
   \
+                                                                               
   \
+       g21 = &g[pkt21_index];                                                  
   \
+       sig21 = g21->sig;                                                       
   \
+       bkt21 = g21->bkt;                                                       
   \
+       sig21 = (sig21 >> 16) | 1LLU;                                           
   \
+       lookup_cmp_sig(sig21, bkt21, match21, match_many21, match_pos21);       
   \
+       match21 <<= pkt21_index;                                                
   \
+       match_many21 |= BUCKET_NEXT_VALID(bkt21);                               
   \
+       match_many21 <<= pkt21_index;                                           
   \
+       key21_index = bkt21->key_pos[match_pos21];                              
   \
+       key21 = &key_mem[key21_index << key_size_shl];                          
   \
+                                                                               
   \
+       rte_prefetch0(key20);                                                   
   \
+       rte_prefetch0(key21);                                                   
   \
+                                                                               
   \
+       pkts_mask_match_many |= match_many20 | match_many21;                    
   \
+                                                                               
   \
+       g20->match = match20;                                                   
   \
+       g20->key_index = key20_index;                                           
   \
+                                                                               
   \
+       g21->match = match21;                                                   
   \
+       g21->key_index = key21_index;                                           
   \
+}
+
+#define lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries) \
+{                                                                              
\
+       struct grinder *g30, *g31;                                              
   \
+       struct rte_mbuf *mbuf30, *mbuf31;                                       
   \
+       uint8_t *key30, *key31, *key_mem = t->key_mem;                          
   \
+       uint8_t *data30, *data31, *data_mem = t->data_mem;                      
   \
+       uint64_t match30, match31, match_key30, match_key31, match_keys;        
   \
+       uint32_t key30_index, key31_index;                                      
   \
+       uint32_t key_size_shl = t->key_size_shl;                                
   \
+       uint32_t data_size_shl = t->data_size_shl;                              
   \
+                                                                               
   \
+       mbuf30 = pkts[pkt30_index];                                             
   \
+       g30 = &g[pkt30_index];                                                  
   \
+       match30 = g30->match;                                                   
   \
+       key30_index = g30->key_index;                                           
   \
+       key30 = &key_mem[key30_index << key_size_shl];                          
   \
+       lookup_cmp_key(mbuf30, key30, match_key30, t);                          
   \
+       match_key30 <<= pkt30_index;                                            
   \
+       match_key30 &= match30;                                                 
   \
+       data30 = &data_mem[key30_index << data_size_shl];                       
   \
+       entries[pkt30_index] = data30;                                          
   \
+                                                                               
\
+       mbuf31 = pkts[pkt31_index];                                             
   \
+       g31 = &g[pkt31_index];                                                  
   \
+       match31 = g31->match;                                                   
   \
+       key31_index = g31->key_index;                                           
   \
+       key31 = &key_mem[key31_index << key_size_shl];                          
   \
+       lookup_cmp_key(mbuf31, key31, match_key31, t);                          
   \
+       match_key31 <<= pkt31_index;                                            
   \
+       match_key31 &= match31;                                                 
   \
+       data31 = &data_mem[key31_index << data_size_shl];                       
   \
+       entries[pkt31_index] = data31;                                          
   \
+                                                                               
   \
+       rte_prefetch0(data30);                                                  
   \
+       rte_prefetch0(data31);                                                  
   \
+                                                                               
   \
+       match_keys = match_key30 | match_key31;                                 
   \
+       pkts_mask_out |= match_keys;                                            
   \
+}
+
+/* The lookup function implements a 4-stage pipeline, with each stage 
processing
+ * two different packets. The purpose of pipelined implementation is to hide 
the
+ * latency of prefetching the data structures and loosen the data dependency
+ * between instructions.
+ *
+ *   p00  _______   p10  _______   p20  _______   p30  _______       
+ * ----->|       |----->|       |----->|       |----->|       |----->
+ *       |   0   |      |   1   |      |   2   |      |   3   |      
+ * ----->|_______|----->|_______|----->|_______|----->|_______|----->
+ *   p01            p11            p21            p31                
+ *
+ * The naming convention is:
+ *    pXY = packet Y of stage X, X = 0 .. 3, Y = 0 .. 1
+ * 
+ ***/
+static int rte_table_hash_ext_lookup(
+       void *table, 
+       struct rte_mbuf **pkts, 
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       struct grinder *g = t->grinders;
+       uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index, 
pkt20_index, pkt21_index, pkt30_index, pkt31_index;
+       uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
+       int status = 0;
+
+       /* Cannot run the pipeline with less than 7 packets */
+       if (__builtin_popcountll(pkts_mask) < 7) {
+               return rte_table_hash_ext_lookup_unoptimized(table, pkts, 
pkts_mask, lookup_hit_mask, entries, 0);
+       }
+       
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline feed */
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+       
+       /* Pipeline feed */
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+       /*
+        * Pipeline run 
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               pkt30_index = pkt20_index;
+               pkt31_index = pkt21_index;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, 
pkt00_index, pkt01_index);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2(t, g, pkt20_index, pkt21_index, 
pkts_mask_match_many);
+
+               /* Pipeline stage 3 */
+               lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, 
pkts_mask_out, entries);
+       }
+       
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+       
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+       
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+
+       /* Slow path */
+       pkts_mask_match_many &= ~pkts_mask_out;
+       if (pkts_mask_match_many) {
+               uint64_t pkts_mask_out_slow = 0;
+
+               status = rte_table_hash_ext_lookup_unoptimized(table, pkts, 
pkts_mask_match_many, &pkts_mask_out_slow, entries, 0);
+               pkts_mask_out |= pkts_mask_out_slow;
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       return status;
+}
+
+static int rte_table_hash_ext_lookup_dosig(
+       void *table, 
+       struct rte_mbuf **pkts, 
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       struct grinder *g = t->grinders;
+       uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index, 
pkt20_index, pkt21_index, pkt30_index, pkt31_index;
+       uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
+       int status = 0;
+
+       /* Cannot run the pipeline with less than 7 packets */
+       if (__builtin_popcountll(pkts_mask) < 7) {
+               return rte_table_hash_ext_lookup_unoptimized(table, pkts, 
pkts_mask, lookup_hit_mask, entries, 1);
+       }
+       
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline feed */
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+       
+       /* Pipeline feed */
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+       /*
+        * Pipeline run 
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               pkt30_index = pkt20_index;
+               pkt31_index = pkt21_index;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, 
pkt00_index, pkt01_index);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2(t, g, pkt20_index, pkt21_index, 
pkts_mask_match_many);
+
+               /* Pipeline stage 3 */
+               lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, 
pkts_mask_out, entries);
+       }
+       
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+       
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+       
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+
+       /* Slow path */
+       pkts_mask_match_many &= ~pkts_mask_out;
+       if (pkts_mask_match_many) {
+               uint64_t pkts_mask_out_slow = 0;
+
+               status = rte_table_hash_ext_lookup_unoptimized(table, pkts, 
pkts_mask_match_many, &pkts_mask_out_slow, entries, 1);
+               pkts_mask_out |= pkts_mask_out_slow;
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       return status;
+}
+
+struct rte_table_ops rte_table_hash_ext_ops  = {
+       .f_create = rte_table_hash_ext_create,
+       .f_free = rte_table_hash_ext_free,
+       .f_add = rte_table_hash_ext_entry_add,
+       .f_delete = rte_table_hash_ext_entry_delete,
+       .f_lookup = rte_table_hash_ext_lookup,
+};
+
+struct rte_table_ops rte_table_hash_ext_dosig_ops  = {
+       .f_create = rte_table_hash_ext_create,
+       .f_free = rte_table_hash_ext_free,
+       .f_add = rte_table_hash_ext_entry_add,
+       .f_delete = rte_table_hash_ext_entry_delete,
+       .f_lookup = rte_table_hash_ext_lookup_dosig,
+};
diff --git a/lib/librte_table/rte_table_hash_key16.c 
b/lib/librte_table/rte_table_hash_key16.c
new file mode 100644
index 0000000..8075f20
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_key16.c
@@ -0,0 +1,1086 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_hash.h"
+#include "rte_lru.h"
+
+#define RTE_TABLE_HASH_KEY_SIZE                            16
+
+#define RTE_BUCKET_ENTRY_VALID                             0x1LLU
+
+struct rte_bucket_4_16 {
+       /* Cache line 0 */
+       uint64_t signature[4 + 1];
+       uint64_t lru_list;
+       struct rte_bucket_4_16 *next;
+       uint64_t next_valid;
+
+       /* Cache line 1 */
+       uint64_t key[4][2];
+
+       /* Cache line 2 */
+       uint8_t data[0];
+};
+
+struct rte_table_hash {
+       /* Input parameters */
+       uint32_t n_buckets;
+       uint32_t n_entries_per_bucket;
+       uint32_t key_size;
+       uint32_t entry_size;
+       uint32_t bucket_size;
+       uint32_t signature_offset;
+       uint32_t key_offset;
+       rte_table_hash_op_hash f_hash;
+       uint64_t seed;
+
+       /* Extendible buckets */
+       uint32_t n_buckets_ext;
+       uint32_t stack_pos;
+       uint32_t *stack;
+
+       /* Lookup table */
+       uint8_t memory[0] __rte_cache_aligned;
+};
+
+static int
+check_params_create_lru(struct rte_table_hash_key16_lru_params * params) {
+       /* n_entries */
+       if (params->n_entries == 0) {
+               RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+               return -EINVAL;
+       }
+
+       /* signature offset */
+       if ((params->signature_offset & 0x3) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid signature_offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* key offset */
+       if ((params->key_offset & 0x7) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid key_offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* f_hash */
+       if (params->f_hash == NULL) {
+               RTE_LOG(ERR, TABLE,
+                               "%s: f_hash function pointer is NULL\n", 
__func__);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void *
+rte_table_hash_create_key16_lru(void *params,
+               int socket_id,
+               uint32_t entry_size)
+{
+       struct rte_table_hash_key16_lru_params *p =
+                       (struct rte_table_hash_key16_lru_params *) params;
+       struct rte_table_hash *f;
+       uint32_t n_buckets, n_entries_per_bucket,
+                       key_size, bucket_size_cl, total_size, i;
+
+       /* Check input parameters */
+       if ((check_params_create_lru(p) != 0) ||
+               ((sizeof(struct rte_table_hash) % CACHE_LINE_SIZE) != 0) ||
+               ((sizeof(struct rte_bucket_4_16) % CACHE_LINE_SIZE) != 0)) {
+               return NULL;
+       }
+       n_entries_per_bucket = 4;
+       key_size = 16;
+
+       /* Memory allocation */
+       n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+                       n_entries_per_bucket);
+       bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket 
*
+                       entry_size + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE;
+       total_size = sizeof(struct rte_table_hash) + n_buckets *
+                       bucket_size_cl * CACHE_LINE_SIZE;
+
+       f = rte_zmalloc_socket("TABLE", total_size, CACHE_LINE_SIZE, socket_id);
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: Cannot allocate %u bytes for hash 
table\n",
+                               __func__, total_size);
+               return NULL;
+       }
+       RTE_LOG(INFO, TABLE,
+                       "%s: Hash table memory footprint is %u bytes\n", 
__func__, total_size);
+
+       /* Memory initialization */
+       f->n_buckets = n_buckets;
+       f->n_entries_per_bucket = n_entries_per_bucket;
+       f->key_size = key_size;
+       f->entry_size = entry_size;
+       f->bucket_size = bucket_size_cl * CACHE_LINE_SIZE;
+       f->signature_offset = p->signature_offset;
+       f->key_offset = p->key_offset;
+       f->f_hash = p->f_hash;
+       f->seed = p->seed;
+
+       for (i = 0; i < n_buckets; i ++) {
+               struct rte_bucket_4_16 *bucket;
+
+               bucket = (struct rte_bucket_4_16 *) &f->memory[i * 
f->bucket_size];
+               lru_init(bucket);
+       }
+
+       return f;
+}
+
+static int
+rte_table_hash_free_key16_lru(void *table)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+       /* Check input parameters */
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_free(f);
+       return 0;
+}
+
+static int
+rte_table_hash_entry_add_key16_lru(
+       void *table,
+       void *key,
+       void *entry,
+       int *key_found,
+       void **entry_ptr)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_16 *bucket;
+       uint64_t signature, pos;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket = (struct rte_bucket_4_16 *)
+                       &f->memory[bucket_index * f->bucket_size];
+       signature |= RTE_BUCKET_ENTRY_VALID;
+
+       /* Key is present in the bucket */
+       for (i = 0; i < 4; i ++) {
+               uint64_t bucket_signature = bucket->signature[i];
+               uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+               if ((bucket_signature == signature) &&
+                               (memcmp(key, bucket_key, f->key_size) == 0)) {
+                       uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+                       memcpy(bucket_data, entry, f->entry_size);
+                       lru_update(bucket, i);
+                       *key_found = 1;
+                       *entry_ptr = (void *) bucket_data;
+                       return 0;
+               }
+       }
+
+       /* Key is not present in the bucket */
+       for (i = 0; i < 4; i ++) {
+               uint64_t bucket_signature = bucket->signature[i];
+               uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+               if (bucket_signature == 0) {
+                       uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+                       bucket->signature[i] = signature;
+                       memcpy(bucket_key, key, f->key_size);
+                       memcpy(bucket_data, entry, f->entry_size);
+                       lru_update(bucket, i);
+                       *key_found = 0;
+                       *entry_ptr = (void *) bucket_data;
+
+                       return 0;
+               }
+       }
+
+       /* Bucket full: replace LRU entry */
+       pos = lru_pos(bucket);
+       bucket->signature[pos] = signature;
+       memcpy(bucket->key[pos], key, f->key_size);
+       memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
+       lru_update(bucket, pos);
+       *key_found = 0;
+       *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
+
+       return 0;
+}
+
+static int
+rte_table_hash_entry_delete_key16_lru(
+       void *table,
+       void *key,
+       int *key_found,
+       void *entry)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_16 *bucket;
+       uint64_t signature;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket = (struct rte_bucket_4_16 *)
+                       &f->memory[bucket_index * f->bucket_size];
+       signature |= RTE_BUCKET_ENTRY_VALID;
+
+       /* Key is present in the bucket */
+       for (i = 0; i < 4; i ++) {
+               uint64_t bucket_signature = bucket->signature[i];
+               uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+               if ((bucket_signature == signature) &&
+                               (memcmp(key, bucket_key, f->key_size) == 0)) {
+                       uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+                       bucket->signature[i] = 0;
+                       *key_found = 1;
+                       if (entry) {
+                               memcpy(entry, bucket_data, f->entry_size);
+                       }
+                       return 0;
+               }
+       }
+
+       /* Key is not present in the bucket */
+       *key_found = 0;
+       return 0;
+}
+
+static int
+check_params_create_ext(struct rte_table_hash_key16_ext_params * params) {
+       /* n_entries */
+       if (params->n_entries == 0) {
+               RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+               return -EINVAL;
+       }
+
+       /* n_entries_ext */
+       if (params->n_entries_ext == 0) {
+               RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
+               return -EINVAL;
+       }
+
+       /* signature offset */
+       if ((params->signature_offset & 0x3) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid signature offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* key offset */
+       if ((params->key_offset & 0x7) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid key offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* f_hash */
+       if (params->f_hash == NULL) {
+               RTE_LOG(ERR, TABLE,
+                               "%s: f_hash function pointer is NULL\n", 
__func__);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void *
+rte_table_hash_create_key16_ext(void *params,
+               int socket_id,
+               uint32_t entry_size)
+{
+       struct rte_table_hash_key16_ext_params *p =
+                       (struct rte_table_hash_key16_ext_params *) params;
+       struct rte_table_hash *f;
+       uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size,
+                       bucket_size_cl, stack_size_cl, total_size, i;
+
+       /* Check input parameters */
+       if ((check_params_create_ext(p) != 0) ||
+               ((sizeof(struct rte_table_hash) % CACHE_LINE_SIZE) != 0) ||
+               ((sizeof(struct rte_bucket_4_16) % CACHE_LINE_SIZE) != 0)) {
+               return NULL;
+       }
+       n_entries_per_bucket = 4;
+       key_size = 16;
+
+       /* Memory allocation */
+       n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+                       n_entries_per_bucket);
+       n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
+                       n_entries_per_bucket;
+       bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket 
*
+                       entry_size + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE;
+       stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + CACHE_LINE_SIZE - 
1) /
+                       CACHE_LINE_SIZE;
+       total_size = sizeof(struct rte_table_hash) + ((n_buckets + 
n_buckets_ext) *
+                       bucket_size_cl + stack_size_cl) * CACHE_LINE_SIZE;
+
+       f = rte_zmalloc_socket("TABLE", total_size, CACHE_LINE_SIZE, socket_id);
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: Cannot allocate %u bytes for hash 
table\n",
+                               __func__, total_size);
+               return NULL;
+       }
+       RTE_LOG(INFO, TABLE,
+                       "%s: Hash table memory footprint is %u bytes\n", 
__func__, total_size);
+
+       /* Memory initialization */
+       f->n_buckets = n_buckets;
+       f->n_entries_per_bucket = n_entries_per_bucket;
+       f->key_size = key_size;
+       f->entry_size = entry_size;
+       f->bucket_size = bucket_size_cl * CACHE_LINE_SIZE;
+       f->signature_offset = p->signature_offset;
+       f->key_offset = p->key_offset;
+       f->f_hash = p->f_hash;
+       f->seed = p->seed;
+
+       f->n_buckets_ext = n_buckets_ext;
+       f->stack_pos = n_buckets_ext;
+       f->stack = (uint32_t *)
+                       &f->memory[(n_buckets + n_buckets_ext) * 
f->bucket_size];
+
+       for (i = 0; i < n_buckets_ext; i ++) {
+               f->stack[i] = i;
+       }
+
+       return f;
+}
+
+static int
+rte_table_hash_free_key16_ext(void *table)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+       /* Check input parameters */
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_free(f);
+       return 0;
+}
+
+static int
+rte_table_hash_entry_add_key16_ext(
+       void *table,
+       void *key,
+       void *entry,
+       int *key_found,
+       void **entry_ptr)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
+       uint64_t signature;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket0 = (struct rte_bucket_4_16 *)
+                       &f->memory[bucket_index * f->bucket_size];
+       signature |= RTE_BUCKET_ENTRY_VALID;
+
+       /* Key is present in the bucket */
+       for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
+               for (i = 0; i < 4; i ++) {
+                       uint64_t bucket_signature = bucket->signature[i];
+                       uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+                       if ((bucket_signature == signature) &&
+                                       (memcmp(key, bucket_key, f->key_size) 
== 0)) {
+                               uint8_t *bucket_data = &bucket->data[i * 
f->entry_size];
+
+                               memcpy(bucket_data, entry, f->entry_size);
+                               *key_found = 1;
+                               *entry_ptr = (void *) bucket_data;
+                               return 0;
+                       }
+               }
+       }
+
+       /* Key is not present in the bucket */
+       for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
+                        bucket_prev = bucket, bucket = bucket->next) {
+               for (i = 0; i < 4; i ++) {
+                       uint64_t bucket_signature = bucket->signature[i];
+                       uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+                       if (bucket_signature == 0) {
+                               uint8_t *bucket_data = &bucket->data[i * 
f->entry_size];
+
+                               bucket->signature[i] = signature;
+                               memcpy(bucket_key, key, f->key_size);
+                               memcpy(bucket_data, entry, f->entry_size);
+                               *key_found = 0;
+                               *entry_ptr = (void *) bucket_data;
+
+                               return 0;
+                       }
+               }
+       }
+
+       /* Bucket full: extend bucket */
+       if (f->stack_pos > 0) {
+               bucket_index = f->stack[-- f->stack_pos];
+
+               bucket = (struct rte_bucket_4_16 *)
+                               &f->memory[(f->n_buckets + bucket_index) * 
f->bucket_size];
+               bucket_prev->next = bucket;
+               bucket_prev->next_valid = 1;
+
+               bucket->signature[0] = signature;
+               memcpy(bucket->key[0], key, f->key_size);
+               memcpy(&bucket->data[0], entry, f->entry_size);
+               *key_found = 0;
+               *entry_ptr = (void *) &bucket->data[0];
+               return 0;
+       }
+
+       return -ENOSPC;
+}
+
+static int
+rte_table_hash_entry_delete_key16_ext(
+       void *table,
+       void *key,
+       int *key_found,
+       void *entry)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev;
+       uint64_t signature;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket0 = (struct rte_bucket_4_16 *)
+                       &f->memory[bucket_index * f->bucket_size];
+       signature |= RTE_BUCKET_ENTRY_VALID;
+
+       /* Key is present in the bucket */
+       for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
+                       bucket_prev = bucket, bucket = bucket->next) {
+               for (i = 0; i < 4; i ++) {
+                       uint64_t bucket_signature = bucket->signature[i];
+                       uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+                       if ((bucket_signature == signature) &&
+                                       (memcmp(key, bucket_key, f->key_size) 
== 0)) {
+                               uint8_t *bucket_data = &bucket->data[i * 
f->entry_size];
+                               
+                               bucket->signature[i] = 0;
+                               *key_found = 1;
+                               if (entry) {
+                                       memcpy(entry, bucket_data, 
f->entry_size);
+                               }
+
+                               if ((bucket->signature[0] == 0) &&
+                                               (bucket->signature[1] == 0) &&
+                                       (bucket->signature[2] == 0) &&
+                                               (bucket->signature[3] == 0) &&
+                                               (bucket_prev != NULL)) {
+                                       bucket_prev->next = bucket->next;
+                                       bucket_prev->next_valid = 
bucket->next_valid;
+
+                                       memset(bucket, 0, sizeof(struct 
rte_bucket_4_16));
+                                       bucket_index = (bucket - ((struct 
rte_bucket_4_16 *)
+                                                       f->memory)) - 
f->n_buckets;
+                                       f->stack[f->stack_pos ++] = 
bucket_index;
+                               }
+
+                               return 0;
+                       }
+               }
+       }
+
+       /* Key is not present in the bucket */
+       *key_found = 0;
+       return 0;
+}
+
+#define lookup_key16_cmp(key_in, bucket, pos)                                  
\
+{                                                                              
\
+       uint64_t xor[4][2], or[4], signature[4];                                
   \
+                                                                               
   \
+       signature[0] = (~bucket->signature[0]) & 1;                             
   \
+       signature[1] = (~bucket->signature[1]) & 1;                             
   \
+       signature[2] = (~bucket->signature[2]) & 1;                             
   \
+       signature[3] = (~bucket->signature[3]) & 1;                             
   \
+                                                                               
\
+       xor[0][0] = key_in[0] ^  bucket->key[0][0];                             
   \
+       xor[0][1] = key_in[1] ^  bucket->key[0][1];                             
   \
+                                                                               
   \
+       xor[1][0] = key_in[0] ^  bucket->key[1][0];                             
   \
+       xor[1][1] = key_in[1] ^  bucket->key[1][1];                             
   \
+                                                                               
   \
+       xor[2][0] = key_in[0] ^  bucket->key[2][0];                             
   \
+       xor[2][1] = key_in[1] ^  bucket->key[2][1];                             
   \
+                                                                               
   \
+       xor[3][0] = key_in[0] ^  bucket->key[3][0];                             
   \
+       xor[3][1] = key_in[1] ^  bucket->key[3][1];                             
   \
+                                                                               
   \
+       or[0] = xor[0][0] | xor[0][1] | signature[0];                           
   \
+       or[1] = xor[1][0] | xor[1][1] | signature[1];                           
   \
+       or[2] = xor[2][0] | xor[2][1] | signature[2];                           
   \
+       or[3] = xor[3][0] | xor[3][1] | signature[3];                           
   \
+                                                                               
   \
+       pos = 4;                                                                
   \
+       if (or[0] == 0) {pos = 0;}                                              
   \
+       if (or[1] == 0) {pos = 1;}                                              
   \
+       if (or[2] == 0) {pos = 2;}                                              
   \
+       if (or[3] == 0) {pos = 3;}                                              
   \
+}
+
+#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)                     
\
+{                                                                              
\
+       uint64_t pkt_mask;                                                      
   \
+                                                                               
   \
+       pkt0_index = __builtin_ctzll(pkts_mask);                                
   \
+       pkt_mask = 1LLU << pkt0_index;                                          
   \
+       pkts_mask &= ~pkt_mask;                                                 
   \
+                                                                               
   \
+       mbuf0 = pkts[pkt0_index];                                               
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));                   
   \
+}
+
+#define lookup1_stage1(mbuf1, bucket1, f)                                      
\
+{                                                                              
\
+       uint64_t signature;                                                     
   \
+       uint32_t bucket_index;                                                  
   \
+                                                                               
   \
+       signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);       
   \
+       bucket_index = signature & (f->n_buckets - 1);                          
   \
+       bucket1 = (struct rte_bucket_4_16 *)                                    
   \
+                       &f->memory[bucket_index * f->bucket_size];              
           \
+       rte_prefetch0(bucket1);                                                 
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket1) + CACHE_LINE_SIZE));       
   \
+}
+
+#define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,                         
\
+               pkts_mask_out, entries, f)                                      
       \
+{                                                                              
\
+       void *a;                                                                
   \
+       uint64_t pkt_mask;                                                      
   \
+       uint64_t *key;                                                          
   \
+       uint32_t pos;                                                           
   \
+                                                                               
   \
+       key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);               
   \
+                                                                               
   \
+       lookup_key16_cmp(key, bucket2, pos);                                    
   \
+                                                                               
   \
+       pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;              
   \
+       pkts_mask_out |= pkt_mask;                                              
   \
+                                                                               
   \
+       a = (void *) &bucket2->data[pos * f->entry_size];                       
   \
+       rte_prefetch0(a);                                                       
   \
+       entries[pkt2_index] = a;                                                
   \
+       lru_update(bucket2, pos);                                               
   \
+}
+
+#define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out, entries, 
\
+               buckets_mask, buckets, keys, f)                                 
       \
+{                                                                              
\
+       struct rte_bucket_4_16 *bucket_next;                                    
   \
+       void *a;                                                                
   \
+       uint64_t pkt_mask, bucket_mask;                                         
   \
+       uint64_t *key;                                                          
   \
+       uint32_t pos;                                                           
   \
+                                                                               
   \
+       key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);               
   \
+                                                                               
   \
+       lookup_key16_cmp(key, bucket2, pos);                                    
   \
+                                                                               
   \
+       pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;              
   \
+       pkts_mask_out |= pkt_mask;                                              
   \
+                                                                               
   \
+       a = (void *) &bucket2->data[pos * f->entry_size];                       
   \
+       rte_prefetch0(a);                                                       
   \
+       entries[pkt2_index] = a;                                                
   \
+                                                                               
   \
+       bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);        
   \
+       buckets_mask |= bucket_mask;                                            
   \
+       bucket_next = bucket2->next;                                            
   \
+       buckets[pkt2_index] = bucket_next;                                      
   \
+       keys[pkt2_index] = key;                                                 
   \
+}
+
+#define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,       
\
+               buckets_mask, f)                                                
       \
+{                                                                              
\
+       struct rte_bucket_4_16 *bucket, *bucket_next;                           
   \
+       void *a;                                                                
   \
+       uint64_t pkt_mask, bucket_mask;                                         
   \
+       uint64_t *key;                                                          
   \
+       uint32_t pos;                                                           
   \
+                                                                               
   \
+       bucket = buckets[pkt_index];                                            
   \
+       key = keys[pkt_index];                                                  
   \
+                                                                               
   \
+       lookup_key16_cmp(key, bucket, pos);                                     
   \
+                                                                               
   \
+       pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;                
   \
+       pkts_mask_out |= pkt_mask;                                              
   \
+                                                                               
   \
+       a = (void *) &bucket->data[pos * f->entry_size];                        
   \
+       rte_prefetch0(a);                                                       
   \
+       entries[pkt_index] = a;                                                 
   \
+                                                                               
   \
+       bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);          
   \
+       buckets_mask |= bucket_mask;                                            
   \
+       bucket_next = bucket->next;                                             
   \
+       rte_prefetch0(bucket_next);                                             
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket_next) + CACHE_LINE_SIZE));   
   \
+       buckets[pkt_index] = bucket_next;                                       
   \
+       keys[pkt_index] = key;                                                  
   \
+}
+
+#define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,               
\
+               pkts, pkts_mask)                                                
       \
+{                                                                              
\
+       uint64_t pkt00_mask, pkt01_mask;                                        
   \
+                                                                               
   \
+       pkt00_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt00_mask = 1LLU << pkt00_index;                                       
   \
+       pkts_mask &= ~pkt00_mask;                                               
   \
+                                                                               
   \
+       mbuf00 = pkts[pkt00_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));                  
   \
+                                                                               
   \
+       pkt01_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt01_mask = 1LLU << pkt01_index;                                       
   \
+       pkts_mask &= ~pkt01_mask;                                               
   \
+                                                                               
   \
+       mbuf01 = pkts[pkt01_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));                  
   \
+}
+
+#define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,              
\
+               mbuf00, mbuf01, pkts, pkts_mask)                                
       \
+{                                                                              
\
+       uint64_t pkt00_mask, pkt01_mask;                                        
   \
+                                                                               
   \
+       pkt00_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt00_mask = 1LLU << pkt00_index;                                       
   \
+       pkts_mask &= ~pkt00_mask;                                               
   \
+                                                                               
   \
+       mbuf00 = pkts[pkt00_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));                  
   \
+                                                                               
   \
+       pkt01_index = __builtin_ctzll(pkts_mask);                               
   \
+       if (pkts_mask == 0) {                                                   
   \
+               pkt01_index = pkt00_index;                                      
       \
+       }                                                                       
   \
+       pkt01_mask = 1LLU << pkt01_index;                                       
   \
+       pkts_mask &= ~pkt01_mask;                                               
   \
+                                                                               
   \
+       mbuf01 = pkts[pkt01_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));                  
   \
+}
+
+#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)                  
\
+{                                                                              
\
+       uint64_t signature10, signature11;                                      
   \
+       uint32_t bucket10_index, bucket11_index;                                
   \
+                                                                               
   \
+       signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);    
   \
+       bucket10_index = signature10 & (f->n_buckets - 1);                      
   \
+       bucket10 = (struct rte_bucket_4_16 *)                                   
   \
+                       &f->memory[bucket10_index * f->bucket_size];            
           \
+       rte_prefetch0(bucket10);                                                
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket10) + CACHE_LINE_SIZE));      
   \
+                                                                               
   \
+       signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);    
   \
+       bucket11_index = signature11 & (f->n_buckets - 1);                      
   \
+       bucket11 = (struct rte_bucket_4_16 *)                                   
   \
+                       &f->memory[bucket11_index * f->bucket_size];            
           \
+       rte_prefetch0(bucket11);                                                
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket11) + CACHE_LINE_SIZE));      
   \
+}
+
+#define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,           
\
+               bucket20, bucket21, pkts_mask_out, entries, f)                  
       \
+{                                                                              
\
+       void *a20, *a21;                                                        
   \
+       uint64_t pkt20_mask, pkt21_mask;                                        
   \
+       uint64_t *key20, *key21;                                                
   \
+       uint32_t pos20, pos21;                                                  
   \
+                                                                               
   \
+       key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);            
   \
+       key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);            
   \
+                                                                               
   \
+       lookup_key16_cmp(key20, bucket20, pos20);                               
   \
+       lookup_key16_cmp(key21, bucket21, pos21);                               
   \
+                                                                               
   \
+       pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;        
   \
+       pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;        
   \
+       pkts_mask_out |= pkt20_mask | pkt21_mask;                               
   \
+                                                                               
   \
+       a20 = (void *) &bucket20->data[pos20 * f->entry_size];                  
   \
+       a21 = (void *) &bucket21->data[pos21 * f->entry_size];                  
   \
+       rte_prefetch0(a20);                                                     
   \
+       rte_prefetch0(a21);                                                     
   \
+       entries[pkt20_index] = a20;                                             
   \
+       entries[pkt21_index] = a21;                                             
   \
+       lru_update(bucket20, pos20);                                            
   \
+       lru_update(bucket21, pos21);                                            
   \
+}
+
+#define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, 
\
+               bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, 
f) \
+{                                                                              
\
+       struct rte_bucket_4_16 *bucket20_next, *bucket21_next;                  
   \
+       void *a20, *a21;                                                        
   \
+       uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;          
   \
+       uint64_t *key20, *key21;                                                
   \
+       uint32_t pos20, pos21;                                                  
   \
+                                                                               
   \
+       key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);            
   \
+       key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);            
   \
+                                                                               
   \
+       lookup_key16_cmp(key20, bucket20, pos20);                               
   \
+       lookup_key16_cmp(key21, bucket21, pos21);                               
   \
+                                                                               
   \
+       pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;        
   \
+       pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;        
   \
+       pkts_mask_out |= pkt20_mask | pkt21_mask;                               
   \
+                                                                               
   \
+       a20 = (void *) &bucket20->data[pos20 * f->entry_size];                  
   \
+       a21 = (void *) &bucket21->data[pos21 * f->entry_size];                  
   \
+       rte_prefetch0(a20);                                                     
   \
+       rte_prefetch0(a21);                                                     
   \
+       entries[pkt20_index] = a20;                                             
   \
+       entries[pkt21_index] = a21;                                             
   \
+                                                                               
   \
+       bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);  
   \
+       bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);  
   \
+       buckets_mask |= bucket20_mask | bucket21_mask;                          
   \
+       bucket20_next = bucket20->next;                                         
   \
+       bucket21_next = bucket21->next;                                         
   \
+       buckets[pkt20_index] = bucket20_next;                                   
   \
+       buckets[pkt21_index] = bucket21_next;                                   
   \
+       keys[pkt20_index] = key20;                                              
   \
+       keys[pkt21_index] = key21;                                              
   \
+}
+
+static int
+rte_table_hash_lookup_key16_lru(
+       void *table,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
+       struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+       uint32_t pkt00_index, pkt01_index, pkt10_index,
+                       pkt11_index, pkt20_index, pkt21_index;
+       uint64_t pkts_mask_out = 0;
+
+       /* Cannot run the pipeline with less than 5 packets */
+       if (__builtin_popcountll(pkts_mask) < 5) {
+               for ( ; pkts_mask; ) {
+                       struct rte_bucket_4_16 *bucket;
+                       struct rte_mbuf *mbuf;
+                       uint32_t pkt_index;
+
+                       lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+                       lookup1_stage1(mbuf, bucket, f);
+                       lookup1_stage2_lru(pkt_index, mbuf, bucket, 
pkts_mask_out,
+                                       entries, f);
+               }
+
+               *lookup_hit_mask = pkts_mask_out;
+               return 0;
+       }
+
+       /*
+        * Pipeline fill
+        *
+        */
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline feed */
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /*
+        * Pipeline run
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               bucket20 = bucket10;
+               bucket21 = bucket11;
+               mbuf20 = mbuf10;
+               mbuf21 = mbuf11;
+               mbuf10 = mbuf00;
+               mbuf11 = mbuf01;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+                               mbuf00, mbuf01, pkts, pkts_mask);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                               bucket20, bucket21, pkts_mask_out, entries, f);
+       }
+
+       /*
+        * Pipeline flush
+        *
+        */
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries, f);
+
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries, f);
+
+       *lookup_hit_mask = pkts_mask_out;
+       return 0;
+} /* rte_table_hash_lookup_key16_lru() */
+
+static int
+rte_table_hash_lookup_key16_ext(
+       void *table,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21;
+       struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+       uint32_t pkt00_index, pkt01_index, pkt10_index,
+                       pkt11_index, pkt20_index, pkt21_index;
+       uint64_t pkts_mask_out = 0, buckets_mask = 0;
+       struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
+       uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+
+       /* Cannot run the pipeline with less than 5 packets */
+       if (__builtin_popcountll(pkts_mask) < 5) {
+               for ( ; pkts_mask; ) {
+                       struct rte_bucket_4_16 *bucket;
+                       struct rte_mbuf *mbuf;
+                       uint32_t pkt_index;
+
+                       lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+                       lookup1_stage1(mbuf, bucket, f);
+                       lookup1_stage2_ext(pkt_index, mbuf, bucket, 
pkts_mask_out,
+                                       entries, buckets_mask, buckets, keys, 
f);
+               }
+
+               *lookup_hit_mask = pkts_mask_out;
+               return 0;
+       }
+
+       /*
+        * Pipeline fill
+        *
+        */
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline feed */
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /*
+        * Pipeline run
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               bucket20 = bucket10;
+               bucket21 = bucket11;
+               mbuf20 = mbuf10;
+               mbuf21 = mbuf11;
+               mbuf10 = mbuf00;
+               mbuf11 = mbuf01;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+                               mbuf00, mbuf01, pkts, pkts_mask);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                               bucket20, bucket21, pkts_mask_out, entries,
+                               buckets_mask, buckets, keys, f);
+       }
+
+       /*
+        * Pipeline flush
+        *
+        */
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries,
+                       buckets_mask, buckets, keys, f);
+
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries,
+                       buckets_mask, buckets, keys, f);
+
+       /* Grind next buckets */
+       for ( ; buckets_mask; ) {
+               uint64_t buckets_mask_next = 0;
+
+               for ( ; buckets_mask; ) {
+                       uint64_t pkt_mask;
+                       uint32_t pkt_index;
+
+                       pkt_index = __builtin_ctzll(buckets_mask);
+                       pkt_mask = 1LLU << pkt_index;
+                       buckets_mask &= ~pkt_mask;
+
+                       lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, 
entries,
+                                       buckets_mask_next, f);
+               }
+
+               buckets_mask = buckets_mask_next;
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       return 0;
+} /* rte_table_hash_lookup_key16_ext() */
+
+struct rte_table_ops rte_table_hash_key16_lru_ops = {
+       .f_create = rte_table_hash_create_key16_lru,
+       .f_free = rte_table_hash_free_key16_lru,
+       .f_add = rte_table_hash_entry_add_key16_lru,
+       .f_delete = rte_table_hash_entry_delete_key16_lru,
+       .f_lookup = rte_table_hash_lookup_key16_lru,
+};
+
+struct rte_table_ops rte_table_hash_key16_ext_ops = {
+       .f_create = rte_table_hash_create_key16_ext,
+       .f_free = rte_table_hash_free_key16_ext,
+       .f_add = rte_table_hash_entry_add_key16_ext,
+       .f_delete = rte_table_hash_entry_delete_key16_ext,
+       .f_lookup = rte_table_hash_lookup_key16_ext,
+};
diff --git a/lib/librte_table/rte_table_hash_key32.c 
b/lib/librte_table/rte_table_hash_key32.c
new file mode 100644
index 0000000..4d56d6d
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_key32.c
@@ -0,0 +1,1100 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_hash.h"
+#include "rte_lru.h"
+
+#define RTE_TABLE_HASH_KEY_SIZE                            32
+
+#define RTE_BUCKET_ENTRY_VALID                             0x1LLU
+
+struct rte_bucket_4_32 {
+       /* Cache line 0 */
+       uint64_t signature[4 + 1];
+       uint64_t lru_list;
+       struct rte_bucket_4_32 *next;
+       uint64_t next_valid;
+
+       /* Cache lines 1 and 2 */
+       uint64_t key[4][4];
+
+       /* Cache line 3 */
+       uint8_t data[0];
+};
+
+struct rte_table_hash {
+       /* Input parameters */
+       uint32_t n_buckets;
+       uint32_t n_entries_per_bucket;
+       uint32_t key_size;
+       uint32_t entry_size;
+       uint32_t bucket_size;
+       uint32_t signature_offset;
+       uint32_t key_offset;
+       rte_table_hash_op_hash f_hash;
+       uint64_t seed;
+
+       /* Extendible buckets */
+       uint32_t n_buckets_ext;
+       uint32_t stack_pos;
+       uint32_t *stack;
+
+       /* Lookup table */
+       uint8_t memory[0] __rte_cache_aligned;
+};
+
+static int
+check_params_create_lru(struct rte_table_hash_key32_lru_params * params) {
+       /* n_entries */
+       if (params->n_entries == 0) {
+               RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+               return -EINVAL;
+       }
+
+       /* signature offset */
+       if ((params->signature_offset & 0x3) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid signature offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* key offset */
+       if ((params->key_offset & 0x7) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid key offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* f_hash */
+       if (params->f_hash == NULL) {
+               RTE_LOG(ERR, TABLE,
+                               "%s: f_hash function pointer is NULL\n", 
__func__);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void *
+rte_table_hash_create_key32_lru(void *params,
+               int socket_id,
+               uint32_t entry_size)
+{
+       struct rte_table_hash_key32_lru_params *p =
+                       (struct rte_table_hash_key32_lru_params *) params;
+       struct rte_table_hash *f;
+       uint32_t n_buckets, n_entries_per_bucket,
+                       key_size, bucket_size_cl, total_size, i;
+
+       /* Check input parameters */
+       if ((check_params_create_lru(p) != 0) ||
+               ((sizeof(struct rte_table_hash) % CACHE_LINE_SIZE) != 0) ||
+               ((sizeof(struct rte_bucket_4_32) % CACHE_LINE_SIZE) != 0)) {
+               return NULL;
+       }
+       n_entries_per_bucket = 4;
+       key_size = 32;
+
+       /* Memory allocation */
+       n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+                       n_entries_per_bucket);
+       bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket 
*
+                       entry_size + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE;
+       total_size = sizeof(struct rte_table_hash) + n_buckets *
+                       bucket_size_cl * CACHE_LINE_SIZE;
+
+       f = rte_zmalloc_socket("TABLE", total_size, CACHE_LINE_SIZE, socket_id);
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: Cannot allocate %u bytes for hash 
table\n",
+                               __func__, total_size);
+               return NULL;
+       }
+       RTE_LOG(INFO, TABLE,
+                       "%s: Hash table memory footprint is %u bytes\n", 
__func__, total_size);
+
+       /* Memory initialization */
+       f->n_buckets = n_buckets;
+       f->n_entries_per_bucket = n_entries_per_bucket;
+       f->key_size = key_size;
+       f->entry_size = entry_size;
+       f->bucket_size = bucket_size_cl * CACHE_LINE_SIZE;
+       f->signature_offset = p->signature_offset;
+       f->key_offset = p->key_offset;
+       f->f_hash = p->f_hash;
+       f->seed = p->seed;
+
+       for (i = 0; i < n_buckets; i ++) {
+               struct rte_bucket_4_32 *bucket;
+
+               bucket = (struct rte_bucket_4_32 *) &f->memory[i * 
f->bucket_size];
+               bucket->lru_list = 0x0000000100020003LLU;
+       }
+
+       return f;
+}
+
+static int
+rte_table_hash_free_key32_lru(void *table)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+       /* Check input parameters */
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_free(f);
+       return 0;
+}
+
+static int
+rte_table_hash_entry_add_key32_lru(
+       void *table,
+       void *key,
+       void *entry,
+       int *key_found,
+       void **entry_ptr)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_32 *bucket;
+       uint64_t signature, pos;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket = (struct rte_bucket_4_32 *)
+                       &f->memory[bucket_index * f->bucket_size];
+       signature |= RTE_BUCKET_ENTRY_VALID;
+
+       /* Key is present in the bucket */
+       for (i = 0; i < 4; i ++) {
+               uint64_t bucket_signature = bucket->signature[i];
+               uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+               if ((bucket_signature == signature) &&
+                               (memcmp(key, bucket_key, f->key_size) == 0)) {
+                       uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+                       memcpy(bucket_data, entry, f->entry_size);
+                       lru_update(bucket, i);
+                       *key_found = 1;
+                       *entry_ptr = (void *) bucket_data;
+                       return 0;
+               }
+       }
+
+       /* Key is not present in the bucket */
+       for (i = 0; i < 4; i ++) {
+               uint64_t bucket_signature = bucket->signature[i];
+               uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+               if (bucket_signature == 0) {
+                       uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+                       bucket->signature[i] = signature;
+                       memcpy(bucket_key, key, f->key_size);
+                       memcpy(bucket_data, entry, f->entry_size);
+                       lru_update(bucket, i);
+                       *key_found = 0;
+                       *entry_ptr = (void *) bucket_data;
+
+                       return 0;
+               }
+       }
+
+       /* Bucket full: replace LRU entry */
+       pos = lru_pos(bucket);
+       bucket->signature[pos] = signature;
+       memcpy(bucket->key[pos], key, f->key_size);
+       memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
+       lru_update(bucket, pos);
+       *key_found  = 0;
+       *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
+
+       return 0;
+}
+
+static int
+rte_table_hash_entry_delete_key32_lru(
+       void *table,
+       void *key,
+       int *key_found,
+       void *entry)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_32 *bucket;
+       uint64_t signature;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket = (struct rte_bucket_4_32 *)
+                       &f->memory[bucket_index * f->bucket_size];
+       signature |= RTE_BUCKET_ENTRY_VALID;
+
+       /* Key is present in the bucket */
+       for (i = 0; i < 4; i ++) {
+               uint64_t bucket_signature = bucket->signature[i];
+               uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+               if ((bucket_signature == signature) &&
+                               (memcmp(key, bucket_key, f->key_size) == 0)) {
+                       uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+                       bucket->signature[i] = 0;
+                       *key_found = 1;
+                       if (entry) {
+                               memcpy(entry, bucket_data, f->entry_size);
+                       }
+
+                       return 0;
+               }
+       }
+
+       /* Key is not present in the bucket */
+       *key_found = 0;
+       return 0;
+}
+
+static int
+check_params_create_ext(struct rte_table_hash_key32_ext_params * params) {
+       /* n_entries */
+       if (params->n_entries == 0) {
+               RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+               return -EINVAL;
+       }
+
+       /* n_entries_ext */
+       if (params->n_entries_ext == 0) {
+               RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
+               return -EINVAL;
+       }
+
+       /* signature offset */
+       if ((params->signature_offset & 0x3) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid signature offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* key offset */
+       if ((params->key_offset & 0x7) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid key offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* f_hash */
+       if (params->f_hash == NULL) {
+               RTE_LOG(ERR, TABLE,
+                               "%s: f_hash function pointer is NULL\n", 
__func__);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void *
+rte_table_hash_create_key32_ext(void *params,
+               int socket_id,
+               uint32_t entry_size)
+{
+       struct rte_table_hash_key32_ext_params *p =
+                       (struct rte_table_hash_key32_ext_params *) params;
+       struct rte_table_hash *f;
+       uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket,
+                       key_size, bucket_size_cl, stack_size_cl, total_size, i;
+
+       /* Check input parameters */
+       if ((check_params_create_ext(p) != 0) ||
+               ((sizeof(struct rte_table_hash) % CACHE_LINE_SIZE) != 0) ||
+               ((sizeof(struct rte_bucket_4_32) % CACHE_LINE_SIZE) != 0)) {
+               return NULL;
+       }
+       n_entries_per_bucket = 4;
+       key_size = 32;
+
+       /* Memory allocation */
+       n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+                       n_entries_per_bucket);
+       n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
+                       n_entries_per_bucket;
+       bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket 
*
+                       entry_size + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE;
+       stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + CACHE_LINE_SIZE - 
1) /
+                       CACHE_LINE_SIZE;
+       total_size = sizeof(struct rte_table_hash) + ((n_buckets + 
n_buckets_ext) *
+                       bucket_size_cl + stack_size_cl) * CACHE_LINE_SIZE;
+
+       f = rte_zmalloc_socket("TABLE", total_size, CACHE_LINE_SIZE, socket_id);
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: Cannot allocate %u bytes for hash 
table\n",
+                               __func__, total_size);
+               return NULL;
+       }
+       RTE_LOG(INFO, TABLE,
+                       "%s: Hash table memory footprint is %u bytes\n", 
__func__, total_size);
+
+       /* Memory initialization */
+       f->n_buckets = n_buckets;
+       f->n_entries_per_bucket = n_entries_per_bucket;
+       f->key_size = key_size;
+       f->entry_size = entry_size;
+       f->bucket_size = bucket_size_cl * CACHE_LINE_SIZE;
+       f->signature_offset = p->signature_offset;
+       f->key_offset = p->key_offset;
+       f->f_hash = p->f_hash;
+       f->seed = p->seed;
+
+       f->n_buckets_ext = n_buckets_ext;
+       f->stack_pos = n_buckets_ext;
+       f->stack = (uint32_t *)
+                       &f->memory[(n_buckets + n_buckets_ext) * 
f->bucket_size];
+
+       for (i = 0; i < n_buckets_ext; i ++) {
+               f->stack[i] = i;
+       }
+
+       return f;
+}
+
+static int
+rte_table_hash_free_key32_ext(void *table)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+       /* Check input parameters */
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_free(f);
+       return 0;
+}
+
+static int
+rte_table_hash_entry_add_key32_ext(
+       void *table,
+       void *key,
+       void *entry,
+       int *key_found,
+       void **entry_ptr)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev;
+       uint64_t signature;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket0 = (struct rte_bucket_4_32 *)
+                       &f->memory[bucket_index * f->bucket_size];
+       signature |= RTE_BUCKET_ENTRY_VALID;
+
+       /* Key is present in the bucket */
+       for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
+               for (i = 0; i < 4; i ++) {
+                       uint64_t bucket_signature = bucket->signature[i];
+                       uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+                       if ((bucket_signature == signature) &&
+                                       (memcmp(key, bucket_key, f->key_size) 
== 0)) {
+                               uint8_t *bucket_data = &bucket->data[i * 
f->entry_size];
+
+                               memcpy(bucket_data, entry, f->entry_size);
+                               *key_found = 1;
+                               *entry_ptr = (void *) bucket_data;
+
+                               return 0;
+                       }
+               }
+       }
+
+       /* Key is not present in the bucket */
+       for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
+                       bucket_prev = bucket, bucket = bucket->next) {
+               for (i = 0; i < 4; i ++) {
+                       uint64_t bucket_signature = bucket->signature[i];
+                       uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+                       if (bucket_signature == 0) {
+                               uint8_t *bucket_data = &bucket->data[i * 
f->entry_size];
+
+                               bucket->signature[i] = signature;
+                               memcpy(bucket_key, key, f->key_size);
+                               memcpy(bucket_data, entry, f->entry_size);
+                               *key_found = 0;
+                               *entry_ptr = (void *) bucket_data;
+
+                               return 0;
+                       }
+               }
+       }
+
+       /* Bucket full: extend bucket */
+       if (f->stack_pos > 0) {
+               bucket_index = f->stack[-- f->stack_pos];
+
+               bucket = (struct rte_bucket_4_32 *)
+                               &f->memory[(f->n_buckets + bucket_index) * 
f->bucket_size];
+               bucket_prev->next = bucket;
+               bucket_prev->next_valid = 1;
+
+               bucket->signature[0] = signature;
+               memcpy(bucket->key[0], key, f->key_size);
+               memcpy(&bucket->data[0], entry, f->entry_size);
+               *key_found = 0;
+               *entry_ptr = (void *) &bucket->data[0];
+               return 0;
+       }
+
+       return -ENOSPC;
+}
+
+static int
+rte_table_hash_entry_delete_key32_ext(
+       void *table,
+       void *key,
+       int *key_found,
+       void *entry)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev;
+       uint64_t signature;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket0 = (struct rte_bucket_4_32 *)
+                       &f->memory[bucket_index * f->bucket_size];
+       signature |= RTE_BUCKET_ENTRY_VALID;
+
+       /* Key is present in the bucket */
+       for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
+                       bucket_prev = bucket, bucket = bucket->next) {
+               for (i = 0; i < 4; i ++) {
+                       uint64_t bucket_signature = bucket->signature[i];
+                       uint8_t *bucket_key = (uint8_t *) bucket->key[i];
+
+                       if ((bucket_signature == signature) &&
+                                       (memcmp(key, bucket_key, f->key_size) 
== 0)) {
+                               uint8_t *bucket_data = &bucket->data[i * 
f->entry_size];
+
+                               bucket->signature[i] = 0;
+                               *key_found = 1;
+                               if (entry) {
+                                       memcpy(entry, bucket_data, 
f->entry_size);
+                               }
+
+                               if ((bucket->signature[0] == 0) &&
+                                               (bucket->signature[1] == 0) &&
+                                       (bucket->signature[2] == 0) &&
+                                               (bucket->signature[3] == 0) &&
+                                               (bucket_prev != NULL)) {
+                                       bucket_prev->next = bucket->next;
+                                       bucket_prev->next_valid = 
bucket->next_valid;
+
+                                       memset(bucket, 0, sizeof(struct 
rte_bucket_4_32));
+                                       bucket_index = (bucket - ((struct 
rte_bucket_4_32 *)
+                                                       f->memory)) - 
f->n_buckets;
+                                       f->stack[f->stack_pos ++] = 
bucket_index;
+                               }
+
+                               return 0;
+                       }
+               }
+       }
+
+       /* Key is not present in the bucket */
+       *key_found = 0;
+       return 0;
+}
+
+#define lookup_key32_cmp(key_in, bucket, pos)                                  
\
+{                                                                              
\
+       uint64_t xor[4][4], or[4], signature[4];                                
   \
+                                                                               
\
+    signature[0] = ((~bucket->signature[0]) & 1);                              
\
+    signature[1] = ((~bucket->signature[1]) & 1);                              
\
+    signature[2] = ((~bucket->signature[2]) & 1);                              
\
+    signature[3] = ((~bucket->signature[3]) & 1);                              
\
+                                                                               
   \
+       xor[0][0] = key_in[0] ^  bucket->key[0][0];                             
   \
+       xor[0][1] = key_in[1] ^  bucket->key[0][1];                             
   \
+       xor[0][2] = key_in[2] ^  bucket->key[0][2];                             
   \
+       xor[0][3] = key_in[3] ^  bucket->key[0][3];                             
   \
+                                                                               
   \
+       xor[1][0] = key_in[0] ^  bucket->key[1][0];                             
   \
+       xor[1][1] = key_in[1] ^  bucket->key[1][1];                             
   \
+       xor[1][2] = key_in[2] ^  bucket->key[1][2];                             
   \
+       xor[1][3] = key_in[3] ^  bucket->key[1][3];                             
   \
+                                                                               
   \
+       xor[2][0] = key_in[0] ^  bucket->key[2][0];                             
   \
+       xor[2][1] = key_in[1] ^  bucket->key[2][1];                             
   \
+       xor[2][2] = key_in[2] ^  bucket->key[2][2];                             
   \
+       xor[2][3] = key_in[3] ^  bucket->key[2][3];                             
   \
+                                                                               
   \
+       xor[3][0] = key_in[0] ^  bucket->key[3][0];                             
   \
+       xor[3][1] = key_in[1] ^  bucket->key[3][1];                             
   \
+       xor[3][2] = key_in[2] ^  bucket->key[3][2];                             
   \
+       xor[3][3] = key_in[3] ^  bucket->key[3][3];                             
   \
+                                                                               
   \
+       or[0] = xor[0][0] | xor[0][1] | xor[0][2] | xor[0][3] | signature[0];   
   \
+       or[1] = xor[1][0] | xor[1][1] | xor[1][2] | xor[1][3] | signature[1];   
   \
+       or[2] = xor[2][0] | xor[2][1] | xor[2][2] | xor[2][3] | signature[2];   
   \
+       or[3] = xor[3][0] | xor[3][1] | xor[3][2] | xor[3][3] | signature[3];   
   \
+                                                                               
   \
+       pos = 4;                                                                
   \
+       if (or[0] == 0) {pos = 0;}                                              
   \
+       if (or[1] == 0) {pos = 1;}                                              
   \
+       if (or[2] == 0) {pos = 2;}                                              
   \
+       if (or[3] == 0) {pos = 3;}                                              
   \
+}
+
+#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)                     
\
+{                                                                              
\
+       uint64_t pkt_mask;                                                      
   \
+                                                                               
   \
+       pkt0_index = __builtin_ctzll(pkts_mask);                                
   \
+       pkt_mask = 1LLU << pkt0_index;                                          
   \
+       pkts_mask &= ~pkt_mask;                                                 
   \
+                                                                               
   \
+       mbuf0 = pkts[pkt0_index];                                               
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));                   
   \
+}
+
+#define lookup1_stage1(mbuf1, bucket1, f)                                      
\
+{                                                                              
\
+       uint64_t signature;                                                     
   \
+       uint32_t bucket_index;                                                  
   \
+                                                                               
   \
+       signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);       
   \
+       bucket_index = signature & (f->n_buckets - 1);                          
   \
+       bucket1 = (struct rte_bucket_4_32 *)                                    
   \
+                       &f->memory[bucket_index * f->bucket_size];              
           \
+       rte_prefetch0(bucket1);                                                 
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket1) + CACHE_LINE_SIZE));       
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket1) + 2 * CACHE_LINE_SIZE));   
   \
+}
+
+#define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,                         
\
+               pkts_mask_out, entries, f)                                      
       \
+{                                                                              
\
+       void *a;                                                                
   \
+       uint64_t pkt_mask;                                                      
   \
+       uint64_t *key;                                                          
   \
+       uint32_t pos;                                                           
   \
+                                                                               
   \
+       key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);               
   \
+                                                                               
   \
+       lookup_key32_cmp(key, bucket2, pos);                                    
   \
+                                                                               
   \
+       pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;              
   \
+       pkts_mask_out |= pkt_mask;                                              
   \
+                                                                               
   \
+       a = (void *) &bucket2->data[pos * f->entry_size];                       
   \
+       rte_prefetch0(a);                                                       
   \
+       entries[pkt2_index] = a;                                                
   \
+       lru_update(bucket2, pos);                                               
   \
+}
+
+#define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,          
\
+               entries, buckets_mask, buckets, keys, f) \
+{                                                                              
\
+       struct rte_bucket_4_32 *bucket_next;                                    
   \
+       void *a;                                                                
   \
+       uint64_t pkt_mask, bucket_mask;                                         
   \
+       uint64_t *key;                                                          
   \
+       uint32_t pos;                                                           
   \
+                                                                               
   \
+       key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);               
   \
+                                                                               
   \
+       lookup_key32_cmp(key, bucket2, pos);                                    
   \
+                                                                               
   \
+       pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;              
   \
+       pkts_mask_out |= pkt_mask;                                              
   \
+                                                                               
   \
+       a = (void *) &bucket2->data[pos * f->entry_size];                       
   \
+       rte_prefetch0(a);                                                       
   \
+       entries[pkt2_index] = a;                                                
   \
+                                                                               
   \
+       bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);        
   \
+       buckets_mask |= bucket_mask;                                            
   \
+       bucket_next = bucket2->next;                                            
   \
+       buckets[pkt2_index] = bucket_next;                                      
   \
+       keys[pkt2_index] = key;                                                 
   \
+}
+
+#define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,                
\
+               entries, buckets_mask, f)                                       
       \
+{                                                                              
\
+       struct rte_bucket_4_32 *bucket, *bucket_next;                           
   \
+       void *a;                                                                
   \
+       uint64_t pkt_mask, bucket_mask;                                         
   \
+       uint64_t *key;                                                          
   \
+       uint32_t pos;                                                           
   \
+                                                                               
   \
+       bucket = buckets[pkt_index];                                            
   \
+       key = keys[pkt_index];                                                  
   \
+                                                                               
   \
+       lookup_key32_cmp(key, bucket, pos);                                     
   \
+                                                                               
   \
+       pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;                
   \
+       pkts_mask_out |= pkt_mask;                                              
   \
+                                                                               
   \
+       a = (void *) &bucket->data[pos * f->entry_size];                        
   \
+       rte_prefetch0(a);                                                       
   \
+       entries[pkt_index] = a;                                                 
   \
+                                                                               
   \
+       bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);          
   \
+       buckets_mask |= bucket_mask;                                            
   \
+       bucket_next = bucket->next;                                             
   \
+       rte_prefetch0(bucket_next);                                             
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket_next) + CACHE_LINE_SIZE));   
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket_next) + 2 * 
CACHE_LINE_SIZE));  \
+       buckets[pkt_index] = bucket_next;                                       
   \
+       keys[pkt_index] = key;                                                  
   \
+}
+
+#define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,               
\
+               pkts, pkts_mask)                                                
       \
+{                                                                              
\
+       uint64_t pkt00_mask, pkt01_mask;                                        
   \
+                                                                               
   \
+       pkt00_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt00_mask = 1LLU << pkt00_index;                                       
   \
+       pkts_mask &= ~pkt00_mask;                                               
   \
+                                                                               
   \
+       mbuf00 = pkts[pkt00_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));                  
   \
+                                                                               
   \
+       pkt01_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt01_mask = 1LLU << pkt01_index;                                       
   \
+       pkts_mask &= ~pkt01_mask;                                               
   \
+                                                                               
   \
+       mbuf01 = pkts[pkt01_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));                  
   \
+}
+
+#define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,              
\
+               mbuf00, mbuf01, pkts, pkts_mask)                                
       \
+{                                                                              
\
+       uint64_t pkt00_mask, pkt01_mask;                                        
   \
+                                                                               
   \
+       pkt00_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt00_mask = 1LLU << pkt00_index;                                       
   \
+       pkts_mask &= ~pkt00_mask;                                               
   \
+                                                                               
   \
+       mbuf00 = pkts[pkt00_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));                  
   \
+                                                                               
   \
+       pkt01_index = __builtin_ctzll(pkts_mask);                               
   \
+       if (pkts_mask == 0) {                                                   
   \
+               pkt01_index = pkt00_index;                                      
       \
+       }                                                                       
   \
+       pkt01_mask = 1LLU << pkt01_index;                                       
   \
+       pkts_mask &= ~pkt01_mask;                                               
   \
+                                                                               
   \
+       mbuf01 = pkts[pkt01_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));                  
   \
+}
+
+#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)                  
\
+{                                                                              
\
+       uint64_t signature10, signature11;                                      
   \
+       uint32_t bucket10_index, bucket11_index;                                
   \
+                                                                               
   \
+       signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);    
   \
+       bucket10_index = signature10 & (f->n_buckets - 1);                      
   \
+       bucket10 = (struct rte_bucket_4_32 *)                                   
   \
+                       &f->memory[bucket10_index * f->bucket_size]; \
+       rte_prefetch0(bucket10);                                                
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket10) + CACHE_LINE_SIZE));      
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket10) + 2* CACHE_LINE_SIZE));   
   \
+                                                                               
   \
+       signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);    
   \
+       bucket11_index = signature11 & (f->n_buckets - 1);                      
   \
+       bucket11 = (struct rte_bucket_4_32 *)                                   
   \
+                       &f->memory[bucket11_index * f->bucket_size];            
           \
+       rte_prefetch0(bucket11);                                                
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket11) + CACHE_LINE_SIZE));      
   \
+       rte_prefetch0((void *)(((uintptr_t) bucket11) + 2 * CACHE_LINE_SIZE));  
   \
+}
+
+#define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,           
\
+               bucket20, bucket21, pkts_mask_out, entries, f)                  
       \
+{                                                                              
\
+       void *a20, *a21;                                                        
   \
+       uint64_t pkt20_mask, pkt21_mask;                                        
   \
+       uint64_t *key20, *key21;                                                
   \
+       uint32_t pos20, pos21;                                                  
   \
+                                                                               
   \
+       key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);            
   \
+       key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);            
   \
+                                                                               
   \
+       lookup_key32_cmp(key20, bucket20, pos20);                               
   \
+       lookup_key32_cmp(key21, bucket21, pos21);                               
   \
+                                                                               
   \
+       pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;        
   \
+       pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;        
   \
+       pkts_mask_out |= pkt20_mask | pkt21_mask;                               
   \
+                                                                               
   \
+       a20 = (void *) &bucket20->data[pos20 * f->entry_size];                  
   \
+       a21 = (void *) &bucket21->data[pos21 * f->entry_size];                  
   \
+       rte_prefetch0(a20);                                                     
   \
+       rte_prefetch0(a21);                                                     
   \
+       entries[pkt20_index] = a20;                                             
   \
+       entries[pkt21_index] = a21;                                             
   \
+       lru_update(bucket20, pos20);                                            
   \
+       lru_update(bucket21, pos21);                                            
   \
+}
+
+#define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, 
\
+               bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, 
f)      \
+{                                                                              
\
+       struct rte_bucket_4_32 *bucket20_next, *bucket21_next;                  
   \
+       void *a20, *a21;                                                        
   \
+       uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;          
   \
+       uint64_t *key20, *key21;                                                
   \
+       uint32_t pos20, pos21;                                                  
   \
+                                                                               
   \
+       key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);            
   \
+       key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);            
   \
+                                                                               
   \
+       lookup_key32_cmp(key20, bucket20, pos20);                               
   \
+       lookup_key32_cmp(key21, bucket21, pos21);                               
   \
+                                                                               
   \
+       pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;        
   \
+       pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;        
   \
+       pkts_mask_out |= pkt20_mask | pkt21_mask;                               
   \
+                                                                               
   \
+       a20 = (void *) &bucket20->data[pos20 * f->entry_size];                  
   \
+       a21 = (void *) &bucket21->data[pos21 * f->entry_size];                  
   \
+       rte_prefetch0(a20);                                                     
   \
+       rte_prefetch0(a21);                                                     
   \
+       entries[pkt20_index] = a20;                                             
   \
+       entries[pkt21_index] = a21;                                             
   \
+                                                                               
   \
+       bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);  
   \
+       bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);  
   \
+       buckets_mask |= bucket20_mask | bucket21_mask;                          
   \
+       bucket20_next = bucket20->next;                                         
   \
+       bucket21_next = bucket21->next;                                         
   \
+       buckets[pkt20_index] = bucket20_next;                                   
   \
+       buckets[pkt21_index] = bucket21_next;                                   
   \
+       keys[pkt20_index] = key20;                                              
   \
+       keys[pkt21_index] = key21;                                              
   \
+}
+
+static int
+rte_table_hash_lookup_key32_lru(
+       void *table,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21;
+       struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+       uint32_t pkt00_index, pkt01_index, pkt10_index,
+                       pkt11_index, pkt20_index, pkt21_index;
+       uint64_t pkts_mask_out = 0;
+
+       /* Cannot run the pipeline with less than 5 packets */
+       if (__builtin_popcountll(pkts_mask) < 5) {
+               for ( ; pkts_mask; ) {
+                       struct rte_bucket_4_32 *bucket;
+                       struct rte_mbuf *mbuf;
+                       uint32_t pkt_index;
+
+                       lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+                       lookup1_stage1(mbuf, bucket, f);
+                       lookup1_stage2_lru(pkt_index, mbuf, bucket,
+                                       pkts_mask_out, entries, f);
+               }
+
+               *lookup_hit_mask = pkts_mask_out;
+               return 0;
+       }
+
+       /*
+        * Pipeline fill
+        *
+        */
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline feed */
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /*
+        * Pipeline run
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               bucket20 = bucket10;
+               bucket21 = bucket11;
+               mbuf20 = mbuf10;
+               mbuf21 = mbuf11;
+               mbuf10 = mbuf00;
+               mbuf11 = mbuf01;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+                               mbuf00, mbuf01, pkts, pkts_mask);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2_lru(pkt20_index, pkt21_index,
+                               mbuf20, mbuf21, bucket20, bucket21, 
pkts_mask_out, entries, f);
+       }
+
+       /*
+        * Pipeline flush
+        *
+        */
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_lru(pkt20_index, pkt21_index,
+                       mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, 
entries, f);
+
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_lru(pkt20_index, pkt21_index,
+                       mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, 
entries, f);
+
+       *lookup_hit_mask = pkts_mask_out;
+       return 0;
+} /* rte_table_hash_lookup_key32_lru() */
+
+static int
+rte_table_hash_lookup_key32_ext(
+       void *table,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21;
+       struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+       uint32_t pkt00_index, pkt01_index, pkt10_index,
+                       pkt11_index, pkt20_index, pkt21_index;
+       uint64_t pkts_mask_out = 0, buckets_mask = 0;
+       struct rte_bucket_4_32 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
+       uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+
+       /* Cannot run the pipeline with less than 5 packets */
+       if (__builtin_popcountll(pkts_mask) < 5) {
+               for ( ; pkts_mask; ) {
+                       struct rte_bucket_4_32 *bucket;
+                       struct rte_mbuf *mbuf;
+                       uint32_t pkt_index;
+
+                       lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+                       lookup1_stage1(mbuf, bucket, f);
+                       lookup1_stage2_ext(pkt_index, mbuf, bucket,
+                                       pkts_mask_out, entries, buckets_mask, 
buckets, keys, f);
+               }
+
+               *lookup_hit_mask = pkts_mask_out;
+               return 0;
+       }
+
+       /*
+        * Pipeline fill
+        *
+        */
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline feed */
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /*
+        * Pipeline run
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               bucket20 = bucket10;
+               bucket21 = bucket11;
+               mbuf20 = mbuf10;
+               mbuf21 = mbuf11;
+               mbuf10 = mbuf00;
+               mbuf11 = mbuf01;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+                               mbuf00, mbuf01, pkts, pkts_mask);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries,
+                       buckets_mask, buckets, keys, f);
+       }
+
+       /*
+        * Pipeline flush
+        *
+        */
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries,
+                       buckets_mask, buckets, keys, f);
+
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries,
+                       buckets_mask, buckets, keys, f);
+
+       /* Grind next buckets */
+       for ( ; buckets_mask; ) {
+               uint64_t buckets_mask_next = 0;
+
+               for ( ; buckets_mask; ) {
+                       uint64_t pkt_mask;
+                       uint32_t pkt_index;
+
+                       pkt_index = __builtin_ctzll(buckets_mask);
+                       pkt_mask = 1LLU << pkt_index;
+                       buckets_mask &= ~pkt_mask;
+
+                       lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
+                                       entries, buckets_mask_next, f);
+               }
+
+               buckets_mask = buckets_mask_next;
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       return 0;
+} /* rte_table_hash_lookup_key32_ext() */
+
+struct rte_table_ops rte_table_hash_key32_lru_ops = {
+       .f_create = rte_table_hash_create_key32_lru,
+       .f_free = rte_table_hash_free_key32_lru,
+       .f_add = rte_table_hash_entry_add_key32_lru,
+       .f_delete = rte_table_hash_entry_delete_key32_lru,
+       .f_lookup = rte_table_hash_lookup_key32_lru,
+};
+
+struct rte_table_ops rte_table_hash_key32_ext_ops = {
+       .f_create = rte_table_hash_create_key32_ext,
+       .f_free = rte_table_hash_free_key32_ext,
+       .f_add = rte_table_hash_entry_add_key32_ext,
+       .f_delete = rte_table_hash_entry_delete_key32_ext,
+       .f_lookup = rte_table_hash_lookup_key32_ext,
+};
diff --git a/lib/librte_table/rte_table_hash_key8.c 
b/lib/librte_table/rte_table_hash_key8.c
new file mode 100644
index 0000000..5df3c56
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_key8.c
@@ -0,0 +1,1372 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_hash.h"
+#include "rte_lru.h"
+
+#define RTE_TABLE_HASH_KEY_SIZE                            8
+
+struct rte_bucket_4_8 {
+       /* Cache line 0 */
+       uint64_t signature;
+       uint64_t lru_list;
+       struct rte_bucket_4_8 *next;
+       uint64_t next_valid;
+
+       uint64_t key[4];
+
+       /* Cache line 1 */
+       uint8_t data[0];
+};
+
+struct rte_table_hash {
+       /* Input parameters */
+       uint32_t n_buckets;
+       uint32_t n_entries_per_bucket;
+       uint32_t key_size;
+       uint32_t entry_size;
+       uint32_t bucket_size;
+       uint32_t signature_offset;
+       uint32_t key_offset;
+       rte_table_hash_op_hash f_hash;
+       uint64_t seed;
+
+       /* Extendible buckets */
+       uint32_t n_buckets_ext;
+       uint32_t stack_pos;
+       uint32_t *stack;
+
+       /* Lookup table */
+       uint8_t memory[0] __rte_cache_aligned;
+};
+
+static int
+check_params_create_lru(struct rte_table_hash_key8_lru_params * params) {
+       /* n_entries */
+       if (params->n_entries == 0) {
+               RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+               return -EINVAL;
+       }
+
+       /* signature offset */
+       if ((params->signature_offset & 0x3) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid signature_offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* key offset */
+       if ((params->key_offset & 0x7) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid key_offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* f_hash */
+       if (params->f_hash == NULL) {
+               RTE_LOG(ERR, TABLE,
+                               "%s: f_hash function pointer is NULL\n", 
__func__);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void *
+rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t 
entry_size)
+{
+       struct rte_table_hash_key8_lru_params *p =
+                       (struct rte_table_hash_key8_lru_params *) params;
+       struct rte_table_hash *f;
+       uint32_t n_buckets, n_entries_per_bucket, key_size,
+                       bucket_size_cl, total_size, i;
+
+       /* Check input parameters */
+       if ((check_params_create_lru(p) != 0) ||
+               ((sizeof(struct rte_table_hash) % CACHE_LINE_SIZE) != 0) ||
+               ((sizeof(struct rte_bucket_4_8) % CACHE_LINE_SIZE) != 0)) {
+               return NULL;
+       }
+       n_entries_per_bucket = 4;
+       key_size = 8;
+
+       /* Memory allocation */
+       n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+                       n_entries_per_bucket);
+       bucket_size_cl = (sizeof(struct rte_bucket_4_8) + n_entries_per_bucket *
+                       entry_size + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE;
+       total_size = sizeof(struct rte_table_hash) + n_buckets *
+                       bucket_size_cl * CACHE_LINE_SIZE;
+
+       f = rte_zmalloc_socket("TABLE", total_size, CACHE_LINE_SIZE, socket_id);
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: Cannot allocate %u bytes for hash 
table\n",
+                               __func__, total_size);
+               return NULL;
+       }
+       RTE_LOG(INFO, TABLE,
+                       "%s: Hash table memory footprint is %u bytes\n", 
__func__, total_size);
+
+       /* Memory initialization */
+       f->n_buckets = n_buckets;
+       f->n_entries_per_bucket = n_entries_per_bucket;
+       f->key_size = key_size;
+       f->entry_size = entry_size;
+       f->bucket_size = bucket_size_cl * CACHE_LINE_SIZE;
+       f->signature_offset = p->signature_offset;
+       f->key_offset = p->key_offset;
+       f->f_hash = p->f_hash;
+       f->seed = p->seed;
+
+       for (i = 0; i < n_buckets; i ++) {
+               struct rte_bucket_4_8 *bucket;
+
+               bucket = (struct rte_bucket_4_8 *) &f->memory[i * 
f->bucket_size];
+               bucket->lru_list = 0x0000000100020003LLU;
+       }
+
+       return f;
+}
+
+static int
+rte_table_hash_free_key8_lru(void *table)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+       /* Check input parameters */
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_free(f);
+       return 0;
+}
+
+static int
+rte_table_hash_entry_add_key8_lru(
+       void *table,
+       void *key,
+       void *entry,
+       int *key_found,
+       void **entry_ptr)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_8 *bucket;
+       uint64_t signature, mask, pos;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket = (struct rte_bucket_4_8 *)
+                       &f->memory[bucket_index * f->bucket_size];
+
+       /* Key is present in the bucket */
+       for (i = 0, mask = 1LLU; i < 4; i ++, mask <<= 1) {
+               uint64_t bucket_signature = bucket->signature;
+               uint64_t bucket_key = bucket->key[i];
+
+               if ((bucket_signature & mask) && (*((uint64_t *) key) == 
bucket_key)) {
+                       uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+                       memcpy(bucket_data, entry, f->entry_size);
+                       lru_update(bucket, i);
+                       *key_found = 1;
+                       *entry_ptr = (void *) bucket_data;
+                       return 0;
+               }
+       }
+
+       /* Key is not present in the bucket */
+       for (i = 0, mask = 1LLU; i < 4; i ++, mask <<= 1) {
+               uint64_t bucket_signature = bucket->signature;
+
+               if ((bucket_signature & mask) == 0) {
+                       uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+
+                       bucket->signature |= mask;
+                       bucket->key[i] = *((uint64_t *) key);
+                       memcpy(bucket_data, entry, f->entry_size);
+                       lru_update(bucket, i);
+                       *key_found = 0;
+                       *entry_ptr = (void *) bucket_data;
+                       
+                       return 0;
+               }
+       }
+
+       /* Bucket full: replace LRU entry */
+       pos = lru_pos(bucket);
+       bucket->key[pos] = *((uint64_t *) key);
+       memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size);
+       lru_update(bucket, pos);
+       *key_found  = 0;
+       *entry_ptr = (void *) &bucket->data[pos * f->entry_size];
+       
+       return 0;
+}
+
+static int
+rte_table_hash_entry_delete_key8_lru(
+       void *table,
+       void *key,
+       int *key_found,
+       void *entry)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_8 *bucket;
+       uint64_t signature, mask;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket = (struct rte_bucket_4_8 *)
+                       &f->memory[bucket_index * f->bucket_size];
+
+       /* Key is present in the bucket */
+       for (i = 0, mask = 1LLU; i < 4; i ++, mask <<= 1) {
+               uint64_t bucket_signature = bucket->signature;
+               uint64_t bucket_key = bucket->key[i];
+
+               if ((bucket_signature & mask) && (*((uint64_t *) key) == 
bucket_key)) {
+                       uint8_t *bucket_data = &bucket->data[i * f->entry_size];
+                       
+                       bucket->signature &= ~mask;
+                       *key_found = 1;
+                       if (entry) {
+                               memcpy(entry, bucket_data, f->entry_size);
+                       }
+                       
+                       return 0;
+               }
+       }
+
+       /* Key is not present in the bucket */
+       *key_found = 0;
+       return 0;
+}
+
+static int
+check_params_create_ext(struct rte_table_hash_key8_ext_params * params) {
+       /* n_entries */
+       if (params->n_entries == 0) {
+               RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__);
+               return -EINVAL;
+       }
+
+       /* n_entries_ext */
+       if (params->n_entries_ext == 0) {
+               RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__);
+               return -EINVAL;
+       }
+
+       /* signature offset */
+       if ((params->signature_offset & 0x3) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid signature_offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* key offset */
+       if ((params->key_offset & 0x7) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: invalid key_offset\n", __func__);
+               return -EINVAL;
+       }
+
+       /* f_hash */
+       if (params->f_hash == NULL) {
+               RTE_LOG(ERR, TABLE,
+                               "%s: f_hash function pointer is NULL\n", 
__func__);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void *
+rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t 
entry_size)
+{
+       struct rte_table_hash_key8_ext_params *p =
+                       (struct rte_table_hash_key8_ext_params *) params;
+       struct rte_table_hash *f;
+       uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size,
+                       bucket_size_cl, stack_size_cl, total_size, i;
+
+       /* Check input parameters */
+       if ((check_params_create_ext(p) != 0) ||
+               ((sizeof(struct rte_table_hash) % CACHE_LINE_SIZE) != 0) ||
+               ((sizeof(struct rte_bucket_4_8) % CACHE_LINE_SIZE) != 0)) {
+               return NULL;
+       }
+       n_entries_per_bucket = 4;
+       key_size = 8;
+
+       /* Memory allocation */
+       n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) /
+                       n_entries_per_bucket);
+       n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) /
+                       n_entries_per_bucket;
+       bucket_size_cl = (sizeof(struct rte_bucket_4_8) + n_entries_per_bucket *
+                       entry_size + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE;
+       stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + CACHE_LINE_SIZE - 
1) /
+                       CACHE_LINE_SIZE;
+       total_size = sizeof(struct rte_table_hash) + ((n_buckets + 
n_buckets_ext)
+                       * bucket_size_cl + stack_size_cl) * CACHE_LINE_SIZE;
+
+       f = rte_zmalloc_socket("TABLE", total_size, CACHE_LINE_SIZE, socket_id);
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: Cannot allocate %u bytes for hash 
table\n",
+                               __func__, total_size);
+               return NULL;
+       }
+       RTE_LOG(INFO, TABLE,
+                       "%s: Hash table memory footprint is %u bytes\n", 
__func__, total_size);
+
+       /* Memory initialization */
+       f->n_buckets = n_buckets;
+       f->n_entries_per_bucket = n_entries_per_bucket;
+       f->key_size = key_size;
+       f->entry_size = entry_size;
+       f->bucket_size = bucket_size_cl * CACHE_LINE_SIZE;
+       f->signature_offset = p->signature_offset;
+       f->key_offset = p->key_offset;
+       f->f_hash = p->f_hash;
+       f->seed = p->seed;
+
+       f->n_buckets_ext = n_buckets_ext;
+       f->stack_pos = n_buckets_ext;
+       f->stack = (uint32_t *)
+                       &f->memory[(n_buckets + n_buckets_ext) * 
f->bucket_size];
+
+       for (i = 0; i < n_buckets_ext; i ++) {
+               f->stack[i] = i;
+       }
+
+       return f;
+}
+
+static int
+rte_table_hash_free_key8_ext(void *table)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+
+       /* Check input parameters */
+       if (f == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_free(f);
+       return 0;
+}
+
+static int
+rte_table_hash_entry_add_key8_ext(
+       void *table,
+       void *key,
+       void *entry,
+       int *key_found,
+       void **entry_ptr)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
+       uint64_t signature;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket0 = (struct rte_bucket_4_8 *)
+                       &f->memory[bucket_index * f->bucket_size];
+
+       /* Key is present in the bucket */
+       for (bucket = bucket0; bucket != NULL; bucket = bucket->next) {
+               uint64_t mask;
+
+               for (i = 0, mask = 1LLU; i < 4; i ++, mask <<= 1) {
+                       uint64_t bucket_signature = bucket->signature;
+                       uint64_t bucket_key = bucket->key[i];
+
+                       if ((bucket_signature & mask) &&
+                                       (*((uint64_t *) key) == bucket_key)) {
+                               uint8_t *bucket_data = &bucket->data[i * 
f->entry_size];
+
+                               memcpy(bucket_data, entry, f->entry_size);
+                               *key_found = 1;
+                               *entry_ptr = (void *) bucket_data;
+                               return 0;
+                       }
+               }
+       }
+
+       /* Key is not present in the bucket */
+       for (bucket_prev = NULL, bucket = bucket0;
+                       bucket != NULL; bucket_prev = bucket, bucket = 
bucket->next) {
+               uint64_t mask;
+
+               for (i = 0, mask = 1LLU; i < 4; i ++, mask <<= 1) {
+                       uint64_t bucket_signature = bucket->signature;
+
+                       if ((bucket_signature & mask) == 0) {
+                               uint8_t *bucket_data = &bucket->data[i * 
f->entry_size];
+
+                               bucket->signature |= mask;
+                               bucket->key[i] = *((uint64_t *) key);
+                               memcpy(bucket_data, entry, f->entry_size);
+                               *key_found = 0;
+                               *entry_ptr = (void *) bucket_data;
+
+                               return 0;
+                       }
+               }
+       }
+
+       /* Bucket full: extend bucket */
+       if (f->stack_pos > 0) {
+               bucket_index = f->stack[-- f->stack_pos];
+
+               bucket = (struct rte_bucket_4_8 *)
+                               &f->memory[(f->n_buckets + bucket_index) * 
f->bucket_size];
+               bucket_prev->next = bucket;
+               bucket_prev->next_valid = 1;
+
+               bucket->signature = 1;
+               bucket->key[0] = *((uint64_t *) key);
+               memcpy(&bucket->data[0], entry, f->entry_size);
+               *key_found = 0;
+               *entry_ptr = (void *) &bucket->data[0];
+               return 0;
+       }
+
+       return -ENOSPC;
+}
+
+static int
+rte_table_hash_entry_delete_key8_ext(
+       void *table,
+       void *key,
+       int *key_found,
+       void *entry)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev;
+       uint64_t signature;
+       uint32_t bucket_index, i;
+
+       signature = f->f_hash(key, f->key_size, f->seed);
+       bucket_index = signature & (f->n_buckets - 1);
+       bucket0 = (struct rte_bucket_4_8 *)
+                       &f->memory[bucket_index * f->bucket_size];
+
+       /* Key is present in the bucket */
+       for (bucket_prev = NULL, bucket = bucket0; bucket != NULL;
+                       bucket_prev = bucket, bucket = bucket->next) {
+               uint64_t mask;
+
+               for (i = 0, mask = 1LLU; i < 4; i ++, mask <<= 1) {
+                       uint64_t bucket_signature = bucket->signature;
+                       uint64_t bucket_key = bucket->key[i];
+
+                       if ((bucket_signature & mask) &&
+                                       (*((uint64_t *) key) == bucket_key)) {
+                               uint8_t *bucket_data = &bucket->data[i * 
f->entry_size];
+
+                               bucket->signature &= ~mask;
+                               *key_found = 1;
+                               if (entry) {
+                                       memcpy(entry, bucket_data, 
f->entry_size);
+                               }
+
+                               if ((bucket->signature == 0) && (bucket_prev != 
NULL)) {
+                                       bucket_prev->next = bucket->next;
+                                       bucket_prev->next_valid = 
bucket->next_valid;
+
+                                       memset(bucket, 0, sizeof(struct 
rte_bucket_4_8));
+                                       bucket_index = (bucket -
+                                                       ((struct rte_bucket_4_8 
*) f->memory)) -
+                                                        f->n_buckets;
+                                       f->stack[f->stack_pos ++] = 
bucket_index;
+                               }
+
+                               return 0;
+                       }
+               }
+       }
+
+       /* Key is not present in the bucket */
+       *key_found = 0;
+       return 0;
+}
+
+#define lookup_key8_cmp(key_in, bucket, pos)                                   
\
+{                                                                              
\
+      uint64_t xor[4], signature;                                              
\
+                                                                               
\
+      signature = ~bucket->signature;                                          
\
+                                                                               
\
+      xor[0] = (key_in[0] ^  bucket->key[0]) | (signature & 1);                
\
+      xor[1] = (key_in[0] ^  bucket->key[1]) | (signature & 2);                
\
+      xor[2] = (key_in[0] ^  bucket->key[2]) | (signature & 4);                
\
+      xor[3] = (key_in[0] ^  bucket->key[3]) | (signature & 8);                
\
+                                                                               
\
+      pos = 4;                                                                 
\
+      if (xor[0] == 0) {pos = 0;}                                              
\
+      if (xor[1] == 0) {pos = 1;}                                              
\
+      if (xor[2] == 0) {pos = 2;}                                              
\
+      if (xor[3] == 0) {pos = 3;}                                              
\
+}
+
+#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask)                     
\
+{                                                                              
\
+       uint64_t pkt_mask;                                                      
   \
+                                                                               
   \
+       pkt0_index = __builtin_ctzll(pkts_mask);                                
   \
+       pkt_mask = 1LLU << pkt0_index;                                          
   \
+       pkts_mask &= ~pkt_mask;                                                 
   \
+                                                                               
   \
+       mbuf0 = pkts[pkt0_index];                                               
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, 0));                   
   \
+}
+
+#define lookup1_stage1(mbuf1, bucket1, f)                                      
\
+{                                                                              
\
+       uint64_t signature;                                                     
   \
+       uint32_t bucket_index;                                                  
   \
+                                                                               
   \
+       signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);       
   \
+       bucket_index = signature & (f->n_buckets - 1);                          
   \
+       bucket1 = (struct rte_bucket_4_8 *)                                     
   \
+                       &f->memory[bucket_index * f->bucket_size];              
           \
+       rte_prefetch0(bucket1);                                                 
   \
+}
+
+#define lookup1_stage1_dosig(mbuf1, bucket1, f)                                
\
+{                                                                              
\
+       uint64_t *key;                                                          
   \
+       uint64_t signature;                                                     
   \
+       uint32_t bucket_index;                                                  
   \
+                                                                               
   \
+       key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);               
   \
+       signature = f->f_hash(key, RTE_TABLE_HASH_KEY_SIZE, f->seed);           
   \
+       bucket_index = signature & (f->n_buckets - 1);                          
   \
+       bucket1 = (struct rte_bucket_4_8 *)                                     
   \
+                       &f->memory[bucket_index * f->bucket_size];              
           \
+       rte_prefetch0(bucket1);                                                 
   \
+}
+
+#define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2,                         
\
+                       pkts_mask_out, entries, f)                              
           \
+{                                                                              
\
+       void *a;                                                                
   \
+       uint64_t pkt_mask;                                                      
   \
+       uint64_t *key;                                                          
   \
+       uint32_t pos;                                                           
   \
+                                                                               
   \
+       key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);               
   \
+                                                                               
   \
+       lookup_key8_cmp(key, bucket2, pos);                                     
   \
+                                                                               
   \
+       pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;          
   \
+       pkts_mask_out |= pkt_mask;                                              
   \
+                                                                               
   \
+       a = (void *) &bucket2->data[pos * f->entry_size];                       
   \
+       rte_prefetch0(a);                                                       
   \
+       entries[pkt2_index] = a;                                                
   \
+       lru_update(bucket2, pos);                                               
   \
+}
+
+#define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,          
\
+               entries, buckets_mask, buckets, keys, f)                        
       \
+{                                                                              
\
+       struct rte_bucket_4_8 *bucket_next;                                     
   \
+       void *a;                                                                
   \
+       uint64_t pkt_mask, bucket_mask;                                         
   \
+       uint64_t *key;                                                          
   \
+       uint32_t pos;                                                           
   \
+                                                                               
   \
+       key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);               
   \
+                                                                               
   \
+       lookup_key8_cmp(key, bucket2, pos);                                     
   \
+                                                                               
   \
+       pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;          
   \
+       pkts_mask_out |= pkt_mask;                                              
   \
+                                                                               
   \
+       a = (void *) &bucket2->data[pos * f->entry_size];                       
   \
+       rte_prefetch0(a);                                                       
   \
+       entries[pkt2_index] = a;                                                
   \
+                                                                               
   \
+       bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);        
   \
+       buckets_mask |= bucket_mask;                                            
   \
+       bucket_next = bucket2->next;                                            
   \
+       buckets[pkt2_index] = bucket_next;                                      
   \
+       keys[pkt2_index] = key;                                                 
   \
+}
+
+#define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,       
\
+               buckets_mask, f)                                                
       \
+{                                                                              
\
+       struct rte_bucket_4_8 *bucket, *bucket_next;                            
   \
+       void *a;                                                                
   \
+       uint64_t pkt_mask, bucket_mask;                                         
   \
+       uint64_t *key;                                                          
   \
+       uint32_t pos;                                                           
   \
+                                                                               
   \
+       bucket = buckets[pkt_index];                                            
   \
+       key = keys[pkt_index];                                                  
   \
+                                                                               
   \
+       lookup_key8_cmp(key, bucket, pos);                                      
   \
+                                                                               
   \
+       pkt_mask = ((bucket->signature >> pos) & 1LLU) << pkt_index;            
   \
+       pkts_mask_out |= pkt_mask;                                              
   \
+                                                                               
   \
+       a = (void *) &bucket->data[pos * f->entry_size];                        
   \
+       rte_prefetch0(a);                                                       
   \
+       entries[pkt_index] = a;                                                 
   \
+                                                                               
   \
+       bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);          
   \
+       buckets_mask |= bucket_mask;                                            
   \
+       bucket_next = bucket->next;                                             
   \
+       rte_prefetch0(bucket_next);                                             
   \
+       buckets[pkt_index] = bucket_next;                                       
   \
+       keys[pkt_index] = key;                                                  
   \
+}
+
+#define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,               
\
+                pkts, pkts_mask)                                               
       \
+{                                                                              
\
+       uint64_t pkt00_mask, pkt01_mask;                                        
   \
+                                                                               
   \
+       pkt00_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt00_mask = 1LLU << pkt00_index;                                       
   \
+       pkts_mask &= ~pkt00_mask;                                               
   \
+                                                                               
   \
+       mbuf00 = pkts[pkt00_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));                  
   \
+                                                                               
   \
+       pkt01_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt01_mask = 1LLU << pkt01_index;                                       
   \
+       pkts_mask &= ~pkt01_mask;                                               
   \
+                                                                               
   \
+       mbuf01 = pkts[pkt01_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));                  
   \
+}
+
+#define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,              
\
+               mbuf00, mbuf01, pkts, pkts_mask)                                
       \
+{                                                                              
\
+       uint64_t pkt00_mask, pkt01_mask;                                        
   \
+                                                                               
   \
+       pkt00_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt00_mask = 1LLU << pkt00_index;                                       
   \
+       pkts_mask &= ~pkt00_mask;                                               
   \
+                                                                               
   \
+       mbuf00 = pkts[pkt00_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));                  
   \
+                                                                               
   \
+       pkt01_index = __builtin_ctzll(pkts_mask);                               
   \
+       if (pkts_mask == 0) {                                                   
   \
+               pkt01_index = pkt00_index;                                      
       \
+       }                                                                       
   \
+       pkt01_mask = 1LLU << pkt01_index;                                       
   \
+       pkts_mask &= ~pkt01_mask;                                               
   \
+                                                                               
   \
+       mbuf01 = pkts[pkt01_index];                                             
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));                  
   \
+}
+
+#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f)                  
\
+{                                                                              
\
+       uint64_t signature10, signature11;                                      
   \
+       uint32_t bucket10_index, bucket11_index;                                
   \
+                                                                               
   \
+       signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);    
   \
+       bucket10_index = signature10 & (f->n_buckets - 1);                      
   \
+       bucket10 = (struct rte_bucket_4_8 *)                                    
   \
+                       &f->memory[bucket10_index * f->bucket_size];            
           \
+       rte_prefetch0(bucket10);                                                
   \
+                                                                               
   \
+       signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);    
   \
+       bucket11_index = signature11 & (f->n_buckets - 1);                      
   \
+       bucket11 = (struct rte_bucket_4_8 *)                                    
   \
+                       &f->memory[bucket11_index * f->bucket_size];            
           \
+       rte_prefetch0(bucket11);                                                
   \
+}
+
+#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f)            
\
+{                                                                              
\
+       uint64_t *key10, *key11;                                                
   \
+       uint64_t signature10, signature11;                                      
   \
+       uint32_t bucket10_index, bucket11_index;                                
   \
+       rte_table_hash_op_hash f_hash = f->f_hash;                              
   \
+       uint64_t seed = f->seed;                                                
   \
+       uint32_t key_offset = f->key_offset;                                    
   \
+                                                                               
   \
+       key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, key_offset);               
   \
+       key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, key_offset);               
   \
+                                                                               
   \
+       signature10 = f_hash(key10, RTE_TABLE_HASH_KEY_SIZE, seed);             
   \
+       bucket10_index = signature10 & (f->n_buckets - 1);                      
   \
+       bucket10 = (struct rte_bucket_4_8 *)                                    
   \
+                       &f->memory[bucket10_index * f->bucket_size];            
           \
+       rte_prefetch0(bucket10);                                                
   \
+                                                                               
   \
+       signature11 = f_hash(key11, RTE_TABLE_HASH_KEY_SIZE, seed);             
   \
+       bucket11_index = signature11 & (f->n_buckets - 1);                      
   \
+       bucket11 = (struct rte_bucket_4_8 *)                                    
   \
+                       &f->memory[bucket11_index * f->bucket_size];            
           \
+       rte_prefetch0(bucket11);                                                
   \
+}
+
+#define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,           
\
+               bucket20, bucket21, pkts_mask_out, entries, f)                  
       \
+{                                                                              
\
+       void *a20, *a21;                                                        
   \
+       uint64_t pkt20_mask, pkt21_mask;                                        
   \
+       uint64_t *key20, *key21;                                                
   \
+       uint32_t pos20, pos21;                                                  
   \
+                                                                               
   \
+       key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);            
   \
+       key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);            
   \
+                                                                               
   \
+       lookup_key8_cmp(key20, bucket20, pos20);                                
   \
+       lookup_key8_cmp(key21, bucket21, pos21);                                
   \
+                                                                               
   \
+       pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;    
   \
+       pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;    
   \
+       pkts_mask_out |= pkt20_mask | pkt21_mask;                               
   \
+                                                                               
   \
+       a20 = (void *) &bucket20->data[pos20 * f->entry_size];                  
   \
+       a21 = (void *) &bucket21->data[pos21 * f->entry_size];                  
   \
+       rte_prefetch0(a20);                                                     
   \
+       rte_prefetch0(a21);                                                     
   \
+       entries[pkt20_index] = a20;                                             
   \
+       entries[pkt21_index] = a21;                                             
   \
+       lru_update(bucket20, pos20);                                            
   \
+       lru_update(bucket21, pos21);                                            
   \
+}
+
+#define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, 
\
+               bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, 
f)      \
+{                                                                              
\
+       struct rte_bucket_4_8 *bucket20_next, *bucket21_next;                   
   \
+       void *a20, *a21;                                                        
   \
+       uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;          
   \
+       uint64_t *key20, *key21;                                                
   \
+       uint32_t pos20, pos21;                                                  
   \
+                                                                               
   \
+       key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);            
   \
+       key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);            
   \
+                                                                               
   \
+       lookup_key8_cmp(key20, bucket20, pos20);                                
   \
+       lookup_key8_cmp(key21, bucket21, pos21);                                
   \
+                                                                               
   \
+       pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;    
   \
+       pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;    
   \
+       pkts_mask_out |= pkt20_mask | pkt21_mask;                               
   \
+                                                                               
   \
+       a20 = (void *) &bucket20->data[pos20 * f->entry_size];                  
   \
+       a21 = (void *) &bucket21->data[pos21 * f->entry_size];                  
   \
+       rte_prefetch0(a20);                                                     
   \
+       rte_prefetch0(a21);                                                     
   \
+       entries[pkt20_index] = a20;                                             
   \
+       entries[pkt21_index] = a21;                                             
   \
+                                                                               
   \
+       bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);  
   \
+       bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);  
   \
+       buckets_mask |= bucket20_mask | bucket21_mask;                          
   \
+       bucket20_next = bucket20->next;                                         
   \
+       bucket21_next = bucket21->next;                                         
   \
+       buckets[pkt20_index] = bucket20_next;                                   
   \
+       buckets[pkt21_index] = bucket21_next;                                   
   \
+       keys[pkt20_index] = key20;                                              
   \
+       keys[pkt21_index] = key21;                                              
   \
+}
+
+static int
+rte_table_hash_lookup_key8_lru(
+       void *table,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
+       struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+       uint32_t pkt00_index, pkt01_index, pkt10_index,
+                       pkt11_index, pkt20_index, pkt21_index;
+       uint64_t pkts_mask_out = 0;
+
+       /* Cannot run the pipeline with less than 5 packets */
+       if (__builtin_popcountll(pkts_mask) < 5) {
+               for ( ; pkts_mask; ) {
+                       struct rte_bucket_4_8 *bucket;
+                       struct rte_mbuf *mbuf;
+                       uint32_t pkt_index;
+
+                       lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+                       lookup1_stage1(mbuf, bucket, f);
+                       lookup1_stage2_lru(pkt_index, mbuf, bucket,
+                                       pkts_mask_out, entries, f);
+               }
+
+               *lookup_hit_mask = pkts_mask_out;
+               return 0;
+       }
+
+       /*
+        * Pipeline fill
+        *
+        */
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline feed */
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /*
+        * Pipeline run
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               bucket20 = bucket10;
+               bucket21 = bucket11;
+               mbuf20 = mbuf10;
+               mbuf21 = mbuf11;
+               mbuf10 = mbuf00;
+               mbuf11 = mbuf01;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+                               mbuf00, mbuf01, pkts, pkts_mask);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                               bucket20, bucket21, pkts_mask_out, entries, f);
+       }
+
+       /*
+        * Pipeline flush
+        *
+        */
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries, f);
+
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries, f);
+
+       *lookup_hit_mask = pkts_mask_out;
+       return 0;
+} /* rte_table_hash_lookup_key8_lru() */
+
+static int
+rte_table_hash_lookup_key8_lru_dosig(
+       void *table,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
+       struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+       uint32_t pkt00_index, pkt01_index, pkt10_index,
+                       pkt11_index, pkt20_index, pkt21_index;
+       uint64_t pkts_mask_out = 0;
+
+       /* Cannot run the pipeline with less than 5 packets */
+       if (__builtin_popcountll(pkts_mask) < 5) {
+               for ( ; pkts_mask; ) {
+                       struct rte_bucket_4_8 *bucket;
+                       struct rte_mbuf *mbuf;
+                       uint32_t pkt_index;
+
+                       lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+                       lookup1_stage1_dosig(mbuf, bucket, f);
+                       lookup1_stage2_lru(pkt_index, mbuf, bucket, 
pkts_mask_out,
+                                       entries, f);
+               }
+
+               *lookup_hit_mask = pkts_mask_out;
+               return 0;
+       }
+
+       /*
+        * Pipeline fill
+        *
+        */
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline feed */
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /*
+        * Pipeline run
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               bucket20 = bucket10;
+               bucket21 = bucket11;
+               mbuf20 = mbuf10;
+               mbuf21 = mbuf11;
+               mbuf10 = mbuf00;
+               mbuf11 = mbuf01;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+                               mbuf00, mbuf01, pkts, pkts_mask);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                               bucket20, bucket21, pkts_mask_out, entries, f);
+       }
+
+       /*
+        * Pipeline flush
+        *
+        */
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries, f);
+
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries, f);
+
+       *lookup_hit_mask = pkts_mask_out;
+       return 0;
+} /* rte_table_hash_lookup_key8_lru_dosig() */
+
+static int
+rte_table_hash_lookup_key8_ext(
+       void *table,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
+       struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+       uint32_t pkt00_index, pkt01_index, pkt10_index,
+                       pkt11_index, pkt20_index, pkt21_index;
+       uint64_t pkts_mask_out = 0, buckets_mask = 0;
+       struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
+       uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+
+       /* Cannot run the pipeline with less than 5 packets */
+       if (__builtin_popcountll(pkts_mask) < 5) {
+               for ( ; pkts_mask; ) {
+                       struct rte_bucket_4_8 *bucket;
+                       struct rte_mbuf *mbuf;
+                       uint32_t pkt_index;
+
+                       lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+                       lookup1_stage1(mbuf, bucket, f);
+                       lookup1_stage2_ext(pkt_index, mbuf, bucket, 
pkts_mask_out,
+                                       entries, buckets_mask, buckets, keys, 
f);
+               }
+
+               *lookup_hit_mask = pkts_mask_out;
+               return 0;
+       }
+
+       /*
+        * Pipeline fill
+        *
+        */
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline feed */
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /*
+        * Pipeline run
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               bucket20 = bucket10;
+               bucket21 = bucket11;
+               mbuf20 = mbuf10;
+               mbuf21 = mbuf11;
+               mbuf10 = mbuf00;
+               mbuf11 = mbuf01;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+                               mbuf00, mbuf01, pkts, pkts_mask);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                               bucket20, bucket21, pkts_mask_out, entries,
+                               buckets_mask, buckets, keys, f);
+       }
+
+       /*
+        * Pipeline flush
+        *
+        */
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries,
+                       buckets_mask, buckets, keys, f);
+
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries,
+                       buckets_mask, buckets, keys, f);
+
+       /* Grind next buckets */
+       for ( ; buckets_mask; ) {
+               uint64_t buckets_mask_next = 0;
+
+               for ( ; buckets_mask; ) {
+                       uint64_t pkt_mask;
+                       uint32_t pkt_index;
+
+                       pkt_index = __builtin_ctzll(buckets_mask);
+                       pkt_mask = 1LLU << pkt_index;
+                       buckets_mask &= ~pkt_mask;
+
+                       lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, 
entries,
+                                       buckets_mask_next, f);
+               }
+
+               buckets_mask = buckets_mask_next;
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       return 0;
+} /* rte_table_hash_lookup_key8_ext() */
+
+static int
+rte_table_hash_lookup_key8_ext_dosig(
+       void *table,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *f = (struct rte_table_hash *) table;
+       struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21;
+       struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21;
+       uint32_t pkt00_index, pkt01_index, pkt10_index,
+                       pkt11_index, pkt20_index, pkt21_index;
+       uint64_t pkts_mask_out = 0, buckets_mask = 0;
+       struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX];
+       uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX];
+
+       /* Cannot run the pipeline with less than 5 packets */
+       if (__builtin_popcountll(pkts_mask) < 5) {
+               for ( ; pkts_mask; ) {
+                       struct rte_bucket_4_8 *bucket;
+                       struct rte_mbuf *mbuf;
+                       uint32_t pkt_index;
+
+                       lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask);
+                       lookup1_stage1_dosig(mbuf, bucket, f);
+                       lookup1_stage2_ext(pkt_index, mbuf, bucket, 
pkts_mask_out, entries,
+                                       buckets_mask, buckets, keys, f);
+               }
+
+               *lookup_hit_mask = pkts_mask_out;
+               return 0;
+       }
+
+       /*
+        * Pipeline fill
+        *
+        */
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline feed */
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, 
pkts_mask);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /*
+        * Pipeline run
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               bucket20 = bucket10;
+               bucket21 = bucket11;
+               mbuf20 = mbuf10;
+               mbuf21 = mbuf11;
+               mbuf10 = mbuf00;
+               mbuf11 = mbuf01;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,
+                               mbuf00, mbuf01, pkts, pkts_mask);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                               bucket20, bucket21, pkts_mask_out, entries,
+                               buckets_mask, buckets, keys, f);
+       }
+
+       /*
+        * Pipeline flush
+        *
+        */
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       mbuf10 = mbuf00;
+       mbuf11 = mbuf01;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries,
+                       buckets_mask, buckets, keys, f);
+
+       /* Pipeline feed */
+       bucket20 = bucket10;
+       bucket21 = bucket11;
+       mbuf20 = mbuf10;
+       mbuf21 = mbuf11;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+
+       /* Pipeline stage 2 */
+       lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21,
+                       bucket20, bucket21, pkts_mask_out, entries,
+                       buckets_mask, buckets, keys, f);
+
+       /* Grind next buckets */
+       for ( ; buckets_mask; ) {
+               uint64_t buckets_mask_next = 0;
+
+               for ( ; buckets_mask; ) {
+                       uint64_t pkt_mask;
+                       uint32_t pkt_index;
+
+                       pkt_index = __builtin_ctzll(buckets_mask);
+                       pkt_mask = 1LLU << pkt_index;
+                       buckets_mask &= ~pkt_mask;
+
+                       lookup_grinder(pkt_index, buckets, keys, pkts_mask_out,
+                                       entries, buckets_mask_next, f);
+               }
+
+               buckets_mask = buckets_mask_next;
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       return 0;
+} /* rte_table_hash_lookup_key8_dosig_ext() */
+
+struct rte_table_ops rte_table_hash_key8_lru_ops = {
+       .f_create = rte_table_hash_create_key8_lru,
+       .f_free = rte_table_hash_free_key8_lru,
+       .f_add = rte_table_hash_entry_add_key8_lru,
+       .f_delete = rte_table_hash_entry_delete_key8_lru,
+       .f_lookup = rte_table_hash_lookup_key8_lru,
+};
+
+struct rte_table_ops rte_table_hash_key8_lru_dosig_ops = {
+       .f_create = rte_table_hash_create_key8_lru,
+       .f_free = rte_table_hash_free_key8_lru,
+       .f_add = rte_table_hash_entry_add_key8_lru,
+       .f_delete = rte_table_hash_entry_delete_key8_lru,
+       .f_lookup = rte_table_hash_lookup_key8_lru_dosig,
+};
+
+struct rte_table_ops rte_table_hash_key8_ext_ops = {
+       .f_create = rte_table_hash_create_key8_ext,
+       .f_free = rte_table_hash_free_key8_ext,
+       .f_add = rte_table_hash_entry_add_key8_ext,
+       .f_delete = rte_table_hash_entry_delete_key8_ext,
+       .f_lookup = rte_table_hash_lookup_key8_ext,
+};
+
+struct rte_table_ops rte_table_hash_key8_ext_dosig_ops = {
+       .f_create = rte_table_hash_create_key8_ext,
+       .f_free = rte_table_hash_free_key8_ext,
+       .f_add = rte_table_hash_entry_add_key8_ext,
+       .f_delete = rte_table_hash_entry_delete_key8_ext,
+       .f_lookup = rte_table_hash_lookup_key8_ext_dosig,
+};
diff --git a/lib/librte_table/rte_table_hash_lru.c 
b/lib/librte_table/rte_table_hash_lru.c
new file mode 100644
index 0000000..be7b380
--- /dev/null
+++ b/lib/librte_table/rte_table_hash_lru.c
@@ -0,0 +1,1021 @@
+/*-
+ *   BSD LICENSE
+ * 
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ * 
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ * 
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+#include "rte_table_hash.h"
+#include "rte_lru.h"
+
+#define KEYS_PER_BUCKET    4
+
+struct bucket {
+       union {
+               struct bucket *next;
+               uint64_t lru_list;
+       };
+       uint16_t sig[KEYS_PER_BUCKET];
+       uint32_t key_pos[KEYS_PER_BUCKET];
+};
+
+struct grinder {
+       struct bucket *bkt;
+       uint64_t sig;
+       uint64_t match;
+       uint64_t match_pos;
+       uint32_t key_index;
+};
+
+struct rte_table_hash {
+       /* Input parameters */
+       uint32_t key_size;
+       uint32_t entry_size;
+       uint32_t n_keys;
+       uint32_t n_buckets;
+       rte_table_hash_op_hash f_hash;
+       uint64_t seed;
+       uint32_t signature_offset;
+       uint32_t key_offset;
+
+       /* Internal */
+       uint64_t bucket_mask;
+       uint32_t key_size_shl;
+       uint32_t data_size_shl;
+       uint32_t key_stack_tos;
+
+       /* Grinder */
+       struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX];
+
+       /* Tables */
+       struct bucket *buckets;
+       uint8_t *key_mem;
+       uint8_t *data_mem;
+       uint32_t *key_stack;
+       
+       /* Table memory */
+       uint8_t memory[0] __rte_cache_aligned;
+};
+
+static int
+check_params_create(struct rte_table_hash_lru_params * params)
+{
+       uint32_t n_buckets_min;
+       
+       /* key_size */
+       if ((params->key_size == 0) ||
+               (!rte_is_power_of_2(params->key_size))) {
+               RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__);
+               return -EINVAL;
+       }
+
+       /* n_keys */
+       if ((params->n_keys == 0) ||
+           (!rte_is_power_of_2(params->n_keys))) {
+               RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__);
+               return -EINVAL;
+       }
+
+       /* n_buckets */
+       n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) /  
params->n_keys;
+       if ((params->n_buckets == 0) || 
+           (!rte_is_power_of_2(params->n_keys)) ||
+               (params->n_buckets < n_buckets_min)) {
+               RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__);
+               return -EINVAL;
+       }
+
+       /* f_hash */
+       if (params->f_hash == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: f_hash invalid value\n", __func__);
+               return -EINVAL;
+       }
+
+       /* signature offset */
+       if ((params->signature_offset & 0x3) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: signature_offset invalid value\n", 
__func__);
+               return -EINVAL;
+       }
+
+       /* key offset */
+       if ((params->key_offset & 0x7) != 0) {
+               RTE_LOG(ERR, TABLE, "%s: key_offset invalid value\n", __func__);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void *
+rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size)
+{
+       struct rte_table_hash_lru_params *p = (struct rte_table_hash_lru_params 
*) params;
+       struct rte_table_hash *t;
+       uint32_t total_size, table_meta_sz, bucket_sz, key_sz, key_stack_sz, 
data_sz;
+       uint32_t table_meta_offset, bucket_offset, key_offset, 
key_stack_offset, data_offset;
+       uint32_t i;
+
+       /* Check input parameters */
+       if ((check_params_create(p) != 0) ||
+           (!rte_is_power_of_2(entry_size)) ||
+               ((sizeof(struct rte_table_hash) % CACHE_LINE_SIZE) != 0) ||
+               (sizeof(struct bucket) != (CACHE_LINE_SIZE / 2))) {
+               return NULL;
+       }
+
+       /* Memory allocation */ 
+       table_meta_sz = CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash));
+       bucket_sz = CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket));
+       key_sz = CACHE_LINE_ROUNDUP(p->n_keys * p->key_size);
+       key_stack_sz = CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t));
+       data_sz = CACHE_LINE_ROUNDUP(p->n_keys * entry_size);
+       total_size = table_meta_sz + bucket_sz + key_sz + key_stack_sz + 
data_sz;
+
+       t = rte_zmalloc_socket("TABLE", total_size, CACHE_LINE_SIZE, socket_id);
+       if (t == NULL) {
+               RTE_LOG(ERR, TABLE, "%s: Cannot allocate %u bytes for hash 
table\n", __func__, total_size);
+               return NULL;
+       }
+       RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is 
%u bytes\n",
+               __func__, p->key_size, total_size);
+
+       /* Memory initialization */
+       t->key_size = p->key_size;
+       t->entry_size = entry_size;
+       t->n_keys = p->n_keys;
+       t->n_buckets = p->n_buckets;
+       t->f_hash = p->f_hash;
+       t->seed = p->seed;
+       t->signature_offset = p->signature_offset;
+       t->key_offset = p->key_offset;
+
+       /* Internal */
+       t->bucket_mask = t->n_buckets - 1;
+       t->key_size_shl = __builtin_ctzl(p->key_size);
+       t->data_size_shl = __builtin_ctzl(p->key_size);
+       
+       /* Tables */
+       table_meta_offset = 0;
+       bucket_offset = table_meta_offset + table_meta_sz;
+       key_offset = bucket_offset + bucket_sz;
+       key_stack_offset = key_offset + key_sz;
+       data_offset = key_stack_offset + key_stack_sz;
+
+       t->buckets = (struct bucket *) &t->memory[bucket_offset];
+       t->key_mem = &t->memory[key_offset];
+       t->key_stack = (uint32_t *) &t->memory[key_stack_offset];
+       t->data_mem = &t->memory[data_offset];
+       
+       /* Key stack */
+       for (i = 0; i < t->n_keys; i ++) {
+               t->key_stack[i] = t->n_keys - 1 - i;
+       }
+       t->key_stack_tos = t->n_keys;
+
+       /* LRU */
+       for (i = 0; i < t->n_buckets; i ++) {
+               struct bucket *bkt = &t->buckets[i];
+
+               lru_init(bkt);
+       }
+
+       return t;
+}
+
+static int
+rte_table_hash_lru_free(void *table)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       
+       /* Check input parameters */
+       if (t == NULL) {
+               return -EINVAL;
+       }
+       
+       rte_free(t);
+       return 0;
+}
+
+static int
+rte_table_hash_lru_entry_add(void *table, void *key, void *entry, int 
*key_found, void **entry_ptr)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       struct bucket *bkt;
+       uint64_t sig;
+       uint32_t bkt_index, i;
+       
+       sig = t->f_hash(key, t->key_size, t->seed);
+       bkt_index = sig & t->bucket_mask;
+       bkt = &t->buckets[bkt_index];
+       sig = (sig >> 16) | 1LLU;
+       
+       /* Key is present in the bucket */
+       for (i = 0; i < KEYS_PER_BUCKET; i ++) {
+               uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+               uint32_t bkt_key_index = bkt->key_pos[i];
+               uint8_t *bkt_key = &t->key_mem[bkt_key_index << 
t->key_size_shl];
+
+               if ((sig == bkt_sig) && (memcmp(key, bkt_key, t->key_size) == 
0)) {
+                       uint8_t *data = &t->data_mem[bkt_key_index << 
t->data_size_shl];
+
+                       memcpy(data, entry, t->entry_size);
+                       lru_update(bkt, i);
+                       *key_found = 1;
+                       *entry_ptr = (void *) data;
+                       return 0;
+               }
+       }
+       
+       /* Key is not present in the bucket */
+       for (i = 0; i < KEYS_PER_BUCKET; i ++) {
+               uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+
+               if (bkt_sig == 0) {
+                       uint32_t bkt_key_index;
+                       uint8_t *bkt_key, *data;
+                       
+                       /* Allocate new key */
+                       if (t->key_stack_tos == 0) {
+                               /* No keys available */
+                               return -ENOSPC;
+                       }
+                       bkt_key_index = t->key_stack[-- t->key_stack_tos];
+                       
+                       /* Install new key */
+                       bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl];
+                       data = &t->data_mem[bkt_key_index << t->data_size_shl];
+
+                       bkt->sig[i] = (uint16_t) sig;
+                       bkt->key_pos[i] = bkt_key_index;
+                       memcpy(bkt_key, key, t->key_size);
+                       memcpy(data, entry, t->entry_size);
+                       lru_update(bkt, i);
+
+                       *key_found = 0;
+                       *entry_ptr = (void *) data;
+                       return 0;
+               }
+       }
+       
+       /* Bucket full */
+       {
+               uint64_t pos = lru_pos(bkt);
+               uint32_t bkt_key_index = bkt->key_pos[pos];
+               uint8_t *bkt_key = &t->key_mem[bkt_key_index << 
t->key_size_shl];
+               uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl];
+
+               bkt->sig[pos] = (uint16_t) sig;
+               memcpy(bkt_key, key, t->key_size);
+               memcpy(data, entry, t->entry_size);
+               lru_update(bkt, pos);
+               
+               *key_found = 0;
+               *entry_ptr = (void *) data;
+               return 0;
+       }
+}
+
+static int
+rte_table_hash_lru_entry_delete(void *table, void *key, int *key_found, void 
*entry)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       struct bucket *bkt;
+       uint64_t sig;
+       uint32_t bkt_index, i;
+       
+       sig = t->f_hash(key, t->key_size, t->seed);
+       bkt_index = sig & t->bucket_mask;
+       bkt = &t->buckets[bkt_index];
+       sig = (sig >> 16) | 1LLU;
+       
+       /* Key is present in the bucket */
+       for (i = 0; i < KEYS_PER_BUCKET; i ++) {
+               uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+               uint32_t bkt_key_index = bkt->key_pos[i];
+               uint8_t *bkt_key = &t->key_mem[bkt_key_index << 
t->key_size_shl];
+
+               if ((sig == bkt_sig) && (memcmp(key, bkt_key, t->key_size) == 
0)) {
+                       uint8_t *data = &t->data_mem[bkt_key_index << 
t->data_size_shl];
+                       
+                       bkt->sig[i] = 0;
+                       t->key_stack[t->key_stack_tos ++] = bkt_key_index;
+                       *key_found = 1;
+                       memcpy(entry, data, t->entry_size);
+                       return 0;
+               }
+       }
+       
+       /* Key is not present in the bucket */
+       *key_found = 0;
+       return 0;
+}
+
+static int rte_table_hash_lru_lookup_unoptimized(
+       void *table, 
+       struct rte_mbuf **pkts, 
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries,
+       int dosig)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       uint64_t pkts_mask_out = 0;
+       
+       for ( ; pkts_mask; ) {
+               struct bucket *bkt;
+               struct rte_mbuf *pkt;
+               uint8_t *key;
+               uint64_t pkt_mask, sig;
+               uint32_t pkt_index, bkt_index, i;
+               
+               pkt_index = __builtin_ctzll(pkts_mask);
+               pkt_mask = 1LLU << pkt_index;
+               pkts_mask &= ~pkt_mask;
+               
+               pkt = pkts[pkt_index];
+               key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset);
+               if (dosig) {
+                       sig = (uint64_t) t->f_hash(key, t->key_size, t->seed);
+               } else {
+                       sig = RTE_MBUF_METADATA_UINT32(pkt, 
t->signature_offset);
+               }
+               
+               bkt_index = sig & t->bucket_mask;
+               bkt = &t->buckets[bkt_index];
+               sig = (sig >> 16) | 1LLU;
+               
+               /* Key is present in the bucket */
+               for (i = 0; i < KEYS_PER_BUCKET; i ++) {
+                       uint64_t bkt_sig = (uint64_t) bkt->sig[i];
+                       uint32_t bkt_key_index = bkt->key_pos[i];
+                       uint8_t *bkt_key = &t->key_mem[bkt_key_index << 
t->key_size_shl];
+                       
+                       if ((sig == bkt_sig) && (memcmp(key, bkt_key, 
t->key_size) == 0)) {
+                               uint8_t *data = &t->data_mem[bkt_key_index << 
t->data_size_shl];
+
+                               lru_update(bkt, i);
+                               pkts_mask_out |= pkt_mask;
+                               entries[pkt_index] = (void *) data;
+                               break;
+                       }
+               }
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       return 0;
+}
+
+/***
+ * 
+ * mask = match bitmask
+ * match = at least one match
+ * match_many = more than one match
+ * match_pos = position of first match
+ * 
+ * ----------------------------------------
+ * mask      match   match_many   match_pos
+ * ----------------------------------------
+ * 0000      0       0            00
+ * 0001      1       0            00
+ * 0010      1       0            01
+ * 0011      1       1            00
+ * ----------------------------------------
+ * 0100      1       0            10
+ * 0101      1       1            00
+ * 0110      1       1            01
+ * 0111      1       1            00
+ * ----------------------------------------
+ * 1000      1       0            11
+ * 1001      1       1            00
+ * 1010      1       1            01
+ * 1011      1       1            00
+ * ----------------------------------------
+ * 1100      1       1            10
+ * 1101      1       1            00
+ * 1110      1       1            01
+ * 1111      1       1            00
+ * ----------------------------------------
+ *
+ * match = 1111_1111_1111_1110
+ * match_many = 1111_1110_1110_1000
+ * match_pos = 0001_0010_0001_0011__0001_0010_0001_0000
+ *
+ * match = 0xFFFELLU
+ * match_many = 0xFEE8LLU
+ * match_pos = 0x12131210LLU
+ *
+ ***/
+
+#define LUT_MATCH                                          0xFFFELLU
+#define LUT_MATCH_MANY                                     0xFEE8LLU
+#define LUT_MATCH_POS                                      0x12131210LLU
+
+#define lookup_cmp_sig(mbuf_sig, bucket, match, match_many, match_pos)         
\
+{                                                                              
\
+       uint64_t bucket_sig[4], mask[4], mask_all;                              
   \
+                                                                               
\
+       bucket_sig[0] = bucket->sig[0];                                         
   \
+       bucket_sig[1] = bucket->sig[1];                                         
   \
+       bucket_sig[2] = bucket->sig[2];                                         
   \
+       bucket_sig[3] = bucket->sig[3];                                         
   \
+                                                                               
\
+       bucket_sig[0] ^= mbuf_sig;                                              
   \
+       bucket_sig[1] ^= mbuf_sig;                                              
   \
+       bucket_sig[2] ^= mbuf_sig;                                              
   \
+       bucket_sig[3] ^= mbuf_sig;                                              
   \
+                                                                               
\
+       mask[0] = 0;                                                            
   \
+       mask[1] = 0;                                                            
   \
+       mask[2] = 0;                                                            
   \
+       mask[3] = 0;                                                            
   \
+                                                                               
\
+       if (bucket_sig[0] == 0) mask[0] = 1;                                    
   \
+       if (bucket_sig[1] == 0) mask[1] = 2;                                    
   \
+       if (bucket_sig[2] == 0) mask[2] = 4;                                    
   \
+       if (bucket_sig[3] == 0) mask[3] = 8;                                    
   \
+                                                                               
\
+       mask_all = (mask[0] | mask[1]) | (mask[2] | mask[3]);                   
   \
+                                                                               
\
+       match = (LUT_MATCH >> mask_all) & 1;                                    
   \
+       match_many = (LUT_MATCH_MANY >> mask_all) & 1;                          
   \
+       match_pos = (LUT_MATCH_POS >> (mask_all << 1)) & 3;                     
   \
+}
+
+#define lookup_cmp_key(mbuf, key, match_key, f)                                
\
+{                                                                              
\
+       uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);  
   \
+       uint64_t *bkt_key = (uint64_t *) key;                                   
   \
+                                                                               
   \
+       switch (f->key_size)                                                    
   \
+       {                                                                       
   \
+               case 8:                                                         
       \
+               {                                                               
       \
+                       uint64_t xor = pkt_key[0] ^ bkt_key[0];                 
           \
+                       match_key = 0;                                          
           \
+                       if (xor == 0) match_key = 1;                            
           \
+               }                                                               
       \
+               break;                                                          
       \
+                                                                               
       \
+               case 16:                                                        
       \
+               {                                                               
       \
+                       uint64_t xor[2], or;                                    
           \
+                                                                               
\
+                       xor[0] = pkt_key[0] ^ bkt_key[0];                       
           \
+                       xor[1] = pkt_key[1] ^ bkt_key[1];                       
           \
+                       or = xor[0] | xor[1];                                   
           \
+                       match_key = 0;                                          
           \
+                       if (or == 0) match_key = 1;                             
           \
+               }                                                               
       \
+               break;                                                          
       \
+                                                                               
   \
+               case 32:                                                        
       \
+               {                                                               
       \
+                       uint64_t xor[4], or;                                    
           \
+                                                                               
\
+                       xor[0] = pkt_key[0] ^ bkt_key[0];                       
           \
+                       xor[1] = pkt_key[1] ^ bkt_key[1];                       
           \
+                       xor[2] = pkt_key[2] ^ bkt_key[2];                       
           \
+                       xor[3] = pkt_key[3] ^ bkt_key[3];                       
           \
+                       or = xor[0] | xor[1] | xor[2] | xor[3];                 
           \
+                       match_key = 0;                                          
           \
+                       if (or == 0) match_key = 1;                             
           \
+               }                                                               
       \
+               break;                                                          
       \
+                                                                               
\
+               case 64:                                                        
       \
+               {                                                               
       \
+                       uint64_t xor[8], or;                                    
           \
+                                                                               
\
+                       xor[0] = pkt_key[0] ^ bkt_key[0];                       
           \
+                       xor[1] = pkt_key[1] ^ bkt_key[1];                       
           \
+                       xor[2] = pkt_key[2] ^ bkt_key[2];                       
           \
+                       xor[3] = pkt_key[3] ^ bkt_key[3];                       
           \
+                       xor[4] = pkt_key[4] ^ bkt_key[4];                       
           \
+                       xor[5] = pkt_key[5] ^ bkt_key[5];                       
           \
+                       xor[6] = pkt_key[6] ^ bkt_key[6];                       
           \
+                       xor[7] = pkt_key[7] ^ bkt_key[7];                       
           \
+                       or = xor[0] | xor[1] | xor[2] | xor[3] | xor[4] | 
xor[5] | xor[6] | xor[7]; \
+                       match_key = 0;                                          
           \
+                       if (or == 0) match_key = 1;                             
           \
+               }                                                               
       \
+               break;                                                          
       \
+                                                                               
   \
+               default:                                                        
       \
+                       match_key = 0;                                          
           \
+                       if (memcmp(pkt_key, bkt_key, f->key_size) == 0) 
{match_key = 1;}   \
+       }                                                                       
   \
+}
+
+#define lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index)        
\
+{                                                                              
\
+       uint64_t pkt00_mask, pkt01_mask;                                        
   \
+       struct rte_mbuf *mbuf00, *mbuf01;                                       
   \
+                                                                               
   \
+       pkt00_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt00_mask = 1LLU << pkt00_index;                                       
   \
+       pkts_mask &= ~pkt00_mask;                                               
   \
+       mbuf00 = pkts[pkt00_index];                                             
   \
+                                                                               
   \
+       pkt01_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt01_mask = 1LLU << pkt01_index;                                       
   \
+       pkts_mask &= ~pkt01_mask;                                               
   \
+       mbuf01 = pkts[pkt01_index];                                             
   \
+                                                                               
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));                  
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));                  
   \
+}
+
+#define lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, pkt00_index, 
pkt01_index) \
+{                                                                              
\
+       uint64_t pkt00_mask, pkt01_mask;                                        
   \
+       struct rte_mbuf *mbuf00, *mbuf01;                                       
   \
+                                                                               
   \
+       pkt00_index = __builtin_ctzll(pkts_mask);                               
   \
+       pkt00_mask = 1LLU << pkt00_index;                                       
   \
+       pkts_mask &= ~pkt00_mask;                                               
   \
+       mbuf00 = pkts[pkt00_index];                                             
   \
+                                                                               
   \
+       pkt01_index = __builtin_ctzll(pkts_mask);                               
   \
+       if (pkts_mask == 0) {                                                   
   \
+               pkt01_index = pkt00_index;                                      
       \
+       }                                                                       
   \
+       pkt01_mask = 1LLU << pkt01_index;                                       
   \
+       pkts_mask &= ~pkt01_mask;                                               
   \
+       mbuf01 = pkts[pkt01_index];                                             
   \
+                                                                               
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, 0));                  
   \
+       rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, 0));                  
   \
+}
+
+#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index)                   
\
+{                                                                              
\
+       struct grinder *g10, *g11;                                              
   \
+       uint64_t sig10, sig11, bkt10_index, bkt11_index;                        
   \
+       struct rte_mbuf *mbuf10, *mbuf11;                                       
   \
+       struct bucket *bkt10, *bkt11, *buckets = t->buckets;                    
   \
+       uint64_t bucket_mask = t->bucket_mask;                                  
   \
+       uint32_t signature_offset = t->signature_offset;                        
   \
+                                                                               
   \
+       mbuf10 = pkts[pkt10_index];                                             
   \
+       sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);  
   \
+       bkt10_index = sig10 & bucket_mask;                                      
   \
+       bkt10 = &buckets[bkt10_index];                                          
   \
+                                                                               
   \
+       mbuf11 = pkts[pkt11_index];                                             
   \
+       sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);  
   \
+       bkt11_index = sig11 & bucket_mask;                                      
   \
+       bkt11 = &buckets[bkt11_index];                                          
   \
+                                                                               
   \
+       rte_prefetch0(bkt10);                                                   
   \
+       rte_prefetch0(bkt11);                                                   
   \
+                                                                               
   \
+       g10 = &g[pkt10_index];                                                  
   \
+       g10->sig = sig10;                                                       
   \
+       g10->bkt = bkt10;                                                       
   \
+                                                                               
   \
+       g11 = &g[pkt11_index];                                                  
   \
+       g11->sig = sig11;                                                       
   \
+       g11->bkt = bkt11;                                                       
   \
+}
+
+#define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index)             
\
+{                                                                              
\
+       struct grinder *g10, *g11;                                              
   \
+       uint64_t sig10, sig11, bkt10_index, bkt11_index;                        
   \
+       struct rte_mbuf *mbuf10, *mbuf11;                                       
   \
+       struct bucket *bkt10, *bkt11, *buckets = t->buckets;                    
   \
+       uint8_t *key10, *key11;                                                 
   \
+       uint64_t bucket_mask = t->bucket_mask;                                  
   \
+       rte_table_hash_op_hash f_hash = t->f_hash;                              
   \
+       uint64_t seed = t->seed;                                                
   \
+       uint32_t key_size = t->key_size;                                        
   \
+       uint32_t key_offset = t->key_offset;                                    
   \
+                                                                               
   \
+       mbuf10 = pkts[pkt10_index];                                             
   \
+       key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset);                
   \
+       sig10 = (uint64_t) f_hash(key10, key_size, seed);                       
   \
+       bkt10_index = sig10 & bucket_mask;                                      
   \
+       bkt10 = &buckets[bkt10_index];                                          
   \
+                                                                               
   \
+       mbuf11 = pkts[pkt11_index];                                             
   \
+       key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset);                
   \
+       sig11 = (uint64_t) f_hash(key11, key_size, seed);                       
   \
+       bkt11_index = sig11 & bucket_mask;                                      
   \
+       bkt11 = &buckets[bkt11_index];                                          
   \
+                                                                               
   \
+       rte_prefetch0(bkt10);                                                   
   \
+       rte_prefetch0(bkt11);                                                   
   \
+                                                                               
   \
+       g10 = &g[pkt10_index];                                                  
   \
+       g10->sig = sig10;                                                       
   \
+       g10->bkt = bkt10;                                                       
   \
+                                                                               
   \
+       g11 = &g[pkt11_index];                                                  
   \
+       g11->sig = sig11;                                                       
   \
+       g11->bkt = bkt11;                                                       
   \
+}
+
+#define lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many)   
\
+{                                                                              
\
+       struct grinder *g20, *g21;                                              
   \
+       uint64_t sig20, sig21;                                                  
   \
+       struct bucket *bkt20, *bkt21;                                           
   \
+       uint8_t *key20, *key21, *key_mem = t->key_mem;                          
   \
+       uint64_t match20, match21, match_many20, match_many21, match_pos20, 
match_pos21; \
+       uint32_t key20_index, key21_index, key_size_shl = t->key_size_shl;      
   \
+                                                                               
   \
+       g20 = &g[pkt20_index];                                                  
   \
+       sig20 = g20->sig;                                                       
   \
+       bkt20 = g20->bkt;                                                       
   \
+       sig20 = (sig20 >> 16) | 1LLU;                                           
   \
+       lookup_cmp_sig(sig20, bkt20, match20, match_many20, match_pos20);       
   \
+       match20 <<= pkt20_index;                                                
   \
+       match_many20 <<= pkt20_index;                                           
   \
+       key20_index = bkt20->key_pos[match_pos20];                              
   \
+       key20 = &key_mem[key20_index << key_size_shl];                          
   \
+                                                                               
   \
+       g21 = &g[pkt21_index];                                                  
   \
+       sig21 = g21->sig;                                                       
   \
+       bkt21 = g21->bkt;                                                       
   \
+       sig21 = (sig21 >> 16) | 1LLU;                                           
   \
+       lookup_cmp_sig(sig21, bkt21, match21, match_many21, match_pos21);       
   \
+       match21 <<= pkt21_index;                                                
   \
+       match_many21 <<= pkt21_index;                                           
   \
+       key21_index = bkt21->key_pos[match_pos21];                              
   \
+       key21 = &key_mem[key21_index << key_size_shl];                          
   \
+                                                                               
   \
+       rte_prefetch0(key20);                                                   
   \
+       rte_prefetch0(key21);                                                   
   \
+                                                                               
   \
+       pkts_mask_match_many |= match_many20 | match_many21;                    
   \
+                                                                               
   \
+       g20->match = match20;                                                   
   \
+       g20->match_pos = match_pos20;                                           
   \
+       g20->key_index = key20_index;                                           
   \
+                                                                               
   \
+       g21->match = match21;                                                   
   \
+       g21->match_pos = match_pos21;                                           
   \
+       g21->key_index = key21_index;                                           
   \
+}
+
+#define lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries) \
+{                                                                              
\
+       struct grinder *g30, *g31;                                              
   \
+       struct rte_mbuf *mbuf30, *mbuf31;                                       
   \
+       struct bucket *bkt30, *bkt31;                                           
   \
+       uint8_t *key30, *key31, *key_mem = t->key_mem;                          
   \
+       uint8_t *data30, *data31, *data_mem = t->data_mem;                      
   \
+       uint64_t match30, match31, match_pos30, match_pos31, match_key30, 
match_key31, match_keys; \
+       uint32_t key30_index, key31_index;                                      
   \
+       uint32_t key_size_shl = t->key_size_shl;                                
   \
+       uint32_t data_size_shl = t->data_size_shl;                              
   \
+                                                                               
   \
+       mbuf30 = pkts[pkt30_index];                                             
   \
+       g30 = &g[pkt30_index];                                                  
   \
+       bkt30 = g30->bkt;                                                       
   \
+       match30 = g30->match;                                                   
   \
+       match_pos30 = g30->match_pos;                                           
   \
+       key30_index = g30->key_index;                                           
   \
+       key30 = &key_mem[key30_index << key_size_shl];                          
   \
+       lookup_cmp_key(mbuf30, key30, match_key30, t);                          
   \
+       match_key30 <<= pkt30_index;                                            
   \
+       match_key30 &= match30;                                                 
   \
+       data30 = &data_mem[key30_index << data_size_shl];                       
   \
+       entries[pkt30_index] = data30;                                          
   \
+                                                                               
\
+       mbuf31 = pkts[pkt31_index];                                             
   \
+       g31 = &g[pkt31_index];                                                  
   \
+       bkt31 = g31->bkt;                                                       
   \
+       match31 = g31->match;                                                   
   \
+       match_pos31 = g31->match_pos;                                           
   \
+       key31_index = g31->key_index;                                           
   \
+       key31 = &key_mem[key31_index << key_size_shl];                          
   \
+       lookup_cmp_key(mbuf31, key31, match_key31, t);                          
   \
+       match_key31 <<= pkt31_index;                                            
   \
+       match_key31 &= match31;                                                 
   \
+       data31 = &data_mem[key31_index << data_size_shl];                       
   \
+       entries[pkt31_index] = data31;                                          
   \
+                                                                               
   \
+       rte_prefetch0(data30);                                                  
   \
+       rte_prefetch0(data31);                                                  
   \
+                                                                               
   \
+       match_keys = match_key30 | match_key31;                                 
   \
+       pkts_mask_out |= match_keys;                                            
   \
+                                                                               
   \
+       if (match_key30 == 0) {                                                 
   \
+               match_pos30 = 4;                                                
       \
+       }                                                                       
   \
+       lru_update(bkt30, match_pos30);                                         
   \
+                                                                               
   \
+       if (match_key31 == 0) {                                                 
   \
+               match_pos31 = 4;                                                
       \
+       }                                                                       
   \
+       lru_update(bkt31, match_pos31);                                         
   \
+}
+
+/* The lookup function implements a 4-stage pipeline, with each stage 
processing
+ * two different packets. The purpose of pipelined implementation is to hide 
the
+ * latency of prefetching the data structures and loosen the data dependency
+ * between instructions.
+ *
+ *   p00  _______   p10  _______   p20  _______   p30  _______       
+ * ----->|       |----->|       |----->|       |----->|       |----->
+ *       |   0   |      |   1   |      |   2   |      |   3   |      
+ * ----->|_______|----->|_______|----->|_______|----->|_______|----->
+ *   p01            p11            p21            p31                
+ *
+ * The naming convention is:
+ *    pXY = packet Y of stage X, X = 0 .. 3, Y = 0 .. 1
+ * 
+ ***/
+static int rte_table_hash_lru_lookup(
+       void *table, 
+       struct rte_mbuf **pkts, 
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       struct grinder *g = t->grinders;
+       uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index, 
pkt20_index, pkt21_index, pkt30_index, pkt31_index;
+       uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
+       int status = 0;
+
+       /* Cannot run the pipeline with less than 7 packets */
+       if (__builtin_popcountll(pkts_mask) < 7) {
+               return rte_table_hash_lru_lookup_unoptimized(table, pkts, 
pkts_mask, lookup_hit_mask, entries, 0);
+       }
+       
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline feed */
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+       
+       /* Pipeline feed */
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+       /*
+        * Pipeline run 
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               pkt30_index = pkt20_index;
+               pkt31_index = pkt21_index;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, 
pkt00_index, pkt01_index);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2(t, g, pkt20_index, pkt21_index, 
pkts_mask_match_many);
+
+               /* Pipeline stage 3 */
+               lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, 
pkts_mask_out, entries);
+       }
+       
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index);
+       
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+       
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+
+       /* Slow path */
+       pkts_mask_match_many &= ~pkts_mask_out;
+       if (pkts_mask_match_many) {
+               uint64_t pkts_mask_out_slow = 0;
+
+               status = rte_table_hash_lru_lookup_unoptimized(table, pkts, 
pkts_mask_match_many, &pkts_mask_out_slow, entries, 0);
+               pkts_mask_out |= pkts_mask_out_slow;
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       return status;
+}
+
+static int rte_table_hash_lru_lookup_dosig(
+       void *table, 
+       struct rte_mbuf **pkts, 
+       uint64_t pkts_mask,
+       uint64_t *lookup_hit_mask,
+       void **entries)
+{
+       struct rte_table_hash *t = (struct rte_table_hash *) table;
+       struct grinder *g = t->grinders;
+       uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index, 
pkt20_index, pkt21_index, pkt30_index, pkt31_index;
+       uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0;
+       int status = 0;
+
+       /* Cannot run the pipeline with less than 7 packets */
+       if (__builtin_popcountll(pkts_mask) < 7) {
+               return rte_table_hash_lru_lookup_unoptimized(table, pkts, 
pkts_mask, lookup_hit_mask, entries, 1);
+       }
+       
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline feed */
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+       
+       /* Pipeline feed */
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 0 */
+       lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index);
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+
+       /*
+        * Pipeline run 
+        *
+        */
+       for ( ; pkts_mask; ) {
+               /* Pipeline feed */
+               pkt30_index = pkt20_index;
+               pkt31_index = pkt21_index;
+               pkt20_index = pkt10_index;
+               pkt21_index = pkt11_index;
+               pkt10_index = pkt00_index;
+               pkt11_index = pkt01_index;
+
+               /* Pipeline stage 0 */
+               lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, 
pkt00_index, pkt01_index);
+
+               /* Pipeline stage 1 */
+               lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+
+               /* Pipeline stage 2 */
+               lookup2_stage2(t, g, pkt20_index, pkt21_index, 
pkts_mask_match_many);
+
+               /* Pipeline stage 3 */
+               lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, 
pkts_mask_out, entries);
+       }
+       
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       pkt10_index = pkt00_index;
+       pkt11_index = pkt01_index;
+
+       /* Pipeline stage 1 */
+       lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index);
+       
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       pkt20_index = pkt10_index;
+       pkt21_index = pkt11_index;
+       
+       /* Pipeline stage 2 */
+       lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many);
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+       
+       /* Pipeline feed */
+       pkt30_index = pkt20_index;
+       pkt31_index = pkt21_index;
+       
+       /* Pipeline stage 3 */
+       lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, 
entries);
+
+       /* Slow path */
+       pkts_mask_match_many &= ~pkts_mask_out;
+       if (pkts_mask_match_many) {
+               uint64_t pkts_mask_out_slow = 0;
+
+               status = rte_table_hash_lru_lookup_unoptimized(table, pkts, 
pkts_mask_match_many, &pkts_mask_out_slow, entries, 1);
+               pkts_mask_out |= pkts_mask_out_slow;
+       }
+
+       *lookup_hit_mask = pkts_mask_out;
+       return status;
+}
+
+struct rte_table_ops rte_table_hash_lru_ops  = {
+       .f_create = rte_table_hash_lru_create,
+       .f_free = rte_table_hash_lru_free,
+       .f_add = rte_table_hash_lru_entry_add,
+       .f_delete = rte_table_hash_lru_entry_delete,
+       .f_lookup = rte_table_hash_lru_lookup,
+};
+
+struct rte_table_ops rte_table_hash_lru_dosig_ops  = {
+       .f_create = rte_table_hash_lru_create,
+       .f_free = rte_table_hash_lru_free,
+       .f_add = rte_table_hash_lru_entry_add,
+       .f_delete = rte_table_hash_lru_entry_delete,
+       .f_lookup = rte_table_hash_lru_lookup_dosig,
+};
-- 
1.7.7.6

Reply via email to