Re: [RFC 1/2] eal: add pointer compression functions

Thomas Monjalon Mon, 09 Oct 2023 08:54:11 -0700

27/09/2023 17:08, Paul Szczepanek:
> Add a new utility header for compressing pointers. Pointers are
> compressed by taking advantage of their locality. Instead of
> storing the full address only an offset from a known base is stored.


You probably need to insert some explanations from the cover letter.

> The provided functions can store pointers in 32bit offsets.
> 
> Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
> Signed-off-by: Paul Szczepanek <paul.szczepa...@arm.com>
> Signed-off-by: Kamalakshitha Aligeri <kamalakshitha.alig...@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
[...]
> --- a/lib/eal/include/meson.build
> +++ b/lib/eal/include/meson.build
> @@ -35,6 +35,7 @@ headers += files(
>          'rte_pci_dev_feature_defs.h',
>          'rte_pci_dev_features.h',
>          'rte_per_lcore.h',
> +     'rte_ptr_compress.h',
>          'rte_pflock.h',
>          'rte_random.h',
>          'rte_reciprocal.h',

Did you try to sort alphabetically? failed :)

> +#ifndef _RTE_PTR_COMPRESS_H_
> +#define _RTE_PTR_COMPRESS_H_

No need extra underscores.

> +
> +/**
> + * @file
> + * RTE pointer compression and decompression.

RTE has no mean here I think.

> + */
> +
> +#include <stdint.h>
> +#include <inttypes.h>
> +
> +#include <rte_branch_prediction.h>
> +#include <rte_common.h>
> +#include <rte_debug.h>
> +#include <rte_vect.h>
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +/**
> + * Compress pointers into 32 bit offsets from base pointer.

I think it should be "32-bit".

> + *
> + * @note Offsets from the base pointer must fit within 32bits. Alignment 
> allows
> + * us to drop bits from the offsets - this means that for pointers aligned by
> + * 8 bytes they must be within 32GB of the base pointer. Unaligned pointers
> + * must be within 4GB.

Not clear what is "alignment".
> + *
> + * @param ptr_base
> + *   A pointer used to calculate offsets of pointers in src_table.
> + * @param src_table
> + *   A pointer to an array of pointers.
> + * @param dest_table
> + *   A pointer to an array of compressed pointers returned by this function.
> + * @param n
> + *   The number of objects to compress, must be strictly positive.
> + * @param bit_shift
> + *   Byte alignment of memory pointed to by the pointers allows for
> + *   bits to be dropped from the offset and hence widen the memory region 
> that
> + *   can be covered. This controls how many bits are right shifted.
> + **/
> +static __rte_always_inline void
> +rte_ptr_compress_32(void *ptr_base, void **src_table,
> +             uint32_t *dest_table, unsigned int n, unsigned int bit_shift)
> +{
> +     unsigned int i = 0;
> +#if defined RTE_HAS_SVE_ACLE
> +     svuint64_t v_src_table;
> +     svuint64_t v_dest_table;
> +     svbool_t pg = svwhilelt_b64(i, n);
> +     do {
> +             v_src_table = svld1_u64(pg, (uint64_t *)src_table + i);
> +             v_dest_table = svsub_x(pg, v_src_table, (uint64_t)ptr_base);
> +             v_dest_table = svlsr_x(pg, v_dest_table, bit_shift);
> +             svst1w(pg, &dest_table[i], v_dest_table);
> +             i += svcntd();
> +             pg = svwhilelt_b64(i, n);
> +     } while (svptest_any(svptrue_b64(), pg));
> +#elif defined __ARM_NEON
> +     uint64_t ptr_diff;
> +     uint64x2_t v_src_table;
> +     uint64x2_t v_dest_table;
> +     /* right shift is done by left shifting by negative int */
> +     int64x2_t v_shift = vdupq_n_s64(-bit_shift);
> +     uint64x2_t v_ptr_base = vdupq_n_u64((uint64_t)ptr_base);
> +     for (; i < (n & ~0x1); i += 2) {
> +             v_src_table = vld1q_u64((const uint64_t *)src_table + i);
> +             v_dest_table = vsubq_u64(v_src_table, v_ptr_base);
> +             v_dest_table = vshlq_u64(v_dest_table, v_shift);
> +             vst1_u32(dest_table + i, vqmovn_u64(v_dest_table));
> +     }
> +     /* process leftover single item in case of odd number of n */
> +     if (unlikely(n & 0x1)) {
> +             ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
> +             dest_table[i] = (uint32_t) (ptr_diff >> bit_shift);
> +     }
> +#else
> +     uint64_t ptr_diff;
> +     for (; i < n; i++) {
> +             ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
> +             /* save extra bits that are redundant due to alignment */
> +             ptr_diff = ptr_diff >> bit_shift;
> +             /* make sure no truncation will happen when casting */
> +             RTE_ASSERT(ptr_diff <= UINT32_MAX);
> +             dest_table[i] = (uint32_t) ptr_diff;
> +     }
> +#endif
> +}

I see it is providing some per-CPU optimizations,
so it is in favor of having it in DPDK.
Other than that, it looks very generic, so it is questionable to have in DPDK.

Re: [RFC 1/2] eal: add pointer compression functions

Reply via email to