Hi Jan, Thanks for the comments. Please see my responses inline. On Friday 28 April 2017 03:25 PM, Jan Viktorin wrote: > Hello Ashwin Sekhar, > > some comments below... > > On Thu, 27 Apr 2017 07:10:20 -0700 > Ashwin Sekhar T K <ashwin.sek...@caviumnetworks.com> wrote: > >> * Added CRC compute APIs for arm64 utilizing the pmull capability >> * Added new file net_crc_neon.h to hold the arm64 pmull CRC >> implementation >> * Added crypto capability in compilation of generic armv8 and >> thunderx targets >> * pmull CRC version is used only after checking the pmull capability >> at runtime >> * Verified the changes with crc_autotest unit test case >> >> Signed-off-by: Ashwin Sekhar T K <ashwin.sek...@caviumnetworks.com> >> --- >> MAINTAINERS | 1 + >> lib/librte_eal/common/include/arch/arm/rte_vect.h | 45 +++ >> lib/librte_net/net_crc_neon.h | 357 >> ++++++++++++++++++++++ >> lib/librte_net/rte_net_crc.c | 32 +- >> lib/librte_net/rte_net_crc.h | 2 + >> mk/machine/armv8a/rte.vars.mk | 2 +- >> mk/machine/thunderx/rte.vars.mk | 2 +- >> mk/rte.cpuflags.mk | 3 + >> mk/toolchain/gcc/rte.toolchain-compat.mk | 1 + >> 9 files changed, 438 insertions(+), 7 deletions(-) >> create mode 100644 lib/librte_net/net_crc_neon.h >> >> diff --git a/MAINTAINERS b/MAINTAINERS >> index 576d60a..283743e 100644 >> --- a/MAINTAINERS >> +++ b/MAINTAINERS >> @@ -149,6 +149,7 @@ F: lib/librte_lpm/rte_lpm_neon.h >> F: lib/librte_hash/rte*_arm64.h >> F: lib/librte_efd/rte*_arm64.h >> F: lib/librte_table/rte*_arm64.h >> +F: lib/librte_net/net_crc_neon.h >> F: drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c >> F: drivers/net/i40e/i40e_rxtx_vec_neon.c >> F: drivers/net/virtio/virtio_rxtx_simple_neon.c >> diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h >> b/lib/librte_eal/common/include/arch/arm/rte_vect.h >> index 4107c99..9a3dfdf 100644 >> --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h >> +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h >> @@ -34,9 +34,18 @@ >> #define _RTE_VECT_ARM_H_ >> >> #include <stdint.h> >> +#include <assert.h> >> + >> #include "generic/rte_vect.h" >> #include "arm_neon.h" >> >> +#ifdef GCC_VERSION >> +#undef GCC_VERSION >> +#endif > > Why are you doing this? What is wrong with GCC_VERSION? > This is just to avoid multiple definitions of GCC_VERSION. Not required really. Can be removed.
>> + >> +#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 \ >> + + __GNUC_PATCHLEVEL__) >> + > > If you have any specific requirements for testing GCC version then it > should be done in a more elegant way. However, I do not understand your > intention. > GCC version is checked so as to define wrappers for some neon intrinsics which are not available in GCC versions < 7. Similar checks of GCC_VERSION done in ./lib/librte_table/rte_lru.h. Followed the same template here. Also, this is the suggested approach by GCC. Please see below link. https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html Please advise on more elegant ways of gcc version detection. >> #ifdef __cplusplus >> extern "C" { >> #endif >> @@ -78,6 +87,42 @@ vqtbl1q_u8(uint8x16_t a, uint8x16_t b) >> } >> #endif >> >> +#if (GCC_VERSION < 70000) > > Is this code is gcc-specific? In such case there should be check for > GCC compiler. We can also build e.g. by clang. > Yes, the code is GCC specific. Currently there are only GCC targets for arm and arm64. So no checks are done for other types of compilers. >> +/* >> + * NEON intrinsic vreinterpretq_u64_p128() is not supported >> + * in GCC versions < 7 >> + */ > > I'd be positive about those comments, like: > > NEON intrinsic vreinterpretq_u64_p128() is supported since GCC 7. > Thanks. Will make the comments positive. >> +static inline uint64x2_t >> +vreinterpretq_u64_p128(poly128_t x) >> +{ >> + return (uint64x2_t)x; >> +} >> + >> +/* >> + * NEON intrinsic vreinterpretq_p64_u64() is not supported >> + * in GCC versions < 7 >> + */ >> +static inline poly64x2_t >> +vreinterpretq_p64_u64(uint64x2_t x) >> +{ >> + return (poly64x2_t)x; >> +} >> + >> +/* >> + * NEON intrinsic vgetq_lane_p64() is not supported >> + * in GCC versions < 7 >> + */ >> +static inline poly64_t >> +vgetq_lane_p64(poly64x2_t x, const int lane) >> +{ >> + assert(lane >= 0 && lane <= 1); >> + >> + poly64_t *p = (poly64_t *)&x; >> + >> + return p[lane]; >> +} >> +#endif >> + >> #ifdef __cplusplus >> } >> #endif >> diff --git a/lib/librte_net/net_crc_neon.h b/lib/librte_net/net_crc_neon.h > > [...] > >> # CPU_LDFLAGS = >> # CPU_ASFLAGS = >> >> -MACHINE_CFLAGS += -march=armv8-a+crc >> +MACHINE_CFLAGS += -march=armv8-a+crc+crypto >> diff --git a/mk/machine/thunderx/rte.vars.mk >> b/mk/machine/thunderx/rte.vars.mk >> index ad5a379..6784105 100644 >> --- a/mk/machine/thunderx/rte.vars.mk >> +++ b/mk/machine/thunderx/rte.vars.mk >> @@ -55,4 +55,4 @@ >> # CPU_LDFLAGS = >> # CPU_ASFLAGS = >> >> -MACHINE_CFLAGS += -march=armv8-a+crc -mcpu=thunderx >> +MACHINE_CFLAGS += -march=armv8-a+crc+crypto -mcpu=thunderx >> diff --git a/mk/rte.cpuflags.mk b/mk/rte.cpuflags.mk >> index e634abc..6bbd742 100644 >> --- a/mk/rte.cpuflags.mk >> +++ b/mk/rte.cpuflags.mk >> @@ -119,6 +119,9 @@ ifneq ($(filter $(AUTO_CPUFLAGS),__ARM_FEATURE_CRC32),) >> CPUFLAGS += CRC32 >> endif >> >> +ifneq ($(filter $(AUTO_CPUFLAGS),__ARM_FEATURE_CRYPTO),) >> +CPUFLAGS += PMULL >> +endif >> >> MACHINE_CFLAGS += $(addprefix -DRTE_MACHINE_CPUFLAG_,$(CPUFLAGS)) >> >> diff --git a/mk/toolchain/gcc/rte.toolchain-compat.mk >> b/mk/toolchain/gcc/rte.toolchain-compat.mk >> index 280dde2..01ac7e2 100644 >> --- a/mk/toolchain/gcc/rte.toolchain-compat.mk >> +++ b/mk/toolchain/gcc/rte.toolchain-compat.mk >> @@ -60,6 +60,7 @@ else >> # >> ifeq ($(shell test $(GCC_VERSION) -le 49 && echo 1), 1) >> MACHINE_CFLAGS := $(patsubst >> -march=armv8-a+crc,-march=armv8-a+crc >> -D__ARM_FEATURE_CRC32=1,$(MACHINE_CFLAGS)) > > The line above is to be dropped, isn't it? > No. It is not to be dropped. For targets like xgene1, crypto is not defined. Above line is required for the substitution to happen in such targets. >> + MACHINE_CFLAGS := $(patsubst >> -march=armv8-a+crc+crypto,-march=armv8-a+crc+crypto >> -D__ARM_FEATURE_CRC32=1,$(MACHINE_CFLAGS)) > > Please, split the "feature-detection" changes into a separate commit and > explain it. In the code, you test for GCC 7. Here you are ok with GCC > 4.9. It's likely to be correct but it is not clear. Sure. Will split the feature detection changes to separate commit. > > Also, please explain why is the "crypto" feature required. crypto feature is required for using the vmull_p64 intrinsic. More specifically the PMULL instruction. Will add this as part of the commit message. > > Regards > Jan > >> endif >> ifeq ($(shell test $(GCC_VERSION) -le 47 && echo 1), 1) >> MACHINE_CFLAGS := $(patsubst >> -march=core-avx-i,-march=corei7-avx,$(MACHINE_CFLAGS)) > Thanks and Regards, Ashwin