This is only a first pass. On Wed, Aug 24, 2022 at 10:31 AM Min Zhou <zhou...@loongson.cn> wrote: > diff --git a/MAINTAINERS b/MAINTAINERS > index 32ffdd1a61..f00b82b5ce 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -311,6 +311,12 @@ F: config/riscv/ > F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst > F: lib/eal/riscv/ > > +LoongArch > +M: Min Zhou <zhou...@loongson.cn> > +F: config/loongarch/ > +F: doc/guides/linux_gsg/cross_build_dpdk_for_loongarch.rst > +F: lib/eal/loongarch/ > +
I tried to put entries in MAINTAINERS in a pseudo alphabetical order (ignoring the vendor name). We currently have: ARM, Power, RISC-V, X86. As a consequence, the block for LoongArch should be moved between ARM, and Power arches. > Intel x86 > M: Bruce Richardson <bruce.richard...@intel.com> > M: Konstantin Ananyev <konstantin.v.anan...@yandex.ru> [snip] > diff --git a/config/loongarch/meson.build b/config/loongarch/meson.build > new file mode 100644 > index 0000000000..e052fbad7f > --- /dev/null > +++ b/config/loongarch/meson.build > @@ -0,0 +1,43 @@ > +# SPDX-License-Identifier: BSD-3-Clause > +# Copyright(c) 2022 Loongson Technology Corporation Limited > + > +if not dpdk_conf.get('RTE_ARCH_64') > + error('Only 64-bit compiles are supported for this platform type') > +endif > +dpdk_conf.set('RTE_ARCH', 'loongarch') > +dpdk_conf.set('RTE_ARCH_LOONGARCH', 1) > +dpdk_conf.set('RTE_ARCH_NO_VECTOR', 1) ? RTE_ARCH_NO_VECTOR is not used anywhere, please remove. > + > +machine_args_generic = [ > + ['default', ['-march=loongarch64']], > +] > + > +flags_generic = [ > + ['RTE_MACHINE', '"loongarch64"'], > + ['RTE_MAX_LCORE', 64], > + ['RTE_MAX_NUMA_NODES', 16], > + ['RTE_CACHE_LINE_SIZE', 64]] > + > +impl_generic = ['Generic loongarch', flags_generic, machine_args_generic] > + > +machine = [] > +machine_args = [] > + > +machine = impl_generic > +impl_pn = 'default' > + > +message('Implementer : ' + machine[0]) > +foreach flag: machine[1] > + if flag.length() > 0 > + dpdk_conf.set(flag[0], flag[1]) > + endif > +endforeach > + > +foreach marg: machine[2] > + if marg[0] == impl_pn > + foreach f: marg[1] > + machine_args += f > + endforeach > + endif > +endforeach > +message(machine_args) [snip] > diff --git a/doc/guides/linux_gsg/index.rst b/doc/guides/linux_gsg/index.rst > index 747552c385..c34966b241 100644 > --- a/doc/guides/linux_gsg/index.rst > +++ b/doc/guides/linux_gsg/index.rst > @@ -15,6 +15,7 @@ Getting Started Guide for Linux > build_dpdk > cross_build_dpdk_for_arm64 > cross_build_dpdk_for_riscv > + cross_build_dpdk_for_loongarch In alphabetical order please. > linux_drivers > build_sample_apps > linux_eal_parameters > diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst > index 7f6cb914a5..8afa7ef7fd 100644 > --- a/doc/guides/nics/features.rst > +++ b/doc/guides/nics/features.rst > @@ -848,6 +848,12 @@ rv64 > Support 64-bit RISC-V architecture. > > > +LoongArch64 > +----------- > + > +Support 64-bit LoongArch architecture. > + > + Idem. > x86-32 > ------ > > diff --git a/doc/guides/nics/features/default.ini > b/doc/guides/nics/features/default.ini > index d1db0c256a..8bbc4600bd 100644 > --- a/doc/guides/nics/features/default.ini > +++ b/doc/guides/nics/features/default.ini > @@ -73,6 +73,7 @@ ARMv7 = > ARMv8 = > Power8 = > rv64 = > +LoongArch64 = Idem. > x86-32 = > x86-64 = > Usage doc = > diff --git a/doc/guides/rel_notes/release_22_11.rst > b/doc/guides/rel_notes/release_22_11.rst > index 8c021cf050..126160d683 100644 > --- a/doc/guides/rel_notes/release_22_11.rst > +++ b/doc/guides/rel_notes/release_22_11.rst > @@ -55,6 +55,12 @@ New Features > Also, make sure to start the actual text at the margin. > ======================================================= > > +* **Added initial LoongArch architecture support.** > + > + Added EAL implementation for LoongArch architecture. The initial devices > + the porting was tested on included Loongson 3A5000, Loongson 3C5000 and > + Loongson 3C5000L. In theory this implementation should work with any target > + based on ``LoongArch`` ISA. Sections in the release notes are separated with two empty lines. > > Removed Items > ------------- [snip] > diff --git a/lib/eal/loongarch/include/meson.build > b/lib/eal/loongarch/include/meson.build > new file mode 100644 > index 0000000000..d5699c5373 > --- /dev/null > +++ b/lib/eal/loongarch/include/meson.build > @@ -0,0 +1,21 @@ > +# SPDX-License-Identifier: BSD-3-Clause > +# Copyright(c) 2022 Loongson Technology Corporation Limited > + > +arch_headers = files( > + 'rte_atomic.h', > + 'rte_byteorder.h', > + 'rte_cpuflags.h', > + 'rte_cycles.h', > + 'rte_io.h', > + 'rte_mcslock.h', > + 'rte_memcpy.h', > + 'rte_pause.h', > + 'rte_pflock.h', > + 'rte_power_intrinsics.h', > + 'rte_prefetch.h', > + 'rte_rwlock.h', > + 'rte_spinlock.h', > + 'rte_ticketlock.h', > + 'rte_vect.h', > +) msclock, pflock and ticketlock are now non-arch specific headers. They can be removed from the loongarch include directory. See: e5e613f05b8c ("eal: remove unused arch-specific headers for locks") > +install_headers(arch_headers, subdir: get_option('include_subdir_arch')) > diff --git a/lib/eal/loongarch/include/rte_atomic.h > b/lib/eal/loongarch/include/rte_atomic.h > new file mode 100644 > index 0000000000..8e007e7f76 > --- /dev/null > +++ b/lib/eal/loongarch/include/rte_atomic.h > @@ -0,0 +1,253 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2022 Loongson Technology Corporation Limited > + */ > + > +#ifndef _RTE_ATOMIC_LOONGARCH_H_ > +#define _RTE_ATOMIC_LOONGARCH_H_ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include <stdint.h> > +#include "generic/rte_atomic.h" > + > +/** > + * LoongArch Synchronize > + */ > +static inline void synchronize(void) This name is too generic. Plus all memory barriers are implemented in the same way. I suggest defining rte_mb() as this inline helper. > +{ > + __asm__ __volatile__("dbar 0":::"memory"); > +} > + > +/** > + * General memory barrier. > + * > + * Guarantees that the LOAD and STORE operations generated before the > + * barrier occur before the LOAD and STORE operations generated after. > + * This function is architecture dependent. Those comments are copied from the generic header which is used for doxygen, but you don't need them in the arch specific header. Please remove. > + */ > +#define rte_mb() synchronize() > + > +/** > + * Write memory barrier. > + * > + * Guarantees that the STORE operations generated before the barrier > + * occur before the STORE operations generated after. > + * This function is architecture dependent. > + */ > +#define rte_wmb() synchronize() > + > +/** > + * Read memory barrier. > + * > + * Guarantees that the LOAD operations generated before the barrier > + * occur before the LOAD operations generated after. > + * This function is architecture dependent. > + */ > +#define rte_rmb() synchronize() > + > +#define rte_smp_mb() rte_mb() > + > +#define rte_smp_wmb() rte_mb() > + > +#define rte_smp_rmb() rte_mb() > + > +#define rte_io_mb() rte_mb() > + > +#define rte_io_wmb() rte_mb() > + > +#define rte_io_rmb() rte_mb() > + > +static __rte_always_inline void > +rte_atomic_thread_fence(int memorder) > +{ > + __atomic_thread_fence(memorder); > +} > + > +#ifndef RTE_FORCE_INTRINSICS Unless I missed something, there is no loongarch specific implementations when RTE_FORCE_INTRINSICS is unset. What is the point of supporting the case where RTE_FORCE_INTRINSICS is undefined? If there is no need, force-set RTE_FORCE_INTRINSICS in config and then update headers accordingly. > +/*------------------------- 16 bit atomic operations > -------------------------*/ > +static inline int > +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) > +{ > + return __sync_bool_compare_and_swap(dst, exp, src); > +} > + > +static inline uint16_t > +rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val) > +{ > +#if defined(__clang__) > + return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST); > +#else > + return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST); > +#endif > +} > + > +static inline void > +rte_atomic16_inc(rte_atomic16_t *v) > +{ > + rte_atomic16_add(v, 1); > +} > + > +static inline void > +rte_atomic16_dec(rte_atomic16_t *v) > +{ > + rte_atomic16_sub(v, 1); > +} > + > +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) > +{ > + return __sync_add_and_fetch(&v->cnt, 1) == 0; > +} > + > +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) > +{ > + return __sync_sub_and_fetch(&v->cnt, 1) == 0; > +} > + > +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) > +{ > + return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); > +} > + > +/*------------------------- 32 bit atomic operations > -------------------------*/ > +static inline int > +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) > +{ > + return __sync_bool_compare_and_swap(dst, exp, src); > +} > + > +static inline uint32_t > +rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val) > +{ > +#if defined(__clang__) > + return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST); > +#else > + return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST); > +#endif > +} > + > +static inline void > +rte_atomic32_inc(rte_atomic32_t *v) > +{ > + rte_atomic32_add(v, 1); > +} > + > +static inline void > +rte_atomic32_dec(rte_atomic32_t *v) > +{ > + rte_atomic32_sub(v, 1); > +} > + > +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) > +{ > + return __sync_add_and_fetch(&v->cnt, 1) == 0; > +} > + > +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) > +{ > + return __sync_sub_and_fetch(&v->cnt, 1) == 0; > +} > + > +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) > +{ > + return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); > +} > + > +/*------------------------- 64 bit atomic operations > -------------------------*/ > +static inline int > +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) > +{ > + return __sync_bool_compare_and_swap(dst, exp, src); > +} > + > +static inline uint64_t > +rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val) > +{ > +#if defined(__clang__) > + return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST); > +#else > + return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST); > +#endif > +} > + > +static inline void > +rte_atomic64_init(rte_atomic64_t *v) > +{ > + v->cnt = 0; > +} > + > +static inline int64_t > +rte_atomic64_read(rte_atomic64_t *v) > +{ > + return v->cnt; > +} > + > +static inline void > +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) > +{ > + v->cnt = new_value; > +} > + > +static inline void > +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) > +{ > + __sync_fetch_and_add(&v->cnt, inc); > +} > + > +static inline void > +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) > +{ > + __sync_fetch_and_sub(&v->cnt, dec); > +} > + > +static inline void > +rte_atomic64_inc(rte_atomic64_t *v) > +{ > + rte_atomic64_add(v, 1); > +} > + > +static inline void > +rte_atomic64_dec(rte_atomic64_t *v) > +{ > + rte_atomic64_sub(v, 1); > +} > + > +static inline int64_t > +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) > +{ > + return __sync_add_and_fetch(&v->cnt, inc); > +} > + > +static inline int64_t > +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) > +{ > + return __sync_sub_and_fetch(&v->cnt, dec); > +} > + > +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) > +{ > + return rte_atomic64_add_return(v, 1) == 0; > +} > + > +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) > +{ > + return rte_atomic64_sub_return(v, 1) == 0; > +} > + > +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) > +{ > + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); > +} > + > +static inline void rte_atomic64_clear(rte_atomic64_t *v) > +{ > + rte_atomic64_set(v, 0); > +} > +#endif > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_ATOMIC_LOONGARCH_H_ */ [snip] > diff --git a/lib/eal/loongarch/include/rte_cycles.h > b/lib/eal/loongarch/include/rte_cycles.h > new file mode 100644 > index 0000000000..1f8f957faf > --- /dev/null > +++ b/lib/eal/loongarch/include/rte_cycles.h > @@ -0,0 +1,53 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2022 Loongson Technology Corporation Limited > + */ > + > +#ifndef _RTE_CYCLES_LOONGARCH_H_ > +#define _RTE_CYCLES_LOONGARCH_H_ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include "generic/rte_cycles.h" > + > +static inline uint64_t > +get_cycle_count(void) Same comment as earlier for memory barriers, this name is too generic. Prefer rte_rdtsc() as the name for this helper. > +{ > + uint64_t count; > + > + __asm__ __volatile__ ( > + "rdtime.d %[cycles], $zero\n" > + : [cycles] "=r" (count) > + :: > + ); > + return count; > +} > + > +/** > + * Read the time base register. > + * > + * @return > + * The time base for this lcore. > + */ > +static inline uint64_t > +rte_rdtsc(void) > +{ > + return get_cycle_count(); > +} > + > +static inline uint64_t > +rte_rdtsc_precise(void) > +{ > + rte_mb(); > + return rte_rdtsc(); > +} > + > +static inline uint64_t > +rte_get_tsc_cycles(void) { return rte_rdtsc(); } > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* _RTE_CYCLES_LOONGARCH_H_ */ [snip] > diff --git a/lib/eal/loongarch/rte_cpuflags.c > b/lib/eal/loongarch/rte_cpuflags.c > new file mode 100644 > index 0000000000..4abcd0fdb3 > --- /dev/null > +++ b/lib/eal/loongarch/rte_cpuflags.c > @@ -0,0 +1,94 @@ > +/* > + * SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2022 Loongson Technology Corporation Limited > + */ > + > +#include "rte_cpuflags.h" > + > +#include <elf.h> > +#include <fcntl.h> > +#include <assert.h> > +#include <unistd.h> > +#include <string.h> > + > +/* Symbolic values for the entries in the auxiliary table */ > +#define AT_HWCAP 16 > +#define AT_HWCAP2 26 AT_HWCAP2 is not used. > + > +/* software based registers */ > +enum cpu_register_t { > + REG_NONE = 0, > + REG_HWCAP, > + REG_MAX > +}; > + > +typedef uint32_t hwcap_registers_t[REG_MAX]; > + > +struct feature_entry { > + uint32_t reg; > + uint32_t bit; > +#define CPU_FLAG_NAME_MAX_LEN 64 > + char name[CPU_FLAG_NAME_MAX_LEN]; > +}; > + > +#define FEAT_DEF(name, reg, bit) \ > + [RTE_CPUFLAG_##name] = {reg, bit, #name}, > + > +const struct feature_entry rte_cpu_feature_table[] = { > + FEAT_DEF(CPUCFG, REG_HWCAP, 0) > + FEAT_DEF(LAM, REG_HWCAP, 1) > + FEAT_DEF(UAL, REG_HWCAP, 2) > + FEAT_DEF(FPU, REG_HWCAP, 3) > + FEAT_DEF(LSX, REG_HWCAP, 4) > + FEAT_DEF(LASX, REG_HWCAP, 5) > + FEAT_DEF(CRC32, REG_HWCAP, 6) > + FEAT_DEF(COMPLEX, REG_HWCAP, 7) > + FEAT_DEF(CRYPTO, REG_HWCAP, 8) > + FEAT_DEF(LVZ, REG_HWCAP, 9) > + FEAT_DEF(LBT_X86, REG_HWCAP, 10) > + FEAT_DEF(LBT_ARM, REG_HWCAP, 11) > + FEAT_DEF(LBT_MIPS, REG_HWCAP, 12) > +}; > + > +/* > + * Read AUXV software register and get cpu features for LoongArch > + */ > +static void > +rte_cpu_get_features(hwcap_registers_t out) > +{ > + out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP); > +} > + > +/* > + * Checks if a particular flag is available on current machine. > + */ > +int > +rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) > +{ > + const struct feature_entry *feat; > + hwcap_registers_t regs = {0}; > + > + if (feature >= RTE_CPUFLAG_NUMFLAGS) > + return -ENOENT; > + > + feat = &rte_cpu_feature_table[feature]; > + if (feat->reg == REG_NONE) > + return -EFAULT; > + > + rte_cpu_get_features(regs); > + return (regs[feat->reg] >> feat->bit) & 1; > +} > + > +const char * > +rte_cpu_get_flag_name(enum rte_cpu_flag_t feature) > +{ > + if (feature >= RTE_CPUFLAG_NUMFLAGS) > + return NULL; > + return rte_cpu_feature_table[feature].name; > +} > + > +void > +rte_cpu_get_intrinsics_support(struct rte_cpu_intrinsics *intrinsics) > +{ > + memset(intrinsics, 0, sizeof(*intrinsics)); > +} [snip] -- David Marchand