Hi David,

I appreciate you giving me these helpful comments.

On Thu, Aug 25, 2022 at 21:56, David Marchand wrote:
This is only a first pass.

On Wed, Aug 24, 2022 at 10:31 AM Min Zhou <zhou...@loongson.cn> wrote:
diff --git a/MAINTAINERS b/MAINTAINERS
index 32ffdd1a61..f00b82b5ce 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -311,6 +311,12 @@ F: config/riscv/
  F: doc/guides/linux_gsg/cross_build_dpdk_for_riscv.rst
  F: lib/eal/riscv/

+LoongArch
+M: Min Zhou <zhou...@loongson.cn>
+F: config/loongarch/
+F: doc/guides/linux_gsg/cross_build_dpdk_for_loongarch.rst
+F: lib/eal/loongarch/
+
I tried to put entries in MAINTAINERS in a pseudo alphabetical order
(ignoring the vendor name).
We currently have: ARM, Power, RISC-V, X86.

As a consequence, the block for LoongArch should be moved between ARM,
and Power arches.

OK, thanks. I will place the block for LoongArch at the appropriate
location.


  Intel x86
  M: Bruce Richardson <bruce.richard...@intel.com>
  M: Konstantin Ananyev <konstantin.v.anan...@yandex.ru>
[snip]

diff --git a/config/loongarch/meson.build b/config/loongarch/meson.build
new file mode 100644
index 0000000000..e052fbad7f
--- /dev/null
+++ b/config/loongarch/meson.build
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 Loongson Technology Corporation Limited
+
+if not dpdk_conf.get('RTE_ARCH_64')
+    error('Only 64-bit compiles are supported for this platform type')
+endif
+dpdk_conf.set('RTE_ARCH', 'loongarch')
+dpdk_conf.set('RTE_ARCH_LOONGARCH', 1)
+dpdk_conf.set('RTE_ARCH_NO_VECTOR', 1)
?
RTE_ARCH_NO_VECTOR is not used anywhere, please remove.

OK, thanks. I will remove it. This macro was applied to the patch set's
earlier iteration. I had already changed a few things to implement no vector
support, but I neglected to get rid of this definition.


+
+machine_args_generic = [
+    ['default', ['-march=loongarch64']],
+]
+
+flags_generic = [
+    ['RTE_MACHINE', '"loongarch64"'],
+    ['RTE_MAX_LCORE', 64],
+    ['RTE_MAX_NUMA_NODES', 16],
+    ['RTE_CACHE_LINE_SIZE', 64]]
+
+impl_generic = ['Generic loongarch', flags_generic, machine_args_generic]
+
+machine = []
+machine_args = []
+
+machine = impl_generic
+impl_pn = 'default'
+
+message('Implementer : ' + machine[0])
+foreach flag: machine[1]
+    if flag.length() > 0
+        dpdk_conf.set(flag[0], flag[1])
+    endif
+endforeach
+
+foreach marg: machine[2]
+    if marg[0] == impl_pn
+        foreach f: marg[1]
+           machine_args += f
+        endforeach
+    endif
+endforeach
+message(machine_args)
[snip]

diff --git a/doc/guides/linux_gsg/index.rst b/doc/guides/linux_gsg/index.rst
index 747552c385..c34966b241 100644
--- a/doc/guides/linux_gsg/index.rst
+++ b/doc/guides/linux_gsg/index.rst
@@ -15,6 +15,7 @@ Getting Started Guide for Linux
      build_dpdk
      cross_build_dpdk_for_arm64
      cross_build_dpdk_for_riscv
+    cross_build_dpdk_for_loongarch
In alphabetical order please.

OK, thanks. I will change it.


      linux_drivers
      build_sample_apps
      linux_eal_parameters
diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst
index 7f6cb914a5..8afa7ef7fd 100644
--- a/doc/guides/nics/features.rst
+++ b/doc/guides/nics/features.rst
@@ -848,6 +848,12 @@ rv64
  Support 64-bit RISC-V architecture.


+LoongArch64
+-----------
+
+Support 64-bit LoongArch architecture.
+
+
Idem.

OK, thanks. I will change it.


  x86-32
  ------

diff --git a/doc/guides/nics/features/default.ini 
b/doc/guides/nics/features/default.ini
index d1db0c256a..8bbc4600bd 100644
--- a/doc/guides/nics/features/default.ini
+++ b/doc/guides/nics/features/default.ini
@@ -73,6 +73,7 @@ ARMv7                =
  ARMv8                =
  Power8               =
  rv64                 =
+LoongArch64          =
Idem.

OK, thanks. I will change it.


  x86-32               =
  x86-64               =
  Usage doc            =
diff --git a/doc/guides/rel_notes/release_22_11.rst 
b/doc/guides/rel_notes/release_22_11.rst
index 8c021cf050..126160d683 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -55,6 +55,12 @@ New Features
       Also, make sure to start the actual text at the margin.
       =======================================================

+* **Added initial LoongArch architecture support.**
+
+  Added EAL implementation for LoongArch architecture. The initial devices
+  the porting was tested on included Loongson 3A5000, Loongson 3C5000 and
+  Loongson 3C5000L. In theory this implementation should work with any target
+  based on ``LoongArch`` ISA.
Sections in the release notes are separated with two empty lines.

OK, thanks. I will put an empty line once more above the subsequent part.

  Removed Items
  -------------
[snip]

diff --git a/lib/eal/loongarch/include/meson.build 
b/lib/eal/loongarch/include/meson.build
new file mode 100644
index 0000000000..d5699c5373
--- /dev/null
+++ b/lib/eal/loongarch/include/meson.build
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2022 Loongson Technology Corporation Limited
+
+arch_headers = files(
+       'rte_atomic.h',
+       'rte_byteorder.h',
+       'rte_cpuflags.h',
+       'rte_cycles.h',
+       'rte_io.h',
+       'rte_mcslock.h',
+       'rte_memcpy.h',
+       'rte_pause.h',
+       'rte_pflock.h',
+       'rte_power_intrinsics.h',
+       'rte_prefetch.h',
+       'rte_rwlock.h',
+       'rte_spinlock.h',
+       'rte_ticketlock.h',
+       'rte_vect.h',
+)
msclock, pflock and ticketlock are now non-arch specific headers.
They can be removed from the loongarch include directory.
See: e5e613f05b8c ("eal: remove unused arch-specific headers for locks")

OK, thanks. I will remove these files.


+install_headers(arch_headers, subdir: get_option('include_subdir_arch'))
diff --git a/lib/eal/loongarch/include/rte_atomic.h 
b/lib/eal/loongarch/include/rte_atomic.h
new file mode 100644
index 0000000000..8e007e7f76
--- /dev/null
+++ b/lib/eal/loongarch/include/rte_atomic.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Loongson Technology Corporation Limited
+ */
+
+#ifndef _RTE_ATOMIC_LOONGARCH_H_
+#define _RTE_ATOMIC_LOONGARCH_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include "generic/rte_atomic.h"
+
+/**
+ * LoongArch Synchronize
+ */
+static inline void synchronize(void)
This name is too generic.
Plus all memory barriers are implemented in the same way.

I suggest defining rte_mb() as this inline helper.

OK, thanks. I will change it as you suggest.


+{
+       __asm__ __volatile__("dbar 0":::"memory");
+}
+
+/**
+ * General memory barrier.
+ *
+ * Guarantees that the LOAD and STORE operations generated before the
+ * barrier occur before the LOAD and STORE operations generated after.
+ * This function is architecture dependent.
Those comments are copied from the generic header which is used for
doxygen, but you don't need them in the arch specific header.
Please remove.

OK, thanks. I will remove those comments.

+ */
+#define rte_mb() synchronize()
+
+/**
+ * Write memory barrier.
+ *
+ * Guarantees that the STORE operations generated before the barrier
+ * occur before the STORE operations generated after.
+ * This function is architecture dependent.
+ */
+#define rte_wmb() synchronize()
+
+/**
+ * Read memory barrier.
+ *
+ * Guarantees that the LOAD operations generated before the barrier
+ * occur before the LOAD operations generated after.
+ * This function is architecture dependent.
+ */
+#define rte_rmb() synchronize()
+
+#define rte_smp_mb() rte_mb()
+
+#define rte_smp_wmb() rte_mb()
+
+#define rte_smp_rmb() rte_mb()
+
+#define rte_io_mb() rte_mb()
+
+#define rte_io_wmb() rte_mb()
+
+#define rte_io_rmb() rte_mb()
+
+static __rte_always_inline void
+rte_atomic_thread_fence(int memorder)
+{
+       __atomic_thread_fence(memorder);
+}
+
+#ifndef RTE_FORCE_INTRINSICS
Unless I missed something, there is no loongarch specific
implementations when RTE_FORCE_INTRINSICS is unset.
What is the point of supporting the case where RTE_FORCE_INTRINSICS is
undefined?

If there is no need, force-set RTE_FORCE_INTRINSICS in config and then
update headers accordingly.

OK, thanks. In fact, there are no implementations specifically for LoongArch
when RTE_FORCE_INTRINSICS is undefined. I will force-set RTE_FORCE_INTRINSICS
in config and include generic implementations for LoongArch.


+/*------------------------- 16 bit atomic operations 
-------------------------*/
+static inline int
+rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
+{
+       return __sync_bool_compare_and_swap(dst, exp, src);
+}
+
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
+{
+#if defined(__clang__)
+       return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+       return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+
+static inline void
+rte_atomic16_inc(rte_atomic16_t *v)
+{
+       rte_atomic16_add(v, 1);
+}
+
+static inline void
+rte_atomic16_dec(rte_atomic16_t *v)
+{
+       rte_atomic16_sub(v, 1);
+}
+
+static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v)
+{
+       return __sync_add_and_fetch(&v->cnt, 1) == 0;
+}
+
+static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
+{
+       return __sync_sub_and_fetch(&v->cnt, 1) == 0;
+}
+
+static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
+{
+       return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
+}
+
+/*------------------------- 32 bit atomic operations 
-------------------------*/
+static inline int
+rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
+{
+       return __sync_bool_compare_and_swap(dst, exp, src);
+}
+
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
+{
+#if defined(__clang__)
+       return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+       return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+
+static inline void
+rte_atomic32_inc(rte_atomic32_t *v)
+{
+       rte_atomic32_add(v, 1);
+}
+
+static inline void
+rte_atomic32_dec(rte_atomic32_t *v)
+{
+       rte_atomic32_sub(v, 1);
+}
+
+static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v)
+{
+       return __sync_add_and_fetch(&v->cnt, 1) == 0;
+}
+
+static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
+{
+       return __sync_sub_and_fetch(&v->cnt, 1) == 0;
+}
+
+static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
+{
+       return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
+}
+
+/*------------------------- 64 bit atomic operations 
-------------------------*/
+static inline int
+rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
+{
+       return __sync_bool_compare_and_swap(dst, exp, src);
+}
+
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
+{
+#if defined(__clang__)
+       return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+       return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+
+static inline void
+rte_atomic64_init(rte_atomic64_t *v)
+{
+       v->cnt = 0;
+}
+
+static inline int64_t
+rte_atomic64_read(rte_atomic64_t *v)
+{
+       return v->cnt;
+}
+
+static inline void
+rte_atomic64_set(rte_atomic64_t *v, int64_t new_value)
+{
+       v->cnt = new_value;
+}
+
+static inline void
+rte_atomic64_add(rte_atomic64_t *v, int64_t inc)
+{
+       __sync_fetch_and_add(&v->cnt, inc);
+}
+
+static inline void
+rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)
+{
+       __sync_fetch_and_sub(&v->cnt, dec);
+}
+
+static inline void
+rte_atomic64_inc(rte_atomic64_t *v)
+{
+       rte_atomic64_add(v, 1);
+}
+
+static inline void
+rte_atomic64_dec(rte_atomic64_t *v)
+{
+       rte_atomic64_sub(v, 1);
+}
+
+static inline int64_t
+rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc)
+{
+       return __sync_add_and_fetch(&v->cnt, inc);
+}
+
+static inline int64_t
+rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec)
+{
+       return __sync_sub_and_fetch(&v->cnt, dec);
+}
+
+static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v)
+{
+       return rte_atomic64_add_return(v, 1) == 0;
+}
+
+static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v)
+{
+       return rte_atomic64_sub_return(v, 1) == 0;
+}
+
+static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
+{
+       return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
+}
+
+static inline void rte_atomic64_clear(rte_atomic64_t *v)
+{
+       rte_atomic64_set(v, 0);
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ATOMIC_LOONGARCH_H_ */
[snip]

diff --git a/lib/eal/loongarch/include/rte_cycles.h 
b/lib/eal/loongarch/include/rte_cycles.h
new file mode 100644
index 0000000000..1f8f957faf
--- /dev/null
+++ b/lib/eal/loongarch/include/rte_cycles.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Loongson Technology Corporation Limited
+ */
+
+#ifndef _RTE_CYCLES_LOONGARCH_H_
+#define _RTE_CYCLES_LOONGARCH_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_cycles.h"
+
+static inline uint64_t
+get_cycle_count(void)
Same comment as earlier for memory barriers, this name is too generic.
Prefer rte_rdtsc() as the name for this helper.

OK, thanks. I will change it as you suggest.


+{
+       uint64_t count;
+
+       __asm__ __volatile__ (
+               "rdtime.d %[cycles], $zero\n"
+               : [cycles] "=r" (count)
+               ::
+               );
+       return count;
+}
+
+/**
+ * Read the time base register.
+ *
+ * @return
+ *   The time base for this lcore.
+ */
+static inline uint64_t
+rte_rdtsc(void)
+{
+       return get_cycle_count();
+}
+
+static inline uint64_t
+rte_rdtsc_precise(void)
+{
+       rte_mb();
+       return rte_rdtsc();
+}
+
+static inline uint64_t
+rte_get_tsc_cycles(void) { return rte_rdtsc(); }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CYCLES_LOONGARCH_H_ */
[snip]

diff --git a/lib/eal/loongarch/rte_cpuflags.c b/lib/eal/loongarch/rte_cpuflags.c
new file mode 100644
index 0000000000..4abcd0fdb3
--- /dev/null
+++ b/lib/eal/loongarch/rte_cpuflags.c
@@ -0,0 +1,94 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Loongson Technology Corporation Limited
+ */
+
+#include "rte_cpuflags.h"
+
+#include <elf.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+
+/* Symbolic values for the entries in the auxiliary table */
+#define AT_HWCAP  16
+#define AT_HWCAP2 26
AT_HWCAP2 is not used.

OK, thanks. I will remove it.


+
+/* software based registers */
+enum cpu_register_t {
+       REG_NONE = 0,
+       REG_HWCAP,
+       REG_MAX
+};
+
+typedef uint32_t hwcap_registers_t[REG_MAX];
+
+struct feature_entry {
+       uint32_t reg;
+       uint32_t bit;
+#define CPU_FLAG_NAME_MAX_LEN 64
+       char name[CPU_FLAG_NAME_MAX_LEN];
+};
+
+#define FEAT_DEF(name, reg, bit) \
+       [RTE_CPUFLAG_##name] = {reg, bit, #name},
+
+const struct feature_entry rte_cpu_feature_table[] = {
+       FEAT_DEF(CPUCFG,             REG_HWCAP,   0)
+       FEAT_DEF(LAM,                REG_HWCAP,   1)
+       FEAT_DEF(UAL,                REG_HWCAP,   2)
+       FEAT_DEF(FPU,                REG_HWCAP,   3)
+       FEAT_DEF(LSX,                REG_HWCAP,   4)
+       FEAT_DEF(LASX,               REG_HWCAP,   5)
+       FEAT_DEF(CRC32,              REG_HWCAP,   6)
+       FEAT_DEF(COMPLEX,            REG_HWCAP,   7)
+       FEAT_DEF(CRYPTO,             REG_HWCAP,   8)
+       FEAT_DEF(LVZ,                REG_HWCAP,   9)
+       FEAT_DEF(LBT_X86,            REG_HWCAP,  10)
+       FEAT_DEF(LBT_ARM,            REG_HWCAP,  11)
+       FEAT_DEF(LBT_MIPS,           REG_HWCAP,  12)
+};
+
+/*
+ * Read AUXV software register and get cpu features for LoongArch
+ */
+static void
+rte_cpu_get_features(hwcap_registers_t out)
+{
+       out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
+}
+
+/*
+ * Checks if a particular flag is available on current machine.
+ */
+int
+rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature)
+{
+       const struct feature_entry *feat;
+       hwcap_registers_t regs = {0};
+
+       if (feature >= RTE_CPUFLAG_NUMFLAGS)
+               return -ENOENT;
+
+       feat = &rte_cpu_feature_table[feature];
+       if (feat->reg == REG_NONE)
+               return -EFAULT;
+
+       rte_cpu_get_features(regs);
+       return (regs[feat->reg] >> feat->bit) & 1;
+}
+
+const char *
+rte_cpu_get_flag_name(enum rte_cpu_flag_t feature)
+{
+       if (feature >= RTE_CPUFLAG_NUMFLAGS)
+               return NULL;
+       return rte_cpu_feature_table[feature].name;
+}
+
+void
+rte_cpu_get_intrinsics_support(struct rte_cpu_intrinsics *intrinsics)
+{
+       memset(intrinsics, 0, sizeof(*intrinsics));
+}
[snip]



Thanks,
Min Zhou

Reply via email to