Hi Remy, I have few questions inline.
? 
Regards,
++Keith Wiles

Intel Corporation







On 9/30/15, 6:04 AM, "dev on behalf of Remy Horton" <dev-bounces at dpdk.org on 
behalf of remy.horton at intel.com> wrote:

>Adds functions for detecting and reporting the live-ness of LCores,
>the primary requirement of which is minimal overheads for the
>core(s) being checked. Core failures are notified via an application
>defined callback.
>
>Signed-off-by: Remy Horton <remy.horton at intel.com>
>---
> lib/librte_eal/bsdapp/eal/Makefile            |   1 +
> lib/librte_eal/common/Makefile                |   2 +-
> lib/librte_eal/common/include/rte_keepalive.h | 140 ++++++++++++++++++++++++++
> lib/librte_eal/common/rte_keepalive.c         | 122 ++++++++++++++++++++++
> lib/librte_eal/linuxapp/eal/Makefile          |   1 +
> 5 files changed, 265 insertions(+), 1 deletion(-)
> create mode 100644 lib/librte_eal/common/include/rte_keepalive.h
> create mode 100644 lib/librte_eal/common/rte_keepalive.c
>
>diff --git a/lib/librte_eal/bsdapp/eal/Makefile 
>b/lib/librte_eal/bsdapp/eal/Makefile
>index a49dcec..65b293f 100644
>--- a/lib/librte_eal/bsdapp/eal/Makefile
>+++ b/lib/librte_eal/bsdapp/eal/Makefile
>@@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_thread.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c
>+SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c
> 
> CFLAGS_eal.o := -D_GNU_SOURCE
> #CFLAGS_eal_thread.o := -D_GNU_SOURCE
>diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
>index 0c43d6a..7f1757a 100644
>--- a/lib/librte_eal/common/Makefile
>+++ b/lib/librte_eal/common/Makefile
>@@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h
> INC += rte_eal_memconfig.h rte_malloc_heap.h
> INC += rte_hexdump.h rte_devargs.h rte_dev.h
> INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
>-INC += rte_malloc.h
>+INC += rte_malloc.h rte_keepalive.h
> 
> ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
> INC += rte_warnings.h
>diff --git a/lib/librte_eal/common/include/rte_keepalive.h 
>b/lib/librte_eal/common/include/rte_keepalive.h
>new file mode 100644
>index 0000000..d67bf4b
>--- /dev/null
>+++ b/lib/librte_eal/common/include/rte_keepalive.h
>@@ -0,0 +1,140 @@
>+/*-
>+ *   BSD LICENSE
>+ *
>+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
>+ *
>+ *   Redistribution and use in source and binary forms, with or without
>+ *   modification, are permitted provided that the following conditions
>+ *   are met:
>+ *
>+ *     * Redistributions of source code must retain the above copyright
>+ *       notice, this list of conditions and the following disclaimer.
>+ *     * Redistributions in binary form must reproduce the above copyright
>+ *       notice, this list of conditions and the following disclaimer in
>+ *       the documentation and/or other materials provided with the
>+ *       distribution.
>+ *     * Neither the name of Intel Corporation nor the names of its
>+ *       contributors may be used to endorse or promote products derived
>+ *       from this software without specific prior written permission.
>+ *
>+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>+ */
>+
>+/**
>+ * @file keepalive.h
>+ * DPDK RTE LCore Keepalive Monitor.
>+ *
>+ **/
>+
>+#ifndef _KEEPALIVE_H_
>+#define _KEEPALIVE_H_
>+
>+#include <rte_memory.h>
>+
>+#ifndef RTE_KEEPALIVE_MAXCORES
>+/**
>+ * Number of cores to track.
>+ * @note Must be larger than the highest core id. */
>+#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
>+#endif
>+
>+
>+/**
>+ * Keepalive failure callback.
>+ *
>+ *  Receives a data pointer passed to rte_keepalive_create() and the id of the
>+ *  failed core.
>+ */
>+typedef void (*rte_keepalive_failure_callback_t)(
>+      void *data,
>+      const int id_core);
>+
>+
>+/**
>+ * Keepalive state structure.
>+ * @internal
>+ */
>+struct rte_keepalive {
>+      /** Core Liveness. */
>+      uint32_t __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];

Normally I see the __rte_cache_aligned at the end of the line before the ?;? 
did you have a reason to have it here? If not then I would move it to the end 
to look the same as the others. I did a quick grop in the code and that is the 
normal location.

My next question is why not align the whole, which would do the same thing. I 
did not check the compiler output, but I was thinking it would possible leave 
gaps in the structure for bytes we can not use normally, but maybe that is not 
a problem.

Next it appears the state_flags is only being set to 0-3, which means it does 
not need to be a uint43_t, but could be a uint8_t, correct?

Also setting the value 0-3 could be turned into a enum with real names so the 
debuggers could print them out instead of these numbers.

>+
>+      /** Last-seen-alive timestamps */
>+      uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];
>+
>+      /**
>+       * Cores to check.
>+       * Indexed by core id, non-zero if the core should be checked.
>+       */
>+      uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];
>+
>+      /** Dead core handler. */
>+      rte_keepalive_failure_callback_t callback;
>+
>+      /**
>+       * Dead core handler app data.
>+       * Pointer is passed to dead core handler.
>+       */
>+      void *callback_data;
>+      uint64_t tsc_initial;
>+      uint64_t tsc_mhz;
>+};
>+
>+
>+/**
>+ * Initialise keepalive sub-system.
>+ * @param callback
>+ *   Function called upon detection of a dead core.
>+ * @param data
>+ *   Data pointer to be passed to function callback.
>+ * @return
>+ *   Keepalive structure success, NULL on failure.
>+ */
>+struct rte_keepalive *rte_keepalive_create(
>+      rte_keepalive_failure_callback_t callback,
>+      void *data);
>+
>+
>+/**
>+ * @param *ptr_timer Triggering timer (unused)
>+ * @param *ptr_data  Data pointer (keepalive structure)
>+ */
>+void rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
>+      void *ptr_data);

Normally I do not see the unused attribute on the declare of a function, does 
this help the compiler at all? I do not believe it does, so I would remove it 
from here as you already have it in the function.
>+
>+
>+/**
>+ * Registers a core for keepalive checks.
>+ * @param *keepcfg
>+ *   Keepalive structure pointer
>+ * @param id_core
>+ *   ID number of core to register.
>+ */
>+void rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int 
>id_core);
>+
>+
>+/**
>+ * Per-core keepalive check.
>+ * @param *keepcfg
>+ *   Keepalive structure pointer
>+ *
>+ * This function needs to be called from within the main process loop of
>+ * the LCore to be checked.
>+ */
>+static inline void
>+rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
>+{
>+      keepcfg->state_flags[rte_lcore_id()] = 1;
>+}
>+
>+
>+#endif /* _KEEPALIVE_H_ */
>diff --git a/lib/librte_eal/common/rte_keepalive.c 
>b/lib/librte_eal/common/rte_keepalive.c
>new file mode 100644
>index 0000000..cbdd801
>--- /dev/null
>+++ b/lib/librte_eal/common/rte_keepalive.c
>@@ -0,0 +1,122 @@
>+/*-
>+ *   BSD LICENSE
>+ *
>+ *   Copyright 2015 Intel Shannon Ltd. All rights reserved.
>+ *
>+ *   Redistribution and use in source and binary forms, with or without
>+ *   modification, are permitted provided that the following conditions
>+ *   are met:
>+ *
>+ *     * Redistributions of source code must retain the above copyright
>+ *       notice, this list of conditions and the following disclaimer.
>+ *     * Redistributions in binary form must reproduce the above copyright
>+ *       notice, this list of conditions and the following disclaimer in
>+ *       the documentation and/or other materials provided with the
>+ *       distribution.
>+ *     * Neither the name of Intel Corporation nor the names of its
>+ *       contributors may be used to endorse or promote products derived
>+ *       from this software without specific prior written permission.
>+ *
>+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>+ */
>+
>+#include <inttypes.h>
>+
>+#include <rte_cycles.h>
>+#include <rte_lcore.h>
>+#include <rte_keepalive.h>
>+
>+#ifdef KEEPALIVE_DEBUG_MSGS
>+static void
>+print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
>+{
>+      printf("%sLast seen %" PRId64  "ms ago.\n",
>+              msg,
>+              ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
>+              / rte_get_tsc_hz()
>+            );
>+}
>+#else
>+static void
>+print_trace(__attribute__((unused)) const char *msg,
>+      __attribute__((unused)) struct rte_keepalive *keepcfg,
>+      __attribute__((unused)) int idx_core)
>+{
>+}
>+#endif
>+
>+
>+
>+void
>+rte_keepalive_dispatch_pings(__attribute__((unused)) void *ptr_timer,
>+      void *ptr_data)
>+{
>+      struct rte_keepalive *keepcfg = (struct rte_keepalive *)ptr_data;
>+      int idx_core;
>+
>+      for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
>+              if (keepcfg->active_cores[idx_core] == 0)
>+                      continue;
>+              switch (keepcfg->state_flags[idx_core]) {
>+              case 1: /* Alive */
>+                      keepcfg->state_flags[idx_core] = 0;
>+                      keepcfg->last_alive[idx_core] = rte_rdtsc();
>+                      break;
>+              case 0: /* MIA */
>+                      print_trace("Core MIA. ", keepcfg, idx_core);
>+                      keepcfg->state_flags[idx_core] = 2;
>+                      break;
>+              case 2: /* Dead */
>+                      keepcfg->state_flags[idx_core] = 3;
>+                      print_trace("Core died. ", keepcfg, idx_core);
>+                      if (keepcfg->callback)
>+                              keepcfg->callback(
>+                                      keepcfg->callback_data,
>+                                      idx_core
>+                                      );
>+                      break;
>+              case 3: /* Buried */
>+                      break;
>+              }
>+      }
>+}
>+
>+
>+struct rte_keepalive *
>+rte_keepalive_create(rte_keepalive_failure_callback_t callback,
>+      void *data)
>+{
>+      int idx_core;
>+      struct rte_keepalive *keepcfg;
>+
>+      keepcfg = malloc(sizeof(struct rte_keepalive));
>+      if (keepcfg != NULL) {
>+              for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; 
>idx_core++) {
>+                      keepcfg->state_flags[idx_core] = 0;
>+                      keepcfg->active_cores[idx_core] = 0;
>+              }

Could you have done a calloc then you do not need the for loop to zero stuff?
>+              keepcfg->callback = callback;
>+              keepcfg->callback_data = data;
>+              keepcfg->tsc_initial = rte_rdtsc();
>+              keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
>+      }
>+      return keepcfg;
>+}
>+
>+
>+void
>+rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
>+{
>+      if (id_core < RTE_KEEPALIVE_MAXCORES)
>+              keepcfg->active_cores[id_core] = 1;
>+}
>diff --git a/lib/librte_eal/linuxapp/eal/Makefile 
>b/lib/librte_eal/linuxapp/eal/Makefile
>index d62196e..05a44d7 100644
>--- a/lib/librte_eal/linuxapp/eal/Makefile
>+++ b/lib/librte_eal/linuxapp/eal/Makefile
>@@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_thread.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c
> SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c
>+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c
> 
> CFLAGS_eal.o := -D_GNU_SOURCE
> CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
>-- 
>1.9.3
>

Reply via email to