On 2024-09-19 15:13, Pavan Nikhilesh Bhagavatula wrote:
From: pbhagavat...@marvell.com <pbhagavat...@marvell.com>
Sent: Tuesday, September 17, 2024 3:11 AM
To: jer...@marvell.com; sthot...@marvell.com; Sevincer, Abdullah
<abdullah.sevin...@intel.com>; hemant.agra...@nxp.com;
sachin.sax...@oss.nxp.com; Van Haaren, Harry
<harry.van.haa...@intel.com>;
mattias.ronnb...@ericsson.com; lian...@liangbit.com; Mccarthy, Peter
<peter.mccar...@intel.com>
Cc: dev@dpdk.org; Pavan Nikhilesh <pbhagavat...@marvell.com>
Subject: [PATCH v2 1/3] eventdev: introduce event pre-scheduling

From: Pavan Nikhilesh <pbhagavat...@marvell.com>

Event pre-scheduling improves scheduling performance by assigning events
to
event ports in advance when dequeues are issued.
The dequeue operation initiates the pre-schedule operation, which
completes in
parallel without affecting the dequeued event flow contexts and dequeue
latency.

Is the prescheduling done to get the event more quickly in the next dequeue?
The first dequeue executes pre-schedule to make events available for the next
dequeue.
Is this how it is supposed to work?


Yes, that is correct.


"improves scheduling performance" may be a bit misleading, in that case. I suggest "reduces scheduling overhead" instead. You can argue it likely reduces scheduling performance, in certain scenarios. "reduces scheduling overhead, at the cost of load balancing performance."

It seems to me that this should be a simple hint-type API, where the hint is used by the event device to decide if pre-scheduling should be used or not (assuming pre-scheduling on/off is even an option). The hint would just be a way for the application to express whether or not it want the scheduler to prioritize load balancing agility and port-to-port wall-time latency, or scheduling overhead, which in turn could potentially be rephrased as the app being throughput or latency/RT-oriented.

It could also be useful for the event device to know which priority levels are to be considered latency-sensitive, and which are throughput-oriented - maybe in the form of a threshold.

Event devices can indicate pre-scheduling capabilities using
`RTE_EVENT_DEV_CAP_EVENT_PRESCHEDULE` and
`RTE_EVENT_DEV_CAP_EVENT_PRESCHEDULE_ADAPTIVE` via the event
device
info function `info.event_dev_cap`.

Applications can select the pre-schedule type and configure it through
`rte_event_dev_config.preschedule_type` during `rte_event_dev_configure`.

The supported pre-schedule types are:
  * `RTE_EVENT_DEV_PRESCHEDULE_NONE` - No pre-scheduling.
  * `RTE_EVENT_DEV_PRESCHEDULE` - Always issue a pre-schedule on
dequeue.
  * `RTE_EVENT_DEV_PRESCHEDULE_ADAPTIVE` - Delay issuing pre-schedule
until
    there are no forward progress constraints with the held flow contexts.

Signed-off-by: Pavan Nikhilesh <pbhagavat...@marvell.com>
---
  app/test/test_eventdev.c                    | 63 +++++++++++++++++++++
  doc/guides/prog_guide/eventdev/eventdev.rst | 22 +++++++
  lib/eventdev/rte_eventdev.h                 | 48 ++++++++++++++++
  3 files changed, 133 insertions(+)

diff --git a/app/test/test_eventdev.c b/app/test/test_eventdev.c index
e4e234dc98..cf496ee88d 100644
--- a/app/test/test_eventdev.c
+++ b/app/test/test_eventdev.c
@@ -1250,6 +1250,67 @@ test_eventdev_profile_switch(void)
        return TEST_SUCCESS;
  }

+static int
+preschedule_test(rte_event_dev_preschedule_type_t preschedule_type,
+const char *preschedule_name) {
+#define NB_EVENTS     1024
+       uint64_t start, total;
+       struct rte_event ev;
+       int rc, cnt;
+
+       ev.event_type = RTE_EVENT_TYPE_CPU;
+       ev.queue_id = 0;
+       ev.op = RTE_EVENT_OP_NEW;
+       ev.u64 = 0xBADF00D0;
+
+       for (cnt = 0; cnt < NB_EVENTS; cnt++) {
+               ev.flow_id = cnt;
+               rc = rte_event_enqueue_burst(TEST_DEV_ID, 0, &ev, 1);
+               TEST_ASSERT(rc == 1, "Failed to enqueue event");
+       }
+
+       RTE_SET_USED(preschedule_type);
+       total = 0;
+       while (cnt) {
+               start = rte_rdtsc_precise();
+               rc = rte_event_dequeue_burst(TEST_DEV_ID, 0, &ev, 1, 0);
+               if (rc) {
+                       total += rte_rdtsc_precise() - start;
+                       cnt--;
+               }
+       }
+       printf("Preschedule type : %s, avg cycles %" PRIu64 "\n",
preschedule_name,
+              total / NB_EVENTS);
+
+       return TEST_SUCCESS;
+}
+
+static int
+test_eventdev_preschedule_configure(void)
+{
+       struct rte_event_dev_config dev_conf;
+       struct rte_event_dev_info info;
+       int rc;
+
+       rte_event_dev_info_get(TEST_DEV_ID, &info);
+
+       if ((info.event_dev_cap &
RTE_EVENT_DEV_CAP_EVENT_PRESCHEDULE)
== 0)
+               return TEST_SKIPPED;
+
+       devconf_set_default_sane_values(&dev_conf, &info);
+       dev_conf.preschedule_type = RTE_EVENT_DEV_PRESCHEDULE;
+       rc = rte_event_dev_configure(TEST_DEV_ID, &dev_conf);
+       TEST_ASSERT_SUCCESS(rc, "Failed to configure eventdev");
+
+       rc = preschedule_test(RTE_EVENT_DEV_PRESCHEDULE_NONE,
"RTE_EVENT_DEV_PRESCHEDULE_NONE");
+       rc |= preschedule_test(RTE_EVENT_DEV_PRESCHEDULE,
"RTE_EVENT_DEV_PRESCHEDULE");
+       if (info.event_dev_cap &
RTE_EVENT_DEV_CAP_EVENT_PRESCHEDULE_ADAPTIVE)
+               rc |=
preschedule_test(RTE_EVENT_DEV_PRESCHEDULE_ADAPTIVE,
+
"RTE_EVENT_DEV_PRESCHEDULE_ADAPTIVE");
+
+       return rc;
+}
+
  static int
  test_eventdev_close(void)
  {
@@ -1310,6 +1371,8 @@ static struct unit_test_suite
eventdev_common_testsuite  = {
                        test_eventdev_start_stop),
                TEST_CASE_ST(eventdev_configure_setup,
eventdev_stop_device,
                        test_eventdev_profile_switch),
+               TEST_CASE_ST(eventdev_configure_setup, NULL,
+                       test_eventdev_preschedule_configure),
                TEST_CASE_ST(eventdev_setup_device,
eventdev_stop_device,
                        test_eventdev_link),
                TEST_CASE_ST(eventdev_setup_device,
eventdev_stop_device,
diff --git a/doc/guides/prog_guide/eventdev/eventdev.rst
b/doc/guides/prog_guide/eventdev/eventdev.rst
index fb6dfce102..341b9bb2c6 100644
--- a/doc/guides/prog_guide/eventdev/eventdev.rst
+++ b/doc/guides/prog_guide/eventdev/eventdev.rst
@@ -357,6 +357,28 @@ Worker path:
         // Process the event received.
     }

+Event Pre-scheduling
+~~~~~~~~~~~~~~~~~~~~
+
+Event pre-scheduling improves scheduling performance by assigning
+events to event ports in advance when dequeues are issued.
+The `rte_event_dequeue_burst` operation initiates the pre-schedule
+operation, which completes in parallel without affecting the dequeued
event
flow contexts and dequeue latency.
+On the next dequeue operation, the pre-scheduled events are dequeued
+and pre-schedule is initiated again.
+
+An application can use event pre-scheduling if the event device
+supports it at either device level or at a individual port level.
+The application can check pre-schedule capability by checking if
+``rte_event_dev_info.event_dev_cap``
+has the bit ``RTE_EVENT_DEV_CAP_PRESCHEDULE`` set, if present
+pre-scheduling can be enabled at device configuration time by setting
appropriate pre-schedule type in ``rte_event_dev_config.preschedule``.
+
+Currently, the following pre-schedule types are supported:
+ * ``RTE_EVENT_DEV_PRESCHEDULE_NONE`` - No pre-scheduling.
+ * ``RTE_EVENT_DEV_PRESCHEDULE`` - Always issue a pre-schedule when
dequeue is issued.
+ * ``RTE_EVENT_DEV_PRESCHEDULE_ADAPTIVE`` - Issue pre-schedule when
dequeue is issued and there are
+   no forward progress constraints.
+
  Starting the EventDev
  ~~~~~~~~~~~~~~~~~~~~~

diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
index
08e5f9320b..5ea7f5a07b 100644
--- a/lib/eventdev/rte_eventdev.h
+++ b/lib/eventdev/rte_eventdev.h
@@ -446,6 +446,30 @@ struct rte_event;
   * @see RTE_SCHED_TYPE_PARALLEL
   */

+#define RTE_EVENT_DEV_CAP_EVENT_PRESCHEDULE (1ULL << 16) /**<
Event
+device supports event pre-scheduling.
+ *
+ * When this capability is available, the application can enable event
+pre-scheduling on the event
+ * device to pre-schedule events to a event port when
+`rte_event_dequeue_burst()`
+ * is issued.
+ * The pre-schedule process starts with the `rte_event_dequeue_burst()`
+call and the
+ * pre-scheduled events are returned on the next
`rte_event_dequeue_burst()`
call.
+ *
+ * @see rte_event_dev_configure()
+ */
+
+#define RTE_EVENT_DEV_CAP_EVENT_PRESCHEDULE_ADAPTIVE (1ULL <<
17)
/**<
+Event device supports adaptive event pre-scheduling.
+ *
+ * When this capability is available, the application can enable
+adaptive pre-scheduling
+ * on the event device where the events are pre-scheduled when there
+are no forward
+ * progress constraints with the currently held flow contexts.
+ * The pre-schedule process starts with the `rte_event_dequeue_burst()`
+call and the
+ * pre-scheduled events are returned on the next
`rte_event_dequeue_burst()`
call.
+ *
+ * @see rte_event_dev_configure()
+ */
+
  /* Event device priority levels */
  #define RTE_EVENT_DEV_PRIORITY_HIGHEST   0
  /**< Highest priority level for events and queues.
@@ -680,6 +704,25 @@ rte_event_dev_attr_get(uint8_t dev_id, uint32_t
attr_id,
   *  @see rte_event_dequeue_timeout_ticks(), rte_event_dequeue_burst()
   */

+typedef enum {
+       RTE_EVENT_DEV_PRESCHEDULE_NONE = 0,
+       /* Disable pre-schedule across the event device or on a given event
port.
+        * @ref rte_event_dev_config.preschedule_type
+        */
+       RTE_EVENT_DEV_PRESCHEDULE,
+       /* Enable pre-schedule always across the event device or a given event
port.
+        * @ref rte_event_dev_config.preschedule_type
+        * @see RTE_EVENT_DEV_CAP_EVENT_PRESCHEDULE
+        */
+       RTE_EVENT_DEV_PRESCHEDULE_ADAPTIVE,
+       /* Enable adaptive pre-schedule across the event device or a given
event
port.
+        * Delay issuing pre-schedule until there are no forward progress
constraints with
+        * the held flow contexts.
+        * @ref rte_event_dev_config.preschedule_type
+        * @see RTE_EVENT_DEV_CAP_EVENT_PRESCHEDULE_ADAPTIVE
+        */
+} rte_event_dev_preschedule_type_t;
+
  /** Event device configuration structure */  struct rte_event_dev_config {
        uint32_t dequeue_timeout_ns;
@@ -752,6 +795,11 @@ struct rte_event_dev_config {
         * optimized for single-link usage, this field is a hint for how many
         * to allocate; otherwise, regular event ports and queues will be used.
         */
+       rte_event_dev_preschedule_type_t preschedule_type;
+       /**< Event pre-schedule type to use across the event device, if
supported.
+        * @see RTE_EVENT_DEV_CAP_EVENT_PRESCHEDULE
+        * @see RTE_EVENT_DEV_CAP_EVENT_PRESCHEDULE_ADAPTIVE
+        */
  };

  /**
--
2.25.1


Reply via email to