[PATCH 4/4] um: add shared memory optimisation for time-travel=ext

Johannes Berg Fri, 10 Nov 2023 10:54:45 -0800

From: Johannes Berg <johannes.b...@intel.com>

With external time travel, a LOT of message can end up
being exchanged on the socket, taking a significant
amount of time just to do that.


Add a new shared memory optimisation to that, where a
number of changes are made:
 - the controller sends a client ID and a shared memory FD
   (and a logging FD we don't use) in the ACK message to
   the initial START
 - the shared memory holds the current time and the
   free_until value, so that there's no need to exchange
   messages for that
 - if the client that's running has shared memory support,
   any client (the running one included) can request the
   next time it wants to run inside the shared memory,
   rather than sending a message, by also updating the
   free_until value
 - when shared memory is enabled, RUN/WAIT messages no
   longer have an ACK, further cutting down on messages

Together, this can reduce the number of messages very
significantly, and reduce overall test/simulation run time.

Co-developed-by: Mordechay Goodstein <mordechay.goodst...@intel.com>
Signed-off-by: Mordechay Goodstein <mordechay.goodst...@intel.com>
Signed-off-by: Johannes Berg <johannes.b...@intel.com>
---
 arch/um/kernel/time.c              | 130 +++++++++++++++++++--
 include/uapi/linux/um_timetravel.h | 182 ++++++++++++++++++++++++++---
 2 files changed, 285 insertions(+), 27 deletions(-)

diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 1a9069f92600..8ff46bc86d09 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -30,6 +30,7 @@ EXPORT_SYMBOL_GPL(time_travel_mode);
 static bool time_travel_start_set;
 static unsigned long long time_travel_start;
 static unsigned long long time_travel_time;
+static unsigned long long time_travel_shm_offset;
 static LIST_HEAD(time_travel_events);
 static LIST_HEAD(time_travel_irqs);
 static unsigned long long time_travel_timer_interval;
@@ -39,8 +40,11 @@ static int time_travel_ext_fd = -1;
 static unsigned int time_travel_ext_waiting;
 static bool time_travel_ext_prev_request_valid;
 static unsigned long long time_travel_ext_prev_request;
-static bool time_travel_ext_free_until_valid;
-static unsigned long long time_travel_ext_free_until;
+static unsigned long long *time_travel_ext_free_until;
+static unsigned long long _time_travel_ext_free_until;
+static u16 time_travel_shm_id;
+static struct um_timetravel_schedshm *time_travel_shm;
+static union um_timetravel_schedshm_client *time_travel_shm_client;
 
 static void time_travel_set_time(unsigned long long ns)
 {
@@ -57,6 +61,7 @@ enum time_travel_message_handling {
        TTMH_IDLE,
        TTMH_POLL,
        TTMH_READ,
+       TTMH_READ_START_ACK,
 };
 
 static u64 bc_message;
@@ -68,6 +73,40 @@ void _time_travel_print_bc_msg(void)
        printk(KERN_INFO "time-travel: received broadcast 0x%llx\n", 
bc_message);
 }
 
+static void time_travel_setup_shm(int fd, u16 id)
+{
+       u32 len;
+
+       time_travel_shm = os_mmap_rw_shared(fd, sizeof(*time_travel_shm));
+
+       if (!time_travel_shm)
+               goto out;
+
+       len = time_travel_shm->len;
+
+       if (time_travel_shm->version != UM_TIMETRAVEL_SCHEDSHM_VERSION ||
+           len < struct_size(time_travel_shm, clients, id + 1)) {
+               os_munmap(time_travel_shm, sizeof(*time_travel_shm));
+               time_travel_shm = NULL;
+               goto out;
+       }
+
+       time_travel_shm = os_mremap_rw_shared(time_travel_shm,
+                                             sizeof(*time_travel_shm),
+                                             len);
+       if (!time_travel_shm)
+               goto out;
+
+       time_travel_shm_offset = time_travel_shm->current_time;
+       time_travel_shm_client = &time_travel_shm->clients[id];
+       time_travel_shm_client->capa |= UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE;
+       time_travel_shm_id = id;
+       /* always look at that free_until from now on */
+       time_travel_ext_free_until = &time_travel_shm->free_until;
+out:
+       os_close_file(fd);
+}
+
 static void time_travel_handle_message(struct um_timetravel_msg *msg,
                                       enum time_travel_message_handling mode)
 {
@@ -88,7 +127,20 @@ static void time_travel_handle_message(struct 
um_timetravel_msg *msg,
                }
        }
 
-       ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
+       if (unlikely(mode == TTMH_READ_START_ACK)) {
+               int fd[UM_TIMETRAVEL_SHARED_MAX_FDS];
+
+               ret = os_rcv_fd_msg(time_travel_ext_fd, fd,
+                                   ARRAY_SIZE(fd), msg, sizeof(*msg));
+               if (ret == sizeof(*msg)) {
+                       time_travel_setup_shm(fd[UM_TIMETRAVEL_SHARED_MEMFD],
+                                             msg->time & 
UM_TIMETRAVEL_START_ACK_ID);
+                       /* we don't use the logging for now */
+                       os_close_file(fd[UM_TIMETRAVEL_SHARED_LOGFD]);
+               }
+       } else {
+               ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
+       }
 
        if (ret == 0)
                panic("time-travel external link is broken\n");
@@ -104,10 +156,20 @@ static void time_travel_handle_message(struct 
um_timetravel_msg *msg,
                return;
        case UM_TIMETRAVEL_RUN:
                time_travel_set_time(msg->time);
+               if (time_travel_shm) {
+                       /* no request right now since we're running */
+                       time_travel_shm_client->flags &=
+                               ~UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
+                       /* no ack for shared memory RUN */
+                       return;
+               }
                break;
        case UM_TIMETRAVEL_FREE_UNTIL:
-               time_travel_ext_free_until_valid = true;
-               time_travel_ext_free_until = msg->time;
+               /* not supposed to get this with shm, but ignore it */
+               if (time_travel_shm)
+                       break;
+               time_travel_ext_free_until = &_time_travel_ext_free_until;
+               _time_travel_ext_free_until = msg->time;
                break;
        case UM_TIMETRAVEL_BROADCAST:
                bc_message = msg->time;
@@ -148,8 +210,15 @@ static u64 time_travel_ext_req(u32 op, u64 time)
        block_signals_hard();
        os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
 
+       /* no ACK expected for WAIT in shared memory mode */
+       if (msg.op == UM_TIMETRAVEL_WAIT && time_travel_shm)
+               goto done;
+
        while (msg.op != UM_TIMETRAVEL_ACK)
-               time_travel_handle_message(&msg, TTMH_READ);
+               time_travel_handle_message(&msg,
+                                          op == UM_TIMETRAVEL_START ?
+                                               TTMH_READ_START_ACK :
+                                               TTMH_READ);
 
        if (msg.seq != mseq)
                panic("time-travel: ACK message has different seqno! op=%d, 
seq=%d != %d time=%lld\n",
@@ -157,6 +226,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)
 
        if (op == UM_TIMETRAVEL_GET)
                time_travel_set_time(msg.time);
+done:
        unblock_signals_hard();
 
        return msg.time;
@@ -192,13 +262,33 @@ static void time_travel_ext_update_request(unsigned long 
long time)
        /*
         * if we're running and are allowed to run past the request
         * then we don't need to update it either
+        *
+        * Note for shm we ignore FREE_UNTIL messages and leave the pointer
+        * to shared memory, and for non-shm the offset is 0.
         */
-       if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
-           time < time_travel_ext_free_until)
+       if (!time_travel_ext_waiting && time_travel_ext_free_until &&
+           time < (*time_travel_ext_free_until - time_travel_shm_offset))
                return;
 
        time_travel_ext_prev_request = time;
        time_travel_ext_prev_request_valid = true;
+
+       if (time_travel_shm) {
+               union um_timetravel_schedshm_client *running;
+
+               running = 
&time_travel_shm->clients[time_travel_shm->running_id];
+
+               if (running->capa & UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE) {
+                       time_travel_shm_client->flags |=
+                               UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN;
+                       time += time_travel_shm_offset;
+                       time_travel_shm_client->req_time = time;
+                       if (time < time_travel_shm->free_until)
+                               time_travel_shm->free_until = time;
+                       return;
+               }
+       }
+
        time_travel_ext_req(UM_TIMETRAVEL_REQUEST, time);
 }
 
@@ -206,6 +296,14 @@ void __time_travel_propagate_time(void)
 {
        static unsigned long long last_propagated;
 
+       if (time_travel_shm) {
+               if (time_travel_shm->running_id != time_travel_shm_id)
+                       panic("time-travel: setting time while not running\n");
+               time_travel_shm->current_time = time_travel_time +
+                                               time_travel_shm_offset;
+               return;
+       }
+
        if (last_propagated == time_travel_time)
                return;
 
@@ -221,9 +319,12 @@ static bool time_travel_ext_request(unsigned long long 
time)
         * If we received an external sync point ("free until") then we
         * don't have to request/wait for anything until then, unless
         * we're already waiting.
+        *
+        * Note for shm we ignore FREE_UNTIL messages and leave the pointer
+        * to shared memory, and for non-shm the offset is 0.
         */
-       if (!time_travel_ext_waiting && time_travel_ext_free_until_valid &&
-           time < time_travel_ext_free_until)
+       if (!time_travel_ext_waiting && time_travel_ext_free_until &&
+           time < (*time_travel_ext_free_until - time_travel_shm_offset))
                return false;
 
        time_travel_ext_update_request(time);
@@ -237,7 +338,8 @@ static void time_travel_ext_wait(bool idle)
        };
 
        time_travel_ext_prev_request_valid = false;
-       time_travel_ext_free_until_valid = false;
+       if (!time_travel_shm)
+               time_travel_ext_free_until = NULL;
        time_travel_ext_waiting++;
 
        time_travel_ext_req(UM_TIMETRAVEL_WAIT, -1);
@@ -260,7 +362,11 @@ static void time_travel_ext_wait(bool idle)
 
 static void time_travel_ext_get_time(void)
 {
-       time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
+       if (time_travel_shm)
+               time_travel_set_time(time_travel_shm->current_time -
+                                    time_travel_shm_offset);
+       else
+               time_travel_ext_req(UM_TIMETRAVEL_GET, -1);
 }
 
 static void __time_travel_update_time(unsigned long long ns, bool idle)
diff --git a/include/uapi/linux/um_timetravel.h 
b/include/uapi/linux/um_timetravel.h
index d119ce14f36a..546a690b0346 100644
--- a/include/uapi/linux/um_timetravel.h
+++ b/include/uapi/linux/um_timetravel.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
 /*
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019 - 2023 Intel Corporation
  */
 #ifndef _UAPI_LINUX_UM_TIMETRAVEL_H
 #define _UAPI_LINUX_UM_TIMETRAVEL_H
@@ -50,6 +39,36 @@ struct um_timetravel_msg {
        __u64 time;
 };
 
+/* max number of file descriptors that can be sent/received in a message */
+#define UM_TIMETRAVEL_MAX_FDS 2
+
+/**
+ * enum um_timetravel_shared_mem_fds - fds sent in ACK message for START 
message
+ */
+enum um_timetravel_shared_mem_fds {
+       /**
+        * @UM_TIMETRAVEL_SHARED_MEMFD: Index of the shared memory file
+        *      descriptor in the control message
+        */
+       UM_TIMETRAVEL_SHARED_MEMFD,
+       /**
+        * @UM_TIMETRAVEL_SHARED_LOGFD: Index of the logging file descriptor
+        *      in the control message
+        */
+       UM_TIMETRAVEL_SHARED_LOGFD,
+       UM_TIMETRAVEL_SHARED_MAX_FDS,
+};
+
+/**
+ * enum um_timetravel_start_ack - ack-time mask for start message
+ */
+enum um_timetravel_start_ack {
+       /**
+        * @UM_TIMETRAVEL_START_ACK_ID: client ID that controller allocated.
+        */
+       UM_TIMETRAVEL_START_ACK_ID = 0xffff,
+};
+
 /**
  * enum um_timetravel_ops - Operation codes
  */
@@ -57,7 +76,9 @@ enum um_timetravel_ops {
        /**
         * @UM_TIMETRAVEL_ACK: response (ACK) to any previous message,
         *      this usually doesn't carry any data in the 'time' field
-        *      unless otherwise specified below
+        *      unless otherwise specified below, note: while using shared
+        *      memory no ACK for WAIT and RUN messages, for more info see
+        *      &struct um_timetravel_schedshm.
         */
        UM_TIMETRAVEL_ACK               = 0,
 
@@ -124,7 +145,8 @@ enum um_timetravel_ops {
         */
        UM_TIMETRAVEL_GET_TOD           = 8,
 
-       /** @UM_TIMETRAVEL_BROADCAST: Send/Receive a broadcast messge
+       /**
+        * @UM_TIMETRAVEL_BROADCAST: Send/Receive a broadcast message.
         *      This message can be used to sync all components in the system
         *      with a single message, if the calender gets the message, the
         *      calender broadcast the message to all components, and if a
@@ -135,4 +157,134 @@ enum um_timetravel_ops {
        UM_TIMETRAVEL_BROADCAST         = 9,
 };
 
+/* version of struct um_timetravel_schedshm */
+#define UM_TIMETRAVEL_SCHEDSHM_VERSION 2
+
+/**
+ * enum um_timetravel_schedshm_cap - time travel capabilities of every client
+ *
+ * These flags must be set immediately after processing the ACK to
+ * the START message, before sending any message to the controller.
+ */
+enum um_timetravel_schedshm_cap {
+       /**
+        * @UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE: client can read current time
+        *      update internal time request to shared memory and read
+        *      free until and send no Ack on RUN and doesn't expect ACK on
+        *      WAIT.
+        */
+       UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE = 0x1,
+};
+
+/**
+ * enum um_timetravel_schedshm_flags - time travel flags of every client
+ */
+enum um_timetravel_schedshm_flags {
+       /**
+        * @UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN: client has a request to run.
+        *      It's set by client when it has a request to run, if (and only
+        *      if) the @running_id points to a client that is able to use
+        *      shared memory, i.e. has %UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE
+        *      (this includes the client itself). Otherwise, a message must
+        *      be used.
+        */
+       UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN = 0x1,
+};
+
+/**
+ * DOC: Time travel shared memory overview
+ *
+ * The main purpose of the shared memory is to avoid all time travel message
+ * that don't need any action, for example current time can be held in shared
+ * memory without the need of any client to send a message UM_TIMETRAVEL_GET
+ * in order to know what's the time.
+ *
+ * Since this is shared memory with all clients and controller and controller
+ * creates the shared memory space, all time values are absolute to controller
+ * time. So first time client connects to shared memory mode it should take the
+ * current_time value in shared memory and keep it internally as a diff to
+ * shared memory times, and once shared memory is initialized, any interaction
+ * with the controller must happen in the controller time domain, including any
+ * messages (for clients that are not using shared memory, the controller will
+ * handle an offset and make the clients think they start at time zero.)
+ *
+ * Along with the shared memory file descriptor is sent to the client a logging
+ * file descriptor, to have all logs related to shared memory,
+ * logged into one place. note: to have all logs synced into log file at write,
+ * file should be flushed (fflush) after writing to it.
+ *
+ * To avoid memory corruption, we define below for each field who can write to
+ * it at what time, defined in the structure fields.
+ *
+ * To avoid having to pack this struct, all fields in it must be naturally 
aligned
+ * (i.e. aligned to their size).
+ */
+
+/**
+ * union um_timetravel_schedshm_client - UM time travel client struct
+ *
+ * Every entity using the shared memory including the controller has a place in
+ * the um_timetravel_schedshm clients array, that holds info related to the 
client
+ * using the shared memory, and can be set only by the client after it gets the
+ * fd memory.
+ *
+ * @capa: bit fields with client capabilities see
+ *     &enum um_timetravel_schedshm_cap, set by client once after getting the
+ *     shared memory file descriptor.
+ * @flags: bit fields for flags see &enum um_timetravel_schedshm_flags for doc.
+ * @req_time: request time to run, set by client on every request it needs.
+ * @name: unique id sent to the controller by client with START message.
+ */
+union um_timetravel_schedshm_client {
+       struct {
+               __u32 capa;
+               __u32 flags;
+               __u64 req_time;
+               __u64 name;
+       };
+       char reserve[128]; /* reserved for future usage */
+};
+
+/**
+ * struct um_timetravel_schedshm - UM time travel shared memory struct
+ *
+ * @hdr: header fields:
+ * @version: Current version struct UM_TIMETRAVEL_SCHEDSHM_VERSION,
+ *     set by controller once at init, clients must check this after mapping
+ *     and work without shared memory if they cannot handle the indicated
+ *     version.
+ * @len: Length of all the memory including header (@hdr), clients should once
+ *     per connection first mmap the header and take the length (@len) to 
remap the entire size.
+ *     This is done in order to support dynamic struct size letting number of
+ *     clients be dynamic based on controller support.
+ * @free_until: Stores the next request to run by any client, in order for the
+ *     current client to know how long it can still run. A client needs to (at
+ *     least) reload this value immediately after communicating with any other
+ *     client, since the controller will update this field when a new request
+ *     is made by any client. Clients also must update this value when they
+ *     insert/update an own request into the shared memory while not running
+ *     themselves, and the new request is before than the current value.
+ * current_time: Current time, can only be set by the client in running state
+ *     (indicated by @running_id), though that client may only run until 
@free_until,
+ *     so it must remain smaller than @free_until.
+ * @running_id: The current client in state running, set before a client is
+ *     notified that it's now running.
+ * @max_clients: size of @clients array, set once at init by the controller.
+ * @clients: clients array see &union um_timetravel_schedshm_client for doc,
+ *     set only by client.
+ */
+struct um_timetravel_schedshm {
+       union {
+               struct {
+                       __u32 version;
+                       __u32 len;
+                       __u64 free_until;
+                       __u64 current_time;
+                       __u16 running_id;
+                       __u16 max_clients;
+               };
+               char hdr[4096]; /* align to 4K page size */
+       };
+       union um_timetravel_schedshm_client clients[];
+};
 #endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */
-- 
2.41.0


_______________________________________________
linux-um mailing list
linux-um@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-um

[PATCH 4/4] um: add shared memory optimisation for time-travel=ext

Reply via email to