20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked") introduced
a possibility of getting EBUSY error on lock contention, which seems to happen
very deterministically in test_maps when running 1024 threads on low-CPU
machine. In libbpf CI case, it's a 2 CPU VM and it's hitting this 100% of the
time. Work around by retrying on EBUSY (and EAGAIN, while we are at it) after
a small sleep. sched_yield() is too agressive and fails even after 20 retries,
so I went with usleep(1) for backoff.

Also log actual error returned to make it easier to see what's going on.

Fixes: 20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked")
Cc: Song Liu <songliubrav...@fb.com>
Acked-by: Song Liu <songliubrav...@fb.com>
Signed-off-by: Andrii Nakryiko <and...@kernel.org>
---
 tools/testing/selftests/bpf/test_maps.c | 48 +++++++++++++++++++++----
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_maps.c 
b/tools/testing/selftests/bpf/test_maps.c
index 0ad3e6305ff0..51adc42b2b40 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1312,22 +1312,58 @@ static void test_map_stress(void)
 #define DO_UPDATE 1
 #define DO_DELETE 0
 
+#define MAP_RETRIES 20
+
+static int map_update_retriable(int map_fd, const void *key, const void *value,
+                               int flags, int attempts)
+{
+       while (bpf_map_update_elem(map_fd, key, value, flags)) {
+               if (!attempts || (errno != EAGAIN && errno != EBUSY))
+                       return -errno;
+
+               usleep(1);
+               attempts--;
+       }
+
+       return 0;
+}
+
+static int map_delete_retriable(int map_fd, const void *key, int attempts)
+{
+       while (bpf_map_delete_elem(map_fd, key)) {
+               if (!attempts || (errno != EAGAIN && errno != EBUSY))
+                       return -errno;
+
+               usleep(1);
+               attempts--;
+       }
+
+       return 0;
+}
+
 static void test_update_delete(unsigned int fn, void *data)
 {
        int do_update = ((int *)data)[1];
        int fd = ((int *)data)[0];
-       int i, key, value;
+       int i, key, value, err;
 
        for (i = fn; i < MAP_SIZE; i += TASKS) {
                key = value = i;
 
                if (do_update) {
-                       assert(bpf_map_update_elem(fd, &key, &value,
-                                                  BPF_NOEXIST) == 0);
-                       assert(bpf_map_update_elem(fd, &key, &value,
-                                                  BPF_EXIST) == 0);
+                       err = map_update_retriable(fd, &key, &value, 
BPF_NOEXIST, MAP_RETRIES);
+                       if (err)
+                               printf("error %d %d\n", err, errno);
+                       assert(err == 0);
+                       err = map_update_retriable(fd, &key, &value, BPF_EXIST, 
MAP_RETRIES);
+                       if (err)
+                               printf("error %d %d\n", err, errno);
+                       assert(err == 0);
                } else {
-                       assert(bpf_map_delete_elem(fd, &key) == 0);
+                       err = map_delete_retriable(fd, &key, MAP_RETRIES);
+                       if (err)
+                               printf("error %d %d\n", err, errno);
+                       assert(err == 0);
                }
        }
 }
-- 
2.24.1

Reply via email to