From: Nicolai Hähnle <nicolai.haeh...@amd.com> Fences are now 4 bytes instead of 96 bytes (on my 64-bit system).
Signaling a fence is a single atomic operation in the fast case plus a syscall in the slow case. Testing if a fence is signaled is the same as before (a simple comparison), but waiting on a fence is now no more expensive than just testing it in the fast (already signaled) case. v2: - style fixes - use p_atomic_xxx macros with the right barriers --- src/util/futex.h | 5 +++ src/util/u_queue.c | 2 ++ src/util/u_queue.h | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) diff --git a/src/util/futex.h b/src/util/futex.h index 142c3b62f00..fa42cf4cf59 100644 --- a/src/util/futex.h +++ b/src/util/futex.h @@ -36,18 +36,23 @@ static inline long sys_futex(void *addr1, int op, int val1, struct timespec *timeout, void *addr2, int val3) { return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); } static inline int futex_wake(uint32_t *addr) { return sys_futex(addr, FUTEX_WAKE, 1, NULL, NULL, 0); } +static inline int futex_wake_all(uint32_t *addr) +{ + return sys_futex(addr, FUTEX_WAKE, INT_MAX, NULL, NULL, 0); +} + static inline int futex_wait(uint32_t *addr, int32_t value) { return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0); } #endif #endif /* UTIL_FUTEX_H */ diff --git a/src/util/u_queue.c b/src/util/u_queue.c index 2272006042f..8293ec661b0 100644 --- a/src/util/u_queue.c +++ b/src/util/u_queue.c @@ -82,20 +82,21 @@ remove_from_atexit_list(struct util_queue *queue) break; } } mtx_unlock(&exit_mutex); } /**************************************************************************** * util_queue_fence */ +#ifdef UTIL_QUEUE_FENCE_STANDARD void util_queue_fence_signal(struct util_queue_fence *fence) { mtx_lock(&fence->mutex); fence->signalled = true; cnd_broadcast(&fence->cond); mtx_unlock(&fence->mutex); } void @@ -129,20 +130,21 @@ util_queue_fence_destroy(struct util_queue_fence *fence) * by thread A, thread B is allowed to destroy it. Since * util_queue_fence_is_signalled does not lock the fence mutex (for * performance reasons), we must do so here. */ mtx_lock(&fence->mutex); mtx_unlock(&fence->mutex); cnd_destroy(&fence->cond); mtx_destroy(&fence->mutex); } +#endif /**************************************************************************** * util_queue implementation */ struct thread_input { struct util_queue *queue; int thread_index; }; diff --git a/src/util/u_queue.h b/src/util/u_queue.h index a3e12260e30..be8e715899a 100644 --- a/src/util/u_queue.h +++ b/src/util/u_queue.h @@ -28,30 +28,121 @@ * * Jobs can be added from any thread. After that, the wait call can be used * to wait for completion of the job. */ #ifndef U_QUEUE_H #define U_QUEUE_H #include <string.h> +#include "util/futex.h" #include "util/list.h" +#include "util/macros.h" +#include "util/u_atomic.h" #include "util/u_thread.h" #ifdef __cplusplus extern "C" { #endif #define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY (1 << 0) #define UTIL_QUEUE_INIT_RESIZE_IF_FULL (1 << 1) +#if defined(__GNUC__) && defined(HAVE_FUTEX) +#define UTIL_QUEUE_FENCE_FUTEX +#else +#define UTIL_QUEUE_FENCE_STANDARD +#endif + +#ifdef UTIL_QUEUE_FENCE_FUTEX +/* Job completion fence. + * Put this into your job structure. + */ +struct util_queue_fence { + /* The fence can be in one of three states: + * 0 - signaled + * 1 - unsignaled + * 2 - unsignaled, may have waiters + */ + uint32_t val; +}; + +static inline void +util_queue_fence_init(struct util_queue_fence *fence) +{ + fence->val = 0; +} + +static inline void +util_queue_fence_destroy(struct util_queue_fence *fence) +{ + assert(fence->val == 0); + /* no-op */ +} + +static inline void +util_queue_fence_wait(struct util_queue_fence *fence) +{ + uint32_t v = fence->val; + + if (likely(v == 0)) + return; + + do { + if (v != 2) { + v = p_atomic_cmpxchg(&fence->val, 1, 2); + if (v == 0) + return; + } + + futex_wait(&fence->val, 2); + v = fence->val; + } while(v != 0); +} + +static inline void +util_queue_fence_signal(struct util_queue_fence *fence) +{ + uint32_t val = p_atomic_xchg(&fence->val, 0); + + assert(val != 0); + + if (val == 2) + futex_wake_all(&fence->val); +} + +/** + * Move \p fence back into unsignalled state. + * + * \warning The caller must ensure that no other thread may currently be + * waiting (or about to wait) on the fence. + */ +static inline void +util_queue_fence_reset(struct util_queue_fence *fence) +{ +#ifdef NDEBUG + fence->val = 1; +#else + uint32_t v = p_atomic_xchg(&fence->val, 1); + assert(v == 0); +#endif +} + +static inline bool +util_queue_fence_is_signalled(struct util_queue_fence *fence) +{ + return fence->val == 0; +} +#endif + +#ifdef UTIL_QUEUE_FENCE_STANDARD /* Job completion fence. * Put this into your job structure. */ struct util_queue_fence { mtx_t mutex; cnd_t cond; int signalled; }; void util_queue_fence_init(struct util_queue_fence *fence); @@ -70,20 +161,21 @@ util_queue_fence_reset(struct util_queue_fence *fence) { assert(fence->signalled); fence->signalled = 0; } static inline bool util_queue_fence_is_signalled(struct util_queue_fence *fence) { return fence->signalled != 0; } +#endif typedef void (*util_queue_execute_func)(void *job, int thread_index); struct util_queue_job { void *job; struct util_queue_fence *fence; util_queue_execute_func execute; util_queue_execute_func cleanup; }; -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev