These changes are somewhat redundant and potentially performance-impacting, the reason is that in the OpenCL API, clEnqueueWrite* commands are specified to block until the memory provided by the application as origin can be reused safely (i.e. until soft_copy_op()() runs), not necessarily until the transfer to graphics memory has completed (which is what hard_event::wait() will wait for). OTOH reads and maps as implemented by soft_copy_op and friends are essentially blocking so the wait() call is redundant in most cases.
The only reason why it might be useful to behave differently on blocking transfers is that the application may have specified a user event in the event dependency list, which may cause the soft_copy_op() call to be delayed until the application signals the user event. In order to fix that it should really only be necessary to wait for the event action to be executed, not necessarily its associated GPU work. Last time this issue came up (yeah it's not the first time) I proposed the patches below to add a mechanism to wait for the event action only, feel free to include it as PATCH 0.1 and 0.2 of this series (it's been a while so they may no longer apply cleanly). Thank you. Jan Vesely <jan.ves...@rutgers.edu> writes: > v2: wait in map_buffer and map_image as well > > Signed-off-by: Jan Vesely <jan.ves...@rutgers.edu> > --- > Hi Aaron, > > yes, I think you're right, we should wait in Map* as well. > If nothing else it's consistent, even if passing the flag to add_map might > make it unnecessary (haven't actually checked). > > thanks, > Jan > > src/gallium/state_trackers/clover/api/transfer.cpp | 30 > ++++++++++++++++++++-- > 1 file changed, 28 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp > b/src/gallium/state_trackers/clover/api/transfer.cpp > index f7046253be..729a34590e 100644 > --- a/src/gallium/state_trackers/clover/api/transfer.cpp > +++ b/src/gallium/state_trackers/clover/api/transfer.cpp > @@ -295,6 +295,9 @@ clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, > cl_bool blocking, > &mem, obj_origin, obj_pitch, > region)); > > + if (blocking) > + hev().wait(); > + > ret_object(rd_ev, hev); > return CL_SUCCESS; > > @@ -325,6 +328,9 @@ clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, > cl_bool blocking, > ptr, {}, obj_pitch, > region)); > > + if (blocking) > + hev().wait(); > + > ret_object(rd_ev, hev); > return CL_SUCCESS; > > @@ -362,6 +368,9 @@ clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem > d_mem, cl_bool blocking, > &mem, obj_origin, obj_pitch, > region)); > > + if (blocking) > + hev().wait(); > + > ret_object(rd_ev, hev); > return CL_SUCCESS; > > @@ -399,6 +408,9 @@ clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem > d_mem, cl_bool blocking, > ptr, host_origin, host_pitch, > region)); > > + if (blocking) > + hev().wait(); > + > ret_object(rd_ev, hev); > return CL_SUCCESS; > > @@ -504,6 +516,9 @@ clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, > cl_bool blocking, > &img, src_origin, src_pitch, > region)); > > + if (blocking) > + hev().wait(); > + > ret_object(rd_ev, hev); > return CL_SUCCESS; > > @@ -538,6 +553,9 @@ clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, > cl_bool blocking, > ptr, {}, src_pitch, > region)); > > + if (blocking) > + hev().wait(); > + > ret_object(rd_ev, hev); > return CL_SUCCESS; > > @@ -667,7 +685,11 @@ clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, > cl_bool blocking, > > void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, > region); > > - ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps)); > + auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps); > + if (blocking) > + hev().wait(); > + > + ret_object(rd_ev, hev); > ret_error(r_errcode, CL_SUCCESS); > return map; > > @@ -695,7 +717,11 @@ clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, > cl_bool blocking, > > void *map = img.resource(q).add_map(q, flags, blocking, origin, region); > > - ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps)); > + auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps); > + if (blocking) > + hev().wait(); > + > + ret_object(rd_ev, hev); > ret_error(r_errcode, CL_SUCCESS); > return map; > > -- > 2.13.3
From 162047763a83a57764c5784c95c3f5474da897b1 Mon Sep 17 00:00:00 2001 From: Francisco Jerez <curroje...@riseup.net> Date: Tue, 9 Jun 2015 22:52:25 +0300 Subject: [PATCH 1/2] clover: Wrap event::wait_count in a method taking care of the required locking. --- src/gallium/state_trackers/clover/core/event.cpp | 19 ++++++++++++------- src/gallium/state_trackers/clover/core/event.hpp | 3 ++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp index e1f9de0..fdd9209 100644 --- a/src/gallium/state_trackers/clover/core/event.cpp +++ b/src/gallium/state_trackers/clover/core/event.cpp @@ -27,7 +27,7 @@ using namespace clover; event::event(clover::context &ctx, const ref_vector<event> &deps, action action_ok, action action_fail) : - context(ctx), wait_count(1), _status(0), + context(ctx), _wait_count(1), _status(0), action_ok(action_ok), action_fail(action_fail) { for (auto &ev : deps) ev.chain(*this); @@ -41,7 +41,7 @@ event::trigger_self() { std::lock_guard<std::mutex> lock(mutex); std::vector<intrusive_ref<event>> evs; - if (!--wait_count) + if (!--_wait_count) std::swap(_chain, evs); return evs; @@ -81,10 +81,15 @@ event::abort(cl_int status) { ev.abort(status); } +unsigned +event::wait_count() const { + std::lock_guard<std::mutex> lock(mutex); + return _wait_count; +} + bool event::signalled() const { - std::lock_guard<std::mutex> lock(mutex); - return !wait_count; + return !wait_count(); } cl_int @@ -99,8 +104,8 @@ event::chain(event &ev) { std::unique_lock<std::mutex> lock_ev(ev.mutex, std::defer_lock); std::lock(lock, lock_ev); - if (wait_count) { - ev.wait_count++; + if (_wait_count) { + ev._wait_count++; _chain.push_back(ev); } ev.deps.push_back(*this); @@ -112,7 +117,7 @@ event::wait() const { ev.wait(); std::unique_lock<std::mutex> lock(mutex); - cv.wait(lock, [=]{ return !wait_count; }); + cv.wait(lock, [=]{ return !_wait_count; }); } hard_event::hard_event(command_queue &q, cl_command_type command, diff --git a/src/gallium/state_trackers/clover/core/event.hpp b/src/gallium/state_trackers/clover/core/event.hpp index 6469e48..53dac68 100644 --- a/src/gallium/state_trackers/clover/core/event.hpp +++ b/src/gallium/state_trackers/clover/core/event.hpp @@ -85,8 +85,9 @@ namespace clover { private: std::vector<intrusive_ref<event>> trigger_self(); std::vector<intrusive_ref<event>> abort_self(cl_int status); + unsigned wait_count() const; - unsigned wait_count; + unsigned _wait_count; cl_int _status; action action_ok; action action_fail; -- 2.3.5
From 9789fa01f013b3f451cc8f8bcfa315edad1865a0 Mon Sep 17 00:00:00 2001 From: Francisco Jerez <curroje...@riseup.net> Date: Tue, 9 Jun 2015 22:59:43 +0300 Subject: [PATCH 2/2] clover: Run the associated action before an event is signalled. And define a method for other threads to wait until the action function associated with an event has been executed to completion. For hard events, this will mean waiting until the corresponding command has been submitted to the pipe driver, without necessarily flushing the pipe_context and waiting for the actual command to be processed by the GPU (which is what hard_event::wait() already does). This weaker kind of event wait will allow implementing blocking memory transfers efficiently. --- src/gallium/state_trackers/clover/core/event.cpp | 22 +++++++++++----------- src/gallium/state_trackers/clover/core/event.hpp | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp index fdd9209..1f0db1b 100644 --- a/src/gallium/state_trackers/clover/core/event.cpp +++ b/src/gallium/state_trackers/clover/core/event.cpp @@ -44,19 +44,16 @@ event::trigger_self() { if (!--_wait_count) std::swap(_chain, evs); + cv.notify_all(); return evs; } void event::trigger() { - auto evs = trigger_self(); - - if (signalled()) { + if (wait_count() == 1) action_ok(*this); - cv.notify_all(); - } - for (event &ev : evs) + for (event &ev : trigger_self()) ev.trigger(); } @@ -73,11 +70,9 @@ event::abort_self(cl_int status) { void event::abort(cl_int status) { - auto evs = abort_self(status); - action_fail(*this); - for (event &ev : evs) + for (event &ev : abort_self(status)) ev.abort(status); } @@ -112,12 +107,17 @@ event::chain(event &ev) { } void +event::wait_signalled() const { + std::unique_lock<std::mutex> lock(mutex); + cv.wait(lock, [=]{ return !_wait_count; }); +} + +void event::wait() const { for (event &ev : deps) ev.wait(); - std::unique_lock<std::mutex> lock(mutex); - cv.wait(lock, [=]{ return !_wait_count; }); + wait_signalled(); } hard_event::hard_event(command_queue &q, cl_command_type command, diff --git a/src/gallium/state_trackers/clover/core/event.hpp b/src/gallium/state_trackers/clover/core/event.hpp index 53dac68..03c97bc 100644 --- a/src/gallium/state_trackers/clover/core/event.hpp +++ b/src/gallium/state_trackers/clover/core/event.hpp @@ -69,6 +69,7 @@ namespace clover { virtual cl_int status() const; virtual command_queue *queue() const = 0; virtual cl_command_type command() const = 0; + void wait_signalled() const; virtual void wait() const; virtual struct pipe_fence_handle *fence() const { -- 2.3.5
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev