These changes are somewhat redundant and potentially
performance-impacting, the reason is that in the OpenCL API,
clEnqueueWrite* commands are specified to block until the memory
provided by the application as origin can be reused safely (i.e. until
soft_copy_op()() runs), not necessarily until the transfer to graphics
memory has completed (which is what hard_event::wait() will wait for).
OTOH reads and maps as implemented by soft_copy_op and friends are
essentially blocking so the wait() call is redundant in most cases.

The only reason why it might be useful to behave differently on blocking
transfers is that the application may have specified a user event in the
event dependency list, which may cause the soft_copy_op() call to be
delayed until the application signals the user event.  In order to fix
that it should really only be necessary to wait for the event action to
be executed, not necessarily its associated GPU work.

Last time this issue came up (yeah it's not the first time) I proposed
the patches below to add a mechanism to wait for the event action only,
feel free to include it as PATCH 0.1 and 0.2 of this series (it's been a
while so they may no longer apply cleanly).

Thank you.

Jan Vesely <jan.ves...@rutgers.edu> writes:

> v2: wait in map_buffer and map_image as well
>
> Signed-off-by: Jan Vesely <jan.ves...@rutgers.edu>
> ---
> Hi Aaron,
>
> yes, I think you're right, we should wait in Map* as well.
> If nothing else it's consistent, even if passing the flag to add_map might 
> make it unnecessary (haven't actually checked).
>
> thanks,
> Jan
>
>  src/gallium/state_trackers/clover/api/transfer.cpp | 30 
> ++++++++++++++++++++--
>  1 file changed, 28 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp 
> b/src/gallium/state_trackers/clover/api/transfer.cpp
> index f7046253be..729a34590e 100644
> --- a/src/gallium/state_trackers/clover/api/transfer.cpp
> +++ b/src/gallium/state_trackers/clover/api/transfer.cpp
> @@ -295,6 +295,9 @@ clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, 
> cl_bool blocking,
>                     &mem, obj_origin, obj_pitch,
>                     region));
>  
> +   if (blocking)
> +       hev().wait();
> +
>     ret_object(rd_ev, hev);
>     return CL_SUCCESS;
>  
> @@ -325,6 +328,9 @@ clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, 
> cl_bool blocking,
>                     ptr, {}, obj_pitch,
>                     region));
>  
> +   if (blocking)
> +       hev().wait();
> +
>     ret_object(rd_ev, hev);
>     return CL_SUCCESS;
>  
> @@ -362,6 +368,9 @@ clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem 
> d_mem, cl_bool blocking,
>                     &mem, obj_origin, obj_pitch,
>                     region));
>  
> +   if (blocking)
> +       hev().wait();
> +
>     ret_object(rd_ev, hev);
>     return CL_SUCCESS;
>  
> @@ -399,6 +408,9 @@ clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem 
> d_mem, cl_bool blocking,
>                     ptr, host_origin, host_pitch,
>                     region));
>  
> +   if (blocking)
> +       hev().wait();
> +
>     ret_object(rd_ev, hev);
>     return CL_SUCCESS;
>  
> @@ -504,6 +516,9 @@ clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, 
> cl_bool blocking,
>                     &img, src_origin, src_pitch,
>                     region));
>  
> +   if (blocking)
> +       hev().wait();
> +
>     ret_object(rd_ev, hev);
>     return CL_SUCCESS;
>  
> @@ -538,6 +553,9 @@ clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, 
> cl_bool blocking,
>                     ptr, {}, src_pitch,
>                     region));
>  
> +   if (blocking)
> +       hev().wait();
> +
>     ret_object(rd_ev, hev);
>     return CL_SUCCESS;
>  
> @@ -667,7 +685,11 @@ clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, 
> cl_bool blocking,
>  
>     void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, 
> region);
>  
> -   ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps));
> +   auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);
> +   if (blocking)
> +       hev().wait();
> +
> +   ret_object(rd_ev, hev);
>     ret_error(r_errcode, CL_SUCCESS);
>     return map;
>  
> @@ -695,7 +717,11 @@ clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, 
> cl_bool blocking,
>  
>     void *map = img.resource(q).add_map(q, flags, blocking, origin, region);
>  
> -   ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps));
> +   auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);
> +   if (blocking)
> +       hev().wait();
> +
> +   ret_object(rd_ev, hev);
>     ret_error(r_errcode, CL_SUCCESS);
>     return map;
>  
> -- 
> 2.13.3

From 162047763a83a57764c5784c95c3f5474da897b1 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <curroje...@riseup.net>
Date: Tue, 9 Jun 2015 22:52:25 +0300
Subject: [PATCH 1/2] clover: Wrap event::wait_count in a method taking care of
 the required locking.

---
 src/gallium/state_trackers/clover/core/event.cpp | 19 ++++++++++++-------
 src/gallium/state_trackers/clover/core/event.hpp |  3 ++-
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp
index e1f9de0..fdd9209 100644
--- a/src/gallium/state_trackers/clover/core/event.cpp
+++ b/src/gallium/state_trackers/clover/core/event.cpp
@@ -27,7 +27,7 @@ using namespace clover;
 
 event::event(clover::context &ctx, const ref_vector<event> &deps,
              action action_ok, action action_fail) :
-   context(ctx), wait_count(1), _status(0),
+   context(ctx), _wait_count(1), _status(0),
    action_ok(action_ok), action_fail(action_fail) {
    for (auto &ev : deps)
       ev.chain(*this);
@@ -41,7 +41,7 @@ event::trigger_self() {
    std::lock_guard<std::mutex> lock(mutex);
    std::vector<intrusive_ref<event>> evs;
 
-   if (!--wait_count)
+   if (!--_wait_count)
       std::swap(_chain, evs);
 
    return evs;
@@ -81,10 +81,15 @@ event::abort(cl_int status) {
       ev.abort(status);
 }
 
+unsigned
+event::wait_count() const {
+   std::lock_guard<std::mutex> lock(mutex);
+   return _wait_count;
+}
+
 bool
 event::signalled() const {
-   std::lock_guard<std::mutex> lock(mutex);
-   return !wait_count;
+   return !wait_count();
 }
 
 cl_int
@@ -99,8 +104,8 @@ event::chain(event &ev) {
    std::unique_lock<std::mutex> lock_ev(ev.mutex, std::defer_lock);
    std::lock(lock, lock_ev);
 
-   if (wait_count) {
-      ev.wait_count++;
+   if (_wait_count) {
+      ev._wait_count++;
       _chain.push_back(ev);
    }
    ev.deps.push_back(*this);
@@ -112,7 +117,7 @@ event::wait() const {
       ev.wait();
 
    std::unique_lock<std::mutex> lock(mutex);
-   cv.wait(lock, [=]{ return !wait_count; });
+   cv.wait(lock, [=]{ return !_wait_count; });
 }
 
 hard_event::hard_event(command_queue &q, cl_command_type command,
diff --git a/src/gallium/state_trackers/clover/core/event.hpp b/src/gallium/state_trackers/clover/core/event.hpp
index 6469e48..53dac68 100644
--- a/src/gallium/state_trackers/clover/core/event.hpp
+++ b/src/gallium/state_trackers/clover/core/event.hpp
@@ -85,8 +85,9 @@ namespace clover {
    private:
       std::vector<intrusive_ref<event>> trigger_self();
       std::vector<intrusive_ref<event>> abort_self(cl_int status);
+      unsigned wait_count() const;
 
-      unsigned wait_count;
+      unsigned _wait_count;
       cl_int _status;
       action action_ok;
       action action_fail;
-- 
2.3.5

From 9789fa01f013b3f451cc8f8bcfa315edad1865a0 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <curroje...@riseup.net>
Date: Tue, 9 Jun 2015 22:59:43 +0300
Subject: [PATCH 2/2] clover: Run the associated action before an event is
 signalled.

And define a method for other threads to wait until the action
function associated with an event has been executed to completion.

For hard events, this will mean waiting until the corresponding
command has been submitted to the pipe driver, without necessarily
flushing the pipe_context and waiting for the actual command to be
processed by the GPU (which is what hard_event::wait() already does).

This weaker kind of event wait will allow implementing blocking memory
transfers efficiently.
---
 src/gallium/state_trackers/clover/core/event.cpp | 22 +++++++++++-----------
 src/gallium/state_trackers/clover/core/event.hpp |  1 +
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/gallium/state_trackers/clover/core/event.cpp b/src/gallium/state_trackers/clover/core/event.cpp
index fdd9209..1f0db1b 100644
--- a/src/gallium/state_trackers/clover/core/event.cpp
+++ b/src/gallium/state_trackers/clover/core/event.cpp
@@ -44,19 +44,16 @@ event::trigger_self() {
    if (!--_wait_count)
       std::swap(_chain, evs);
 
+   cv.notify_all();
    return evs;
 }
 
 void
 event::trigger() {
-   auto evs = trigger_self();
-
-   if (signalled()) {
+   if (wait_count() == 1)
       action_ok(*this);
-      cv.notify_all();
-   }
 
-   for (event &ev : evs)
+   for (event &ev : trigger_self())
       ev.trigger();
 }
 
@@ -73,11 +70,9 @@ event::abort_self(cl_int status) {
 
 void
 event::abort(cl_int status) {
-   auto evs = abort_self(status);
-
    action_fail(*this);
 
-   for (event &ev : evs)
+   for (event &ev : abort_self(status))
       ev.abort(status);
 }
 
@@ -112,12 +107,17 @@ event::chain(event &ev) {
 }
 
 void
+event::wait_signalled() const {
+   std::unique_lock<std::mutex> lock(mutex);
+   cv.wait(lock, [=]{ return !_wait_count; });
+}
+
+void
 event::wait() const {
    for (event &ev : deps)
       ev.wait();
 
-   std::unique_lock<std::mutex> lock(mutex);
-   cv.wait(lock, [=]{ return !_wait_count; });
+   wait_signalled();
 }
 
 hard_event::hard_event(command_queue &q, cl_command_type command,
diff --git a/src/gallium/state_trackers/clover/core/event.hpp b/src/gallium/state_trackers/clover/core/event.hpp
index 53dac68..03c97bc 100644
--- a/src/gallium/state_trackers/clover/core/event.hpp
+++ b/src/gallium/state_trackers/clover/core/event.hpp
@@ -69,6 +69,7 @@ namespace clover {
       virtual cl_int status() const;
       virtual command_queue *queue() const = 0;
       virtual cl_command_type command() const = 0;
+      void wait_signalled() const;
       virtual void wait() const;
 
       virtual struct pipe_fence_handle *fence() const {
-- 
2.3.5

Attachment: signature.asc
Description: PGP signature

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to