For the series: Reviewed-by: Marek Olšák <[email protected]>
Marek On Thu, Dec 6, 2018 at 7:57 AM Nicolai Hähnle <[email protected]> wrote: > From: Nicolai Hähnle <[email protected]> > > The following race condition could occur in the no-timeout case: > > API thread Gallium thread Watchdog > ---------- -------------- -------- > dd_before_draw > u_threaded_context draw > dd_after_draw > add to dctx->records > signal watchdog > dump & destroy record > execute draw > dd_after_draw_async > use-after-free! > > Alternatively, the same scenario would assert in a debug build when > destroying the record because record->driver_finished has not signaled. > > Fix this and simplify the logic at the same time by > - handing the record pointers off to the watchdog thread *before* each > draw call and > - waiting on the driver_finished fence in the watchdog thread > --- > .../auxiliary/driver_ddebug/dd_context.c | 1 - > src/gallium/auxiliary/driver_ddebug/dd_draw.c | 103 ++++++++---------- > src/gallium/auxiliary/driver_ddebug/dd_pipe.h | 21 ++-- > 3 files changed, 52 insertions(+), 73 deletions(-) > > diff --git a/src/gallium/auxiliary/driver_ddebug/dd_context.c > b/src/gallium/auxiliary/driver_ddebug/dd_context.c > index a9ac6ef14ab..15efeccf879 100644 > --- a/src/gallium/auxiliary/driver_ddebug/dd_context.c > +++ b/src/gallium/auxiliary/driver_ddebug/dd_context.c > @@ -589,21 +589,20 @@ static void > dd_context_destroy(struct pipe_context *_pipe) > { > struct dd_context *dctx = dd_context(_pipe); > struct pipe_context *pipe = dctx->pipe; > > dd_thread_join(dctx); > mtx_destroy(&dctx->mutex); > cnd_destroy(&dctx->cond); > > assert(list_empty(&dctx->records)); > - assert(!dctx->record_pending); > > if (pipe->set_log_context) { > pipe->set_log_context(pipe, NULL); > > if (dd_screen(dctx->base.screen)->dump_mode == DD_DUMP_ALL_CALLS) { > FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen), 0); > if (f) { > fprintf(f, "Remainder of driver log:\n\n"); > } > > diff --git a/src/gallium/auxiliary/driver_ddebug/dd_draw.c > b/src/gallium/auxiliary/driver_ddebug/dd_draw.c > index cb5db8ab83b..a930299ebb7 100644 > --- a/src/gallium/auxiliary/driver_ddebug/dd_draw.c > +++ b/src/gallium/auxiliary/driver_ddebug/dd_draw.c > @@ -981,80 +981,75 @@ dd_report_hang(struct dd_context *dctx) > } > > fclose(f); > } > > if (top_not_reached) > stop_output = true; > encountered_hang = true; > } > > - if (num_later || dctx->record_pending) { > - fprintf(stderr, "... and %u%s additional draws.\n", num_later, > - dctx->record_pending ? "+1 (pending)" : ""); > - } > + if (num_later) > + fprintf(stderr, "... and %u additional draws.\n", num_later); > > fprintf(stderr, "\nDone.\n"); > dd_kill_process(); > } > > int > dd_thread_main(void *input) > { > struct dd_context *dctx = (struct dd_context *)input; > struct dd_screen *dscreen = dd_screen(dctx->base.screen); > struct pipe_screen *screen = dscreen->screen; > > mtx_lock(&dctx->mutex); > > for (;;) { > struct list_head records; > - struct pipe_fence_handle *fence; > - struct pipe_fence_handle *fence2 = NULL; > - > list_replace(&dctx->records, &records); > list_inithead(&dctx->records); > dctx->num_records = 0; > > if (dctx->api_stalled) > cnd_signal(&dctx->cond); > > - if (!list_empty(&records)) { > - /* Wait for the youngest draw. This means hangs can take a bit > longer > - * to detect, but it's more efficient this way. */ > - struct dd_draw_record *youngest = > - LIST_ENTRY(struct dd_draw_record, records.prev, list); > - fence = youngest->bottom_of_pipe; > - } else if (dctx->record_pending) { > - /* Wait for pending fences, in case the driver ends up hanging > internally. */ > - fence = dctx->record_pending->prev_bottom_of_pipe; > - fence2 = dctx->record_pending->top_of_pipe; > - } else if (dctx->kill_thread) { > - break; > - } else { > + if (list_empty(&records)) { > + if (dctx->kill_thread) > + break; > + > cnd_wait(&dctx->cond, &dctx->mutex); > continue; > } > + > mtx_unlock(&dctx->mutex); > > - /* Fences can be NULL legitimately when timeout detection is > disabled. */ > - if ((fence && > - !screen->fence_finish(screen, NULL, fence, > - (uint64_t)dscreen->timeout_ms * > 1000*1000)) || > - (fence2 && > - !screen->fence_finish(screen, NULL, fence2, > - (uint64_t)dscreen->timeout_ms * > 1000*1000))) { > - mtx_lock(&dctx->mutex); > - list_splice(&records, &dctx->records); > - dd_report_hang(dctx); > - /* we won't actually get here */ > - mtx_unlock(&dctx->mutex); > + /* Wait for the youngest draw. This means hangs can take a bit > longer > + * to detect, but it's more efficient this way. */ > + struct dd_draw_record *youngest = > + list_last_entry(&records, struct dd_draw_record, list); > + > + if (dscreen->timeout_ms > 0) { > + uint64_t abs_timeout = os_time_get_absolute_timeout( > + (uint64_t)dscreen->timeout_ms * > 1000*1000); > + > + if (!util_queue_fence_wait_timeout(&youngest->driver_finished, > abs_timeout) || > + !screen->fence_finish(screen, NULL, youngest->bottom_of_pipe, > + (uint64_t)dscreen->timeout_ms * > 1000*1000)) { > + mtx_lock(&dctx->mutex); > + list_splice(&records, &dctx->records); > + dd_report_hang(dctx); > + /* we won't actually get here */ > + mtx_unlock(&dctx->mutex); > + } > + } else { > + util_queue_fence_wait(&youngest->driver_finished); > } > > list_for_each_entry_safe(struct dd_draw_record, record, &records, > list) { > dd_maybe_dump_record(dscreen, record); > list_del(&record->list); > dd_free_record(screen, record); > } > > mtx_lock(&dctx->mutex); > } > @@ -1072,20 +1067,21 @@ dd_create_record(struct dd_context *dctx) > return NULL; > > record->dctx = dctx; > record->draw_call = dctx->num_draw_calls; > > record->prev_bottom_of_pipe = NULL; > record->top_of_pipe = NULL; > record->bottom_of_pipe = NULL; > record->log_page = NULL; > util_queue_fence_init(&record->driver_finished); > + util_queue_fence_reset(&record->driver_finished); > > dd_init_copy_of_draw_state(&record->draw_state); > dd_copy_draw_state(&record->draw_state.base, &dctx->draw_state); > > return record; > } > > static void > dd_context_flush(struct pipe_context *_pipe, > struct pipe_fence_handle **fence, unsigned flags) > @@ -1108,41 +1104,50 @@ dd_before_draw(struct dd_context *dctx, struct > dd_draw_record *record) > if (dscreen->timeout_ms > 0) { > if (dscreen->flush_always && dctx->num_draw_calls >= > dscreen->skip_count) { > pipe->flush(pipe, &record->prev_bottom_of_pipe, 0); > screen->fence_reference(screen, &record->top_of_pipe, > record->prev_bottom_of_pipe); > } else { > pipe->flush(pipe, &record->prev_bottom_of_pipe, > PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE); > pipe->flush(pipe, &record->top_of_pipe, > PIPE_FLUSH_DEFERRED | PIPE_FLUSH_TOP_OF_PIPE); > } > + } > > - mtx_lock(&dctx->mutex); > - dctx->record_pending = record; > - if (list_empty(&dctx->records)) > - cnd_signal(&dctx->cond); > - mtx_unlock(&dctx->mutex); > + mtx_lock(&dctx->mutex); > + if (unlikely(dctx->num_records > 10000)) { > + dctx->api_stalled = true; > + /* Since this is only a heuristic to prevent the API thread from > getting > + * too far ahead, we don't need a loop here. */ > + cnd_wait(&dctx->cond, &dctx->mutex); > + dctx->api_stalled = false; > } > + > + if (list_empty(&dctx->records)) > + cnd_signal(&dctx->cond); > + > + list_addtail(&record->list, &dctx->records); > + dctx->num_records++; > + mtx_unlock(&dctx->mutex); > } > > static void > dd_after_draw_async(void *data) > { > struct dd_draw_record *record = (struct dd_draw_record *)data; > struct dd_context *dctx = record->dctx; > struct dd_screen *dscreen = dd_screen(dctx->base.screen); > > record->log_page = u_log_new_page(&dctx->log); > record->time_after = os_time_get_nano(); > > - if (!util_queue_fence_is_signalled(&record->driver_finished)) > - util_queue_fence_signal(&record->driver_finished); > + util_queue_fence_signal(&record->driver_finished); > > if (dscreen->dump_mode == DD_DUMP_APITRACE_CALL && > dscreen->apitrace_dump_call > > dctx->draw_state.apitrace_call_number) { > dd_thread_join(dctx); > /* No need to continue. */ > exit(0); > } > } > > static void > @@ -1151,48 +1156,28 @@ dd_after_draw(struct dd_context *dctx, struct > dd_draw_record *record) > struct dd_screen *dscreen = dd_screen(dctx->base.screen); > struct pipe_context *pipe = dctx->pipe; > > if (dscreen->timeout_ms > 0) { > unsigned flush_flags; > if (dscreen->flush_always && dctx->num_draw_calls >= > dscreen->skip_count) > flush_flags = 0; > else > flush_flags = PIPE_FLUSH_DEFERRED | PIPE_FLUSH_BOTTOM_OF_PIPE; > pipe->flush(pipe, &record->bottom_of_pipe, flush_flags); > - > - assert(record == dctx->record_pending); > } > > if (pipe->callback) { > - util_queue_fence_reset(&record->driver_finished); > pipe->callback(pipe, dd_after_draw_async, record, true); > } else { > dd_after_draw_async(record); > } > > - mtx_lock(&dctx->mutex); > - if (unlikely(dctx->num_records > 10000)) { > - dctx->api_stalled = true; > - /* Since this is only a heuristic to prevent the API thread from > getting > - * too far ahead, we don't need a loop here. */ > - cnd_wait(&dctx->cond, &dctx->mutex); > - dctx->api_stalled = false; > - } > - > - if (list_empty(&dctx->records)) > - cnd_signal(&dctx->cond); > - > - list_addtail(&record->list, &dctx->records); > - dctx->record_pending = NULL; > - dctx->num_records++; > - mtx_unlock(&dctx->mutex); > - > ++dctx->num_draw_calls; > if (dscreen->skip_count && dctx->num_draw_calls % 10000 == 0) > fprintf(stderr, "Gallium debugger reached %u draw calls.\n", > dctx->num_draw_calls); > } > > static void > dd_context_draw_vbo(struct pipe_context *_pipe, > const struct pipe_draw_info *info) > { > diff --git a/src/gallium/auxiliary/driver_ddebug/dd_pipe.h > b/src/gallium/auxiliary/driver_ddebug/dd_pipe.h > index 07c4d55017f..12da8280aa6 100644 > --- a/src/gallium/auxiliary/driver_ddebug/dd_pipe.h > +++ b/src/gallium/auxiliary/driver_ddebug/dd_pipe.h > @@ -267,20 +267,21 @@ struct dd_draw_state_copy > }; > > struct dd_draw_record { > struct list_head list; > struct dd_context *dctx; > > int64_t time_before; > int64_t time_after; > unsigned draw_call; > > + /* The fence pointers are guaranteed to be valid once driver_finished > is signalled */ > struct pipe_fence_handle *prev_bottom_of_pipe; > struct pipe_fence_handle *top_of_pipe; > struct pipe_fence_handle *bottom_of_pipe; > > struct dd_call call; > struct dd_draw_state_copy draw_state; > > struct util_queue_fence driver_finished; > struct u_log_page *log_page; > }; > @@ -290,38 +291,32 @@ struct dd_context > struct pipe_context base; > struct pipe_context *pipe; > > struct dd_draw_state draw_state; > unsigned num_draw_calls; > > struct u_log_context log; > > /* Pipelined hang detection. > * > - * This is without unnecessary flushes and waits. There is a > memory-based > - * fence that is incremented by clear_buffer every draw call. Driver > fences > - * are not used. > + * Before each draw call, a new dd_draw_record is created that contains > + * a copy of all states. After each draw call, the driver's log is > added > + * to this record. Additionally, deferred fences are associated to each > + * record both before and after the draw. > * > - * After each draw call, a new dd_draw_record is created that contains > - * a copy of all states, the output of pipe_context::dump_debug_state, > - * and it has a fence number assigned. That's done without knowing > whether > - * that draw call is problematic or not. The record is added into the > list > - * of all records. > - * > - * An independent, separate thread loops over the list of records and > checks > - * their fences. Records with signalled fences are freed. On fence > timeout, > - * the thread dumps the records of in-flight draws. > + * The records are handed off to a separate thread which waits on the > + * records' fences. Records with signalled fences are freed. When a > timeout > + * is detected, the thread dumps the records of in-flight draws. > */ > thrd_t thread; > mtx_t mutex; > cnd_t cond; > - struct dd_draw_record *record_pending; /* currently inside the driver > */ > struct list_head records; /* oldest record first */ > unsigned num_records; > bool kill_thread; > bool api_stalled; > }; > > > struct pipe_context * > dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe); > > -- > 2.19.1 > > _______________________________________________ > mesa-dev mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
