From: Roland Scheidegger <srol...@vmware.com> Handle PIPE_QUERY_GPU_FINISHED and PIPE_QUERY_TIMESTAMP_DISJOINT, and also fill out the ps_invocations and c_primitives from the PIPE_QUERY_PIPELINE_STATISTICS (the others in there should already be handled). Note that ps_invocations isn't pixel exact, just 16 pixel exact but I guess it's better than nothing. Doesn't really seem to work correctly but there's probably bugs elsewhere. Also use a 64bit counter for occlusion queries. --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 11 ++++++++--- src/gallium/drivers/llvmpipe/lp_jit.c | 2 +- src/gallium/drivers/llvmpipe/lp_jit.h | 2 +- src/gallium/drivers/llvmpipe/lp_query.c | 23 ++++++++++++++++++++--- src/gallium/drivers/llvmpipe/lp_rast.c | 19 ++++++++++++++++--- src/gallium/drivers/llvmpipe/lp_rast_priv.h | 6 +++++- src/gallium/drivers/llvmpipe/lp_setup.c | 4 ++-- src/gallium/drivers/llvmpipe/lp_setup_line.c | 13 ++++++++----- src/gallium/drivers/llvmpipe/lp_setup_point.c | 10 +++++++++- src/gallium/drivers/llvmpipe/lp_setup_tri.c | 8 ++++++++ 10 files changed, 78 insertions(+), 20 deletions(-)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index edb59cc..79891cf 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -429,7 +429,7 @@ get_s_shift_and_mask(const struct util_format_description *format_desc, * Test the depth mask. Add the number of channel which has none zero mask * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. * The counter will add 4. - * TODO: could get that out of the loop, and need to use 64bit counter. + * TODO: could get that out of the fs loop. * * \param type holds element type of the mask vector. * \param maskvalue is the depth test mask. @@ -458,6 +458,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, LLVMInt32TypeInContext(context), bits); count = lp_build_intrinsic_unary(builder, popcntintr, LLVMInt32TypeInContext(context), bits); + count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); } else if(util_cpu_caps.has_avx && type.length == 8) { const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; @@ -468,6 +469,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, LLVMInt32TypeInContext(context), bits); count = lp_build_intrinsic_unary(builder, popcntintr, LLVMInt32TypeInContext(context), bits); + count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); } else { unsigned i; @@ -510,8 +512,11 @@ lp_build_occlusion_count(struct gallivm_state *gallivm, } count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd); - if (type.length > 4) { - count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), ""); + if (type.length > 8) { + count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 64), ""); + } + else if (type.length < 8) { + count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); } } newcount = LLVMBuildLoad(builder, counter, "origcount"); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index f517b67..fa0f128 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -195,7 +195,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT]; LLVMTypeRef thread_data_type; - elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt32TypeInContext(lc); + elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc); thread_data_type = LLVMStructTypeInContext(lc, elem_types, Elements(elem_types), 0); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 2ecfde7..30cfaae 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -164,7 +164,7 @@ enum { struct lp_jit_thread_data { - uint32_t vis_counter; + uint64_t vis_counter; }; diff --git a/src/gallium/drivers/llvmpipe/lp_query.c b/src/gallium/drivers/llvmpipe/lp_query.c index 922913d..7fbf5f7 100644 --- a/src/gallium/drivers/llvmpipe/lp_query.c +++ b/src/gallium/drivers/llvmpipe/lp_query.c @@ -40,6 +40,7 @@ #include "lp_query.h" #include "lp_screen.h" #include "lp_state.h" +#include "lp_rast.h" static struct llvmpipe_query *llvmpipe_query( struct pipe_query *p ) @@ -128,7 +129,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe, case PIPE_QUERY_OCCLUSION_PREDICATE: for (i = 0; i < num_threads; i++) { /* safer (still not guaranteed) when there's an overflow */ - *result = *result || pq->count[i]; + vresult->b = vresult->b || pq->count[i]; } break; case PIPE_QUERY_TIMESTAMP: @@ -140,6 +141,17 @@ llvmpipe_get_query_result(struct pipe_context *pipe, *result = os_time_get_nano(); } break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: { + struct pipe_query_data_timestamp_disjoint *td = + (struct pipe_query_data_timestamp_disjoint *)vresult; + /* os_get_time_nano return nanoseconds, but we sum all threads */ + td->frequency = UINT64_C(1000000000) * num_threads; + td->disjoint = FALSE; + } + break; + case PIPE_QUERY_GPU_FINISHED: + vresult->b = TRUE; + break; case PIPE_QUERY_PRIMITIVES_GENERATED: *result = pq->num_primitives_generated; break; @@ -147,7 +159,7 @@ llvmpipe_get_query_result(struct pipe_context *pipe, *result = pq->num_primitives_written; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - *result = pq->so_has_overflown; + vresult->b = pq->so_has_overflown; break; case PIPE_QUERY_SO_STATISTICS: { struct pipe_query_data_so_statistics *stats = @@ -159,8 +171,13 @@ llvmpipe_get_query_result(struct pipe_context *pipe, case PIPE_QUERY_PIPELINE_STATISTICS: { struct pipe_query_data_pipeline_statistics *stats = (struct pipe_query_data_pipeline_statistics *)vresult; + /* only ps_invocations come from binned query */ + for (i = 0; i < num_threads; i++) { + pq->stats.ps_invocations += pq->count[i]; + } + pq->stats.ps_invocations *= LP_RASTER_BLOCK_SIZE * LP_RASTER_BLOCK_SIZE; *stats = pq->stats; - } + } break; default: assert(0); diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d802d53..62a82e3 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -455,6 +455,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, * allocated 4x4 blocks hence need to filter them out here. */ if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { + if (task->query[PIPE_QUERY_PIPELINE_STATISTICS]) { + /* not very accurate would need a popcount on the mask */ + task->ps_invocations++; + } /* run shader on 4x4 block */ BEGIN_JIT_CALL(state, task); variant->jit_function[RAST_EDGE_TEST](&state->jit_context, @@ -493,11 +497,14 @@ lp_rast_begin_query(struct lp_rasterizer_task *task, case PIPE_QUERY_OCCLUSION_PREDICATE: task->thread_data.vis_counter = 0; break; + case PIPE_QUERY_PIPELINE_STATISTICS: + task->ps_invocations = 0; + break; case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_PIPELINE_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_TIMESTAMP_DISJOINT: break; default: assert(0); @@ -518,7 +525,9 @@ lp_rast_end_query(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { struct llvmpipe_query *pq = arg.query_obj; - assert(task->query[pq->type] == pq || pq->type == PIPE_QUERY_TIMESTAMP); + assert(task->query[pq->type] == pq || + pq->type == PIPE_QUERY_TIMESTAMP || + pq->type == PIPE_QUERY_GPU_FINISHED); switch (pq->type) { case PIPE_QUERY_OCCLUSION_COUNTER: @@ -528,11 +537,15 @@ lp_rast_end_query(struct lp_rasterizer_task *task, case PIPE_QUERY_TIMESTAMP: pq->count[task->thread_index] = os_time_get_nano(); break; + case PIPE_QUERY_PIPELINE_STATISTICS: + pq->count[task->thread_index] += task->ps_invocations; + break; case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_SO_STATISTICS: - case PIPE_QUERY_PIPELINE_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + case PIPE_QUERY_TIMESTAMP_DISJOINT: + case PIPE_QUERY_GPU_FINISHED: break; default: assert(0); diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h index 6f03023..3048940 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h @@ -99,8 +99,8 @@ struct lp_rasterizer_task /* occlude counter for visible pixels */ struct lp_jit_thread_data thread_data; - uint64_t query_start; struct llvmpipe_query *query[PIPE_QUERY_TYPES]; + uint64_t ps_invocations; pipe_semaphore work_ready; pipe_semaphore work_done; @@ -307,6 +307,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task, * allocated 4x4 blocks hence need to filter them out here. */ if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { + if (task->query[PIPE_QUERY_PIPELINE_STATISTICS]) { + /* not very accurate would need a popcount on the mask */ + task->ps_invocations++; + } /* run shader on 4x4 block */ BEGIN_JIT_CALL(state, task); variant->jit_function[RAST_WHOLE]( &state->jit_context, diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index e2fb257..6b64446 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -1215,7 +1215,7 @@ lp_setup_begin_query(struct lp_setup_context *setup, assert(setup->active_query[pq->type] == NULL); set_scene_state(setup, SETUP_ACTIVE, "begin_query"); - + setup->active_query[pq->type] = pq; /* XXX: It is possible that a query is created before the scene @@ -1249,7 +1249,7 @@ lp_setup_end_query(struct lp_setup_context *setup, struct llvmpipe_query *pq) { set_scene_state(setup, SETUP_ACTIVE, "end_query"); - if (pq->type != PIPE_QUERY_TIMESTAMP) { + if (pq->type != PIPE_QUERY_TIMESTAMP && pq->type != PIPE_QUERY_GPU_FINISHED) { assert(setup->active_query[pq->type] == pq); setup->active_query[pq->type] = NULL; } diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index a9809a0..4b59bf3 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -36,6 +36,7 @@ #include "lp_rast.h" #include "lp_state_fs.h" #include "lp_state_setup.h" +#include "lp_context.h" #define NUM_CHANNELS 4 @@ -541,11 +542,6 @@ try_setup_line( struct lp_setup_context *setup, y[3] = subpixel_snap(v1[0][1] + y_offset - setup->pixel_offset); } - - - LP_COUNT(nr_tris); - - /* Bounding rectangle (in pixels) */ { /* Yes this is necessary to accurately calculate bounding boxes @@ -598,6 +594,13 @@ try_setup_line( struct lp_setup_context *setup, line->v[1][1] = v2[0][1]; #endif + LP_COUNT(nr_tris); + + if (setup->active_query[PIPE_QUERY_PIPELINE_STATISTICS]) { + struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; + lp_context->pipeline_statistics.c_primitives++; + } + /* calculate the deltas */ plane = GET_PLANES(line); plane[0].dcdy = x[0] - x[1]; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c index 789caa8..7fe7bc5 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_point.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c @@ -29,13 +29,14 @@ * Binning code for points */ -#include "lp_setup_context.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "lp_setup_context.h" #include "lp_perf.h" #include "lp_rast.h" #include "lp_state_fs.h" #include "lp_state_setup.h" +#include "lp_context.h" #include "tgsi/tgsi_scan.h" #define NUM_CHANNELS 4 @@ -376,6 +377,13 @@ try_setup_point( struct lp_setup_context *setup, point->v[0][1] = v0[0][1]; #endif + LP_COUNT(nr_tris); + + if (setup->active_query[PIPE_QUERY_PIPELINE_STATISTICS]) { + struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; + lp_context->pipeline_statistics.c_primitives++; + } + info.v0 = v0; info.dx01 = 0; info.dx12 = fixed_width; diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 2ca47bc..bedd16b 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -38,6 +38,7 @@ #include "lp_rast.h" #include "lp_state_fs.h" #include "lp_state_setup.h" +#include "lp_context.h" #define NUM_CHANNELS 4 @@ -333,6 +334,11 @@ do_triangle_ccw(struct lp_setup_context *setup, LP_COUNT(nr_tris); + if (setup->active_query[PIPE_QUERY_PIPELINE_STATISTICS]) { + struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe; + lp_context->pipeline_statistics.c_primitives++; + } + /* Setup parameter interpolants: */ setup->setup.variant->jit_function( v0, @@ -883,6 +889,8 @@ typedef void (*triangle_func_t)(struct lp_setup_context *setup, /** * Subdivide this triangle by bisecting edge (v0, v1). * \param pv the provoking vertex (must = v0 or v1 or v2) + * TODO: should probably think about non-overflowing arithmetic elsewhere. + * This will definitely screw with pipeline counters for instance. */ static void subdiv_tri(struct lp_setup_context *setup, -- 1.7.9.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev