From: Ian Romanick <ian.d.roman...@intel.com> On many CPU-limited applications, this is *the* hot path. The idea is to generate per-API versions of brw_draw_prims that elide some checks. This patch removes render-mode and "is everything in VBOs" checks from core-profile contexts.
On my IVB laptop (which may have experienced thermal throttling): Gl32Batch7: 3.70955% +/- 1.11344% OglBatch7: 1.04398% +/- 0.772788% These are the same benchmark, but Gl32Batch7 uses an OpenGL 3.2 Core Profile context. v2: Reorder parameters to brw_try_draw_prims to reduce data shuffling. v3: Pass a gl_api into draw_prims instead of a must-be-core-profile flag. This will make it easier to expand to other profiles later. v4: Make brw_draw_prims_generic be a dispatcher. This way we always use the correct per-API version. This should reduce cache pollution when brw_draw_prims_core is used, but it didn't seem to affect performance one way or the other on my IVB. Signed-off-by: Ian Romanick <ian.d.roman...@intel.com> --- src/mesa/drivers/dri/i965/brw_draw.c | 148 +++++++++++++++++----- src/mesa/drivers/dri/i965/brw_draw.h | 54 ++++++-- src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 6 +- src/mesa/drivers/dri/i965/brw_primitive_restart.c | 4 +- 4 files changed, 164 insertions(+), 48 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index a23e9c0..bfd113f 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -416,10 +416,10 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) */ static void brw_try_draw_prims(struct gl_context *ctx, - const struct gl_client_array *arrays[], const struct _mesa_prim *prims, GLuint nr_prims, const struct _mesa_index_buffer *ib, + const struct gl_client_array *arrays[], GLuint min_index, GLuint max_index, struct gl_buffer_object *indirect) @@ -572,23 +572,26 @@ retry: return; } -void -brw_draw_prims(struct gl_context *ctx, - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, - GLuint max_index, - struct gl_transform_feedback_object *unused_tfb_object, - unsigned stream, - struct gl_buffer_object *indirect) +/** + * \warning + * This function must be static, inline, and always_inline. This is the only + * thing that allows the compiler to optimize the tests of \c + * must_be_core_profile away. + */ +static inline __attribute__((always_inline)) void +draw_prims(struct gl_context *ctx, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct gl_buffer_object *indirect, + gl_api API) { struct brw_context *brw = brw_context(ctx); const struct gl_client_array **arrays = ctx->Array._DrawArrays; - assert(unused_tfb_object == NULL); - if (!brw_check_conditional_render(brw)) return; @@ -598,38 +601,108 @@ brw_draw_prims(struct gl_context *ctx, return; } - /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it - * won't support all the extensions we support. + /* Core profile removed GL_SELECT and GL_FEEDBACK. + * + * FINISHME: OpenGL ES (all versions) also remove GL_SELECT and + * FINISHME: GL_FEEDBACK. We could take advantage of this easily. */ - if (ctx->RenderMode != GL_RENDER) { - perf_debug("%s render mode not supported in hardware\n", - _mesa_enum_to_string(ctx->RenderMode)); - _swsetup_Wakeup(ctx); - _tnl_wakeup(ctx); - _tnl_draw_prims(ctx, prims, nr_prims, ib, - index_bounds_valid, min_index, max_index, NULL, 0, NULL); - return; + if (API == API_OPENGL_COMPAT) { + /* Do GL_SELECT and GL_FEEDBACK rendering using swrast, even though it + * won't support all the extensions we support. + */ + if (ctx->RenderMode != GL_RENDER) { + perf_debug("%s render mode not supported in hardware\n", + _mesa_enum_to_string(ctx->RenderMode)); + _swsetup_Wakeup(ctx); + _tnl_wakeup(ctx); + _tnl_draw_prims(ctx, prims, nr_prims, ib, + index_bounds_valid, min_index, max_index, NULL, 0, + NULL); + return; + } } - /* If we're going to have to upload any of the user's vertex arrays, then - * get the minimum and maximum of their index buffer so we know what range - * to upload. + /* Core profile requires that all vertex data be stored in VBOs, so there + * is no need to check whether or not all the data is in VBOs. */ - if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) { - perf_debug("Scanning index buffer to compute index buffer bounds. " - "Use glDrawRangeElements() to avoid this.\n"); - vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims); + if (API != API_OPENGL_CORE) { + /* If we're going to have to upload any of the user's vertex arrays, then + * get the minimum and maximum of their index buffer so we know what range + * to upload. + */ + if (!index_bounds_valid && !vbo_all_varyings_in_vbos(arrays)) { + perf_debug("Scanning index buffer to compute index buffer bounds. " + "Use glDrawRangeElements() to avoid this.\n"); + vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims); + } } /* Try drawing with the hardware, but don't do anything else if we can't * manage it. swrast doesn't support our featureset, so we can't fall back * to it. */ - brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index, + brw_try_draw_prims(ctx, prims, nr_prims, ib, arrays, min_index, max_index, indirect); } void +brw_draw_prims_core(struct gl_context *ctx, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct gl_transform_feedback_object *unused_tfb_object, + unsigned stream, + struct gl_buffer_object *indirect) +{ + (void) index_bounds_valid; + (void) unused_tfb_object; + (void) stream; + + assert(unused_tfb_object == NULL); + + draw_prims(ctx, + prims, + nr_prims, + ib, + true, /* index_bounds_valid */ + min_index, + max_index, + indirect, + API_OPENGL_CORE); +} + +void +brw_draw_prims_compat(struct gl_context *ctx, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct gl_transform_feedback_object *unused_tfb_object, + unsigned stream, + struct gl_buffer_object *indirect) +{ + (void) unused_tfb_object; + (void) stream; + + assert(unused_tfb_object == NULL); + + draw_prims(ctx, + prims, + nr_prims, + ib, + index_bounds_valid, + min_index, + max_index, + indirect, + API_OPENGL_COMPAT); +} + +void brw_draw_init(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; @@ -637,7 +710,16 @@ brw_draw_init(struct brw_context *brw) /* Register our drawing function: */ - vbo->draw_prims = brw_draw_prims; + switch (ctx->API) { + case API_OPENGL_COMPAT: + case API_OPENGLES: + case API_OPENGLES2: + vbo->draw_prims = brw_draw_prims_compat; + break; + case API_OPENGL_CORE: + vbo->draw_prims = brw_draw_prims_core; + break; + } for (int i = 0; i < VERT_ATTRIB_MAX; i++) brw->vb.inputs[i].buffer = -1; diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h index f994726..4ae32e1 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.h +++ b/src/mesa/drivers/dri/i965/brw_draw.h @@ -30,20 +30,54 @@ #include "main/mtypes.h" /* for struct gl_context... */ #include "vbo/vbo.h" +#include "vbo/vbo_context.h" struct brw_context; -void brw_draw_prims(struct gl_context *ctx, - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, - GLuint max_index, - struct gl_transform_feedback_object *unused_tfb_object, - unsigned stream, - struct gl_buffer_object *indirect ); +/* This is the draw_prims to call in paths where the API is not known at + * compile time. + */ +static inline void +brw_draw_prims_generic(struct gl_context *ctx, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct gl_transform_feedback_object *unused_tfb_object, + unsigned stream, + struct gl_buffer_object *indirect) +{ + struct vbo_context *vbo = vbo_context(ctx); + + vbo->draw_prims(ctx, prims, nr_prims, ib, index_bounds_valid, + min_index, max_index, unused_tfb_object, stream, + indirect); +} + +void brw_draw_prims_compat(struct gl_context *ctx, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct gl_transform_feedback_object *unused_tfb_object, + unsigned stream, + struct gl_buffer_object *indirect); + +void brw_draw_prims_core(struct gl_context *ctx, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index, + struct gl_transform_feedback_object *unused_tfb_object, + unsigned stream, + struct gl_buffer_object *indirect); void brw_draw_init( struct brw_context *brw ); void brw_draw_destroy( struct brw_context *brw ); diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index f5ecbb5..d4b09c0 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -198,9 +198,9 @@ brw_draw_rectlist(struct gl_context *ctx, struct rect *rect, int num_instances) /* Make sure our internal prim value doesn't clash with a valid GL value. */ assert(!_mesa_is_valid_prim_mode(ctx, prim.mode)); - brw_draw_prims(ctx, &prim, 1, NULL, - GL_TRUE, start, start + count - 1, - NULL, 0, NULL); + brw_draw_prims_generic(ctx, &prim, 1, NULL, + GL_TRUE, start, start + count - 1, + NULL, 0, NULL); } static void diff --git a/src/mesa/drivers/dri/i965/brw_primitive_restart.c b/src/mesa/drivers/dri/i965/brw_primitive_restart.c index 6ed79d7..9a83dac 100644 --- a/src/mesa/drivers/dri/i965/brw_primitive_restart.c +++ b/src/mesa/drivers/dri/i965/brw_primitive_restart.c @@ -161,8 +161,8 @@ brw_handle_primitive_restart(struct gl_context *ctx, /* Cut index should work for primitive restart, so use it */ brw->prim_restart.enable_cut_index = true; - brw_draw_prims(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL, 0, - indirect); + brw_draw_prims_generic(ctx, prims, nr_prims, ib, GL_FALSE, -1, -1, NULL, + 0, indirect); brw->prim_restart.enable_cut_index = false; } else { /* Not all the primitive draw modes are supported by the cut index, -- 2.1.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev