From: "J Karanje, Kedar" <kedar.j.kara...@intel.com> The feature is enabled by default during make however we need to add the following to drirc to enable the feature at runtime. <option name="enable_bounding_box_culling" value="true"/>
vbo: Main algorithm & code to check for MVP & vertex position location Build Files: Flags to enable BBOX Code and check AVX version compiler: Code to recognize simple shader (gl_position is a simple function of mvp and vertex) i965 & util: dri query to check if feature is enabled vbo: Implements a bounding box algorithm for mesa,we hook into the default drawelements and drawrangelements and the MVP & vertex positions location and the corresponding program is got,we re-create the frustum planes using this data and also create a box around the object and use the 8 vertices (box vertices) and check if the box is within the frustum or not, we drop the draw calls that are completely outside the view frustum and go for sub-boxes for objects that are intersecting with the frustum planes. The current patch has been verified on KBL+Ubuntu 16.04, we noticed 8~10% improvements in GFxBench TREX offscreen and ~2% for Manhattan offscreen, Platforms where avx2 is not supported shall still see ~6-8% improvement, the other KPIs were not impacted. Based on empirical data we have set minimum vertex count as 999 and the sub-box size as 198, this provides the best results, we have also implemented some level of caching for the box co-od and frustum plane co-od. we have also optimized some algorithms to use avx2 when a target supports it. Shader classification code is currently in hir and we have got review comments to move the same to NIR. Signed-off-by: Aravindan Muthukumar <aravindan.muthuku...@intel.com> Signed-off-by: Yogesh Marathe <yogesh.mara...@intel.com> --- Android.common.mk | 19 + configure.ac | 34 +- src/compiler/glsl/ast_to_hir.cpp | 168 +++- src/compiler/glsl/glsl_parser_extras.cpp | 10 + src/compiler/glsl/glsl_parser_extras.h | 7 + src/compiler/glsl/linker.cpp | 18 + src/intel/common/gen_debug.c | 7 + src/mesa/Makefile.sources | 11 + src/mesa/drivers/dri/i965/brw_context.c | 17 + src/mesa/drivers/dri/i965/intel_screen.c | 4 + src/mesa/main/bufferobj.c | 19 + src/mesa/main/mtypes.h | 51 + src/mesa/program/Android.mk | 1 + src/mesa/program/program.c | 3 + src/mesa/vbo/vbo_bbox.c | 1538 ++++++++++++++++++++++++++++++ src/mesa/vbo/vbo_bbox.h | 383 ++++++++ src/mesa/vbo/vbo_bbox_cache.c | 195 ++++ src/mesa/vbo/vbo_context.c | 11 +- src/mesa/vbo/vbo_exec_array.c | 37 +- src/util/00-mesa-defaults.conf | 4 + src/util/xmlpool/t_options.h | 5 + 21 files changed, 2535 insertions(+), 7 deletions(-) mode change 100644 => 100755 src/compiler/glsl/ast_to_hir.cpp create mode 100644 src/mesa/vbo/vbo_bbox.c create mode 100644 src/mesa/vbo/vbo_bbox.h create mode 100644 src/mesa/vbo/vbo_bbox_cache.c diff --git a/Android.common.mk b/Android.common.mk index aa1b266..efd6792 100644 --- a/Android.common.mk +++ b/Android.common.mk @@ -21,6 +21,8 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. +MESA_BBOX_ENABLE=true + ifeq ($(LOCAL_IS_HOST_MODULE),true) LOCAL_CFLAGS += -D_GNU_SOURCE endif @@ -80,6 +82,10 @@ LOCAL_CFLAGS += \ -fno-trapping-math \ -Wno-sign-compare +ifeq ($(MESA_BBOX_ENABLE),true) +LOCAL_CFLAGS += -DMESA_BBOX_OPT +endif + LOCAL_CPPFLAGS += \ -D__STDC_CONSTANT_MACROS \ -D__STDC_FORMAT_MACROS \ @@ -87,6 +93,10 @@ LOCAL_CPPFLAGS += \ -Wno-error=non-virtual-dtor \ -Wno-non-virtual-dtor +ifeq ($(MESA_BBOX_ENABLE),true) +LOCAL_CPPFLAGS += -DMESA_BBOX_OPT +endif + # mesa requires at least c99 compiler LOCAL_CONLYFLAGS += \ -std=c99 @@ -98,6 +108,15 @@ ifeq ($(filter 5 6 7 8 9, $(MESA_ANDROID_MAJOR_VERSION)),) LOCAL_CFLAGS += -DHAVE_TIMESPEC_GET endif +ifeq ($(MESA_BBOX_ENABLE),true) +#if defined(CONFIG_AS_AVX) +LOCAL_CONLYFLAGS += -mavx +#elif +LOCAL_CONLYFLAGS += -msse4.1 +#endif +endif + + ifeq ($(strip $(MESA_ENABLE_ASM)),true) ifeq ($(TARGET_ARCH),x86) LOCAL_CFLAGS += \ diff --git a/configure.ac b/configure.ac index 4d9d9e5..dcdbcf3 100644 --- a/configure.ac +++ b/configure.ac @@ -278,7 +278,8 @@ _SAVE_LDFLAGS="$LDFLAGS" _SAVE_CPPFLAGS="$CPPFLAGS" dnl Compiler macros -DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS" +DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -DMESA_BBOX_OPT" +dnl DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS" AC_SUBST([DEFINES]) android=no case "$host_os" in @@ -295,10 +296,38 @@ esac AM_CONDITIONAL(HAVE_ANDROID, test "x$android" = xyes) + +dnl Conditional parameters for enabling BBOX file compilation in Makefile +dnl bbox=yes +dnl MESA_BBOX_ENABLE=true +dnl AM_CONDITIONAL([MESA_BBOX_ENABLE], [test x$bbox= xyes]) + dnl dnl Check compiler flags dnl -AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall"]) + +AC_ARG_WITH([swr-archs], + [AS_HELP_STRING([--with-swr-archs@<:@=DIRS...@:>@], + [comma delimited swr architectures list, e.g. + "avx,avx2,sse4.1,sse4.2" @<:@default="sse4.1,sse4.2"@:>@])], + [with_swr_archs="$withval"], + [with_swr_archs="sse4.1,sse4.2"]) + +swr_archs=`IFS=', '; echo $with_swr_archs` +if test "$swr_archs" = "avx"; then +AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall -msse4.1 -mavx"]) +AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -mavx"]) +elif test "$swr_archs" = "avx2"; then +AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall -msse4.1 -mavx -mavx2"]) +AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -mavx -mavx2"]) +elif test "$swr_archs" = "sse4.1"; then +AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall -msse4.1"]) +AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall -msse4.1"]) +elif test "$swr_archs" = "sse4.2"; then +AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS -Wall -msse4.1 -msse4.2"]) +AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall -msse4.1 -msse4.2"]) +fi + AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], [CFLAGS="$CFLAGS -Werror=implicit-function-declaration"]) AX_CHECK_COMPILE_FLAG([-Werror=missing-prototypes], [CFLAGS="$CFLAGS -Werror=missing-prototypes"]) AX_CHECK_COMPILE_FLAG([-Wmissing-prototypes], [CFLAGS="$CFLAGS -Wmissing-prototypes"]) @@ -313,7 +342,6 @@ dnl dnl Check C++ compiler flags dnl AC_LANG_PUSH([C++]) -AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall"]) AX_CHECK_COMPILE_FLAG([-fno-math-errno], [CXXFLAGS="$CXXFLAGS -fno-math-errno"]) AX_CHECK_COMPILE_FLAG([-fno-trapping-math], [CXXFLAGS="$CXXFLAGS -fno-trapping-math"]) AX_CHECK_COMPILE_FLAG([-fvisibility=hidden], [VISIBILITY_CXXFLAGS="-fvisibility=hidden"]) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp old mode 100644 new mode 100755 index 5d3f10b..f4e8dea --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -62,7 +62,6 @@ #include "builtin_functions.h" using namespace ir_builder; - static void detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, exec_list *instructions); @@ -1325,6 +1324,124 @@ ast_expression::set_is_lhs(bool new_value) this->subexpressions[0]->set_is_lhs(new_value); } +#ifdef MESA_BBOX_OPT +static bool +is_simple_shader(exec_list *instructions, ast_expression *simple_ast_root, + struct _mesa_glsl_parse_state *state) +{ + ast_expression * subex0 = simple_ast_root->subexpressions[0]; + ast_expression * subex1 = simple_ast_root->subexpressions[1]; + + char temp_identifier[100]; + + subex0->set_is_lhs(true); + + if (subex1->oper == ast_mul) + { + ir_rvalue *rhsParts[3]; + rhsParts[0] = subex1->subexpressions[0]->hir(instructions, state); + rhsParts[1] = subex1->subexpressions[1]->hir(instructions, state); + + if (rhsParts[0]->type->gl_type == GL_FLOAT_MAT4 && + rhsParts[1]->type->gl_type == GL_FLOAT_VEC4) { + foreach_list_typed (ast_node, ast, link, + &subex1->subexpressions[1]->expressions) { + if (((ast_expression *)ast)->oper != ast_identifier && + (((ast_expression *)ast)->oper != ast_int_constant) && + (((ast_expression *)ast)->oper != ast_float_constant)) { + return false; + } + if (((ast_expression *)ast)->oper == ast_identifier && + ((ast_expression *)ast)->primary_expression.identifier) { + strncpy((char *)state->stateVertPosition, + ((ast_expression *)ast)->primary_expression.identifier, + strlen(((ast_expression *)ast)->primary_expression.identifier)); + } + } + } + if (subex0->oper == ast_identifier) { + if (!strcmp(subex0->primary_expression.identifier,"gl_Position")) { + if(subex0 && subex0->primary_expression.identifier) { + strncpy((char *)temp_identifier, + subex0->primary_expression.identifier, + strlen(subex0->primary_expression.identifier)); + } else { + return false; + } + if(subex1 && subex1->subexpressions[0] && + subex1->subexpressions[0]->primary_expression.identifier) { + strncpy((char *)state->stateMVP, + subex1->subexpressions[0]->primary_expression.identifier, + strlen(subex1->subexpressions[0]->primary_expression.identifier)); + } else { + return false; + } + + return true; + } else { + if(subex0 && subex0->primary_expression.identifier) { + strncpy((char *)temp_identifier, + subex0->primary_expression.identifier, + strlen(subex0->primary_expression.identifier)); + } else { + return true; + } + + if (subex1->subexpressions[0]->oper == ast_identifier) { + if(subex1 && subex1->subexpressions[0] && + subex1->subexpressions[0]->primary_expression.identifier) { + strncpy((char *)state->stateMVP, + subex1->subexpressions[0]->primary_expression.identifier, + strlen(subex1->subexpressions[0]->primary_expression.identifier)); + } else { + return true; + } + } + return false; //Return false to trigger further parsing. + } + } else { + return false; + } + } else { + if (subex0->primary_expression.identifier != NULL) + if (!strcmp(subex0->primary_expression.identifier,"gl_Position") && + (strlen(temp_identifier) > 0)) { //gl_position = temp; + if (subex1->oper == ast_identifier) { + if (!strcmp((char *)temp_identifier, + subex1->primary_expression.identifier)) { + return true; + } + } else { + return false; + } + } + } + return false; +} +/* + * Function to check if LHS of assign is an input variable Eg: in_position + */ + +static bool +is_attribute(struct _mesa_glsl_parse_state *state) +{ + ir_variable *const var = state->symbols->get_variable(state->stateVertPosition); + ir_rvalue *result = NULL; + void *ctx = state; + if (var != NULL) + { + result = new(ctx) ir_dereference_variable(var); + (void)result; + if (var->data.mode == ir_var_shader_in) + return true; + + } + + return false; + +} +#endif //MESA_BBOX_OPT + ir_rvalue * ast_expression::do_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state, @@ -1400,6 +1517,55 @@ ast_expression::do_hir(exec_list *instructions, unreachable("ast_aggregate: Should never get here."); case ast_assign: { +#ifdef MESA_BBOX_OPT + if (state->stage == MESA_SHADER_VERTEX && + !state->state_bbox_simple_shader && + is_simple_shader(instructions,this,state) && + !state->state_shader_analysis_complete) + { + state->state_bbox_simple_shader = true; + } + + if (state->state_bbox_simple_shader && + !state->state_shader_analysis_complete) { + if (!is_attribute(state)) { + if ((ir_dereference_variable *)op[0] != NULL && + ((ir_dereference_variable *)op[0])->var->name != NULL && + strlen(state->stateVertPosition) != 0) + if (!strcmp(((ir_dereference_variable *)op[0])->var->name, + state->stateVertPosition) ) { + if (((ir_instruction *)op[0])->ir_type == ir_type_variable || + ((ir_instruction *)op[0])->ir_type == + ir_type_dereference_variable && + (ir_dereference_variable *)op[1] != NULL && + ((ir_dereference_variable *)op[1])->ir_type == + ir_type_dereference_variable) { + if ((((ir_instruction *)op[1])->ir_type == ir_type_variable || + ((ir_instruction *)op[1])->ir_type == + ir_type_dereference_variable) && + ((ir_dereference_variable *)op[1])->var->name) { + strncpy((char *)state->stateVertPosition, + (char *)((ir_dereference_variable *)op[1])->var->name, + strlen(((ir_dereference_variable *)op[1])->var->name)); + state->state_shader_analysis_complete = true; + } + } + else { + state->state_bbox_simple_shader = false; + state->state_shader_analysis_complete = true; + } + } + else { + state->state_bbox_simple_shader = false; + state->state_shader_analysis_complete = true; + + } + } + else { + state->state_shader_analysis_complete = true; + } + } +#endif //MESA_BBOX_OPT this->subexpressions[0]->set_is_lhs(true); op[0] = this->subexpressions[0]->hir(instructions, state); op[1] = this->subexpressions[1]->hir(instructions, state); diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 42ba88f..c540b3a 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -2121,6 +2121,16 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, if (!state->error && !state->translation_unit.is_empty()) _mesa_ast_to_hir(shader->ir, state); +#ifdef MESA_BBOX_OPT + shader->shader_bbox_simple_shader = state->state_bbox_simple_shader; + if (shader->shader_bbox_simple_shader) + { + strncpy((char *)shader->shaderMVP,state->stateMVP, + strlen(state->stateMVP)); + strncpy((char *)shader->shaderVertPosition,state->stateVertPosition, + strlen(state->stateVertPosition)); + } +#endif //MESA_BBOX_OPT if (!state->error) { validate_ir_tree(shader->ir); diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h index da44d37..b951a66 100644 --- a/src/compiler/glsl/glsl_parser_extras.h +++ b/src/compiler/glsl/glsl_parser_extras.h @@ -887,6 +887,13 @@ struct _mesa_glsl_parse_state { * so we can check totals aren't too large. */ unsigned clip_dist_size, cull_dist_size; + +#ifdef MESA_BBOX_OPT + bool state_shader_analysis_complete; + bool state_bbox_simple_shader; + char stateMVP[20] = {'\0'}; + char stateVertPosition[100] = {'\0'}; +#endif }; # define YYLLOC_DEFAULT(Current, Rhs, N) \ diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 3ce78fe..210f37f 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -4876,6 +4876,24 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) switch (stage) { case MESA_SHADER_VERTEX: +#ifdef MESA_BBOX_OPT + if (shader_list[MESA_SHADER_VERTEX][0] != NULL) { + sh->linked_bbox_simple_shader = + shader_list[MESA_SHADER_VERTEX][0]->shader_bbox_simple_shader; + /*TBD: How do we handle multiple Vertex Shaders + * being linked ?? + * MVP Name copied to get MVP in VBO + */ + strncpy((char *)sh->linkedshaderMVP, + shader_list[MESA_SHADER_VERTEX][0]->shaderMVP, + strlen(shader_list[MESA_SHADER_VERTEX][0]->shaderMVP)); + /* Vertex Position attribute name */ + strncpy((char *)sh->linkedshaderVertPosition, + shader_list[MESA_SHADER_VERTEX][0]->shaderVertPosition, + strlen( + shader_list[MESA_SHADER_VERTEX][0]->shaderVertPosition)); + } +#endif validate_vertex_shader_executable(prog, sh, ctx); break; case MESA_SHADER_TESS_CTRL: diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c index a978f2f..c677208 100644 --- a/src/intel/common/gen_debug.c +++ b/src/intel/common/gen_debug.c @@ -106,6 +106,13 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage) static void brw_process_intel_debug_variable_once(void) { +#if defined(__ANDROID__) || defined(ANDROID) + setenv("MESA_GLSL_CACHE_DISABLE","true",1); + setenv("MESA_BBOX_MIN_VERTEX_CNT", "999", 1); + setenv("MESA_BBOX_OPT_ENABLE", "3", 1); + setenv("MESA_OPT_SPLIT_SIZE", "198", 1); +#endif + INTEL_DEBUG = parse_debug_string(getenv("INTEL_DEBUG"), debug_control); } diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 0d3c277..8148622 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -1,4 +1,5 @@ ### Lists of source files, included by Makefiles +MESA_BBOX_ENABLE = false # this is part of MAIN_FILES MAIN_ES_FILES = \ @@ -400,6 +401,7 @@ TNL_FILES = \ tnl/t_vp_build.c \ tnl/t_vp_build.h + VBO_FILES = \ vbo/vbo_attrib.h \ vbo/vbo_attrib_tmp.h \ @@ -422,6 +424,15 @@ VBO_FILES = \ vbo/vbo_save.h \ vbo/vbo_save_loopback.c +#ifeq($(MESA_BBOX_ENABLE), true) + +VBO_FILES += \ + vbo/vbo_bbox_cache.c \ + vbo/vbo_bbox.c \ + vbo/vbo_bbox.h + +#endif + STATETRACKER_FILES = \ state_tracker/st_atifs_to_tgsi.c \ state_tracker/st_atifs_to_tgsi.h \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 6ba64e4..03cf861 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -48,6 +48,10 @@ #include "vbo/vbo.h" +#ifdef MESA_BBOX_OPT +#include "vbo/vbo_bbox.h" +#endif + #include "drivers/common/driverfuncs.h" #include "drivers/common/meta.h" #include "utils.h" @@ -890,6 +894,11 @@ brw_process_driconf_options(struct brw_context *brw) ctx->Const.AllowGLSLCrossStageInterpolationMismatch = driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch"); +#ifdef MESA_BBOX_OPT + ctx->Const.EnableBoundingBoxCulling = + driQueryOptionb(options, "enable_bounding_box_culling"); +#endif //MESA_BBOX_OPT + ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20); driComputeOptionsSha1(&brw->screen->optionCache, ctx->Const.dri_config_options_sha1); @@ -1001,6 +1010,14 @@ brwCreateContext(gl_api api, brw_process_driconf_options(brw); +#ifdef MESA_BBOX_OPT + if (ctx->Const.EnableBoundingBoxCulling) { + MESA_BBOX("EnableBoundingBoxCulling True\n"); + vbo_bbox_init(ctx); + } else + MESA_BBOX("EnableBoundingBoxCulling False\n"); +#endif //MESA_BBOX_OPT + if (INTEL_DEBUG & DEBUG_PERF) brw->perf_debug = true; diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index eaf5a3b..35c7624 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -88,6 +88,10 @@ DRI_CONF_BEGIN DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false") DRI_CONF_FORCE_GLSL_ABS_SQRT("false") +#ifdef MESA_BBOX_OPT + DRI_CONF_ENABLE_BOUNDING_BOX_CULLING("true") +#endif + DRI_CONF_OPT_BEGIN_B(shader_precompile, "true") DRI_CONF_DESC(en, "Perform code generation at shader link time.") DRI_CONF_OPT_END diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 1d1e51b..67a369b 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -48,6 +48,9 @@ #include "varray.h" #include "util/u_atomic.h" +#ifdef MESA_BBOX_OPT +#include "vbo/vbo_bbox.h" +#endif /* Debug flags */ /*#define VBO_DEBUG*/ @@ -2254,6 +2257,10 @@ buffer_sub_data(GLenum target, GLuint buffer, GLintptr offset, if (no_error || validate_buffer_sub_data(ctx, bufObj, offset, size, func)) _mesa_buffer_sub_data(ctx, bufObj, offset, size, data); + +#ifdef MESA_BBOX_OPT + vbo_bbox_element_buffer_update(ctx,bufObj,data,offset,size); +#endif } @@ -2589,9 +2596,17 @@ validate_and_unmap_buffer(struct gl_context *ctx, #endif #ifdef VBO_DEBUG +#ifdef MESA_BBOX_OPT + if (bufObj->StorageFlags & GL_MAP_WRITE_BIT) { +#else if (bufObj->AccessFlags & GL_MAP_WRITE_BIT) { +#endif GLuint i, unchanged = 0; +#ifdef MESA_BBOX_OPT + GLubyte *b = (GLubyte *) bufObj->Data; +#else GLubyte *b = (GLubyte *) bufObj->Pointer; +#endif GLint pos = -1; /* check which bytes changed */ for (i = 0; i < bufObj->Size - 1; i++) { @@ -3154,7 +3169,11 @@ map_buffer_range(struct gl_context *ctx, struct gl_buffer_object *bufObj, /* Access must be write only */ if ((access & GL_MAP_WRITE_BIT) && (!(access & ~GL_MAP_WRITE_BIT))) { GLuint i; +#ifdef MESA_BBOX_OPT + GLubyte *b = (GLubyte *) bufObj->Data; +#else GLubyte *b = (GLubyte *) bufObj->Pointer; +#endif for (i = 0; i < bufObj->Size; i++) b[i] = i & 0xff; } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 9fd577d..c262173 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1359,6 +1359,9 @@ struct gl_buffer_object bool MinMaxCacheDirty; bool HandleAllocated; /**< GL_ARB_bindless_texture */ +#ifdef MESA_BBOX_OPT + int data_change_counter; //TBD: Same as RefCount to check +#endif }; @@ -2535,6 +2538,12 @@ struct gl_linked_shader * stores information that is also needed during linking. */ struct gl_shader_spirv_data *spirv_data; + +#ifdef MESA_BBOX_OPT + bool linked_bbox_simple_shader; + char linkedshaderMVP[20]; + char linkedshaderVertPosition[100]; +#endif }; @@ -2631,6 +2640,11 @@ struct gl_shader /* ARB_gl_spirv related data */ struct gl_shader_spirv_data *spirv_data; +#ifdef MESA_BBOX_OPT + bool shader_bbox_simple_shader; + char shaderMVP[20]; + char shaderVertPosition[100]; +#endif }; @@ -2902,6 +2916,9 @@ struct gl_shader_program_data * ARB_gl_spirv extension. */ bool spirv; +#ifdef MESA_BBOX_OPT + GLuint vbo_bbox_mvp_location; +#endif }; /** @@ -3108,6 +3125,10 @@ struct gl_pipeline_shader_state /** Pipeline objects */ struct _mesa_HashTable *Objects; +#ifdef MESA_BBOX_OPT + /* Bounding box draw optimization control structure */ + struct mesa_bbox_opt *BboxOpt; +#endif }; /** @@ -4076,6 +4097,12 @@ struct gl_constants /** GL_ARB_gl_spirv */ struct spirv_supported_capabilities SpirVCapabilities; + +#ifdef MESA_BBOX_OPT + /** MESA_BBOX_OPT Runtime enable_bounding_box_culling*/ + bool EnableBoundingBoxCulling; +#endif + }; @@ -4720,6 +4747,21 @@ struct gl_semaphore_object GLuint Name; /**< hash table ID/name */ }; +#ifdef MESA_BBOX_OPT +/** + * Bounding volume classification types + */ +typedef enum +{ + BOUNDING_VOLUME_AABB = 0, + BOUNDING_VOLUME_OBB = 1, + BOUNDING_VOLUME_SPHERE = 2, + BOUNDING_VOLUME_DOP = 3, + BOUNDING_VOULME_MIXED = 4, + BOUNDING_VOLUME_MAX = 5, +} bounding_volume_type; +#endif //MESA_BBOX_OPT + /** * Mesa rendering context. * @@ -5096,6 +5138,15 @@ struct gl_context struct hash_table_u64 *ResidentTextureHandles; struct hash_table_u64 *ResidentImageHandles; /*@}*/ + +#ifdef MESA_BBOX_OPT + /** + * Bounding volume type + * + */ + bounding_volume_type volume_type; +#endif //MESA_BBOX_OPT + }; /** diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk index c6470e6..6489d3e 100644 --- a/src/mesa/program/Android.mk +++ b/src/mesa/program/Android.mk @@ -75,6 +75,7 @@ $(intermediates)/program/lex.yy.c: $(LOCAL_PATH)/program_lexer.l LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa \ + $(MESA_TOP)/src/mesa/vbo \ $(MESA_TOP)/src/compiler/nir \ $(MESA_TOP)/src/gallium/auxiliary \ $(MESA_TOP)/src/gallium/include diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index 6ab1bf5..e0b3563 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -43,6 +43,9 @@ #include "util/ralloc.h" #include "util/u_atomic.h" +#ifdef MESA_BBOX_OPT +#include "vbo/vbo_bbox.h" +#endif /** * A pointer to this dummy program is put into the hash table when diff --git a/src/mesa/vbo/vbo_bbox.c b/src/mesa/vbo/vbo_bbox.c new file mode 100644 index 0000000..f1e153d --- /dev/null +++ b/src/mesa/vbo/vbo_bbox.c @@ -0,0 +1,1538 @@ +/* + * Copyright � 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * \brief VBO BBOX module implementation + * \author Kedar Karanje + */ + +#pragma GCC optimize (0) +#include "vbo_bbox.h" +#include <fcntl.h> +#define BBOX_MAX_FRAMES_TO_DYNAMICALLY_UPDATE (5) +#define BBOX_MIN_EFFECTIVE_DRAWS_TO_DYNAMICALLY_UPDATE (5) +#define BBOX_MIN_VAL_FOR_EFFECTIVE_DRAWS_COUNTER (-100) +#define BBOX_MAX_VAL_FOR_EFFECTIVE_DRAWS_COUNTER (100) +#define BBOX_MIN_GPU_HEAD_ROOM_TO_PROCESS_ELEMENT_BUFFER (100) + +/** + * Min part of split draw that we want to drop + */ +#define BBOX_MIN_SPLITTED_DRAW_TO_DROP (1) + +#ifdef __AVX__ +static __m256 fullMin; +static __m256 fullMax; +#else +static __m128 fullMin; +static __m128 fullMax; +#endif + +/* Segment functions */ + +static inline GLboolean +intersect(const struct gl_segment *s_src,const struct gl_segment *s_tar) +{ + return (s_src->Left < s_tar->Right && s_src->Right > s_tar->Left); +} + + +static inline GLboolean +subsegment(const struct gl_segment *s_src,const struct gl_segment *s_tar) +{ + return (s_src->Left <= s_tar->Left && s_src->Right >= s_tar->Right); +} + + +static inline GLboolean +superset(const struct gl_segment *s_src,const struct gl_segment *s_tar) +{ + return subsegment(s_tar,s_src); +} + + +static void +normalize(vbo_bbox_frustum_plane *fr_plane) +{ + GLfloat a,b,c; + a = fr_plane->a; + b = fr_plane->b; + c = fr_plane->c; + + GLfloat norm = 1.0f/sqrt(a*a + b*b + c*c); + + fr_plane->a *= norm; + fr_plane->b *= norm; + fr_plane->c *= norm; + fr_plane->d *= norm; +}; + +static inline +int vbo_bbox_get_delay(struct mesa_bbox_opt *opt) +{ + if (mesa_bbox_env_variables.bbox_enable < MESA_BBOX_ENABLE_FORCE_RECALC) { + if (opt->calc_delay > BBOX_CALC_MAX_DELAY) { + opt->calc_delay = BBOX_CALC_MIN_DELAY; + } + return opt->calc_delay++; + } + else { + return 0; + } +} + +static inline +bool vbo_init_sub_bbox_array(int bbox_count, struct vbo_bbox_cache_data* data) +{ + bool allocate_bbox = true; + if(data->sub_bbox_array != NULL) { + if(bbox_count == data->sub_bbox_cnt) { + allocate_bbox = false; + } + else { + free(data->sub_bbox_array); + data->sub_bbox_array = NULL; + } + } + if (allocate_bbox) + data->sub_bbox_array = (struct bounding_info*) malloc( + bbox_count * sizeof (struct bounding_info)); + + if (!data->sub_bbox_array) + return false; + + data->sub_bbox_cnt = bbox_count; + return true; +} + +static inline GLint +vbo_bbox_get_mvp(struct gl_context *ctx) +{ + struct gl_linked_shader * linked_shader = + ctx->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX]; + + return _mesa_GetUniformLocation(ctx->Shader.ActiveProgram->Name, + linked_shader->linkedshaderMVP); +} + +/* + * Gets the currently linked shaders flag for simple shader + * + */ +static inline int +vbo_is_simple_shader(struct gl_context *ctx) +{ + struct gl_linked_shader * linked_shader = + ctx->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX]; + + return linked_shader->linked_bbox_simple_shader; +} + +/** + * Get current VAO + */ +static inline struct gl_vertex_array_object* +vbo_get_current_vao(struct gl_context *const gc) +{ + assert(gc); + struct gl_vertex_array_object* vao = gc->Array.VAO; + assert(vao); + + return vao; +} + +/** + * Returns the location of the "position" in the attributes of the + * currently active program + */ +static inline +int vbo_get_simple_vs_position_attr_location( + struct gl_context *const gc) +{ + assert(gc); + struct gl_linked_shader * linked_shader = + gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX]; + GLint vertexPosLocation = _mesa_GetAttribLocation( + gc->Shader.ActiveProgram->Name, + linked_shader->linkedshaderVertPosition); + if (vertexPosLocation >= 0) + return vertexPosLocation; + else + return -1; +} + +/** + * Get element-buffer handle of the current VAO + */ +static inline struct gl_buffer_object* +vbo_get_current_element_buffer(struct gl_context *const gc) +{ + assert(gc); + struct gl_buffer_object* element_buffer = + vbo_get_current_vao(gc)->IndexBufferObj; + assert(element_buffer); + return element_buffer; +} + +/** + * Get vertex-binding of position from the current VAO + */ +static inline struct gl_vertex_buffer_binding* +vbo_get_current_vertex_buffer_binding_of_position(struct gl_context *const gc) +{ + assert(gc); + struct gl_vertex_array_object* vao = vbo_get_current_vao(gc); + + GLbitfield mask = vao->_Enabled & vao->VertexAttribBufferMask; + const struct gl_array_attributes *attrib_array = + &vao->VertexAttrib[ffs(mask) - 1]; + struct gl_vertex_buffer_binding *buffer_binding = + &vao->BufferBinding[attrib_array->BufferBindingIndex]; + + return buffer_binding; +} + +/** + * Get vertex-buffer handle of the current VAO + */ +static inline struct gl_buffer_object* +vbo_get_current_vertex_buffer(struct gl_context *const gc) +{ + assert(gc); + struct gl_buffer_object* pVertexBuffer = + vbo_get_current_vertex_buffer_binding_of_position(gc)->BufferObj; + assert(pVertexBuffer); + return pVertexBuffer; +} + + +/** + * Condition to enter bounding box optimization + */ +static inline bool +vbo_bbox_check_supported_draw_call(struct gl_context *const gc, + GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices, GLint basevertex) +{ + + assert(gc); + int shader_scenario; + struct gl_linked_shader *_LinkedShaders; + + /* Check if the minimum vertex count is met. */ + if (count < (GLsizei) mesa_bbox_env_variables.bbox_min_vrtx_count) { + /* Count is most common cause to bail out form optimization + * so should be first. + */ + MESA_BBOX("Aborting MESA_BBOX :%d: Vertex count too small, minimum count = %d\n", + count,mesa_bbox_env_variables.bbox_min_vrtx_count); + return false; + } + + if (mode != GL_TRIANGLES) { + MESA_BBOX("Aborting MESA_BBOX :%d: Primitive mode is not GL_TRIANGLES, \ + mode = %d\n", count, mode); + return false; + } + + /* Examine current shader */ + if (!gc->_Shader->ActiveProgram) { + MESA_BBOX("Aborting MESA_BBOX:%d: No active GLSL program.\n", count); + return false; + } + + /* BASIC Shader scenario is when we have just VS & FS */ + if (gc->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL && + gc->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT] != NULL && + gc->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL] == NULL && + gc->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL] == NULL && + gc->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY] == NULL) { + shader_scenario = 0; + } + else + shader_scenario = 1; + + if (shader_scenario) { + MESA_BBOX("Aborting MESA_BBOX:%d: GLSL program must contain only vertex and \ + fragment shaders, shader scenario = \n", count ); + return false; + } + + if (!vbo_is_simple_shader(gc)) { + MESA_BBOX("Aborting MESA_BBOX:%d: GLSL vertex shader does not have simple \ + position calculation \n", count); + return false; + } + if (gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX]) { + _LinkedShaders = + gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX]; + + MESA_BBOX("MVP:%s, VertPos:%s\n",_LinkedShaders->linkedshaderMVP, + _LinkedShaders->linkedshaderVertPosition); + } + + /* Examine element buffer */ + struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc); + if ((!element_buffer) || element_buffer->Name == 0) { + MESA_BBOX("Aborting MESA_BBOX:%d: Element buffer name is 0\n", count); + return false; + } + + if (!(element_buffer->StorageFlags & + (GL_CLIENT_STORAGE_BIT | GL_DYNAMIC_STORAGE_BIT))){ + MESA_BBOX("Aborting MESA_BBOX:%d: Element buffer not resident: %#x\n", count, + element_buffer->StorageFlags); + return false; + } + + /* Get VertexPosLocation */ + int vertexPosLocation = 0; + if (gc->Shader.ActiveProgram) + vertexPosLocation = vbo_get_simple_vs_position_attr_location(gc); + if (vertexPosLocation < 0) + { + MESA_BBOX("Aborting MESA_BBOX:%d: VertexPosition Location is inValid:\n", count); + return false; + } + + struct gl_vertex_array_object* vao = vbo_get_current_vao(gc); + int posAttribMapMode = + _mesa_vao_attribute_map[vao->_AttributeMapMode][vertexPosLocation]; + + if (((vao->_Enabled >> posAttribMapMode) & 0x1) != 1) + { + MESA_BBOX("Aborting MESA_BBOX:%d: Vertex data does not come from VBO , GL-API:%d\n", count,gc->API); +//#if !defined(__ANDROID__) || !defined(ANDROID) +//This is not specific to Android but to the GLES API + if (gc->API != API_OPENGLES && gc->API != API_OPENGLES2) + return false; +//#endif + } + + struct gl_buffer_object* vertexattrib_buffer = + vbo_get_current_vertex_buffer(gc); + if ((!vertexattrib_buffer) || vertexattrib_buffer->Name == 0) { + MESA_BBOX("Aborting MESA_BBOX:%d: Vertex buffer %p name is %d\n", count, + vertexattrib_buffer,vertexattrib_buffer->Name); +#if !defined(__ANDROID__) || !defined(ANDROID) + return false; +#endif + } + + if (!(vertexattrib_buffer->StorageFlags & + (GL_CLIENT_STORAGE_BIT | GL_DYNAMIC_STORAGE_BIT))){ + MESA_BBOX("Aborting MESA_BBOX:%d:Vertex buffer not resident %#x \n", count, + vertexattrib_buffer->StorageFlags); + MESA_BBOX("Aborting MESA_BBOX:VAO AttributeMode is %d\n", vao->_AttributeMapMode); +#if !defined(__ANDROID__) || !defined(ANDROID) + return false; +#endif + } + + /* Examine vertex position attribute configuration */ + if (vao->VertexAttrib[posAttribMapMode].Enabled) { + if (vao->VertexAttrib[posAttribMapMode].Size != 3) + { + MESA_BBOX("Aborting MESA_BBOX:%d: Vertex attrib size :%d, only 3 supported\n", + count, vao->VertexAttrib[VERT_ATTRIB_POS].Size); +#if !defined(__ANDROID__) || !defined(ANDROID) + return false; +#endif + } + if (vao->VertexAttrib[posAttribMapMode].Type != GL_FLOAT) + { + MESA_BBOX("Aborting MESA_BBOX:%d: Vertex attrib type is %d, only GL_FLOAT \ + supported\n", count, vao->VertexAttrib[VERT_ATTRIB_POS].Type); + return false; + } + } + + if (type != GL_UNSIGNED_SHORT) { + MESA_BBOX("Aborting MESA_BBOX:%d: type is %d, only GL_UNSIGNED_SHORT \ + supported\n", count, type); + return false; + } + if (basevertex != 0) { + MESA_BBOX("Aborting MESA_BBOX:%d: basevertex is 0 \n", count); + return false; + } + + /* If size ==3 and type == GL_FLOAT, then element stride must be 12. */ + assert(vao->VertexAttrib[VERT_ATTRIB_POS].StrideB == 12); + + /* When transform feedback is capturing we cannot do early clipping since + * xfb must write unclipped vertices + * Note - we could check for IsCapturing() but that would require + * more elaborate checking for VBO modifications. + */ + if (gc->TransformFeedback.CurrentObject->Active) { + MESA_BBOX("MESA_BBOX:%d: Transform feedback is active, \ + cannot clip\n", count); + return false; + } + return true; +} + + +/** + * Check condition to enter bounding box optimization and if draw call + * is suitable prepare key describing given geometry. + */ +static inline +void vbo_bbox_prepare_key(struct gl_context *const gc, GLenum mode, + GLsizei count, GLenum type, GLuint type_size, + const GLvoid *indices, GLint basevertex, + vbo_bbox_cache_key *key) +{ + assert(gc); + + /* Examine element buffer */ + struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc); + struct gl_buffer_object* vertexattrib_buffer = + vbo_get_current_vertex_buffer(gc); + struct gl_vertex_buffer_binding* vbinding = + vbo_get_current_vertex_buffer_binding_of_position(gc); + + memset(key,0,sizeof(vbo_bbox_cache_key)); + + key->mode = mode; + key->count = count; + key->indices_type = type; + key->type_size = type_size; + key->indices = (GLint) (uintptr_t) indices; + key->basevertex = basevertex; + + key->element_buf_name = element_buffer->Name; + key->vertex_buf_name = vertexattrib_buffer->Name; + + key->offset = (GLint)vbinding->Offset; + key->stride = vbinding->Stride; +} + + +/** + * Create a bounding box descriptor in a form of 8 correctly + * ordered vertex coordinates. The order of coordinates is significant. + */ +static +void vbo_bbox_create_bounding_box(float* const minVec3f, float* const maxVec3F, + vbo_vec4f* vertices4) +{ + assert(minVec3f); + assert(maxVec3F); + assert(vertices4); + + float Xmin = minVec3f[0]; + float Ymin = minVec3f[1]; + float Zmin = minVec3f[2]; + float Wmin = 1.0f; + + float Xmax = maxVec3F[0]; + float Ymax = maxVec3F[1]; + float Zmax = maxVec3F[2]; + float Wmax = 1.0f; + + float* v = (float*)vertices4; + int i = 0; + v[i+0] = Xmin; v[i+1] = Ymin; v[i+2] = Zmin;v[i+3]=Wmin; i+=4; + v[i+0] = Xmax; v[i+1] = Ymin; v[i+2] = Zmin;v[i+3]=Wmin; i+=4; + v[i+0] = Xmin; v[i+1] = Ymax; v[i+2] = Zmin;v[i+3]=Wmin; i+=4; + v[i+0] = Xmax; v[i+1] = Ymax; v[i+2] = Zmin;v[i+3]=Wmin; i+=4; + + v[i+0] = Xmin; v[i+1] = Ymin; v[i+2] = Zmax;v[i+3]=Wmax; i+=4; + v[i+0] = Xmax; v[i+1] = Ymin; v[i+2] = Zmax;v[i+3]=Wmax; i+=4; + v[i+0] = Xmin; v[i+1] = Ymax; v[i+2] = Zmax;v[i+3]=Wmax; i+=4; + v[i+0] = Xmax; v[i+1] = Ymax; v[i+2] = Zmax;v[i+3]=Wmax; i+=4; +} + +#ifdef __AVX__ +/* Calculate bbox Subbox Coordinates */ +static void +vbo_bbox_calc_subbox_coordinates(unsigned int vertSubBox,unsigned int vertCount, + unsigned int first_idx,unsigned int second_idx, + unsigned short* indices,float* vertices, + unsigned int stride, + struct vbo_bbox_cache_data *data) +{ + + /* Retrieving the starting offset of the first and second subbox */ + unsigned int first = first_idx * vertSubBox; + unsigned int second = second_idx * vertSubBox; + + float tmpVertexBuf[8] = {0.0}; + float tmp_buf_min[8] = {0.0}; + float tmp_buf_max[8] = {0.0}; + + __m256 subMin = _mm256_set1_ps(FLT_MAX); + __m256 subMax = _mm256_set1_ps(-FLT_MAX); + + /* Run both the subboxes for vertex count */ + for(unsigned int iter = 0; iter < vertCount; iter++){ + /* Calculate the vertex offset of first subbox */ + unsigned short index1 = indices[first+iter]; + /* Fetching the vertices for the first subbox */ + float* vertex1 = (float *)((char *)(vertices) + stride*index1); + memcpy(tmpVertexBuf,vertex1, 3 * sizeof(float)); + + /* Calculate the vertex offset of second subbox */ + unsigned short index2 = indices[second+iter]; + + /* Fetching the vertices for the second subbox */ + float* vertex2 = (float *)((char *)(vertices) + stride*index2); + memcpy(tmpVertexBuf+4,vertex2, 3 * sizeof(float)); + + __m256 tmp = _mm256_loadu_ps(tmpVertexBuf); + subMin = _mm256_min_ps(subMin, tmp); + subMax = _mm256_max_ps(subMax, tmp); + } + + /* compare full box values */ + fullMin = _mm256_min_ps(fullMin, subMin); + fullMax = _mm256_max_ps(fullMax, subMax); + + /* store results */ + _mm256_storeu_ps(tmp_buf_min, subMin); + _mm256_storeu_ps(tmp_buf_max, subMax); + + + /* Update the min and max values in sub box coordinates for both + * the subboxes + */ + vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max, + &(data->sub_bbox_array[first_idx].bounding_volume.vert_vec4[0])); + vbo_bbox_create_bounding_box(tmp_buf_min+4, tmp_buf_max+4, + &(data->sub_bbox_array[second_idx].bounding_volume.vert_vec4[0])); +} +#else +static void +vbo_bbox_calc_subbox_coordinates( + unsigned int vertSubBox, + unsigned int vertCount, + unsigned int idx, + unsigned short *indices, + float *vertices, + unsigned int stride, + struct vbo_bbox_cache_data *data) +{ + /* Retrieving the starting offset of the first and second subbox */ + unsigned int first = idx * vertSubBox; + + float tmpVertexBuf[4] = {0.0}; + float tmp_buf_min[4] = {0.0}; + float tmp_buf_max[4] = {0.0}; + + __m128 subMin = _mm_set1_ps(FLT_MAX); + __m128 subMax = _mm_set1_ps(-FLT_MAX); + + /* Run both the subboxes for vertex count */ + for(unsigned int iter = 0; iter < vertCount; iter++){ + + /* Calculate the vertex offset of first subbox */ + unsigned short index = indices[first+iter]; + /* Fetching the vertices for the first subbox */ + float* vertex = (float *)((char *)(vertices) + stride*index); + memcpy(tmpVertexBuf,vertex, 3 * sizeof(float)); + + __m128 tmp = _mm_loadu_ps(tmpVertexBuf); + subMin = _mm_min_ps(subMin, tmp); + subMax = _mm_max_ps(subMax, tmp); + } + + /* compare full box values */ + fullMin = _mm_min_ps(fullMin, subMin); + fullMax = _mm_max_ps(fullMax, subMax); + + /* store results */ + _mm_storeu_ps(tmp_buf_min, subMin); + _mm_storeu_ps(tmp_buf_max, subMax); + + /* Update the min and max values in sub box coordinates for both + * the subboxes + */ + vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max, + &(data->sub_bbox_array[idx].bounding_volume.vert_vec4[0])); +} +#endif + +/** + * Get pointer to VBO data. + * Pointer should be suitable for fast data reading, not data change. + */ +static +bool vbo_bbox_get_vbo_ptr(struct gl_context* gc, struct gl_buffer_object* vbo, + int offset, void** data, int* dataSize) +{ + assert(gc); + assert(vbo); + GLubyte* vboDataPtr = NULL; + + if (offset >= vbo->Size) { + return false; + } + vboDataPtr = _mesa_MapNamedBuffer(vbo->Name,GL_WRITE_ONLY_ARB); + if (vboDataPtr == NULL) { + return false; + } + *data = vboDataPtr + offset; + *dataSize = vbo->Size - offset; + + return true; +} + +/** + * Unlock VBO + */ +static inline +void vbo_bbox_release_vbo_ptr(struct gl_context* gc, + struct gl_buffer_object* vbo) +{ + assert(gc); + assert(vbo); + _mesa_UnmapNamedBuffer_no_error(vbo->Name); +} + +/** + * Check if given range of indices contains only degenerate triangles. + */ +static +bool vbo_bbox_is_degenerate(GLvoid *indices, GLuint count_in) +{ + assert(indices); + assert(count_in % 3 == 0); + + GLuint triangle_count = count_in / 3; + GLuint input_idx = 0; + + GLushort* ptr = (GLushort*)indices; + for (GLuint i = 0; i < triangle_count; i++) { + GLushort a = ptr[input_idx++]; + GLushort b = ptr[input_idx++]; + GLushort c = ptr[input_idx++]; + if (!(a == b || a == c || b == c)) { + return false; + } + } + return true; +} + + +/** + * Calculate bounding boxes for given geometry. + */ +static +bool vbo_bbox_calculate_bounding_boxes_with_indices(struct gl_context *const gc, + const vbo_bbox_cache_key *key, + struct vbo_bbox_cache_data *data, + void* indexData, + int indexDataSize) +{ + assert(gc); + + void* vertex_data = NULL; + int vertex_datasize = 0; + int vert_per_subBbox = mesa_bbox_env_variables.bbox_split_size; + int sub_bbox_cnt = (key->count + vert_per_subBbox -1)/vert_per_subBbox; + int *subbox_array; + int idx =0; + int non_degen_count = 0; + + assert(sub_bbox_cnt); + + struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc, + key->element_buf_name); + + struct gl_buffer_object * vertexattrib_buffer = _mesa_lookup_bufferobj(gc, + key->vertex_buf_name); + + if (element_buffer == NULL || vertexattrib_buffer == NULL) { + return false; + } + + if (!vbo_bbox_get_vbo_ptr(gc, vertexattrib_buffer,(int) key->offset, + &vertex_data, &vertex_datasize)) { + return false; + } + + assert(vertex_data); + assert(vertex_datasize > 0); + assert(indexData); + assert(indexDataSize > 0); + assert(key->indices_type == GL_UNSIGNED_SHORT); + assert(indexDataSize > key->count * (int)key->type_size); + + /* Allocate memory for bounding boxes */ + if (!vbo_init_sub_bbox_array(sub_bbox_cnt,data)) { + vbo_bbox_release_vbo_ptr(gc, vertexattrib_buffer); + return false; + } + /* Initialize size of bounding boxes */ + for (int i = 0; i < sub_bbox_cnt; i++) { + data->sub_bbox_array[i].vert_count = (i==sub_bbox_cnt-1)? + (key->count - i*vert_per_subBbox):vert_per_subBbox; + data->sub_bbox_array[i].start_offset = i*vert_per_subBbox * key->type_size; + } + + subbox_array = malloc(sub_bbox_cnt * sizeof(int)); + + /* Check if all triangles withing bbox are degenerate (i.e triangles with + zero area) */ + for (int i = 0; i < sub_bbox_cnt; i++) { + GLubyte* ptr = (GLubyte *)indexData; + data->sub_bbox_array[i].is_degenerate = + vbo_bbox_is_degenerate((ptr + data->sub_bbox_array[i].start_offset), + data->sub_bbox_array[i].vert_count); + + if(!data->sub_bbox_array[i].is_degenerate) + { + subbox_array[idx++] = i; + non_degen_count++; + } + } + + float tmp_buf_min[8] = {0.0}; + float tmp_buf_max[8] = {0.0}; + +#ifdef __AVX__ + int odd = non_degen_count % 2; + int num_iter = non_degen_count/2; + int iter; + + fullMin = _mm256_set1_ps(FLT_MAX); + fullMax = _mm256_set1_ps(-FLT_MAX); + idx = 0; + for(iter = 0; iter < num_iter;iter++){ + idx = 2*iter; + if(data->sub_bbox_array[subbox_array[idx]].vert_count == + data->sub_bbox_array[subbox_array[idx+1]].vert_count) + { + /* call the algorithm with the count */ + vbo_bbox_calc_subbox_coordinates( + vert_per_subBbox, + data->sub_bbox_array[subbox_array[idx]].vert_count, + subbox_array[idx], + subbox_array[idx+1], + (GLushort*)indexData, + (GLfloat*)vertex_data, + key->stride, + data + ); + } + else + { + /* call the first one separately */ + vbo_bbox_calc_subbox_coordinates(vert_per_subBbox, + data->sub_bbox_array[subbox_array[idx]].vert_count, + subbox_array[idx], + subbox_array[idx], + (GLushort*)indexData, + (GLfloat*)vertex_data, + key->stride, + data); + + /* call the second one separately */ + vbo_bbox_calc_subbox_coordinates(vert_per_subBbox, + data->sub_bbox_array[subbox_array[idx+1]].vert_count, + subbox_array[idx+1], + subbox_array[idx+1], + (GLushort*)indexData, + (GLfloat*)vertex_data, + key->stride, + data); + + } + + } + + if(odd) + { + idx = 2*iter; + /* call the last one separately */ + vbo_bbox_calc_subbox_coordinates(vert_per_subBbox, + data->sub_bbox_array[subbox_array[idx]].vert_count, + subbox_array[idx], + subbox_array[idx], + (GLushort*)indexData, + (GLfloat*)vertex_data, + key->stride, + data); + } + + /* Finding the minimum from the full box 256 */ + __m128 firstlane = _mm256_extractf128_ps(fullMin,0); + __m128 secondlane = _mm256_extractf128_ps(fullMin,1); + firstlane = _mm_min_ps(firstlane,secondlane); + _mm_storeu_ps(tmp_buf_min,firstlane); + + /* Finding the maximum from the full box 256 */ + firstlane = _mm256_extractf128_ps(fullMax,0); + secondlane = _mm256_extractf128_ps(fullMax,1); + firstlane = _mm_max_ps(firstlane,secondlane); + _mm_storeu_ps(tmp_buf_max,firstlane); + +#else + fullMin = _mm_set1_ps(FLT_MAX); + fullMax = _mm_set1_ps(-FLT_MAX); + + for(unsigned int i=0; i< non_degen_count; i++){ + //call the algorithm with the count + vbo_bbox_calc_subbox_coordinates(vert_per_subBbox, + data->sub_bbox_array[subbox_array[i]].vert_count, + subbox_array[i], + (GLushort*)indexData, + (GLfloat*)vertex_data, + key->stride, + data); + } + _mm_storeu_ps(tmp_buf_min, fullMin); + _mm_storeu_ps(tmp_buf_max, fullMax); +#endif + + /* Set up bounding box as 8 vertices and store in bbox data */ + vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max, + &(data->full_box.bounding_volume.vert_vec4[0])); + + if(subbox_array) + free(subbox_array); + vbo_bbox_release_vbo_ptr(gc, vertexattrib_buffer); + data->valid = true; + + return true; +} + + +/** + * Calculate bounding boxes for given geometry. + */ +static inline +bool vbo_bbox_calculate_bounding_boxes(struct gl_context *const gc, + const vbo_bbox_cache_key *key, + struct vbo_bbox_cache_data* data) +{ + assert(gc); + assert(key->indices_type == GL_UNSIGNED_SHORT); + + void* pIndexData = NULL; + int indexDataSize = 0; + + struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc, + key->element_buf_name); + if (element_buffer == NULL) { + return false; + } + if (!vbo_bbox_get_vbo_ptr(gc, element_buffer, (int) key->indices, + &pIndexData, &indexDataSize)) { + return false; + } + + bool ret = vbo_bbox_calculate_bounding_boxes_with_indices(gc, key, data, + pIndexData, indexDataSize); + + vbo_bbox_release_vbo_ptr(gc, element_buffer); + + return ret; +} + + +/** + * Create new bounding box cache entry + */ +static +struct vbo_bbox_cache_data* vbo_bbox_create_data(struct gl_context *const gc, + const vbo_bbox_cache_key *key) +{ + assert(gc); + + struct gl_buffer_object* element_buffer = vbo_get_current_element_buffer(gc); + struct gl_buffer_object* vertexattrib_buffer = + vbo_get_current_vertex_buffer(gc); + + if ((vertexattrib_buffer == NULL) || + (element_buffer == NULL)){ + return NULL; + } + + mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt; + assert(BboxOpt); + + struct vbo_bbox_cache_data* data = (struct vbo_bbox_cache_data *) malloc( + sizeof (struct vbo_bbox_cache_data)); + data->full_box.is_degenerate = false; + + if (data == NULL){ + return NULL; + } + /* Initialize the cache data and variables in cache data */ + data->valid = false; + data->need_new_calculation = true; + data->init_delaycnt = 0; + data->init_delaylimit = 0; + data->vertpos_vbo_changecnt = 0; + data->indices_vbo_changecnt = 0; + data->sub_bbox_cnt = 0; + data->sub_bbox_array = NULL; + data->drawcnt_bbox_helped = 0; + data->last_use_frame = 0; + data->vertpos_vbo_changecnt = vertexattrib_buffer->data_change_counter; + data->indices_vbo_changecnt = element_buffer->data_change_counter; + + /* This defines for how many cache hits we wait before actually creating the + * data */ + data->init_delaylimit = vbo_bbox_get_delay(BboxOpt); + + /* At this point data is not valid yet */ + _mesa_bbox_cache_insert(gc,gc->Pipeline.BboxOpt->cache, + key,sizeof(vbo_bbox_cache_key),data); + + return data; +} + + +/** + * Check if contents of the VBO buffers have changed since data entry + * was created. + */ +static inline +bool vbo_bbox_validate_data( + struct gl_context *const gc, + const vbo_bbox_cache_key *key, + struct vbo_bbox_cache_data* data) +{ + assert(gc); + + struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc, + key->element_buf_name); + + struct gl_buffer_object * vertexattrib_buffer = _mesa_lookup_bufferobj(gc, + key->vertex_buf_name); + + if (element_buffer == NULL || vertexattrib_buffer == NULL) { + return false; + } + + if ((element_buffer->data_change_counter != data->indices_vbo_changecnt) || + (vertexattrib_buffer->data_change_counter != data->vertpos_vbo_changecnt) + ) { + return false; + } + return true; +} + +/** + * Retrieve bounding box data from cache. + */ +static inline +struct vbo_bbox_cache_data* vbo_bbox_get_bounding_boxes( + struct gl_context *const gc, + const vbo_bbox_cache_key *key) + { + + assert(gc); + mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt; + assert(BboxOpt); + struct vbo_bbox_cache_data* data = _mesa_search_bbox_cache(BboxOpt->cache, + key, sizeof(vbo_bbox_cache_key)); + if (data) { + if (data->need_new_calculation == false) + { + /* Data is initialized and valid */ + if (data->valid) { + if (vbo_bbox_validate_data(gc, key, data)) { + data->mvp_valid = true; + return data; + } + } + else { + data->valid = false; + return NULL; + } + } + } + else { + /* Data does not exist, create it */ + data = vbo_bbox_create_data(gc, key); + if (data == NULL) + { + return NULL; + } + } + if ((data->need_new_calculation) && + (data->init_delaycnt++ >= data->init_delaylimit)) { + data->valid = false; + data->need_new_calculation = false; + data->mvp_valid = false; + + if (!vbo_bbox_validate_data(gc, key, data)) { + struct gl_buffer_object * element_buffer = + _mesa_lookup_bufferobj(gc,key->element_buf_name); + + struct gl_buffer_object * vertexattrib_buffer = + _mesa_lookup_bufferobj(gc,key->vertex_buf_name); + + if ((vertexattrib_buffer == NULL) || + (element_buffer == NULL)) { + return NULL; + } + data->vertpos_vbo_changecnt = vertexattrib_buffer->data_change_counter; + data->indices_vbo_changecnt = element_buffer->data_change_counter; + } + if (gc->volume_type == BOUNDING_VOLUME_AABB) { + /* Calculate bounding boxes */ + if (vbo_bbox_calculate_bounding_boxes(gc, key, data)) { + return data; + } + } + } + return NULL; +} + +/** + * This function is called when we updating the element buffer. because the + * element-buffer has changed we have to update the relevant bbox data: + */ +void vbo_bbox_element_buffer_update(struct gl_context *const gc, + struct gl_buffer_object *buffer, + const void* data, + int offset, + int size) +{ + mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt; + + if (BboxOpt) { + struct gl_segment updateSegment; + updateSegment.Left = offset; + updateSegment.Right = offset+size; + + mesa_bbox_cache * buffer_map = BboxOpt->cache; + if (buffer_map) { + struct vbo_bbox_cache_data *c; + GLuint i = 0; + for (i = 0; i < buffer_map->size; i++) + for (c = buffer_map->items[i]; c != NULL ; c = c->next) { + struct vbo_bbox_cache_data * bbox_data = buffer_map->items[i]; + struct gl_buffer_object *buffObj = _mesa_lookup_bufferobj(gc, + bbox_data->key->element_buf_name); + + if(buffObj != buffer) + continue; + + if (!bbox_data) { + + assert(bbox_data); + return; + } + + bbox_data->indices_vbo_changecnt = buffer->data_change_counter; + + struct gl_segment element_bufferSegment; + element_bufferSegment.Left = bbox_data->key->indices; + element_bufferSegment.Right = bbox_data->key->indices + + (bbox_data->key->count * bbox_data->key->type_size); + + if (bbox_data->valid && + intersect(&element_bufferSegment, &updateSegment)) { + bbox_data->valid = false; + bbox_data->need_new_calculation = true; + + if (superset(&element_bufferSegment,&updateSegment)) + { + int offset_in_newdata = bbox_data->key->indices - offset; + int newsize = size - offset_in_newdata; + + assert(offset_in_newdata >= 0); + assert(newsize > 0); + + GLchar * start_data = (GLchar *)data + offset_in_newdata; + + if (vbo_bbox_calculate_bounding_boxes_with_indices(gc, + bbox_data->key, bbox_data, start_data, newsize)) { + bbox_data->need_new_calculation = false; + bbox_data->mvp_valid = false; + } + } + } + } + } + } +} + +/** + * Generate 6 clip planes from MVP. + */ +static +void vbo_bbox_get_frustum_from_mvp(vbo_bbox_frustum *frustum, GLmatrix* mvpin) +{ + + GLfloat in_mat[4][4] = {0}; + { + #define M(row,col) m[col*4+row] + in_mat[0][0] = mvpin->M(0,0); + in_mat[0][1] = mvpin->M(0,1); + in_mat[0][2] = mvpin->M(0,2); + in_mat[0][3] = mvpin->M(0,3); + + in_mat[1][0] = mvpin->M(1,0); + in_mat[1][1] = mvpin->M(1,1); + in_mat[1][2] = mvpin->M(1,2); + in_mat[1][3] = mvpin->M(1,3); + + in_mat[2][0] = mvpin->M(2,0); + in_mat[2][1] = mvpin->M(2,1); + in_mat[2][2] = mvpin->M(2,2); + in_mat[2][3] = mvpin->M(2,3); + + in_mat[3][0] = mvpin->M(3,0); + in_mat[3][1] = mvpin->M(3,1); + in_mat[3][2] = mvpin->M(3,2); + in_mat[3][3] = mvpin->M(3,3); + #undef M + } + + /* Frustum plane calculation */ + + /* Left plane */ + frustum->plane[0].a = in_mat[3][0] + in_mat[0][0]; + frustum->plane[0].b = in_mat[3][1] + in_mat[0][1]; + frustum->plane[0].c = in_mat[3][2] + in_mat[0][2]; + frustum->plane[0].d = in_mat[3][3] + in_mat[0][3]; + + /* Right plane */ + frustum->plane[1].a = in_mat[3][0] - in_mat[0][0]; + frustum->plane[1].b = in_mat[3][1] - in_mat[0][1]; + frustum->plane[1].c = in_mat[3][2] - in_mat[0][2]; + frustum->plane[1].d = in_mat[3][3] - in_mat[0][3]; + + /* Top plane */ + frustum->plane[2].a = in_mat[3][0] - in_mat[1][0]; + frustum->plane[2].b = in_mat[3][1] - in_mat[1][1]; + frustum->plane[2].c = in_mat[3][2] - in_mat[1][2]; + frustum->plane[2].d = in_mat[3][3] - in_mat[1][3]; + + /* Bottom plane */ + frustum->plane[3].a = in_mat[3][0] + in_mat[1][0]; + frustum->plane[3].b = in_mat[3][1] + in_mat[1][1]; + frustum->plane[3].c = in_mat[3][2] + in_mat[1][2]; + frustum->plane[3].d = in_mat[3][3] + in_mat[1][3]; + + /* Far plane */ + frustum->plane[4].a = in_mat[3][0] - in_mat[2][0]; + frustum->plane[4].b = in_mat[3][1] - in_mat[2][1]; + frustum->plane[4].c = in_mat[3][2] - in_mat[2][2]; + frustum->plane[4].d = in_mat[3][3] - in_mat[2][3]; + + /* Near plane */ + frustum->plane[5].a = in_mat[3][0] + in_mat[2][0]; + frustum->plane[5].b = in_mat[3][1] + in_mat[2][1]; + frustum->plane[5].c = in_mat[3][2] + in_mat[2][2]; + frustum->plane[5].d = in_mat[3][3] + in_mat[2][3]; + + /* Calculate octants */ + for(int n = 0; n < 6; ++n) { + frustum->octant[n] = (frustum->plane[n].a >=0 ? 1 : 0) | + (frustum->plane[n].b >=0 ? 2 : 0) | + (frustum->plane[n].c >=0 ? 4 : 0); + normalize(&(frustum->plane[n])); + } +} + + +/** + * Calculate distance form a point to place + * + */ +static inline +float vbo_bbox_dist_from_point_to_plane( + const vbo_bbox_frustum_plane *plane, + const vbo_vec4f *point) +{ + return (plane->a * point->x + plane->b * point->y + plane->c * + point->z + plane->d); +} + + +/** + * Description: + * Bounding box clipping algorthm + * BBOX_CLIP_INSIDE - bounding box is fully inside frustum + * BBOX_CLIP_OUTSIDE - bounding box is fully outside frustum + * BBOX_CLIP_INTERSECT - bounding box intersects with frustum + */ +static +enum vbo_bbox_clip_result vbo_bbox_fast_clipping_test(bounding_info* bbox, + const vbo_bbox_frustum *frustum) +{ + assert(bbox); + vbo_vec4f *aabb = bbox->bounding_volume.vert_vec4; + + enum vbo_bbox_clip_result result = BBOX_CLIP_INSIDE; + for (int i = 0; i < 6; ++i) + { + unsigned char normalOctant = frustum->octant[i]; + + /* Test near and far vertices of AABB according to plane normal. + * Plane equation can be normalized to save some divisions. + */ + float farDistance = vbo_bbox_dist_from_point_to_plane( + &(frustum->plane[i]), + &(aabb[normalOctant])); + if (farDistance < 0.0f) { + return BBOX_CLIP_OUTSIDE; + } + + float nearDistance = vbo_bbox_dist_from_point_to_plane( + &(frustum->plane[i]), + &(aabb[normalOctant ^ 7])); + if (nearDistance < 0.0f) { + result = BBOX_CLIP_INTERSECT; + } + } + + return result; +} + +/** + * Wrapper for clip algorithm. + */ +static inline +enum vbo_bbox_clip_result vbo_bbox_clip(bounding_info* bbox, + const vbo_bbox_frustum *frustum) +{ + assert(bbox); + if (bbox->is_degenerate) { + return BBOX_CLIP_DEGEN; + } + return vbo_bbox_fast_clipping_test(bbox, frustum); +} + +/** + * Bounding box drawelements implementation + */ +void +vbo_bbox_drawelements(struct gl_context *ctx, GLenum mode, + GLboolean index_bounds_valid, GLuint start, GLuint end, + GLsizei count, GLenum type, const GLvoid * indices, + GLint basevertex, GLuint numInstances, + GLuint baseInstance) +{ + assert(ctx); + GLuint type_size; + /* BOUNDING VOLUME: Checks would remain same I guess */ + bool draw_call_supported = vbo_bbox_check_supported_draw_call(ctx, + mode, + count, + type, + indices, + basevertex); + if (!draw_call_supported || + (mesa_bbox_env_variables.bbox_enable < MESA_BBOX_ENABLE_FORCE_CLIPPING)) { + MESA_BBOX("Aborting MESA_BBOX : BBOX Is not ENABLED !!! \n"); + vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start, + end, count, type, indices, basevertex, numInstances, baseInstance); + return; + } + + type_size = sizeof(GLushort); + vbo_bbox_cache_key key;/* Need not initialize key its done in prepare key */ + vbo_bbox_prepare_key(ctx, mode, count, type, type_size, indices, basevertex, + &key); + /*BOUNDING VOLUME: Call bounding volume creation based on bounding volume + * type */ + + struct vbo_bbox_cache_data* cached_bbox = vbo_bbox_get_bounding_boxes(ctx, + &key); + + if (cached_bbox == NULL) { + MESA_BBOX("Aborting MESA_BBOX : New Object not in Cache!!! \n"); + vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start, end, + count, type, indices, basevertex, + numInstances, baseInstance); + return; + } + + GLint loc = vbo_bbox_get_mvp(ctx); + if (loc < 0) { + MESA_BBOX("MVP Location Error\n"); + /* TBD: Free cache here */ + vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start, + end, count, type, indices, basevertex, + numInstances, baseInstance); + return; + } + + GLfloat *mvp_ptr = (GLfloat *) + ctx->_Shader->ActiveProgram->data->UniformStorage[loc].storage; + vbo_bbox_frustum frustum; + bool recalculate_subbox_clip = false; + + if(cached_bbox->mvp_valid == false || + memcmp(cached_bbox->mvp, mvp_ptr,16*sizeof(GLfloat))) { + memcpy(&(cached_bbox->mvp), mvp_ptr, 16*sizeof(GLfloat)); + cached_bbox->mvpin.m = cached_bbox->mvp; + vbo_bbox_get_frustum_from_mvp(&frustum,&(cached_bbox->mvpin)); + + /* BOUNDING VOLUME: Call specific function to calculate clip results */ + + if (ctx->volume_type == BOUNDING_VOLUME_AABB) { + cached_bbox->full_box.clip_result = + vbo_bbox_clip(&(cached_bbox->full_box), &frustum); + } + recalculate_subbox_clip = true; + } + + /* Calculate frustum planes */ + MESA_BBOX("MESA_BBOX: Full Box ClipResult:%d \n", + cached_bbox->full_box.clip_result); + switch (cached_bbox->full_box.clip_result) { + case BBOX_CLIP_OUTSIDE: + /* Geometry outside view frustum, dont draw it */ + return; + case BBOX_CLIP_INSIDE: + vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start, + end, count, type, indices, basevertex, + numInstances, baseInstance); + return; + /* case BBOX_CLIP_INTERSECT: */ + default: + MESA_BBOX("MESA_BBOX: Vertices INTERSECTING with the frustum, going" + " for Sub Bboxes: \n"); + break; + } + + GLsizei count_to_send = 0; + GLsizei count_to_drop = 0; + GLvoid* offset_to_send = NULL; + bool clipped = false; + unsigned potential_clipped = 0; + + for (int i = 0; i < cached_bbox->sub_bbox_cnt; i++) { + int new_count = cached_bbox->sub_bbox_array[i].vert_count; + + if(recalculate_subbox_clip) { + if (ctx->volume_type == BOUNDING_VOLUME_AABB) { + cached_bbox->sub_bbox_array[i].clip_result = + vbo_bbox_clip(&(cached_bbox->sub_bbox_array[i]), &frustum); + } + } + + switch (cached_bbox->sub_bbox_array[i].clip_result) + { + case BBOX_CLIP_OUTSIDE: + count_to_drop += new_count; + potential_clipped += new_count; + + break; + case BBOX_CLIP_DEGEN: + count_to_drop += new_count; + potential_clipped += new_count; + break; + default: + /* Sub bounding box intersects with view, draw/save it + * for later draw + */ + if (count_to_send == 0) { + /* Starting new batch */ + count_to_send = new_count; + offset_to_send = (char*)indices + + cached_bbox->sub_bbox_array[i].start_offset; + } + else { + if (count_to_drop >= + (int)(mesa_bbox_env_variables.bbox_split_size * + BBOX_MIN_SPLITTED_DRAW_TO_DROP)) { + + /* Draw accumulated geometry */ + vbo_validated_drawrangeelements(ctx, mode, + index_bounds_valid, start, end, count_to_send, type, + offset_to_send, basevertex, numInstances, baseInstance); + + /* Reset accumulated draws */ + count_to_send = 0; + offset_to_send = (char*)indices + + cached_bbox->sub_bbox_array[i].start_offset; + + clipped = true; + } + else + { + count_to_send += count_to_drop; + } + + /* append to current batch of sent primitives */ + count_to_send += new_count; + } + + count_to_drop = 0; + break; + } + } + + if (count_to_send > 0) + { + vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start, + end, count_to_send, type, + offset_to_send, basevertex, + numInstances, baseInstance); + } + + + clipped |= (count_to_drop >= (int)(mesa_bbox_env_variables.bbox_split_size * + BBOX_MIN_SPLITTED_DRAW_TO_DROP)); + + if (clipped) + { + cached_bbox->drawcnt_bbox_helped = + MIN(cached_bbox->drawcnt_bbox_helped + 1, + BBOX_MAX_VAL_FOR_EFFECTIVE_DRAWS_COUNTER); + } + else + { + if (potential_clipped == 0) + { + cached_bbox->drawcnt_bbox_helped = + MAX(cached_bbox->drawcnt_bbox_helped - 1, + BBOX_MIN_VAL_FOR_EFFECTIVE_DRAWS_COUNTER); + } + } +} + +/** + * Initialization for bounding box optimization + */ +void vbo_bbox_init(struct gl_context* const gc) +{ + assert(gc); + + const char * mesa_bbox_opt = getenv("MESA_BBOX_OPT_ENABLE"); + if (mesa_bbox_opt !=NULL) + mesa_bbox_env_variables.bbox_enable = atoi(mesa_bbox_opt); + + mesa_bbox_opt = getenv("MESA_OPT_SPLIT_SIZE"); + if (mesa_bbox_opt !=NULL) + mesa_bbox_env_variables.bbox_split_size = atoi(mesa_bbox_opt); + + mesa_bbox_opt = getenv("MESA_BBOX_MIN_VERTEX_CNT"); + if (mesa_bbox_opt != NULL) + mesa_bbox_env_variables.bbox_min_vrtx_count = atoi(mesa_bbox_opt); + + if (!gc->Const.EnableBoundingBoxCulling) { + mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_OFF; + mesa_bbox_env_variables.bbox_split_size = 0x7fffffff; + mesa_bbox_env_variables.bbox_min_vrtx_count = 0; + } + + mesa_bbox_opt = getenv("MESA_OPT_TRACE_LEVEL"); + if (mesa_bbox_opt !=NULL) + mesa_bbox_env_variables.bbox_trace_level = atoi(mesa_bbox_opt); + + if (mesa_bbox_env_variables.bbox_enable == MESA_BBOX_ENABLE_AUTO) { + /* Android/Linux: enable of Gen7.5, Gen8 and Gen9 */ + #if (IGFX_GEN == IGFX_GEN9) + mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_SMART; + #else + mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_OFF; + #endif + if (mesa_bbox_env_variables.bbox_enable == MESA_BBOX_ENABLE_SMART && 1) + { + mesa_bbox_env_variables.bbox_enable = + MESA_BBOX_ENABLE_FORCE_CLIPPING; + } + } + /* BOUNDING VOLUME: Add initializations based on bounding volume here */ + switch (gc->volume_type) { + case BOUNDING_VOLUME_AABB: + if (mesa_bbox_env_variables.bbox_enable) { + mesa_bbox_env_variables.bbox_split_size = + (mesa_bbox_env_variables.bbox_split_size / 3) * 3; + mesa_bbox_env_variables.bbox_min_vrtx_count = + (mesa_bbox_env_variables.bbox_min_vrtx_count / 3) * 3; + + assert(gc->Pipeline.BboxOpt == NULL); + + gc->Pipeline.BboxOpt = malloc(sizeof(mesa_bbox_opt)); + if (!gc->Pipeline.BboxOpt) { + /* No memory, disable bbox optimization */ + mesa_bbox_env_variables.bbox_enable = + MESA_BBOX_ENABLE_OFF; + mesa_bbox_env_variables.bbox_split_size = 0x7fffffff; + mesa_bbox_env_variables.bbox_min_vrtx_count = 0; + return; + } + else { + gc->Pipeline.BboxOpt->calc_delay = BBOX_CALC_MIN_DELAY; + gc->Pipeline.BboxOpt->cache = _mesa_new_bbox_cache(); + + if (!gc->Pipeline.BboxOpt->cache) { + free(gc->Pipeline.BboxOpt); + MESA_BBOX("MESA_BBOX: Cache creation failed\n"); + return; + } + } + if (mesa_bbox_env_variables.bbox_trace_level > 0) { + MESA_BBOX("\nMESA BBOX OPT config: \ + bboxOptEnable = %d, bboxOptMinVertexCount = %d,\ + bboxOptSplitSize = %d\n", + mesa_bbox_env_variables.bbox_enable, + mesa_bbox_env_variables.bbox_min_vrtx_count, + mesa_bbox_env_variables.bbox_split_size); + } + } + /* Initialize some function pointers so that we dont have to + * check bounding volume type for every draw call */ + break; + case BOUNDING_VOLUME_OBB: + /* Init for OBB bounding volume */ + break; + case BOUNDING_VOLUME_SPHERE: + /* Init for SPHERE bounding volume */ + break; + case BOUNDING_VOLUME_DOP: + /* Init for DOP bounding volume */ + break; + default: + MESA_BBOX("BOUNDING VOLUME TYPE IS INCORRECT\n"); + break; + } +} + + +/** + * Free resources associated with bounding box optimization. + * To be called when context is destroyed + */ +void vbo_bbox_free(struct gl_context* const gc) +{ + assert(gc); + + if (gc->Pipeline.BboxOpt) { + + if(gc->Pipeline.BboxOpt->cache) + { + _mesa_delete_bbox_cache(gc,gc->Pipeline.BboxOpt->cache); + gc->Pipeline.BboxOpt->cache = NULL; + } + + if(gc->Pipeline.BboxOpt) { + free(gc->Pipeline.BboxOpt); + gc->Pipeline.BboxOpt = NULL; + } + } +} diff --git a/src/mesa/vbo/vbo_bbox.h b/src/mesa/vbo/vbo_bbox.h new file mode 100644 index 0000000..1dd98f2 --- /dev/null +++ b/src/mesa/vbo/vbo_bbox.h @@ -0,0 +1,383 @@ +/* + * Copyright � 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * \brief VBO BBOX module implementation + * \author Kedar Karanje + */ + +#ifndef _VBO_BBOX_H_ +#define _VBO_BBOX_H_ + +#include <stdio.h> +#include "main/arrayobj.h" +#include "main/glheader.h" +#include "main/context.h" +#include "main/state.h" +#include "main/varray.h" +#include "main/bufferobj.h" +#include "main/arrayobj.h" +#include "main/enums.h" +#include "main/macros.h" +#include "main/transformfeedback.h" +#include "main/mtypes.h" +#include "compiler/glsl/ir_uniform.h" +#include "main/shaderapi.h" +#include "main/uniforms.h" +#include "sys/param.h" +#include "program/prog_cache.h" +/* For Intrinsic functions */ +#include <smmintrin.h> +#include <tmmintrin.h> +#include <mmintrin.h> +#include <immintrin.h> + +#if defined(__ANDROID__) || defined(ANDROID) +#include <cutils/log.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define FRUSTUM_PLANE_COUNT 6 +#define BBOX_CALC_MIN_DELAY 2 +#define BBOX_CALC_MAX_DELAY 5 + +#undef LOG_TAG +#define LOG_TAG "MESA_BBOX_LOG" + +/* Default, driver will select mode for given GPU and OS */ +#define MESA_BBOX_ENABLE_AUTO -1 +/* Disable BBOX */ +#define MESA_BBOX_ENABLE_OFF 0 +/* Enable BBOX, bot clipping */ +#define MESA_BBOX_ENABLE_SMART 1 +/* Enable BBOX, clipping will be done regardless of GPU utilization */ +#define MESA_BBOX_ENABLE_FORCE_CLIPPING 2 +/* Enable BBOX, force immediate bbox recalculation and clipping */ +#define MESA_BBOX_ENABLE_FORCE_RECALC 3 + +/** + * MESA BBOX PRINTS + * Uncomment below line to enable debugging logs + */ +#define MESA_BBOX_DEBUG 0 + +#if defined(__ANDROID__) || defined (ANDROID) + +#if MESA_BBOX_DEBUG == 2 +#define MESA_BBOX_PRINT(...) ALOGE(__VA_ARGS__) +#define MESA_BBOX(...) ALOGE(__VA_ARGS__) + +#elif MESA_BBOX_DEBUG == 1 +#define MESA_BBOX_PRINT(...) ALOGE(__VA_ARGS__) +#define MESA_BBOX(...) + +#else +#define MESA_BBOX_PRINT(...) +#define MESA_BBOX(...) +#endif //MESA_BBOX_DEBUG + +#else //ANDROID + +#if MESA_BBOX_DEBUG == 2 +#define MESA_BBOX_PRINT(...) printf(__VA_ARGS__) +#define MESA_BBOX(...) printf(__VA_ARGS__) + +#elif MESA_BBOX_DEBUG == 1 +#define MESA_BBOX_PRINT(...) printf(__VA_ARGS__) +#define MESA_BBOX(...) + +#else +#define MESA_BBOX_PRINT(...) +#define MESA_BBOX(...) +#endif //MESA_BBOX_DEBUG + +#endif //ANDROID + +/** + * MESA Bbox options environment variables + */ +int env_opt_val; +const char *env_opt; + +typedef struct vbo_bbox_env_variable { + GLuint bbox_min_vrtx_count; + GLuint bbox_enable; + GLuint bbox_split_size; + GLuint bbox_trace_level; +} bbox_env; +bbox_env mesa_bbox_env_variables; + +void +vbo_bbox_init(struct gl_context *const gc); + +void +vbo_bbox_free(struct gl_context *const gc); + + +void +vbo_bbox_element_buffer_update(struct gl_context *const gc, + struct gl_buffer_object *buffer, + const void* data, + int offset, + int size); + +void +vbo_validated_drawrangeelements(struct gl_context *ctx, + GLenum mode, + GLboolean index_bounds_valid, + GLuint start, + GLuint end, + GLsizei count, + GLenum type, + const GLvoid * indices, + GLint basevertex, + GLuint numInstances, + GLuint baseInstance); + +void +vbo_bbox_drawelements(struct gl_context *ctx, + GLenum mode, + GLboolean index_bounds_valid, + GLuint start, + GLuint end, + GLsizei count, + GLenum type, + const GLvoid * indices, + GLint basevertex, + GLuint numInstances, + GLuint baseInstance); + +/** + * Segment Functions + */ +typedef struct gl_segment +{ + GLint Left; + GLint Right; + +}segment; + +/** + * Clip algorithm result + */ +enum vbo_bbox_clip_result +{ + BBOX_CLIP_OUTSIDE=0, + BBOX_CLIP_INTERSECT=1, + BBOX_CLIP_INSIDE=2, + BBOX_CLIP_DEGEN = 3, + BBOX_CLIP_ERROR = 4, +}; + +typedef struct gl_matrixRec +{ + GLfloat melem[4][4]; + GLenum matrixType; +} gl_matrix; + +/** + * Structure to describe plane + */ +typedef struct vbo_bbox_frustum_plane +{ + GLfloat a, b, c, d; +} vbo_bbox_frustum_plane; + + +/** + * Planes and octants with their normals + */ +typedef struct vbo_bbox_frustum +{ + const unsigned char planeCount; + struct vbo_bbox_frustum_plane plane[FRUSTUM_PLANE_COUNT]; + unsigned char octant[FRUSTUM_PLANE_COUNT]; +} vbo_bbox_frustum; + +/* + * Axis Aligned Bounding Box + */ +typedef union vbo_vec4f { + GLfloat data[4]; + struct { + GLfloat x, y, z, w; + }; +} vbo_vec4f; + +/* + * Oriented Bounding Box + */ +typedef struct oriented_bounding_box { + GLfloat x,y,z,w; +} oriented_box; + +/* + * Spherical Bounding volume + */ +typedef struct spherical_bounding_volume { + GLfloat x,y,z,r; // x^2+y^2+z^2 = r^2 +} spherical_volume; + +/* + * 8-Discrete oriented polytopes + */ +typedef struct dop_bounding_volume { + GLfloat x,y,z,r,a,b;//TBD Not sure of the representation for 8-DOP yet! +} dop_volume; + +/* + * Bounding volumes for AABB, OBB, SPHERE, DOP etc + */ +typedef union bounding_volume_info { + vbo_vec4f vert_vec4[8]; /* Bbox mix man coordinates */ + oriented_box obb[8]; + spherical_volume sphere; + dop_volume dop[8]; +} bounding_volume_info; + + +typedef struct vbo_bbox_cache_key +{ + /* From API call */ + GLenum mode; /* GL_TRAINGLES are only mode supported currently */ + GLsizei count; /* Number if indices in draw call */ + GLenum indices_type; /* must be GL_UNSIGNED_SHORT for now */ + GLuint type_size; + GLuint type_sizeShift; + GLint indices; /* Offset to index VBO */ + GLint basevertex; /* Only 0 supported for now. */ + + /* VBO objects names */ + GLuint element_buf_name; + GLuint vertex_buf_name; + + /* Vertex position attribute configuration */ + GLint offset; + GLint size; /* Size of attribute, must be 3 for now */ + GLenum vertDataType; /* Must be GL_FLOAT */ + GLsizei stride; /* Any */ + GLuint elementStride; +} vbo_bbox_cache_key; + +typedef struct bounding_info +{ + int vert_count; /* Number of vertices this bbox covers */ + int start_offset; /* Start offset for this bbox */ + bool is_degenerate; /* Triangle can not be formed */ + enum vbo_bbox_clip_result clip_result; + + bounding_volume_info bounding_volume; /* Bbox mix man coordinates */ + +} bounding_info; + +/** + * Cached information about (multiple) bounding boxes + */ +struct vbo_bbox_cache_data +{ + /* Valid data indicator, will be set to false if VBO have been + * modified by application + */ + bool valid; + + /* Indicates if bounding boxes were calculated for this geometry */ + bool need_new_calculation; + + /* Controls delay with which bounding boxes are calculated */ + int init_delaycnt; + int init_delaylimit; + + /* Data change indicator for VBOs */ + int vertpos_vbo_changecnt; + int indices_vbo_changecnt; + + /* How many bounding boxes are stored for this geometry */ + int sub_bbox_cnt; + + /* How many draws call the bbox was effective */ + int drawcnt_bbox_helped; + + int last_use_frame; + + GLuint hash; + + unsigned keysize; + + vbo_bbox_cache_key *key; + + bool mvp_valid; + GLmatrix mvpin; + GLfloat mvp[16]; + vbo_bbox_frustum frustum; + + /* Pointer to array of bboxes */ + bounding_info* sub_bbox_array; + + /* Bounding box that covers whole geometry */ + bounding_info full_box; + + struct vbo_bbox_cache_data *next; +}; + +typedef struct mesa_bbox_cache { + struct vbo_bbox_cache_data **items; + struct vbo_bbox_cache_data *last; + GLuint size, n_items; +} mesa_bbox_cache; + +typedef struct mesa_bbox_opt +{ + mesa_bbox_cache * cache; + GLuint calc_delay; +} mesa_bbox_opt; + +/** + * VBO BBox Cache functions, replica of program cache functions + * + */ +struct mesa_bbox_cache * +_mesa_new_bbox_cache(void); + +void +_mesa_delete_bbox_cache(struct gl_context *ctx, + struct mesa_bbox_cache *cache); + +struct vbo_bbox_cache_data * +_mesa_search_bbox_cache(struct mesa_bbox_cache *cache, + const void *key, GLuint keysize); + +void +_mesa_bbox_cache_insert(struct gl_context *ctx, + struct mesa_bbox_cache *cache, + const void *key, + GLuint keysize, + struct vbo_bbox_cache_data *CachedData); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif //_VBO_BBOX_H_ diff --git a/src/mesa/vbo/vbo_bbox_cache.c b/src/mesa/vbo/vbo_bbox_cache.c new file mode 100644 index 0000000..09bd1dd --- /dev/null +++ b/src/mesa/vbo/vbo_bbox_cache.c @@ -0,0 +1,195 @@ + +/* + * Copyright � 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * \brief VBO BBOX module implementation + * \author Kedar Karanje + */ + +#include "main/imports.h" +#include "main/mtypes.h" +#include "vbo/vbo_bbox.h" + +#define CACHE_SIZE 17 + +/** + * Compute hash index from state key. + */ +static GLuint +bbox_hash_key(const void *key, GLuint key_size) +{ + const GLuint *ikey = (const GLuint *) key; + GLuint hash = 0, i; + + assert(key_size >= 4); + + /* Make a slightly better attempt at a hash function: + */ + for (i = 0; i < key_size / sizeof(*ikey); i++) + { + hash += ikey[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + return hash; +} + + +/** + * Rebuild/expand the hash table to accommodate more entries + */ +static void +bbox_rehash(struct mesa_bbox_cache *cache) +{ + struct vbo_bbox_cache_data **items; + struct vbo_bbox_cache_data *c, *next; + GLuint size, i; + + cache->last = NULL; + + size = cache->size * 3; + items = calloc(size, sizeof(*items)); + + for (i = 0; i < cache->size; i++) + for (c = cache->items[i]; c; c = next) { + next = c->next; + c->next = items[c->hash % size]; + items[c->hash % size] = c; + } + + free(cache->items); + cache->items = items; + cache->size = size; +} + + +static void +bbox_clear_cache(struct gl_context *ctx, mesa_bbox_cache *cache) +{ + struct vbo_bbox_cache_data *c, *next; + GLuint i; + + cache->last = NULL; + + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { + next = c->next; + free(c->key); + free(c->sub_bbox_array); + free(c); + } + cache->items[i] = NULL; + } + cache->n_items = 0; +} + + + +mesa_bbox_cache * +_mesa_new_bbox_cache(void) +{ + mesa_bbox_cache *cache = CALLOC_STRUCT(mesa_bbox_cache); + if (cache) { + cache->size = CACHE_SIZE; + cache->items = calloc(cache->size, sizeof(struct vbo_bbox_cache_data)); + if (!cache->items) { + MESA_BBOX("Func:%s cache-size=%d " + "Cannot allocate items freeing cache\n", + __func__,cache->size); + free(cache); + return NULL; + } + MESA_BBOX("Func:%s cache:%#x cache->size=%d \n", + __func__,cache,cache->size); + return cache; + } + else { + MESA_BBOX("cache is Null in Func:%s\n",__func__); + return cache; + } +} + + +void +_mesa_delete_bbox_cache(struct gl_context *ctx, mesa_bbox_cache *cache) +{ + bbox_clear_cache(ctx, cache); + free(cache->items); + free(cache); +} + +struct vbo_bbox_cache_data * +_mesa_search_bbox_cache(mesa_bbox_cache *cache, + const void *key, GLuint keysize) +{ + MESA_BBOX("Func:%s cache:%#x \n",__func__,cache); + if (cache->last && + cache->last->key->mode == ((vbo_bbox_cache_key *)key)->mode && + cache->last->key->count == ((vbo_bbox_cache_key *)key)->count && + cache->last->key->indices == ((vbo_bbox_cache_key *)key)->indices) { + return cache->last; + } + else { + const GLuint hash = bbox_hash_key(key, keysize); + struct vbo_bbox_cache_data *c; + MESA_BBOX("cache:%#x,hash:%d,cache->size:%d\n",cache,hash,cache->size); + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->hash == hash && + c->key->mode == ((vbo_bbox_cache_key *)key)->mode && + c->key->count == ((vbo_bbox_cache_key *)key)->count && + c->key->indices == ((vbo_bbox_cache_key *)key)->indices) { + cache->last = c; + return c; + } + } + return NULL; + } +} + + +void +_mesa_bbox_cache_insert(struct gl_context *ctx,struct mesa_bbox_cache *cache, + const void *key, GLuint keysize, + struct vbo_bbox_cache_data *CachedData) +{ + const GLuint hash = bbox_hash_key(key, keysize); + + CachedData->hash = hash; + + CachedData->key = calloc(1, keysize); + memcpy(CachedData->key, key, keysize); + CachedData->keysize = keysize; + + if (cache->n_items > cache->size * 1.5) { + if (cache->size < 1000) + bbox_rehash(cache); + else + bbox_clear_cache(ctx, cache); + } + + cache->n_items++; + CachedData->next = cache->items[hash % cache->size]; + cache->items[hash % cache->size] = CachedData; +} diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c index cf9405d..8c608c2 100644 --- a/src/mesa/vbo/vbo_context.c +++ b/src/mesa/vbo/vbo_context.c @@ -36,6 +36,9 @@ #include "vbo.h" #include "vbo_private.h" +#ifdef MESA_BBOX_OPT +#include "vbo_bbox.h" +#endif static GLuint check_size(const GLfloat *attr) @@ -199,6 +202,10 @@ _vbo_CreateContext(struct gl_context *ctx) if (ctx->API == API_OPENGL_COMPAT) vbo_save_init(ctx); +#ifdef MESA_BBOX_OPT //Hard coded to AABB for now + ctx->volume_type = BOUNDING_VOLUME_AABB; +#endif + vbo->VAO = _mesa_new_vao(ctx, ~((GLuint)0)); /* The exec VAO assumes to have all arributes bound to binding 0 */ for (unsigned i = 0; i < VERT_ATTRIB_MAX; ++i) @@ -219,7 +226,9 @@ _vbo_DestroyContext(struct gl_context *ctx) _ae_destroy_context(ctx); ctx->aelt_context = NULL; } - +#ifdef MESA_BBOX_OPT + vbo_bbox_free(ctx); +#endif if (vbo) { _mesa_reference_buffer_object(ctx, &vbo->binding.BufferObj, NULL); diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 51c000e..3321a21 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -39,6 +39,10 @@ #include "main/macros.h" #include "main/transformfeedback.h" +#ifdef MESA_BBOX_OPT +#include "vbo_bbox.h" +#endif + typedef struct { GLuint count; GLuint primCount; @@ -784,6 +788,16 @@ skip_draw_elements(struct gl_context *ctx, GLsizei count, * Do the rendering for a glDrawElements or glDrawRangeElements call after * we've validated buffer bounds, etc. */ +#ifdef MESA_BBOX_OPT +void +vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode, + GLboolean index_bounds_valid, + GLuint start, GLuint end, + GLsizei count, GLenum type, + const GLvoid * indices, + GLint basevertex, GLuint numInstances, + GLuint baseInstance) +#else static void vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode, GLboolean index_bounds_valid, @@ -792,6 +806,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode, const GLvoid * indices, GLint basevertex, GLuint numInstances, GLuint baseInstance) +#endif { struct _mesa_index_buffer ib; struct _mesa_prim prim; @@ -997,6 +1012,11 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, _mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n", _mesa_enum_to_string(mode), count, _mesa_enum_to_string(type), indices); +#ifdef MESA_BBOX_OPT + MESA_BBOX_PRINT("glDrawElements(%s, %u, %s, %p)\n", + _mesa_enum_to_string(mode), count, + _mesa_enum_to_string(type), indices); +#endif FLUSH_FOR_DRAW(ctx); @@ -1011,9 +1031,13 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, if (!_mesa_validate_DrawElements(ctx, mode, count, type, indices)) return; } - +#ifdef MESA_BBOX_OPT + vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0, + count, type, indices, 0, 1, 0); +#else vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0, count, type, indices, 0, 1, 0); +#endif } @@ -1045,8 +1069,13 @@ vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, return; } +#ifdef MESA_BBOX_OPT + vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0, + count, type, indices, basevertex, 1, 0); +#else vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0, count, type, indices, basevertex, 1, 0); +#endif } @@ -1078,9 +1107,13 @@ vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type, indices, numInstances)) return; } - +#ifdef MESA_BBOX_OPT + vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0, + count, type, indices, 0, numInstances, 0); +#else vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0, count, type, indices, 0, numInstances, 0); +#endif } diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf index ad59efb..15f064b 100644 --- a/src/util/00-mesa-defaults.conf +++ b/src/util/00-mesa-defaults.conf @@ -197,6 +197,10 @@ TODO: document the other workarounds. <option name="force_glsl_extensions_warn" value="true" /> </application> + <application name="GLBenchmark" executable="GLBenchmark"> + <option name="enable_bounding_box_culling" value="true"/> + </application> + <!-- The GL thread whitelist is below, workarounds are above. Keep it that way. --> diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h index ecf495a..c3f8928 100644 --- a/src/util/xmlpool/t_options.h +++ b/src/util/xmlpool/t_options.h @@ -135,6 +135,11 @@ DRI_CONF_OPT_BEGIN_B(allow_glsl_cross_stage_interpolation_mismatch, def) \ DRI_CONF_DESC(en,gettext("Allow interpolation qualifier mismatch across shader stages")) \ DRI_CONF_OPT_END +#define DRI_CONF_ENABLE_BOUNDING_BOX_CULLING(def) \ +DRI_CONF_OPT_BEGIN_B(enable_bounding_box_culling, def) \ + DRI_CONF_DESC(en,gettext("Enable bounding box culling in CPU")) \ +DRI_CONF_OPT_END + /** * \brief Image quality-related options */ -- 2.7.4
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev