Hi,
On 31.08.2018 08:25, kedar.j.kara...@intel.com wrote:
From: "J Karanje, Kedar" <kedar.j.kara...@intel.com>
The feature is enabled by default during make however we need to
add the following to drirc to enable the feature at runtime.
<option name="enable_bounding_box_culling" value="true"/>
vbo: Main algorithm & code to check for MVP & vertex position location
Build Files: Flags to enable BBOX Code and check AVX version
compiler: Code to recognize simple shader
(gl_position is a simple function of mvp and vertex)
i965 & util: dri query to check if feature is enabled
vbo: Implements a bounding box algorithm for mesa,we hook into the default
drawelements and drawrangelements and the MVP & vertex positions location
and the corresponding program is got,we re-create the frustum planes
using this data and also create a box around the object and use the 8
vertices (box vertices) and check if the box is within the frustum or not,
we drop the draw calls that are completely outside the view frustum and
go for sub-boxes for objects that are intersecting with the frustum planes.
The current patch has been verified on KBL+Ubuntu 16.04, we noticed
8~10% improvements in GFxBench TREX offscreen and ~2% for Manhattan offscreen,
Platforms where avx2 is not supported shall still see ~6-8% improvement, the
other KPIs were not impacted.
I've tried this with Egypt and T-Rex on KBL GT3e, and I'm not seeing
a performance difference. What I'm missing / what exactly I should
do to get it enabled, see that it's enabled, and to see the perf
difference?
Based on empirical data we have set minimum vertex count as 999 and the
sub-box size as 198, this provides the best results, we have also implemented
some level of caching for the box co-od and frustum plane co-od.
we have also optimized some algorithms to use avx2 when a target supports it.
Shader classification code is currently in hir and we have got review comments
to move the same to NIR.
Signed-off-by: Aravindan Muthukumar <aravindan.muthuku...@intel.com>
Signed-off-by: Yogesh Marathe <yogesh.mara...@intel.com>
---
Android.common.mk | 19 +
configure.ac | 34 +-
src/compiler/glsl/ast_to_hir.cpp | 168 +++-
src/compiler/glsl/glsl_parser_extras.cpp | 10 +
src/compiler/glsl/glsl_parser_extras.h | 7 +
src/compiler/glsl/linker.cpp | 18 +
src/intel/common/gen_debug.c | 7 +
src/mesa/Makefile.sources | 11 +
src/mesa/drivers/dri/i965/brw_context.c | 17 +
src/mesa/drivers/dri/i965/intel_screen.c | 4 +
src/mesa/main/bufferobj.c | 19 +
src/mesa/main/mtypes.h | 51 +
src/mesa/program/Android.mk | 1 +
src/mesa/program/program.c | 3 +
src/mesa/vbo/vbo_bbox.c | 1538 ++++++++++++++++++++++++++++++
src/mesa/vbo/vbo_bbox.h | 383 ++++++++
src/mesa/vbo/vbo_bbox_cache.c | 195 ++++
src/mesa/vbo/vbo_context.c | 11 +-
src/mesa/vbo/vbo_exec_array.c | 37 +-
src/util/00-mesa-defaults.conf | 4 +
src/util/xmlpool/t_options.h | 5 +
21 files changed, 2535 insertions(+), 7 deletions(-)
mode change 100644 => 100755 src/compiler/glsl/ast_to_hir.cpp
create mode 100644 src/mesa/vbo/vbo_bbox.c
create mode 100644 src/mesa/vbo/vbo_bbox.h
create mode 100644 src/mesa/vbo/vbo_bbox_cache.c
diff --git a/Android.common.mk b/Android.common.mk
index aa1b266..efd6792 100644
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -21,6 +21,8 @@
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
+MESA_BBOX_ENABLE=true
+
ifeq ($(LOCAL_IS_HOST_MODULE),true)
LOCAL_CFLAGS += -D_GNU_SOURCE
endif
@@ -80,6 +82,10 @@ LOCAL_CFLAGS += \
-fno-trapping-math \
-Wno-sign-compare
+ifeq ($(MESA_BBOX_ENABLE),true)
+LOCAL_CFLAGS += -DMESA_BBOX_OPT
+endif
+
LOCAL_CPPFLAGS += \
-D__STDC_CONSTANT_MACROS \
-D__STDC_FORMAT_MACROS \
@@ -87,6 +93,10 @@ LOCAL_CPPFLAGS += \
-Wno-error=non-virtual-dtor \
-Wno-non-virtual-dtor
+ifeq ($(MESA_BBOX_ENABLE),true)
+LOCAL_CPPFLAGS += -DMESA_BBOX_OPT
+endif
+
# mesa requires at least c99 compiler
LOCAL_CONLYFLAGS += \
-std=c99
@@ -98,6 +108,15 @@ ifeq ($(filter 5 6 7 8 9, $(MESA_ANDROID_MAJOR_VERSION)),)
LOCAL_CFLAGS += -DHAVE_TIMESPEC_GET
endif
+ifeq ($(MESA_BBOX_ENABLE),true)
+#if defined(CONFIG_AS_AVX)
+LOCAL_CONLYFLAGS += -mavx
+#elif
+LOCAL_CONLYFLAGS += -msse4.1
+#endif
+endif
+
+
ifeq ($(strip $(MESA_ENABLE_ASM)),true)
ifeq ($(TARGET_ARCH),x86)
LOCAL_CFLAGS += \
diff --git a/configure.ac b/configure.ac
index 4d9d9e5..dcdbcf3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -278,7 +278,8 @@ _SAVE_LDFLAGS="$LDFLAGS"
_SAVE_CPPFLAGS="$CPPFLAGS"
dnl Compiler macros
-DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS"
+DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS
-DMESA_BBOX_OPT"
+dnl DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
-D__STDC_LIMIT_MACROS"
AC_SUBST([DEFINES])
android=no
case "$host_os" in
@@ -295,10 +296,38 @@ esac
AM_CONDITIONAL(HAVE_ANDROID, test "x$android" = xyes)
+
+dnl Conditional parameters for enabling BBOX file compilation in Makefile
+dnl bbox=yes
+dnl MESA_BBOX_ENABLE=true
+dnl AM_CONDITIONAL([MESA_BBOX_ENABLE], [test x$bbox= xyes])
+
dnl
dnl Check compiler flags
dnl
-AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS
-Wall"])
+
+AC_ARG_WITH([swr-archs],
+ [AS_HELP_STRING([--with-swr-archs@<:@=DIRS...@:>@],
+ [comma delimited swr architectures list, e.g.
+ "avx,avx2,sse4.1,sse4.2" @<:@default="sse4.1,sse4.2"@:>@])],
+ [with_swr_archs="$withval"],
+ [with_swr_archs="sse4.1,sse4.2"])
+
+swr_archs=`IFS=', '; echo $with_swr_archs`
+if test "$swr_archs" = "avx"; then
+AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS
-Wall -msse4.1 -mavx"])
+AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS
-Wall -msse4.1 -mavx"])
+elif test "$swr_archs" = "avx2"; then
+AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS
-Wall -msse4.1 -mavx -mavx2"])
+AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS
-Wall -msse4.1 -mavx -mavx2"])
+elif test "$swr_archs" = "sse4.1"; then
+AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS
-Wall -msse4.1"])
+AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS
-Wall -msse4.1"])
+elif test "$swr_archs" = "sse4.2"; then
+AX_CHECK_COMPILE_FLAG([-Wall], [CFLAGS="$CFLAGS
-Wall -msse4.1 -msse4.2"])
+AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS
-Wall -msse4.1 -msse4.2"])
+fi
+
AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], [CFLAGS="$CFLAGS
-Werror=implicit-function-declaration"])
AX_CHECK_COMPILE_FLAG([-Werror=missing-prototypes], [CFLAGS="$CFLAGS
-Werror=missing-prototypes"])
AX_CHECK_COMPILE_FLAG([-Wmissing-prototypes], [CFLAGS="$CFLAGS
-Wmissing-prototypes"])
@@ -313,7 +342,6 @@ dnl
dnl Check C++ compiler flags
dnl
AC_LANG_PUSH([C++])
-AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS
-Wall"])
Why removed?
AX_CHECK_COMPILE_FLAG([-fno-math-errno],
[CXXFLAGS="$CXXFLAGS -fno-math-errno"])
AX_CHECK_COMPILE_FLAG([-fno-trapping-math],
[CXXFLAGS="$CXXFLAGS -fno-trapping-math"])
AX_CHECK_COMPILE_FLAG([-fvisibility=hidden],
[VISIBILITY_CXXFLAGS="-fvisibility=hidden"])
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
old mode 100644
new mode 100755
index 5d3f10b..f4e8dea
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -62,7 +62,6 @@
#include "builtin_functions.h"
using namespace ir_builder;
-
static void
detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
exec_list *instructions);
@@ -1325,6 +1324,124 @@ ast_expression::set_is_lhs(bool new_value)
this->subexpressions[0]->set_is_lhs(new_value);
}
+#ifdef MESA_BBOX_OPT
+static bool
+is_simple_shader(exec_list *instructions, ast_expression *simple_ast_root,
+ struct _mesa_glsl_parse_state *state)
+{
+ ast_expression * subex0 = simple_ast_root->subexpressions[0];
+ ast_expression * subex1 = simple_ast_root->subexpressions[1];
+
+ char temp_identifier[100];
+
+ subex0->set_is_lhs(true);
+
+ if (subex1->oper == ast_mul)
+ {
+ ir_rvalue *rhsParts[3];
+ rhsParts[0] = subex1->subexpressions[0]->hir(instructions, state);
+ rhsParts[1] = subex1->subexpressions[1]->hir(instructions, state);
+
+ if (rhsParts[0]->type->gl_type == GL_FLOAT_MAT4 &&
+ rhsParts[1]->type->gl_type == GL_FLOAT_VEC4) {
+ foreach_list_typed (ast_node, ast, link,
+ &subex1->subexpressions[1]->expressions) {
+ if (((ast_expression *)ast)->oper != ast_identifier &&
+ (((ast_expression *)ast)->oper != ast_int_constant) &&
+ (((ast_expression *)ast)->oper != ast_float_constant)) {
+ return false;
+ }
+ if (((ast_expression *)ast)->oper == ast_identifier &&
+ ((ast_expression *)ast)->primary_expression.identifier) {
+ strncpy((char *)state->stateVertPosition,
+ ((ast_expression *)ast)->primary_expression.identifier,
+ strlen(((ast_expression
*)ast)->primary_expression.identifier));
+ }
+ }
+ }
+ if (subex0->oper == ast_identifier) {
+ if (!strcmp(subex0->primary_expression.identifier,"gl_Position")) {
+ if(subex0 && subex0->primary_expression.identifier) {
+ strncpy((char *)temp_identifier,
+ subex0->primary_expression.identifier,
+ strlen(subex0->primary_expression.identifier));
+ } else {
+ return false;
+ }
+ if(subex1 && subex1->subexpressions[0] &&
+ subex1->subexpressions[0]->primary_expression.identifier) {
+ strncpy((char *)state->stateMVP,
+ subex1->subexpressions[0]->primary_expression.identifier,
+ strlen(subex1->subexpressions[0]->primary_expression.identifier));
+ } else {
+ return false;
+ }
+
+ return true;
+ } else {
+ if(subex0 && subex0->primary_expression.identifier) {
+ strncpy((char *)temp_identifier,
+ subex0->primary_expression.identifier,
+ strlen(subex0->primary_expression.identifier));
+ } else {
+ return true;
+ }
+
+ if (subex1->subexpressions[0]->oper == ast_identifier) {
+ if(subex1 && subex1->subexpressions[0] &&
+ subex1->subexpressions[0]->primary_expression.identifier) {
+ strncpy((char *)state->stateMVP,
+ subex1->subexpressions[0]->primary_expression.identifier,
+
strlen(subex1->subexpressions[0]->primary_expression.identifier));
+ } else {
+ return true;
+ }
+ }
+ return false; //Return false to trigger further parsing.
+ }
+ } else {
+ return false;
+ }
+ } else {
+ if (subex0->primary_expression.identifier != NULL)
+ if (!strcmp(subex0->primary_expression.identifier,"gl_Position") &&
+ (strlen(temp_identifier) > 0)) { //gl_position = temp;
+ if (subex1->oper == ast_identifier) {
+ if (!strcmp((char *)temp_identifier,
+ subex1->primary_expression.identifier)) {
+ return true;
+ }
+ } else {
+ return false;
+ }
+ }
+ }
+ return false;
+}
+/*
+ * Function to check if LHS of assign is an input variable Eg: in_position
+ */
+
+static bool
+is_attribute(struct _mesa_glsl_parse_state *state)
+{
+ ir_variable *const var =
state->symbols->get_variable(state->stateVertPosition);
+ ir_rvalue *result = NULL;
+ void *ctx = state;
+ if (var != NULL)
+ {
+ result = new(ctx) ir_dereference_variable(var);
+ (void)result;
+ if (var->data.mode == ir_var_shader_in)
+ return true;
+
+ }
+
+ return false;
+
+}
+#endif //MESA_BBOX_OPT
+
ir_rvalue *
ast_expression::do_hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state,
@@ -1400,6 +1517,55 @@ ast_expression::do_hir(exec_list *instructions,
unreachable("ast_aggregate: Should never get here.");
case ast_assign: {
+#ifdef MESA_BBOX_OPT
+ if (state->stage == MESA_SHADER_VERTEX &&
+ !state->state_bbox_simple_shader &&
+ is_simple_shader(instructions,this,state) &&
+ !state->state_shader_analysis_complete)
+ {
+ state->state_bbox_simple_shader = true;
+ }
+
+ if (state->state_bbox_simple_shader &&
+ !state->state_shader_analysis_complete) {
+ if (!is_attribute(state)) {
+ if ((ir_dereference_variable *)op[0] != NULL &&
+ ((ir_dereference_variable *)op[0])->var->name != NULL &&
+ strlen(state->stateVertPosition) != 0)
+ if (!strcmp(((ir_dereference_variable *)op[0])->var->name,
+ state->stateVertPosition) ) {
+ if (((ir_instruction *)op[0])->ir_type == ir_type_variable ||
+ ((ir_instruction *)op[0])->ir_type ==
+ ir_type_dereference_variable &&
+ (ir_dereference_variable *)op[1] != NULL &&
+ ((ir_dereference_variable *)op[1])->ir_type ==
+ ir_type_dereference_variable) {
+ if ((((ir_instruction *)op[1])->ir_type == ir_type_variable
||
+ ((ir_instruction *)op[1])->ir_type ==
+ ir_type_dereference_variable) &&
+ ((ir_dereference_variable *)op[1])->var->name) {
+ strncpy((char *)state->stateVertPosition,
+ (char *)((ir_dereference_variable
*)op[1])->var->name,
+ strlen(((ir_dereference_variable *)op[1])->var->name));
+ state->state_shader_analysis_complete = true;
+ }
+ }
+ else {
+ state->state_bbox_simple_shader = false;
+ state->state_shader_analysis_complete = true;
+ }
+ }
+ else {
+ state->state_bbox_simple_shader = false;
+ state->state_shader_analysis_complete = true;
+
+ }
+ }
+ else {
+ state->state_shader_analysis_complete = true;
+ }
+ }
+#endif //MESA_BBOX_OPT
this->subexpressions[0]->set_is_lhs(true);
op[0] = this->subexpressions[0]->hir(instructions, state);
op[1] = this->subexpressions[1]->hir(instructions, state);
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp
b/src/compiler/glsl/glsl_parser_extras.cpp
index 42ba88f..c540b3a 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -2121,6 +2121,16 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct
gl_shader *shader,
if (!state->error && !state->translation_unit.is_empty())
_mesa_ast_to_hir(shader->ir, state);
+#ifdef MESA_BBOX_OPT
+ shader->shader_bbox_simple_shader = state->state_bbox_simple_shader;
+ if (shader->shader_bbox_simple_shader)
+ {
+ strncpy((char *)shader->shaderMVP,state->stateMVP,
+ strlen(state->stateMVP));
+ strncpy((char *)shader->shaderVertPosition,state->stateVertPosition,
+ strlen(state->stateVertPosition));
+ }
+#endif //MESA_BBOX_OPT
if (!state->error) {
validate_ir_tree(shader->ir);
diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h
index da44d37..b951a66 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -887,6 +887,13 @@ struct _mesa_glsl_parse_state {
* so we can check totals aren't too large.
*/
unsigned clip_dist_size, cull_dist_size;
+
+#ifdef MESA_BBOX_OPT
+ bool state_shader_analysis_complete;
+ bool state_bbox_simple_shader;
+ char stateMVP[20] = {'\0'};
+ char stateVertPosition[100] = {'\0'};
+#endif
};
This causes lot of compiler warnings:
-------------------------------------------
In file included from ../../../src/mesa/main/uniform_query.cpp:37:0:
../../../src/compiler/glsl/glsl_parser_extras.h:901:29: warning:
non-static data member initializers only available with -std=c++11 or
-std=gnu++11
char stateMVP[20] = {'\0'};
^
../../../src/compiler/glsl/glsl_parser_extras.h:902:39: warning:
non-static data member initializers only available with -std=c++11 or
-std=gnu++11
char stateVertPosition[100] = {'\0'};
^
../../../src/compiler/glsl/glsl_parser_extras.h:901:29: warning:
extended initializer lists only available with -std=c++11 or -std=gnu++11
char stateMVP[20] = {'\0'};
^
../../../src/compiler/glsl/glsl_parser_extras.h:902:39: warning:
extended initializer lists only available with -std=c++11 or -std=gnu++11
char stateVertPosition[100] = {'\0'};
-------------------------------------------
(Rest of struct isn't initialized like that, so IMHO those members
shouldn't either.)
# define YYLLOC_DEFAULT(Current, Rhs, N) \
diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index 3ce78fe..210f37f 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4876,6 +4876,24 @@ link_shaders(struct gl_context *ctx, struct
gl_shader_program *prog)
switch (stage) {
case MESA_SHADER_VERTEX:
+#ifdef MESA_BBOX_OPT
+ if (shader_list[MESA_SHADER_VERTEX][0] != NULL) {
+ sh->linked_bbox_simple_shader =
+
shader_list[MESA_SHADER_VERTEX][0]->shader_bbox_simple_shader;
+ /*TBD: How do we handle multiple Vertex Shaders
+ * being linked ??
+ * MVP Name copied to get MVP in VBO
+ */
+ strncpy((char *)sh->linkedshaderMVP,
+ shader_list[MESA_SHADER_VERTEX][0]->shaderMVP,
+ strlen(shader_list[MESA_SHADER_VERTEX][0]->shaderMVP));
+ /* Vertex Position attribute name */
+ strncpy((char *)sh->linkedshaderVertPosition,
+ shader_list[MESA_SHADER_VERTEX][0]->shaderVertPosition,
+ strlen(
+ shader_list[MESA_SHADER_VERTEX][0]->shaderVertPosition));
+ }
+#endif
validate_vertex_shader_executable(prog, sh, ctx);
break;
case MESA_SHADER_TESS_CTRL:
diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c
index a978f2f..c677208 100644
--- a/src/intel/common/gen_debug.c
+++ b/src/intel/common/gen_debug.c
@@ -106,6 +106,13 @@ intel_debug_flag_for_shader_stage(gl_shader_stage stage)
static void
brw_process_intel_debug_variable_once(void)
{
+#if defined(__ANDROID__) || defined(ANDROID)
+ setenv("MESA_GLSL_CACHE_DISABLE","true",1);
+ setenv("MESA_BBOX_MIN_VERTEX_CNT", "999", 1);
+ setenv("MESA_BBOX_OPT_ENABLE", "3", 1);
+ setenv("MESA_OPT_SPLIT_SIZE", "198", 1);
+#endif
+
INTEL_DEBUG = parse_debug_string(getenv("INTEL_DEBUG"), debug_control);
}
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 0d3c277..8148622 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -1,4 +1,5 @@
### Lists of source files, included by Makefiles
+MESA_BBOX_ENABLE = false
# this is part of MAIN_FILES
MAIN_ES_FILES = \
@@ -400,6 +401,7 @@ TNL_FILES = \
tnl/t_vp_build.c \
tnl/t_vp_build.h
+
VBO_FILES = \
vbo/vbo_attrib.h \
vbo/vbo_attrib_tmp.h \
@@ -422,6 +424,15 @@ VBO_FILES = \
vbo/vbo_save.h \
vbo/vbo_save_loopback.c
+#ifeq($(MESA_BBOX_ENABLE), true)
+
+VBO_FILES += \
+ vbo/vbo_bbox_cache.c \
+ vbo/vbo_bbox.c \
+ vbo/vbo_bbox.h
+
+#endif
+
STATETRACKER_FILES = \
state_tracker/st_atifs_to_tgsi.c \
state_tracker/st_atifs_to_tgsi.h \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c
b/src/mesa/drivers/dri/i965/brw_context.c
index 6ba64e4..03cf861 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -48,6 +48,10 @@
#include "vbo/vbo.h"
+#ifdef MESA_BBOX_OPT
+#include "vbo/vbo_bbox.h"
+#endif
+
#include "drivers/common/driverfuncs.h"
#include "drivers/common/meta.h"
#include "utils.h"
@@ -890,6 +894,11 @@ brw_process_driconf_options(struct brw_context *brw)
ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
driQueryOptionb(options,
"allow_glsl_cross_stage_interpolation_mismatch");
+#ifdef MESA_BBOX_OPT
+ ctx->Const.EnableBoundingBoxCulling =
+ driQueryOptionb(options, "enable_bounding_box_culling");
+#endif //MESA_BBOX_OPT
+
ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
driComputeOptionsSha1(&brw->screen->optionCache,
ctx->Const.dri_config_options_sha1);
@@ -1001,6 +1010,14 @@ brwCreateContext(gl_api api,
brw_process_driconf_options(brw);
+#ifdef MESA_BBOX_OPT
+ if (ctx->Const.EnableBoundingBoxCulling) {
+ MESA_BBOX("EnableBoundingBoxCulling True\n");
+ vbo_bbox_init(ctx);
+ } else
+ MESA_BBOX("EnableBoundingBoxCulling False\n");
+#endif //MESA_BBOX_OPT
+
if (INTEL_DEBUG & DEBUG_PERF)
brw->perf_debug = true;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index eaf5a3b..35c7624 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -88,6 +88,10 @@ DRI_CONF_BEGIN
DRI_CONF_ALLOW_HIGHER_COMPAT_VERSION("false")
DRI_CONF_FORCE_GLSL_ABS_SQRT("false")
+#ifdef MESA_BBOX_OPT
+ DRI_CONF_ENABLE_BOUNDING_BOX_CULLING("true")
+#endif
+
DRI_CONF_OPT_BEGIN_B(shader_precompile, "true")
DRI_CONF_DESC(en, "Perform code generation at shader link time.")
DRI_CONF_OPT_END
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 1d1e51b..67a369b 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -48,6 +48,9 @@
#include "varray.h"
#include "util/u_atomic.h"
+#ifdef MESA_BBOX_OPT
+#include "vbo/vbo_bbox.h"
+#endif
/* Debug flags */
/*#define VBO_DEBUG*/
@@ -2254,6 +2257,10 @@ buffer_sub_data(GLenum target, GLuint buffer, GLintptr
offset,
if (no_error || validate_buffer_sub_data(ctx, bufObj, offset, size, func))
_mesa_buffer_sub_data(ctx, bufObj, offset, size, data);
+
+#ifdef MESA_BBOX_OPT
+ vbo_bbox_element_buffer_update(ctx,bufObj,data,offset,size);
+#endif
}
@@ -2589,9 +2596,17 @@ validate_and_unmap_buffer(struct gl_context *ctx,
#endif
#ifdef VBO_DEBUG
+#ifdef MESA_BBOX_OPT
+ if (bufObj->StorageFlags & GL_MAP_WRITE_BIT) {
+#else
if (bufObj->AccessFlags & GL_MAP_WRITE_BIT) {
+#endif
GLuint i, unchanged = 0;
+#ifdef MESA_BBOX_OPT
+ GLubyte *b = (GLubyte *) bufObj->Data;
+#else
GLubyte *b = (GLubyte *) bufObj->Pointer;
+#endif
GLint pos = -1;
/* check which bytes changed */
for (i = 0; i < bufObj->Size - 1; i++) {
@@ -3154,7 +3169,11 @@ map_buffer_range(struct gl_context *ctx, struct
gl_buffer_object *bufObj,
/* Access must be write only */
if ((access & GL_MAP_WRITE_BIT) && (!(access & ~GL_MAP_WRITE_BIT))) {
GLuint i;
+#ifdef MESA_BBOX_OPT
+ GLubyte *b = (GLubyte *) bufObj->Data;
+#else
GLubyte *b = (GLubyte *) bufObj->Pointer;
+#endif
for (i = 0; i < bufObj->Size; i++)
b[i] = i & 0xff;
}
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 9fd577d..c262173 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1359,6 +1359,9 @@ struct gl_buffer_object
bool MinMaxCacheDirty;
bool HandleAllocated; /**< GL_ARB_bindless_texture */
+#ifdef MESA_BBOX_OPT
+ int data_change_counter; //TBD: Same as RefCount to check
+#endif
};
@@ -2535,6 +2538,12 @@ struct gl_linked_shader
* stores information that is also needed during linking.
*/
struct gl_shader_spirv_data *spirv_data;
+
+#ifdef MESA_BBOX_OPT
+ bool linked_bbox_simple_shader;
+ char linkedshaderMVP[20];
+ char linkedshaderVertPosition[100];
+#endif
};
@@ -2631,6 +2640,11 @@ struct gl_shader
/* ARB_gl_spirv related data */
struct gl_shader_spirv_data *spirv_data;
+#ifdef MESA_BBOX_OPT
+ bool shader_bbox_simple_shader;
+ char shaderMVP[20];
+ char shaderVertPosition[100];
+#endif
};
@@ -2902,6 +2916,9 @@ struct gl_shader_program_data
* ARB_gl_spirv extension.
*/
bool spirv;
+#ifdef MESA_BBOX_OPT
+ GLuint vbo_bbox_mvp_location;
+#endif
};
/**
@@ -3108,6 +3125,10 @@ struct gl_pipeline_shader_state
/** Pipeline objects */
struct _mesa_HashTable *Objects;
+#ifdef MESA_BBOX_OPT
+ /* Bounding box draw optimization control structure */
+ struct mesa_bbox_opt *BboxOpt;
+#endif
};
/**
@@ -4076,6 +4097,12 @@ struct gl_constants
/** GL_ARB_gl_spirv */
struct spirv_supported_capabilities SpirVCapabilities;
+
+#ifdef MESA_BBOX_OPT
+ /** MESA_BBOX_OPT Runtime enable_bounding_box_culling*/
+ bool EnableBoundingBoxCulling;
+#endif
+
};
@@ -4720,6 +4747,21 @@ struct gl_semaphore_object
GLuint Name; /**< hash table ID/name */
};
+#ifdef MESA_BBOX_OPT
+/**
+ * Bounding volume classification types
+ */
+typedef enum
+{
+ BOUNDING_VOLUME_AABB = 0,
+ BOUNDING_VOLUME_OBB = 1,
+ BOUNDING_VOLUME_SPHERE = 2,
+ BOUNDING_VOLUME_DOP = 3,
+ BOUNDING_VOULME_MIXED = 4,
+ BOUNDING_VOLUME_MAX = 5,
+} bounding_volume_type;
+#endif //MESA_BBOX_OPT
+
/**
* Mesa rendering context.
*
@@ -5096,6 +5138,15 @@ struct gl_context
struct hash_table_u64 *ResidentTextureHandles;
struct hash_table_u64 *ResidentImageHandles;
/*@}*/
+
+#ifdef MESA_BBOX_OPT
+ /**
+ * Bounding volume type
+ *
+ */
+ bounding_volume_type volume_type;
+#endif //MESA_BBOX_OPT
+
};
/**
diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk
index c6470e6..6489d3e 100644
--- a/src/mesa/program/Android.mk
+++ b/src/mesa/program/Android.mk
@@ -75,6 +75,7 @@ $(intermediates)/program/lex.yy.c:
$(LOCAL_PATH)/program_lexer.l
LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/mapi \
$(MESA_TOP)/src/mesa \
+ $(MESA_TOP)/src/mesa/vbo \
$(MESA_TOP)/src/compiler/nir \
$(MESA_TOP)/src/gallium/auxiliary \
$(MESA_TOP)/src/gallium/include
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 6ab1bf5..e0b3563 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -43,6 +43,9 @@
#include "util/ralloc.h"
#include "util/u_atomic.h"
+#ifdef MESA_BBOX_OPT
+#include "vbo/vbo_bbox.h"
+#endif
/**
* A pointer to this dummy program is put into the hash table when
diff --git a/src/mesa/vbo/vbo_bbox.c b/src/mesa/vbo/vbo_bbox.c
new file mode 100644
index 0000000..f1e153d
--- /dev/null
+++ b/src/mesa/vbo/vbo_bbox.c
@@ -0,0 +1,1538 @@
+/*
+ * Copyright � 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * \brief VBO BBOX module implementation
+ * \author Kedar Karanje
+ */
+
+#pragma GCC optimize (0)
+#include "vbo_bbox.h"
+#include <fcntl.h>
+#define BBOX_MAX_FRAMES_TO_DYNAMICALLY_UPDATE (5)
+#define BBOX_MIN_EFFECTIVE_DRAWS_TO_DYNAMICALLY_UPDATE (5)
+#define BBOX_MIN_VAL_FOR_EFFECTIVE_DRAWS_COUNTER (-100)
+#define BBOX_MAX_VAL_FOR_EFFECTIVE_DRAWS_COUNTER (100)
+#define BBOX_MIN_GPU_HEAD_ROOM_TO_PROCESS_ELEMENT_BUFFER (100)
+
+/**
+ * Min part of split draw that we want to drop
+ */
+#define BBOX_MIN_SPLITTED_DRAW_TO_DROP (1)
+
+#ifdef __AVX__
+static __m256 fullMin;
+static __m256 fullMax;
+#else
+static __m128 fullMin;
+static __m128 fullMax;
+#endif
+
+/* Segment functions */
+
+static inline GLboolean
+intersect(const struct gl_segment *s_src,const struct gl_segment *s_tar)
+{
+ return (s_src->Left < s_tar->Right && s_src->Right > s_tar->Left);
+}
+
+
+static inline GLboolean
+subsegment(const struct gl_segment *s_src,const struct gl_segment *s_tar)
+{
+ return (s_src->Left <= s_tar->Left && s_src->Right >= s_tar->Right);
+}
+
+
+static inline GLboolean
+superset(const struct gl_segment *s_src,const struct gl_segment *s_tar)
+{
+ return subsegment(s_tar,s_src);
+}
+
+
+static void
+normalize(vbo_bbox_frustum_plane *fr_plane)
+{
+ GLfloat a,b,c;
+ a = fr_plane->a;
+ b = fr_plane->b;
+ c = fr_plane->c;
+
+ GLfloat norm = 1.0f/sqrt(a*a + b*b + c*c);
+
+ fr_plane->a *= norm;
+ fr_plane->b *= norm;
+ fr_plane->c *= norm;
+ fr_plane->d *= norm;
+};
+
+static inline
+int vbo_bbox_get_delay(struct mesa_bbox_opt *opt)
+{
+ if (mesa_bbox_env_variables.bbox_enable < MESA_BBOX_ENABLE_FORCE_RECALC) {
+ if (opt->calc_delay > BBOX_CALC_MAX_DELAY) {
+ opt->calc_delay = BBOX_CALC_MIN_DELAY;
+ }
+ return opt->calc_delay++;
+ }
+ else {
+ return 0;
+ }
+}
+
+static inline
+bool vbo_init_sub_bbox_array(int bbox_count, struct vbo_bbox_cache_data* data)
+{
+ bool allocate_bbox = true;
+ if(data->sub_bbox_array != NULL) {
+ if(bbox_count == data->sub_bbox_cnt) {
+ allocate_bbox = false;
+ }
+ else {
+ free(data->sub_bbox_array);
+ data->sub_bbox_array = NULL;
+ }
+ }
+ if (allocate_bbox)
+ data->sub_bbox_array = (struct bounding_info*) malloc(
+ bbox_count * sizeof (struct
bounding_info));
+
+ if (!data->sub_bbox_array)
+ return false;
+
+ data->sub_bbox_cnt = bbox_count;
+ return true;
+}
+
+static inline GLint
+vbo_bbox_get_mvp(struct gl_context *ctx)
+{
+ struct gl_linked_shader * linked_shader =
+
ctx->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+
+ return _mesa_GetUniformLocation(ctx->Shader.ActiveProgram->Name,
+ linked_shader->linkedshaderMVP);
+}
+
+/*
+ * Gets the currently linked shaders flag for simple shader
+ *
+ */
+static inline int
+vbo_is_simple_shader(struct gl_context *ctx)
+{
+ struct gl_linked_shader * linked_shader =
+
ctx->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+
+ return linked_shader->linked_bbox_simple_shader;
+}
+
+/**
+ * Get current VAO
+ */
+static inline struct gl_vertex_array_object*
+vbo_get_current_vao(struct gl_context *const gc)
+{
+ assert(gc);
+ struct gl_vertex_array_object* vao = gc->Array.VAO;
+ assert(vao);
+
+ return vao;
+}
+
+/**
+ * Returns the location of the "position" in the attributes of the
+ * currently active program
+ */
+static inline
+int vbo_get_simple_vs_position_attr_location(
+ struct gl_context *const gc)
+{
+ assert(gc);
+ struct gl_linked_shader * linked_shader =
+
gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+ GLint vertexPosLocation = _mesa_GetAttribLocation(
+ gc->Shader.ActiveProgram->Name,
+ linked_shader->linkedshaderVertPosition);
+ if (vertexPosLocation >= 0)
+ return vertexPosLocation;
+ else
+ return -1;
+}
+
+/**
+ * Get element-buffer handle of the current VAO
+ */
+static inline struct gl_buffer_object*
+vbo_get_current_element_buffer(struct gl_context *const gc)
+{
+ assert(gc);
+ struct gl_buffer_object* element_buffer =
+
vbo_get_current_vao(gc)->IndexBufferObj;
+ assert(element_buffer);
+ return element_buffer;
+}
+
+/**
+ * Get vertex-binding of position from the current VAO
+ */
+static inline struct gl_vertex_buffer_binding*
+vbo_get_current_vertex_buffer_binding_of_position(struct gl_context *const gc)
+{
+ assert(gc);
+ struct gl_vertex_array_object* vao = vbo_get_current_vao(gc);
+
+ GLbitfield mask = vao->_Enabled & vao->VertexAttribBufferMask;
+ const struct gl_array_attributes *attrib_array =
+ &vao->VertexAttrib[ffs(mask) - 1];
+ struct gl_vertex_buffer_binding *buffer_binding =
+ &vao->BufferBinding[attrib_array->BufferBindingIndex];
+
+ return buffer_binding;
+}
+
+/**
+ * Get vertex-buffer handle of the current VAO
+ */
+static inline struct gl_buffer_object*
+vbo_get_current_vertex_buffer(struct gl_context *const gc)
+{
+ assert(gc);
+ struct gl_buffer_object* pVertexBuffer =
+
vbo_get_current_vertex_buffer_binding_of_position(gc)->BufferObj;
+ assert(pVertexBuffer);
+ return pVertexBuffer;
+}
+
+
+/**
+ * Condition to enter bounding box optimization
+ */
+static inline bool
+vbo_bbox_check_supported_draw_call(struct gl_context *const gc,
+ GLenum mode, GLsizei count, GLenum type,
+ const GLvoid *indices, GLint basevertex)
+{
+
+ assert(gc);
+ int shader_scenario;
+ struct gl_linked_shader *_LinkedShaders;
+
+ /* Check if the minimum vertex count is met. */
+ if (count < (GLsizei) mesa_bbox_env_variables.bbox_min_vrtx_count) {
+ /* Count is most common cause to bail out form optimization
+ * so should be first.
+ */
+ MESA_BBOX("Aborting MESA_BBOX :%d: Vertex count too small, minimum count =
%d\n",
+ count,mesa_bbox_env_variables.bbox_min_vrtx_count);
+ return false;
+ }
+
+ if (mode != GL_TRIANGLES) {
+ MESA_BBOX("Aborting MESA_BBOX :%d: Primitive mode is not GL_TRIANGLES, \
+ mode = %d\n", count, mode);
+ return false;
+ }
+
+ /* Examine current shader */
+ if (!gc->_Shader->ActiveProgram) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: No active GLSL program.\n", count);
+ return false;
+ }
+
+ /* BASIC Shader scenario is when we have just VS & FS */
+ if (gc->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL &&
+ gc->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT] != NULL &&
+ gc->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL] == NULL &&
+ gc->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL] == NULL &&
+ gc->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY] == NULL) {
+ shader_scenario = 0;
+ }
+ else
+ shader_scenario = 1;
+
+ if (shader_scenario) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: GLSL program must contain only vertex
and \
+ fragment shaders, shader scenario = \n", count );
+ return false;
+ }
+
+ if (!vbo_is_simple_shader(gc)) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: GLSL vertex shader does not have
simple \
+ position calculation \n", count);
+ return false;
+ }
+ if (gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX]) {
+ _LinkedShaders =
+ gc->Shader.ActiveProgram->_LinkedShaders[MESA_SHADER_VERTEX];
+
+ MESA_BBOX("MVP:%s, VertPos:%s\n",_LinkedShaders->linkedshaderMVP,
+ _LinkedShaders->linkedshaderVertPosition);
+ }
+
+ /* Examine element buffer */
+ struct gl_buffer_object* element_buffer =
vbo_get_current_element_buffer(gc);
+ if ((!element_buffer) || element_buffer->Name == 0) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: Element buffer name is 0\n", count);
+ return false;
+ }
+
+ if (!(element_buffer->StorageFlags &
+ (GL_CLIENT_STORAGE_BIT | GL_DYNAMIC_STORAGE_BIT))){
+ MESA_BBOX("Aborting MESA_BBOX:%d: Element buffer not resident: %#x\n",
count,
+ element_buffer->StorageFlags);
+ return false;
+ }
+
+ /* Get VertexPosLocation */
+ int vertexPosLocation = 0;
+ if (gc->Shader.ActiveProgram)
+ vertexPosLocation = vbo_get_simple_vs_position_attr_location(gc);
+ if (vertexPosLocation < 0)
+ {
+ MESA_BBOX("Aborting MESA_BBOX:%d: VertexPosition Location is
inValid:\n", count);
+ return false;
+ }
+
+ struct gl_vertex_array_object* vao = vbo_get_current_vao(gc);
+ int posAttribMapMode =
+ _mesa_vao_attribute_map[vao->_AttributeMapMode][vertexPosLocation];
+
+ if (((vao->_Enabled >> posAttribMapMode) & 0x1) != 1)
+ {
+ MESA_BBOX("Aborting MESA_BBOX:%d: Vertex data does not come from VBO ,
GL-API:%d\n", count,gc->API);
+//#if !defined(__ANDROID__) || !defined(ANDROID)
+//This is not specific to Android but to the GLES API
+ if (gc->API != API_OPENGLES && gc->API != API_OPENGLES2)
+ return false;
+//#endif
+ }
+
+ struct gl_buffer_object* vertexattrib_buffer =
+ vbo_get_current_vertex_buffer(gc);
+ if ((!vertexattrib_buffer) || vertexattrib_buffer->Name == 0) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: Vertex buffer %p name is %d\n", count,
+ vertexattrib_buffer,vertexattrib_buffer->Name);
+#if !defined(__ANDROID__) || !defined(ANDROID)
+ return false;
+#endif
+ }
+
+ if (!(vertexattrib_buffer->StorageFlags &
+ (GL_CLIENT_STORAGE_BIT | GL_DYNAMIC_STORAGE_BIT))){
+ MESA_BBOX("Aborting MESA_BBOX:%d:Vertex buffer not resident %#x \n",
count,
+ vertexattrib_buffer->StorageFlags);
+ MESA_BBOX("Aborting MESA_BBOX:VAO AttributeMode is %d\n",
vao->_AttributeMapMode);
+#if !defined(__ANDROID__) || !defined(ANDROID)
+ return false;
+#endif
+ }
+
+ /* Examine vertex position attribute configuration */
+ if (vao->VertexAttrib[posAttribMapMode].Enabled) {
+ if (vao->VertexAttrib[posAttribMapMode].Size != 3)
+ {
+ MESA_BBOX("Aborting MESA_BBOX:%d: Vertex attrib size :%d, only 3
supported\n",
+ count, vao->VertexAttrib[VERT_ATTRIB_POS].Size);
+#if !defined(__ANDROID__) || !defined(ANDROID)
+ return false;
+#endif
+ }
+ if (vao->VertexAttrib[posAttribMapMode].Type != GL_FLOAT)
+ {
+ MESA_BBOX("Aborting MESA_BBOX:%d: Vertex attrib type is %d, only
GL_FLOAT \
+ supported\n", count, vao->VertexAttrib[VERT_ATTRIB_POS].Type);
+ return false;
+ }
+ }
+
+ if (type != GL_UNSIGNED_SHORT) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: type is %d, only GL_UNSIGNED_SHORT \
+ supported\n", count, type);
+ return false;
+ }
+ if (basevertex != 0) {
+ MESA_BBOX("Aborting MESA_BBOX:%d: basevertex is 0 \n", count);
+ return false;
+ }
+
+ /* If size ==3 and type == GL_FLOAT, then element stride must be 12. */
+ assert(vao->VertexAttrib[VERT_ATTRIB_POS].StrideB == 12);
+
+ /* When transform feedback is capturing we cannot do early clipping since
+ * xfb must write unclipped vertices
+ * Note - we could check for IsCapturing() but that would require
+ * more elaborate checking for VBO modifications.
+ */
+ if (gc->TransformFeedback.CurrentObject->Active) {
+ MESA_BBOX("MESA_BBOX:%d: Transform feedback is active, \
+ cannot clip\n", count);
+ return false;
+ }
+ return true;
+}
+
+
+/**
+ * Check condition to enter bounding box optimization and if draw call
+ * is suitable prepare key describing given geometry.
+ */
+static inline
+void vbo_bbox_prepare_key(struct gl_context *const gc, GLenum mode,
+ GLsizei count, GLenum type, GLuint type_size,
+ const GLvoid *indices, GLint basevertex,
+ vbo_bbox_cache_key *key)
+{
+ assert(gc);
+
+ /* Examine element buffer */
+ struct gl_buffer_object* element_buffer =
vbo_get_current_element_buffer(gc);
+ struct gl_buffer_object* vertexattrib_buffer =
+ vbo_get_current_vertex_buffer(gc);
+ struct gl_vertex_buffer_binding* vbinding =
+ vbo_get_current_vertex_buffer_binding_of_position(gc);
+
+ memset(key,0,sizeof(vbo_bbox_cache_key));
+
+ key->mode = mode;
+ key->count = count;
+ key->indices_type = type;
+ key->type_size = type_size;
+ key->indices = (GLint) (uintptr_t) indices;
+ key->basevertex = basevertex;
+
+ key->element_buf_name = element_buffer->Name;
+ key->vertex_buf_name = vertexattrib_buffer->Name;
+
+ key->offset = (GLint)vbinding->Offset;
+ key->stride = vbinding->Stride;
+}
+
+
+/**
+ * Create a bounding box descriptor in a form of 8 correctly
+ * ordered vertex coordinates. The order of coordinates is significant.
+ */
+static
+void vbo_bbox_create_bounding_box(float* const minVec3f, float* const maxVec3F,
+ vbo_vec4f* vertices4)
+{
+ assert(minVec3f);
+ assert(maxVec3F);
+ assert(vertices4);
+
+ float Xmin = minVec3f[0];
+ float Ymin = minVec3f[1];
+ float Zmin = minVec3f[2];
+ float Wmin = 1.0f;
+
+ float Xmax = maxVec3F[0];
+ float Ymax = maxVec3F[1];
+ float Zmax = maxVec3F[2];
+ float Wmax = 1.0f;
+
+ float* v = (float*)vertices4;
+ int i = 0;
+ v[i+0] = Xmin; v[i+1] = Ymin; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+ v[i+0] = Xmax; v[i+1] = Ymin; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+ v[i+0] = Xmin; v[i+1] = Ymax; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+ v[i+0] = Xmax; v[i+1] = Ymax; v[i+2] = Zmin;v[i+3]=Wmin; i+=4;
+
+ v[i+0] = Xmin; v[i+1] = Ymin; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+ v[i+0] = Xmax; v[i+1] = Ymin; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+ v[i+0] = Xmin; v[i+1] = Ymax; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+ v[i+0] = Xmax; v[i+1] = Ymax; v[i+2] = Zmax;v[i+3]=Wmax; i+=4;
+}
+
+#ifdef __AVX__
+/* Calculate bbox Subbox Coordinates */
+static void
+vbo_bbox_calc_subbox_coordinates(unsigned int vertSubBox,unsigned int
vertCount,
+ unsigned int first_idx,unsigned int
second_idx,
+ unsigned short* indices,float* vertices,
+ unsigned int stride,
+ struct vbo_bbox_cache_data *data)
+{
+
+ /* Retrieving the starting offset of the first and second subbox */
+ unsigned int first = first_idx * vertSubBox;
+ unsigned int second = second_idx * vertSubBox;
+
+ float tmpVertexBuf[8] = {0.0};
+ float tmp_buf_min[8] = {0.0};
+ float tmp_buf_max[8] = {0.0};
+
+ __m256 subMin = _mm256_set1_ps(FLT_MAX);
+ __m256 subMax = _mm256_set1_ps(-FLT_MAX);
+
+ /* Run both the subboxes for vertex count */
+ for(unsigned int iter = 0; iter < vertCount; iter++){
+ /* Calculate the vertex offset of first subbox */
+ unsigned short index1 = indices[first+iter];
+ /* Fetching the vertices for the first subbox */
+ float* vertex1 = (float *)((char *)(vertices) + stride*index1);
+ memcpy(tmpVertexBuf,vertex1, 3 * sizeof(float));
+
+ /* Calculate the vertex offset of second subbox */
+ unsigned short index2 = indices[second+iter];
+
+ /* Fetching the vertices for the second subbox */
+ float* vertex2 = (float *)((char *)(vertices) + stride*index2);
+ memcpy(tmpVertexBuf+4,vertex2, 3 * sizeof(float));
+
+ __m256 tmp = _mm256_loadu_ps(tmpVertexBuf);
+ subMin = _mm256_min_ps(subMin, tmp);
+ subMax = _mm256_max_ps(subMax, tmp);
+ }
+
+ /* compare full box values */
+ fullMin = _mm256_min_ps(fullMin, subMin);
+ fullMax = _mm256_max_ps(fullMax, subMax);
+
+ /* store results */
+ _mm256_storeu_ps(tmp_buf_min, subMin);
+ _mm256_storeu_ps(tmp_buf_max, subMax);
+
+
+ /* Update the min and max values in sub box coordinates for both
+ * the subboxes
+ */
+ vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,
+ &(data->sub_bbox_array[first_idx].bounding_volume.vert_vec4[0]));
+ vbo_bbox_create_bounding_box(tmp_buf_min+4, tmp_buf_max+4,
+ &(data->sub_bbox_array[second_idx].bounding_volume.vert_vec4[0]));
+}
+#else
+static void
+vbo_bbox_calc_subbox_coordinates(
+ unsigned int vertSubBox,
+ unsigned int vertCount,
+ unsigned int idx,
+ unsigned short *indices,
+ float *vertices,
+ unsigned int stride,
+ struct vbo_bbox_cache_data *data)
+{
+ /* Retrieving the starting offset of the first and second subbox */
+ unsigned int first = idx * vertSubBox;
+
+ float tmpVertexBuf[4] = {0.0};
+ float tmp_buf_min[4] = {0.0};
+ float tmp_buf_max[4] = {0.0};
+
+ __m128 subMin = _mm_set1_ps(FLT_MAX);
+ __m128 subMax = _mm_set1_ps(-FLT_MAX);
+
+ /* Run both the subboxes for vertex count */
+ for(unsigned int iter = 0; iter < vertCount; iter++){
+
+ /* Calculate the vertex offset of first subbox */
+ unsigned short index = indices[first+iter];
+ /* Fetching the vertices for the first subbox */
+ float* vertex = (float *)((char *)(vertices) + stride*index);
+ memcpy(tmpVertexBuf,vertex, 3 * sizeof(float));
+
+ __m128 tmp = _mm_loadu_ps(tmpVertexBuf);
+ subMin = _mm_min_ps(subMin, tmp);
+ subMax = _mm_max_ps(subMax, tmp);
+ }
+
+ /* compare full box values */
+ fullMin = _mm_min_ps(fullMin, subMin);
+ fullMax = _mm_max_ps(fullMax, subMax);
+
+ /* store results */
+ _mm_storeu_ps(tmp_buf_min, subMin);
+ _mm_storeu_ps(tmp_buf_max, subMax);
+
+ /* Update the min and max values in sub box coordinates for both
+ * the subboxes
+ */
+ vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,
+ &(data->sub_bbox_array[idx].bounding_volume.vert_vec4[0]));
+}
+#endif
+
+/**
+ * Get pointer to VBO data.
+ * Pointer should be suitable for fast data reading, not data change.
+ */
+static
+bool vbo_bbox_get_vbo_ptr(struct gl_context* gc, struct gl_buffer_object* vbo,
+ int offset, void** data, int* dataSize)
+{
+ assert(gc);
+ assert(vbo);
+ GLubyte* vboDataPtr = NULL;
+
+ if (offset >= vbo->Size) {
+ return false;
+ }
+ vboDataPtr = _mesa_MapNamedBuffer(vbo->Name,GL_WRITE_ONLY_ARB);
+ if (vboDataPtr == NULL) {
+ return false;
+ }
+ *data = vboDataPtr + offset;
+ *dataSize = vbo->Size - offset;
+
+ return true;
+}
+
+/**
+ * Unlock VBO
+ */
+static inline
+void vbo_bbox_release_vbo_ptr(struct gl_context* gc,
+ struct gl_buffer_object* vbo)
+{
+ assert(gc);
+ assert(vbo);
+ _mesa_UnmapNamedBuffer_no_error(vbo->Name);
+}
+
+/**
+ * Check if given range of indices contains only degenerate triangles.
+ */
+static
+bool vbo_bbox_is_degenerate(GLvoid *indices, GLuint count_in)
+{
+ assert(indices);
+ assert(count_in % 3 == 0);
+
+ GLuint triangle_count = count_in / 3;
+ GLuint input_idx = 0;
+
+ GLushort* ptr = (GLushort*)indices;
+ for (GLuint i = 0; i < triangle_count; i++) {
+ GLushort a = ptr[input_idx++];
+ GLushort b = ptr[input_idx++];
+ GLushort c = ptr[input_idx++];
+ if (!(a == b || a == c || b == c)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+/**
+ * Calculate bounding boxes for given geometry.
+ */
+static
+bool vbo_bbox_calculate_bounding_boxes_with_indices(struct gl_context *const
gc,
+ const vbo_bbox_cache_key *key,
+ struct vbo_bbox_cache_data *data,
+ void* indexData,
+ int indexDataSize)
+{
+ assert(gc);
+
+ void* vertex_data = NULL;
+ int vertex_datasize = 0;
+ int vert_per_subBbox = mesa_bbox_env_variables.bbox_split_size;
+ int sub_bbox_cnt = (key->count + vert_per_subBbox -1)/vert_per_subBbox;
+ int *subbox_array;
+ int idx =0;
+ int non_degen_count = 0;
+
+ assert(sub_bbox_cnt);
+
+ struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,
+ key->element_buf_name);
+
+ struct gl_buffer_object * vertexattrib_buffer = _mesa_lookup_bufferobj(gc,
+ key->vertex_buf_name);
+
+ if (element_buffer == NULL || vertexattrib_buffer == NULL) {
+ return false;
+ }
+
+ if (!vbo_bbox_get_vbo_ptr(gc, vertexattrib_buffer,(int) key->offset,
+ &vertex_data, &vertex_datasize)) {
+ return false;
+ }
+
+ assert(vertex_data);
+ assert(vertex_datasize > 0);
+ assert(indexData);
+ assert(indexDataSize > 0);
+ assert(key->indices_type == GL_UNSIGNED_SHORT);
+ assert(indexDataSize > key->count * (int)key->type_size);
+
+ /* Allocate memory for bounding boxes */
+ if (!vbo_init_sub_bbox_array(sub_bbox_cnt,data)) {
+ vbo_bbox_release_vbo_ptr(gc, vertexattrib_buffer);
+ return false;
+ }
+ /* Initialize size of bounding boxes */
+ for (int i = 0; i < sub_bbox_cnt; i++) {
+ data->sub_bbox_array[i].vert_count = (i==sub_bbox_cnt-1)?
+ (key->count - i*vert_per_subBbox):vert_per_subBbox;
+ data->sub_bbox_array[i].start_offset = i*vert_per_subBbox * key->type_size;
+ }
+
+ subbox_array = malloc(sub_bbox_cnt * sizeof(int));
+
+ /* Check if all triangles withing bbox are degenerate (i.e triangles with
+ zero area) */
+ for (int i = 0; i < sub_bbox_cnt; i++) {
+ GLubyte* ptr = (GLubyte *)indexData;
+ data->sub_bbox_array[i].is_degenerate =
+ vbo_bbox_is_degenerate((ptr + data->sub_bbox_array[i].start_offset),
+ data->sub_bbox_array[i].vert_count);
+
+ if(!data->sub_bbox_array[i].is_degenerate)
+ {
+ subbox_array[idx++] = i;
+ non_degen_count++;
+ }
+ }
+
+ float tmp_buf_min[8] = {0.0};
+ float tmp_buf_max[8] = {0.0};
+
+#ifdef __AVX__
+ int odd = non_degen_count % 2;
+ int num_iter = non_degen_count/2;
+ int iter;
+
+ fullMin = _mm256_set1_ps(FLT_MAX);
+ fullMax = _mm256_set1_ps(-FLT_MAX);
+ idx = 0;
+ for(iter = 0; iter < num_iter;iter++){
+ idx = 2*iter;
+ if(data->sub_bbox_array[subbox_array[idx]].vert_count ==
+ data->sub_bbox_array[subbox_array[idx+1]].vert_count)
+ {
+ /* call the algorithm with the count */
+ vbo_bbox_calc_subbox_coordinates(
+ vert_per_subBbox,
+ data->sub_bbox_array[subbox_array[idx]].vert_count,
+ subbox_array[idx],
+ subbox_array[idx+1],
+ (GLushort*)indexData,
+ (GLfloat*)vertex_data,
+ key->stride,
+ data
+ );
+ }
+ else
+ {
+ /* call the first one separately */
+ vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+ data->sub_bbox_array[subbox_array[idx]].vert_count,
+ subbox_array[idx],
+ subbox_array[idx],
+ (GLushort*)indexData,
+ (GLfloat*)vertex_data,
+ key->stride,
+ data);
+
+ /* call the second one separately */
+ vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+ data->sub_bbox_array[subbox_array[idx+1]].vert_count,
+ subbox_array[idx+1],
+ subbox_array[idx+1],
+ (GLushort*)indexData,
+ (GLfloat*)vertex_data,
+ key->stride,
+ data);
+
+ }
+
+ }
+
+ if(odd)
+ {
+ idx = 2*iter;
+ /* call the last one separately */
+ vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+ data->sub_bbox_array[subbox_array[idx]].vert_count,
+ subbox_array[idx],
+ subbox_array[idx],
+ (GLushort*)indexData,
+ (GLfloat*)vertex_data,
+ key->stride,
+ data);
+ }
+
+ /* Finding the minimum from the full box 256 */
+ __m128 firstlane = _mm256_extractf128_ps(fullMin,0);
+ __m128 secondlane = _mm256_extractf128_ps(fullMin,1);
+ firstlane = _mm_min_ps(firstlane,secondlane);
+ _mm_storeu_ps(tmp_buf_min,firstlane);
+
+ /* Finding the maximum from the full box 256 */
+ firstlane = _mm256_extractf128_ps(fullMax,0);
+ secondlane = _mm256_extractf128_ps(fullMax,1);
+ firstlane = _mm_max_ps(firstlane,secondlane);
+ _mm_storeu_ps(tmp_buf_max,firstlane);
+
+#else
+ fullMin = _mm_set1_ps(FLT_MAX);
+ fullMax = _mm_set1_ps(-FLT_MAX);
+
+ for(unsigned int i=0; i< non_degen_count; i++){
+ //call the algorithm with the count
+ vbo_bbox_calc_subbox_coordinates(vert_per_subBbox,
+ data->sub_bbox_array[subbox_array[i]].vert_count,
+ subbox_array[i],
+ (GLushort*)indexData,
+ (GLfloat*)vertex_data,
+ key->stride,
+ data);
+ }
+ _mm_storeu_ps(tmp_buf_min, fullMin);
+ _mm_storeu_ps(tmp_buf_max, fullMax);
+#endif
+
+ /* Set up bounding box as 8 vertices and store in bbox data */
+ vbo_bbox_create_bounding_box(tmp_buf_min, tmp_buf_max,
+ &(data->full_box.bounding_volume.vert_vec4[0]));
+
+ if(subbox_array)
+ free(subbox_array);
+ vbo_bbox_release_vbo_ptr(gc, vertexattrib_buffer);
+ data->valid = true;
+
+ return true;
+}
+
+
+/**
+ * Calculate bounding boxes for given geometry.
+ */
+static inline
+bool vbo_bbox_calculate_bounding_boxes(struct gl_context *const gc,
+ const vbo_bbox_cache_key *key,
+ struct vbo_bbox_cache_data* data)
+{
+ assert(gc);
+ assert(key->indices_type == GL_UNSIGNED_SHORT);
+
+ void* pIndexData = NULL;
+ int indexDataSize = 0;
+
+ struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,
+ key->element_buf_name);
+ if (element_buffer == NULL) {
+ return false;
+ }
+ if (!vbo_bbox_get_vbo_ptr(gc, element_buffer, (int) key->indices,
+ &pIndexData, &indexDataSize)) {
+ return false;
+ }
+
+ bool ret = vbo_bbox_calculate_bounding_boxes_with_indices(gc, key, data,
+ pIndexData, indexDataSize);
+
+ vbo_bbox_release_vbo_ptr(gc, element_buffer);
+
+ return ret;
+}
+
+
+/**
+ * Create new bounding box cache entry
+ */
+static
+struct vbo_bbox_cache_data* vbo_bbox_create_data(struct gl_context *const gc,
+ const vbo_bbox_cache_key *key)
+{
+ assert(gc);
+
+ struct gl_buffer_object* element_buffer =
vbo_get_current_element_buffer(gc);
+ struct gl_buffer_object* vertexattrib_buffer =
+ vbo_get_current_vertex_buffer(gc);
+
+ if ((vertexattrib_buffer == NULL) ||
+ (element_buffer == NULL)){
+ return NULL;
+ }
+
+ mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;
+ assert(BboxOpt);
+
+ struct vbo_bbox_cache_data* data = (struct vbo_bbox_cache_data *) malloc(
+ sizeof (struct vbo_bbox_cache_data));
+ data->full_box.is_degenerate = false;
+
+ if (data == NULL){
+ return NULL;
+ }
+ /* Initialize the cache data and variables in cache data */
+ data->valid = false;
+ data->need_new_calculation = true;
+ data->init_delaycnt = 0;
+ data->init_delaylimit = 0;
+ data->vertpos_vbo_changecnt = 0;
+ data->indices_vbo_changecnt = 0;
+ data->sub_bbox_cnt = 0;
+ data->sub_bbox_array = NULL;
+ data->drawcnt_bbox_helped = 0;
+ data->last_use_frame = 0;
+ data->vertpos_vbo_changecnt = vertexattrib_buffer->data_change_counter;
+ data->indices_vbo_changecnt = element_buffer->data_change_counter;
+
+ /* This defines for how many cache hits we wait before actually creating the
+ * data */
+ data->init_delaylimit = vbo_bbox_get_delay(BboxOpt);
+
+ /* At this point data is not valid yet */
+ _mesa_bbox_cache_insert(gc,gc->Pipeline.BboxOpt->cache,
+ key,sizeof(vbo_bbox_cache_key),data);
+
+ return data;
+}
+
+
+/**
+ * Check if contents of the VBO buffers have changed since data entry
+ * was created.
+ */
+static inline
+bool vbo_bbox_validate_data(
+ struct gl_context *const gc,
+ const vbo_bbox_cache_key *key,
+ struct vbo_bbox_cache_data* data)
+{
+ assert(gc);
+
+ struct gl_buffer_object* element_buffer = _mesa_lookup_bufferobj(gc,
+ key->element_buf_name);
+
+ struct gl_buffer_object * vertexattrib_buffer = _mesa_lookup_bufferobj(gc,
+ key->vertex_buf_name);
+
+ if (element_buffer == NULL || vertexattrib_buffer == NULL) {
+ return false;
+ }
+
+ if ((element_buffer->data_change_counter != data->indices_vbo_changecnt) ||
+ (vertexattrib_buffer->data_change_counter != data->vertpos_vbo_changecnt)
+ ) {
+ return false;
+ }
+ return true;
+}
+
+/**
+ * Retrieve bounding box data from cache.
+ */
+static inline
+struct vbo_bbox_cache_data* vbo_bbox_get_bounding_boxes(
+ struct gl_context *const gc,
+ const vbo_bbox_cache_key *key)
+ {
+
+ assert(gc);
+ mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;
+ assert(BboxOpt);
+ struct vbo_bbox_cache_data* data = _mesa_search_bbox_cache(BboxOpt->cache,
+ key, sizeof(vbo_bbox_cache_key));
+ if (data) {
+ if (data->need_new_calculation == false)
+ {
+ /* Data is initialized and valid */
+ if (data->valid) {
+ if (vbo_bbox_validate_data(gc, key, data)) {
+ data->mvp_valid = true;
+ return data;
+ }
+ }
+ else {
+ data->valid = false;
+ return NULL;
+ }
+ }
+ }
+ else {
+ /* Data does not exist, create it */
+ data = vbo_bbox_create_data(gc, key);
+ if (data == NULL)
+ {
+ return NULL;
+ }
+ }
+ if ((data->need_new_calculation) &&
+ (data->init_delaycnt++ >= data->init_delaylimit)) {
+ data->valid = false;
+ data->need_new_calculation = false;
+ data->mvp_valid = false;
+
+ if (!vbo_bbox_validate_data(gc, key, data)) {
+ struct gl_buffer_object * element_buffer =
+ _mesa_lookup_bufferobj(gc,key->element_buf_name);
+
+ struct gl_buffer_object * vertexattrib_buffer =
+ _mesa_lookup_bufferobj(gc,key->vertex_buf_name);
+
+ if ((vertexattrib_buffer == NULL) ||
+ (element_buffer == NULL)) {
+ return NULL;
+ }
+ data->vertpos_vbo_changecnt =
vertexattrib_buffer->data_change_counter;
+ data->indices_vbo_changecnt = element_buffer->data_change_counter;
+ }
+ if (gc->volume_type == BOUNDING_VOLUME_AABB) {
+ /* Calculate bounding boxes */
+ if (vbo_bbox_calculate_bounding_boxes(gc, key, data)) {
+ return data;
+ }
+ }
+ }
+ return NULL;
+}
+
+/**
+ * This function is called when we updating the element buffer. because the
+ * element-buffer has changed we have to update the relevant bbox data:
+ */
+void vbo_bbox_element_buffer_update(struct gl_context *const gc,
+ struct gl_buffer_object *buffer,
+ const void* data,
+ int offset,
+ int size)
+{
+ mesa_bbox_opt * BboxOpt = gc->Pipeline.BboxOpt;
+
+ if (BboxOpt) {
+ struct gl_segment updateSegment;
+ updateSegment.Left = offset;
+ updateSegment.Right = offset+size;
+
+ mesa_bbox_cache * buffer_map = BboxOpt->cache;
+ if (buffer_map) {
+ struct vbo_bbox_cache_data *c;
+ GLuint i = 0;
+ for (i = 0; i < buffer_map->size; i++)
+ for (c = buffer_map->items[i]; c != NULL ; c = c->next) {
+ struct vbo_bbox_cache_data * bbox_data = buffer_map->items[i];
+ struct gl_buffer_object *buffObj = _mesa_lookup_bufferobj(gc,
+ bbox_data->key->element_buf_name);
+
+ if(buffObj != buffer)
+ continue;
+
+ if (!bbox_data) {
+
+ assert(bbox_data);
+ return;
+ }
+
+ bbox_data->indices_vbo_changecnt = buffer->data_change_counter;
+
+ struct gl_segment element_bufferSegment;
+ element_bufferSegment.Left = bbox_data->key->indices;
+ element_bufferSegment.Right = bbox_data->key->indices +
+ (bbox_data->key->count * bbox_data->key->type_size);
+
+ if (bbox_data->valid &&
+ intersect(&element_bufferSegment, &updateSegment)) {
+ bbox_data->valid = false;
+ bbox_data->need_new_calculation = true;
+
+ if (superset(&element_bufferSegment,&updateSegment))
+ {
+ int offset_in_newdata = bbox_data->key->indices - offset;
+ int newsize = size - offset_in_newdata;
+
+ assert(offset_in_newdata >= 0);
+ assert(newsize > 0);
+
+ GLchar * start_data = (GLchar *)data + offset_in_newdata;
+
+ if (vbo_bbox_calculate_bounding_boxes_with_indices(gc,
+ bbox_data->key, bbox_data, start_data, newsize)) {
+ bbox_data->need_new_calculation = false;
+ bbox_data->mvp_valid = false;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+/**
+ * Generate 6 clip planes from MVP.
+ */
+static
+void vbo_bbox_get_frustum_from_mvp(vbo_bbox_frustum *frustum, GLmatrix* mvpin)
+{
+
+ GLfloat in_mat[4][4] = {0};
+ {
+ #define M(row,col) m[col*4+row]
+ in_mat[0][0] = mvpin->M(0,0);
+ in_mat[0][1] = mvpin->M(0,1);
+ in_mat[0][2] = mvpin->M(0,2);
+ in_mat[0][3] = mvpin->M(0,3);
+
+ in_mat[1][0] = mvpin->M(1,0);
+ in_mat[1][1] = mvpin->M(1,1);
+ in_mat[1][2] = mvpin->M(1,2);
+ in_mat[1][3] = mvpin->M(1,3);
+
+ in_mat[2][0] = mvpin->M(2,0);
+ in_mat[2][1] = mvpin->M(2,1);
+ in_mat[2][2] = mvpin->M(2,2);
+ in_mat[2][3] = mvpin->M(2,3);
+
+ in_mat[3][0] = mvpin->M(3,0);
+ in_mat[3][1] = mvpin->M(3,1);
+ in_mat[3][2] = mvpin->M(3,2);
+ in_mat[3][3] = mvpin->M(3,3);
+ #undef M
+ }
+
+ /* Frustum plane calculation */
+
+ /* Left plane */
+ frustum->plane[0].a = in_mat[3][0] + in_mat[0][0];
+ frustum->plane[0].b = in_mat[3][1] + in_mat[0][1];
+ frustum->plane[0].c = in_mat[3][2] + in_mat[0][2];
+ frustum->plane[0].d = in_mat[3][3] + in_mat[0][3];
+
+ /* Right plane */
+ frustum->plane[1].a = in_mat[3][0] - in_mat[0][0];
+ frustum->plane[1].b = in_mat[3][1] - in_mat[0][1];
+ frustum->plane[1].c = in_mat[3][2] - in_mat[0][2];
+ frustum->plane[1].d = in_mat[3][3] - in_mat[0][3];
+
+ /* Top plane */
+ frustum->plane[2].a = in_mat[3][0] - in_mat[1][0];
+ frustum->plane[2].b = in_mat[3][1] - in_mat[1][1];
+ frustum->plane[2].c = in_mat[3][2] - in_mat[1][2];
+ frustum->plane[2].d = in_mat[3][3] - in_mat[1][3];
+
+ /* Bottom plane */
+ frustum->plane[3].a = in_mat[3][0] + in_mat[1][0];
+ frustum->plane[3].b = in_mat[3][1] + in_mat[1][1];
+ frustum->plane[3].c = in_mat[3][2] + in_mat[1][2];
+ frustum->plane[3].d = in_mat[3][3] + in_mat[1][3];
+
+ /* Far plane */
+ frustum->plane[4].a = in_mat[3][0] - in_mat[2][0];
+ frustum->plane[4].b = in_mat[3][1] - in_mat[2][1];
+ frustum->plane[4].c = in_mat[3][2] - in_mat[2][2];
+ frustum->plane[4].d = in_mat[3][3] - in_mat[2][3];
+
+ /* Near plane */
+ frustum->plane[5].a = in_mat[3][0] + in_mat[2][0];
+ frustum->plane[5].b = in_mat[3][1] + in_mat[2][1];
+ frustum->plane[5].c = in_mat[3][2] + in_mat[2][2];
+ frustum->plane[5].d = in_mat[3][3] + in_mat[2][3];
+
+ /* Calculate octants */
+ for(int n = 0; n < 6; ++n) {
+ frustum->octant[n] = (frustum->plane[n].a >=0 ? 1 : 0) |
+ (frustum->plane[n].b >=0 ? 2 : 0) |
+ (frustum->plane[n].c >=0 ? 4 : 0);
+ normalize(&(frustum->plane[n]));
+ }
+}
+
+
+/**
+ * Calculate distance form a point to place
+ *
+ */
+static inline
+float vbo_bbox_dist_from_point_to_plane(
+ const vbo_bbox_frustum_plane *plane,
+ const vbo_vec4f *point)
+{
+ return (plane->a * point->x + plane->b * point->y + plane->c *
+ point->z + plane->d);
+}
+
+
+/**
+ * Description:
+ * Bounding box clipping algorthm
+ * BBOX_CLIP_INSIDE - bounding box is fully inside frustum
+ * BBOX_CLIP_OUTSIDE - bounding box is fully outside frustum
+ * BBOX_CLIP_INTERSECT - bounding box intersects with frustum
+ */
+static
+enum vbo_bbox_clip_result vbo_bbox_fast_clipping_test(bounding_info* bbox,
+ const vbo_bbox_frustum *frustum)
+{
+ assert(bbox);
+ vbo_vec4f *aabb = bbox->bounding_volume.vert_vec4;
+
+ enum vbo_bbox_clip_result result = BBOX_CLIP_INSIDE;
+ for (int i = 0; i < 6; ++i)
+ {
+ unsigned char normalOctant = frustum->octant[i];
+
+ /* Test near and far vertices of AABB according to plane normal.
+ * Plane equation can be normalized to save some divisions.
+ */
+ float farDistance = vbo_bbox_dist_from_point_to_plane(
+ &(frustum->plane[i]),
+ &(aabb[normalOctant]));
+ if (farDistance < 0.0f) {
+ return BBOX_CLIP_OUTSIDE;
+ }
+
+ float nearDistance = vbo_bbox_dist_from_point_to_plane(
+ &(frustum->plane[i]),
+ &(aabb[normalOctant ^ 7]));
+ if (nearDistance < 0.0f) {
+ result = BBOX_CLIP_INTERSECT;
+ }
+ }
+
+ return result;
+}
+
+/**
+ * Wrapper for clip algorithm.
+ */
+static inline
+enum vbo_bbox_clip_result vbo_bbox_clip(bounding_info* bbox,
+ const vbo_bbox_frustum *frustum)
+{
+ assert(bbox);
+ if (bbox->is_degenerate) {
+ return BBOX_CLIP_DEGEN;
+ }
+ return vbo_bbox_fast_clipping_test(bbox, frustum);
+}
+
+/**
+ * Bounding box drawelements implementation
+ */
+void
+vbo_bbox_drawelements(struct gl_context *ctx, GLenum mode,
+ GLboolean index_bounds_valid, GLuint start, GLuint end,
+ GLsizei count, GLenum type, const GLvoid * indices,
+ GLint basevertex, GLuint numInstances,
+ GLuint baseInstance)
+{
+ assert(ctx);
+ GLuint type_size;
+ /* BOUNDING VOLUME: Checks would remain same I guess */
+ bool draw_call_supported = vbo_bbox_check_supported_draw_call(ctx,
+ mode,
+ count,
+ type,
+ indices,
+ basevertex);
+ if (!draw_call_supported ||
+ (mesa_bbox_env_variables.bbox_enable < MESA_BBOX_ENABLE_FORCE_CLIPPING)) {
+ MESA_BBOX("Aborting MESA_BBOX : BBOX Is not ENABLED !!! \n");
+ vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+ end, count, type, indices, basevertex, numInstances, baseInstance);
+ return;
+ }
+
+ type_size = sizeof(GLushort);
+ vbo_bbox_cache_key key;/* Need not initialize key its done in prepare key */
+ vbo_bbox_prepare_key(ctx, mode, count, type, type_size, indices, basevertex,
+ &key);
+ /*BOUNDING VOLUME: Call bounding volume creation based on bounding volume
+ * type */
+
+ struct vbo_bbox_cache_data* cached_bbox = vbo_bbox_get_bounding_boxes(ctx,
+ &key);
+
+ if (cached_bbox == NULL) {
+ MESA_BBOX("Aborting MESA_BBOX : New Object not in Cache!!! \n");
+ vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
end,
+ count, type, indices, basevertex,
+ numInstances, baseInstance);
+ return;
+ }
+
+ GLint loc = vbo_bbox_get_mvp(ctx);
+ if (loc < 0) {
+ MESA_BBOX("MVP Location Error\n");
+ /* TBD: Free cache here */
+ vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+ end, count, type, indices, basevertex,
+ numInstances, baseInstance);
+ return;
+ }
+
+ GLfloat *mvp_ptr = (GLfloat *)
+ ctx->_Shader->ActiveProgram->data->UniformStorage[loc].storage;
+ vbo_bbox_frustum frustum;
+ bool recalculate_subbox_clip = false;
+
+ if(cached_bbox->mvp_valid == false ||
+ memcmp(cached_bbox->mvp, mvp_ptr,16*sizeof(GLfloat))) {
+ memcpy(&(cached_bbox->mvp), mvp_ptr, 16*sizeof(GLfloat));
+ cached_bbox->mvpin.m = cached_bbox->mvp;
+ vbo_bbox_get_frustum_from_mvp(&frustum,&(cached_bbox->mvpin));
+
+ /* BOUNDING VOLUME: Call specific function to calculate clip results */
+
+ if (ctx->volume_type == BOUNDING_VOLUME_AABB) {
+ cached_bbox->full_box.clip_result =
+ vbo_bbox_clip(&(cached_bbox->full_box), &frustum);
+ }
+ recalculate_subbox_clip = true;
+ }
+
+ /* Calculate frustum planes */
+ MESA_BBOX("MESA_BBOX: Full Box ClipResult:%d \n",
+ cached_bbox->full_box.clip_result);
+ switch (cached_bbox->full_box.clip_result) {
+ case BBOX_CLIP_OUTSIDE:
+ /* Geometry outside view frustum, dont draw it */
+ return;
+ case BBOX_CLIP_INSIDE:
+ vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+ end, count, type, indices, basevertex,
+ numInstances, baseInstance);
+ return;
+ /* case BBOX_CLIP_INTERSECT: */
+ default:
+ MESA_BBOX("MESA_BBOX: Vertices INTERSECTING with the frustum, going"
+ " for Sub Bboxes: \n");
+ break;
+ }
+
+ GLsizei count_to_send = 0;
+ GLsizei count_to_drop = 0;
+ GLvoid* offset_to_send = NULL;
+ bool clipped = false;
+ unsigned potential_clipped = 0;
+
+ for (int i = 0; i < cached_bbox->sub_bbox_cnt; i++) {
+ int new_count = cached_bbox->sub_bbox_array[i].vert_count;
+
+ if(recalculate_subbox_clip) {
+ if (ctx->volume_type == BOUNDING_VOLUME_AABB) {
+ cached_bbox->sub_bbox_array[i].clip_result =
+ vbo_bbox_clip(&(cached_bbox->sub_bbox_array[i]), &frustum);
+ }
+ }
+
+ switch (cached_bbox->sub_bbox_array[i].clip_result)
+ {
+ case BBOX_CLIP_OUTSIDE:
+ count_to_drop += new_count;
+ potential_clipped += new_count;
+
+ break;
+ case BBOX_CLIP_DEGEN:
+ count_to_drop += new_count;
+ potential_clipped += new_count;
+ break;
+ default:
+ /* Sub bounding box intersects with view, draw/save it
+ * for later draw
+ */
+ if (count_to_send == 0) {
+ /* Starting new batch */
+ count_to_send = new_count;
+ offset_to_send = (char*)indices +
+ cached_bbox->sub_bbox_array[i].start_offset;
+ }
+ else {
+ if (count_to_drop >=
+ (int)(mesa_bbox_env_variables.bbox_split_size *
+ BBOX_MIN_SPLITTED_DRAW_TO_DROP)) {
+
+ /* Draw accumulated geometry */
+ vbo_validated_drawrangeelements(ctx, mode,
+ index_bounds_valid, start, end, count_to_send, type,
+ offset_to_send, basevertex, numInstances, baseInstance);
+
+ /* Reset accumulated draws */
+ count_to_send = 0;
+ offset_to_send = (char*)indices +
+ cached_bbox->sub_bbox_array[i].start_offset;
+
+ clipped = true;
+ }
+ else
+ {
+ count_to_send += count_to_drop;
+ }
+
+ /* append to current batch of sent primitives */
+ count_to_send += new_count;
+ }
+
+ count_to_drop = 0;
+ break;
+ }
+ }
+
+ if (count_to_send > 0)
+ {
+ vbo_validated_drawrangeelements(ctx, mode, index_bounds_valid, start,
+ end, count_to_send, type,
+ offset_to_send, basevertex,
+ numInstances, baseInstance);
+ }
+
+
+ clipped |= (count_to_drop >= (int)(mesa_bbox_env_variables.bbox_split_size
*
+ BBOX_MIN_SPLITTED_DRAW_TO_DROP));
+
+ if (clipped)
+ {
+ cached_bbox->drawcnt_bbox_helped =
+ MIN(cached_bbox->drawcnt_bbox_helped + 1,
+
BBOX_MAX_VAL_FOR_EFFECTIVE_DRAWS_COUNTER);
+ }
+ else
+ {
+ if (potential_clipped == 0)
+ {
+ cached_bbox->drawcnt_bbox_helped =
+ MAX(cached_bbox->drawcnt_bbox_helped - 1,
+
BBOX_MIN_VAL_FOR_EFFECTIVE_DRAWS_COUNTER);
+ }
+ }
+}
+
+/**
+ * Initialization for bounding box optimization
+ */
+void vbo_bbox_init(struct gl_context* const gc)
+{
+ assert(gc);
+
+ const char * mesa_bbox_opt = getenv("MESA_BBOX_OPT_ENABLE");
+ if (mesa_bbox_opt !=NULL)
+ mesa_bbox_env_variables.bbox_enable = atoi(mesa_bbox_opt);
+
+ mesa_bbox_opt = getenv("MESA_OPT_SPLIT_SIZE");
+ if (mesa_bbox_opt !=NULL)
+ mesa_bbox_env_variables.bbox_split_size = atoi(mesa_bbox_opt);
+
+ mesa_bbox_opt = getenv("MESA_BBOX_MIN_VERTEX_CNT");
+ if (mesa_bbox_opt != NULL)
+ mesa_bbox_env_variables.bbox_min_vrtx_count = atoi(mesa_bbox_opt);
+
+ if (!gc->Const.EnableBoundingBoxCulling) {
+ mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_OFF;
+ mesa_bbox_env_variables.bbox_split_size = 0x7fffffff;
+ mesa_bbox_env_variables.bbox_min_vrtx_count = 0;
+ }
+
+ mesa_bbox_opt = getenv("MESA_OPT_TRACE_LEVEL");
+ if (mesa_bbox_opt !=NULL)
+ mesa_bbox_env_variables.bbox_trace_level = atoi(mesa_bbox_opt);
+
+ if (mesa_bbox_env_variables.bbox_enable == MESA_BBOX_ENABLE_AUTO) {
+ /* Android/Linux: enable of Gen7.5, Gen8 and Gen9 */
+ #if (IGFX_GEN == IGFX_GEN9)
+ mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_SMART;
+ #else
+ mesa_bbox_env_variables.bbox_enable = MESA_BBOX_ENABLE_OFF;
+ #endif
+ if (mesa_bbox_env_variables.bbox_enable == MESA_BBOX_ENABLE_SMART && 1)
+ {
+ mesa_bbox_env_variables.bbox_enable =
+ MESA_BBOX_ENABLE_FORCE_CLIPPING;
+ }
+ }
+ /* BOUNDING VOLUME: Add initializations based on bounding volume here */
+ switch (gc->volume_type) {
+ case BOUNDING_VOLUME_AABB:
+ if (mesa_bbox_env_variables.bbox_enable) {
+ mesa_bbox_env_variables.bbox_split_size =
+ (mesa_bbox_env_variables.bbox_split_size / 3) * 3;
+ mesa_bbox_env_variables.bbox_min_vrtx_count =
+ (mesa_bbox_env_variables.bbox_min_vrtx_count / 3) * 3;
+
+ assert(gc->Pipeline.BboxOpt == NULL);
+
+ gc->Pipeline.BboxOpt = malloc(sizeof(mesa_bbox_opt));
+ if (!gc->Pipeline.BboxOpt) {
+ /* No memory, disable bbox optimization */
+ mesa_bbox_env_variables.bbox_enable =
+ MESA_BBOX_ENABLE_OFF;
+ mesa_bbox_env_variables.bbox_split_size = 0x7fffffff;
+ mesa_bbox_env_variables.bbox_min_vrtx_count = 0;
+ return;
+ }
+ else {
+ gc->Pipeline.BboxOpt->calc_delay =
BBOX_CALC_MIN_DELAY;
+ gc->Pipeline.BboxOpt->cache = _mesa_new_bbox_cache();
+
+ if (!gc->Pipeline.BboxOpt->cache) {
+ free(gc->Pipeline.BboxOpt);
+ MESA_BBOX("MESA_BBOX: Cache creation failed\n");
+ return;
+ }
+ }
+ if (mesa_bbox_env_variables.bbox_trace_level > 0) {
+ MESA_BBOX("\nMESA BBOX OPT config: \
+ bboxOptEnable = %d, bboxOptMinVertexCount = %d,\
+ bboxOptSplitSize = %d\n",
+ mesa_bbox_env_variables.bbox_enable,
+ mesa_bbox_env_variables.bbox_min_vrtx_count,
+ mesa_bbox_env_variables.bbox_split_size);
+ }
+ }
+ /* Initialize some function pointers so that we dont have to
+ * check bounding volume type for every draw call */
+ break;
+ case BOUNDING_VOLUME_OBB:
+ /* Init for OBB bounding volume */
+ break;
+ case BOUNDING_VOLUME_SPHERE:
+ /* Init for SPHERE bounding volume */
+ break;
+ case BOUNDING_VOLUME_DOP:
+ /* Init for DOP bounding volume */
+ break;
+ default:
+ MESA_BBOX("BOUNDING VOLUME TYPE IS INCORRECT\n");
+ break;
+ }
+}
+
+
+/**
+ * Free resources associated with bounding box optimization.
+ * To be called when context is destroyed
+ */
+void vbo_bbox_free(struct gl_context* const gc)
+{
+ assert(gc);
+
+ if (gc->Pipeline.BboxOpt) {
+
+ if(gc->Pipeline.BboxOpt->cache)
+ {
+ _mesa_delete_bbox_cache(gc,gc->Pipeline.BboxOpt->cache);
+ gc->Pipeline.BboxOpt->cache = NULL;
+ }
+
+ if(gc->Pipeline.BboxOpt) {
+ free(gc->Pipeline.BboxOpt);
+ gc->Pipeline.BboxOpt = NULL;
+ }
+ }
+}
diff --git a/src/mesa/vbo/vbo_bbox.h b/src/mesa/vbo/vbo_bbox.h
new file mode 100644
index 0000000..1dd98f2
--- /dev/null
+++ b/src/mesa/vbo/vbo_bbox.h
This header is included to several other files that aren't about BBOX
implementation, but some of the things here lack a common (e.g. bbox)
prefix. Have you considered moving some of these things elsewhere or
adding the prefix?
Is all the debug stuff here still needed?
- Eero
@@ -0,0 +1,383 @@
+/*
+ * Copyright � 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * \brief VBO BBOX module implementation
+ * \author Kedar Karanje
+ */
+
+#ifndef _VBO_BBOX_H_
+#define _VBO_BBOX_H_
+
+#include <stdio.h>
+#include "main/arrayobj.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/varray.h"
+#include "main/bufferobj.h"
+#include "main/arrayobj.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/transformfeedback.h"
+#include "main/mtypes.h"
+#include "compiler/glsl/ir_uniform.h"
+#include "main/shaderapi.h"
+#include "main/uniforms.h"
+#include "sys/param.h"
+#include "program/prog_cache.h"
+/* For Intrinsic functions */
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#include <mmintrin.h>
+#include <immintrin.h>
+
+#if defined(__ANDROID__) || defined(ANDROID)
+#include <cutils/log.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FRUSTUM_PLANE_COUNT 6
+#define BBOX_CALC_MIN_DELAY 2
+#define BBOX_CALC_MAX_DELAY 5
+
+#undef LOG_TAG
+#define LOG_TAG "MESA_BBOX_LOG"
+
+/* Default, driver will select mode for given GPU and OS */
+#define MESA_BBOX_ENABLE_AUTO -1
+/* Disable BBOX */
+#define MESA_BBOX_ENABLE_OFF 0
+/* Enable BBOX, bot clipping */
+#define MESA_BBOX_ENABLE_SMART 1
+/* Enable BBOX, clipping will be done regardless of GPU utilization */
+#define MESA_BBOX_ENABLE_FORCE_CLIPPING 2
+/* Enable BBOX, force immediate bbox recalculation and clipping */
+#define MESA_BBOX_ENABLE_FORCE_RECALC 3
+
+/**
+ * MESA BBOX PRINTS
+ * Uncomment below line to enable debugging logs
+ */
+#define MESA_BBOX_DEBUG 0
+
+#if defined(__ANDROID__) || defined (ANDROID)
+
+#if MESA_BBOX_DEBUG == 2
+#define MESA_BBOX_PRINT(...) ALOGE(__VA_ARGS__)
+#define MESA_BBOX(...) ALOGE(__VA_ARGS__)
+
+#elif MESA_BBOX_DEBUG == 1
+#define MESA_BBOX_PRINT(...) ALOGE(__VA_ARGS__)
+#define MESA_BBOX(...)
+
+#else
+#define MESA_BBOX_PRINT(...)
+#define MESA_BBOX(...)
+#endif //MESA_BBOX_DEBUG
+
+#else //ANDROID
+
+#if MESA_BBOX_DEBUG == 2
+#define MESA_BBOX_PRINT(...) printf(__VA_ARGS__)
+#define MESA_BBOX(...) printf(__VA_ARGS__)
+
+#elif MESA_BBOX_DEBUG == 1
+#define MESA_BBOX_PRINT(...) printf(__VA_ARGS__)
+#define MESA_BBOX(...)
+
+#else
+#define MESA_BBOX_PRINT(...)
+#define MESA_BBOX(...)
+#endif //MESA_BBOX_DEBUG
+
+#endif //ANDROID
+
+/**
+ * MESA Bbox options environment variables
+ */
+int env_opt_val;
+const char *env_opt;
+
+typedef struct vbo_bbox_env_variable {
+ GLuint bbox_min_vrtx_count;
+ GLuint bbox_enable;
+ GLuint bbox_split_size;
+ GLuint bbox_trace_level;
+} bbox_env;
+bbox_env mesa_bbox_env_variables;
+
+void
+vbo_bbox_init(struct gl_context *const gc);
+
+void
+vbo_bbox_free(struct gl_context *const gc);
+
+
+void
+vbo_bbox_element_buffer_update(struct gl_context *const gc,
+ struct gl_buffer_object *buffer,
+ const void* data,
+ int offset,
+ int size);
+
+void
+vbo_validated_drawrangeelements(struct gl_context *ctx,
+ GLenum mode,
+ GLboolean index_bounds_valid,
+ GLuint start,
+ GLuint end,
+ GLsizei count,
+ GLenum type,
+ const GLvoid * indices,
+ GLint basevertex,
+ GLuint numInstances,
+ GLuint baseInstance);
+
+void
+vbo_bbox_drawelements(struct gl_context *ctx,
+ GLenum mode,
+ GLboolean index_bounds_valid,
+ GLuint start,
+ GLuint end,
+ GLsizei count,
+ GLenum type,
+ const GLvoid * indices,
+ GLint basevertex,
+ GLuint numInstances,
+ GLuint baseInstance);
+
+/**
+ * Segment Functions
+ */
+typedef struct gl_segment
+{
+ GLint Left;
+ GLint Right;
+
+}segment;
+
+/**
+ * Clip algorithm result
+ */
+enum vbo_bbox_clip_result
+{
+ BBOX_CLIP_OUTSIDE=0,
+ BBOX_CLIP_INTERSECT=1,
+ BBOX_CLIP_INSIDE=2,
+ BBOX_CLIP_DEGEN = 3,
+ BBOX_CLIP_ERROR = 4,
+};
+
+typedef struct gl_matrixRec
+{
+ GLfloat melem[4][4];
+ GLenum matrixType;
+} gl_matrix;
+
+/**
+ * Structure to describe plane
+ */
+typedef struct vbo_bbox_frustum_plane
+{
+ GLfloat a, b, c, d;
+} vbo_bbox_frustum_plane;
+
+
+/**
+ * Planes and octants with their normals
+ */
+typedef struct vbo_bbox_frustum
+{
+ const unsigned char planeCount;
+ struct vbo_bbox_frustum_plane plane[FRUSTUM_PLANE_COUNT];
+ unsigned char octant[FRUSTUM_PLANE_COUNT];
+} vbo_bbox_frustum;
+
+/*
+ * Axis Aligned Bounding Box
+ */
+typedef union vbo_vec4f {
+ GLfloat data[4];
+ struct {
+ GLfloat x, y, z, w;
+ };
+} vbo_vec4f;
+
+/*
+ * Oriented Bounding Box
+ */
+typedef struct oriented_bounding_box {
+ GLfloat x,y,z,w;
+} oriented_box;
+
+/*
+ * Spherical Bounding volume
+ */
+typedef struct spherical_bounding_volume {
+ GLfloat x,y,z,r; // x^2+y^2+z^2 = r^2
+} spherical_volume;
+
+/*
+ * 8-Discrete oriented polytopes
+ */
+typedef struct dop_bounding_volume {
+ GLfloat x,y,z,r,a,b;//TBD Not sure of the representation for 8-DOP yet!
+} dop_volume;
+
+/*
+ * Bounding volumes for AABB, OBB, SPHERE, DOP etc
+ */
+typedef union bounding_volume_info {
+ vbo_vec4f vert_vec4[8]; /* Bbox mix man coordinates */
+ oriented_box obb[8];
+ spherical_volume sphere;
+ dop_volume dop[8];
+} bounding_volume_info;
+
+
+typedef struct vbo_bbox_cache_key
+{
+ /* From API call */
+ GLenum mode; /* GL_TRAINGLES are only mode supported currently */
+ GLsizei count; /* Number if indices in draw call */
+ GLenum indices_type; /* must be GL_UNSIGNED_SHORT for now */
+ GLuint type_size;
+ GLuint type_sizeShift;
+ GLint indices; /* Offset to index VBO */
+ GLint basevertex; /* Only 0 supported for now. */
+
+ /* VBO objects names */
+ GLuint element_buf_name;
+ GLuint vertex_buf_name;
+
+ /* Vertex position attribute configuration */
+ GLint offset;
+ GLint size; /* Size of attribute, must be 3 for now */
+ GLenum vertDataType; /* Must be GL_FLOAT */
+ GLsizei stride; /* Any */
+ GLuint elementStride;
+} vbo_bbox_cache_key;
+
+typedef struct bounding_info
+{
+ int vert_count; /* Number of vertices this bbox covers */
+ int start_offset; /* Start offset for this bbox */
+ bool is_degenerate; /* Triangle can not be formed */
+ enum vbo_bbox_clip_result clip_result;
+
+ bounding_volume_info bounding_volume; /* Bbox mix man coordinates */
+
+} bounding_info;
+
+/**
+ * Cached information about (multiple) bounding boxes
+ */
+struct vbo_bbox_cache_data
+{
+ /* Valid data indicator, will be set to false if VBO have been
+ * modified by application
+ */
+ bool valid;
+
+ /* Indicates if bounding boxes were calculated for this geometry */
+ bool need_new_calculation;
+
+ /* Controls delay with which bounding boxes are calculated */
+ int init_delaycnt;
+ int init_delaylimit;
+
+ /* Data change indicator for VBOs */
+ int vertpos_vbo_changecnt;
+ int indices_vbo_changecnt;
+
+ /* How many bounding boxes are stored for this geometry */
+ int sub_bbox_cnt;
+
+ /* How many draws call the bbox was effective */
+ int drawcnt_bbox_helped;
+
+ int last_use_frame;
+
+ GLuint hash;
+
+ unsigned keysize;
+
+ vbo_bbox_cache_key *key;
+
+ bool mvp_valid;
+ GLmatrix mvpin;
+ GLfloat mvp[16];
+ vbo_bbox_frustum frustum;
+
+ /* Pointer to array of bboxes */
+ bounding_info* sub_bbox_array;
+
+ /* Bounding box that covers whole geometry */
+ bounding_info full_box;
+
+ struct vbo_bbox_cache_data *next;
+};
+
+typedef struct mesa_bbox_cache {
+ struct vbo_bbox_cache_data **items;
+ struct vbo_bbox_cache_data *last;
+ GLuint size, n_items;
+} mesa_bbox_cache;
+
+typedef struct mesa_bbox_opt
+{
+ mesa_bbox_cache * cache;
+ GLuint calc_delay;
+} mesa_bbox_opt;
+
+/**
+ * VBO BBox Cache functions, replica of program cache functions
+ *
+ */
+struct mesa_bbox_cache *
+_mesa_new_bbox_cache(void);
+
+void
+_mesa_delete_bbox_cache(struct gl_context *ctx,
+ struct mesa_bbox_cache *cache);
+
+struct vbo_bbox_cache_data *
+_mesa_search_bbox_cache(struct mesa_bbox_cache *cache,
+ const void *key, GLuint keysize);
+
+void
+_mesa_bbox_cache_insert(struct gl_context *ctx,
+ struct mesa_bbox_cache *cache,
+ const void *key,
+ GLuint keysize,
+ struct vbo_bbox_cache_data *CachedData);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif //_VBO_BBOX_H_
diff --git a/src/mesa/vbo/vbo_bbox_cache.c b/src/mesa/vbo/vbo_bbox_cache.c
new file mode 100644
index 0000000..09bd1dd
--- /dev/null
+++ b/src/mesa/vbo/vbo_bbox_cache.c
@@ -0,0 +1,195 @@
+
+/*
+ * Copyright � 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * \brief VBO BBOX module implementation
+ * \author Kedar Karanje
+ */
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "vbo/vbo_bbox.h"
+
+#define CACHE_SIZE 17
+
+/**
+ * Compute hash index from state key.
+ */
+static GLuint
+bbox_hash_key(const void *key, GLuint key_size)
+{
+ const GLuint *ikey = (const GLuint *) key;
+ GLuint hash = 0, i;
+
+ assert(key_size >= 4);
+
+ /* Make a slightly better attempt at a hash function:
+ */
+ for (i = 0; i < key_size / sizeof(*ikey); i++)
+ {
+ hash += ikey[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ return hash;
+}
+
+
+/**
+ * Rebuild/expand the hash table to accommodate more entries
+ */
+static void
+bbox_rehash(struct mesa_bbox_cache *cache)
+{
+ struct vbo_bbox_cache_data **items;
+ struct vbo_bbox_cache_data *c, *next;
+ GLuint size, i;
+
+ cache->last = NULL;
+
+ size = cache->size * 3;
+ items = calloc(size, sizeof(*items));
+
+ for (i = 0; i < cache->size; i++)
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ c->next = items[c->hash % size];
+ items[c->hash % size] = c;
+ }
+
+ free(cache->items);
+ cache->items = items;
+ cache->size = size;
+}
+
+
+static void
+bbox_clear_cache(struct gl_context *ctx, mesa_bbox_cache *cache)
+{
+ struct vbo_bbox_cache_data *c, *next;
+ GLuint i;
+
+ cache->last = NULL;
+
+ for (i = 0; i < cache->size; i++) {
+ for (c = cache->items[i]; c; c = next) {
+ next = c->next;
+ free(c->key);
+ free(c->sub_bbox_array);
+ free(c);
+ }
+ cache->items[i] = NULL;
+ }
+ cache->n_items = 0;
+}
+
+
+
+mesa_bbox_cache *
+_mesa_new_bbox_cache(void)
+{
+ mesa_bbox_cache *cache = CALLOC_STRUCT(mesa_bbox_cache);
+ if (cache) {
+ cache->size = CACHE_SIZE;
+ cache->items = calloc(cache->size, sizeof(struct vbo_bbox_cache_data));
+ if (!cache->items) {
+ MESA_BBOX("Func:%s cache-size=%d "
+ "Cannot allocate items freeing cache\n",
+ __func__,cache->size);
+ free(cache);
+ return NULL;
+ }
+ MESA_BBOX("Func:%s cache:%#x cache->size=%d \n",
+ __func__,cache,cache->size);
+ return cache;
+ }
+ else {
+ MESA_BBOX("cache is Null in Func:%s\n",__func__);
+ return cache;
+ }
+}
+
+
+void
+_mesa_delete_bbox_cache(struct gl_context *ctx, mesa_bbox_cache *cache)
+{
+ bbox_clear_cache(ctx, cache);
+ free(cache->items);
+ free(cache);
+}
+
+struct vbo_bbox_cache_data *
+_mesa_search_bbox_cache(mesa_bbox_cache *cache,
+ const void *key, GLuint keysize)
+{
+ MESA_BBOX("Func:%s cache:%#x \n",__func__,cache);
+ if (cache->last &&
+ cache->last->key->mode == ((vbo_bbox_cache_key *)key)->mode &&
+ cache->last->key->count == ((vbo_bbox_cache_key *)key)->count &&
+ cache->last->key->indices == ((vbo_bbox_cache_key *)key)->indices) {
+ return cache->last;
+ }
+ else {
+ const GLuint hash = bbox_hash_key(key, keysize);
+ struct vbo_bbox_cache_data *c;
+ MESA_BBOX("cache:%#x,hash:%d,cache->size:%d\n",cache,hash,cache->size);
+ for (c = cache->items[hash % cache->size]; c; c = c->next) {
+ if (c->hash == hash &&
+ c->key->mode == ((vbo_bbox_cache_key *)key)->mode &&
+ c->key->count == ((vbo_bbox_cache_key *)key)->count &&
+ c->key->indices == ((vbo_bbox_cache_key *)key)->indices) {
+ cache->last = c;
+ return c;
+ }
+ }
+ return NULL;
+ }
+}
+
+
+void
+_mesa_bbox_cache_insert(struct gl_context *ctx,struct mesa_bbox_cache *cache,
+ const void *key, GLuint keysize,
+ struct vbo_bbox_cache_data *CachedData)
+{
+ const GLuint hash = bbox_hash_key(key, keysize);
+
+ CachedData->hash = hash;
+
+ CachedData->key = calloc(1, keysize);
+ memcpy(CachedData->key, key, keysize);
+ CachedData->keysize = keysize;
+
+ if (cache->n_items > cache->size * 1.5) {
+ if (cache->size < 1000)
+ bbox_rehash(cache);
+ else
+ bbox_clear_cache(ctx, cache);
+ }
+
+ cache->n_items++;
+ CachedData->next = cache->items[hash % cache->size];
+ cache->items[hash % cache->size] = CachedData;
+}
diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c
index cf9405d..8c608c2 100644
--- a/src/mesa/vbo/vbo_context.c
+++ b/src/mesa/vbo/vbo_context.c
@@ -36,6 +36,9 @@
#include "vbo.h"
#include "vbo_private.h"
+#ifdef MESA_BBOX_OPT
+#include "vbo_bbox.h"
+#endif
static GLuint
check_size(const GLfloat *attr)
@@ -199,6 +202,10 @@ _vbo_CreateContext(struct gl_context *ctx)
if (ctx->API == API_OPENGL_COMPAT)
vbo_save_init(ctx);
+#ifdef MESA_BBOX_OPT //Hard coded to AABB for now
+ ctx->volume_type = BOUNDING_VOLUME_AABB;
+#endif
+
vbo->VAO = _mesa_new_vao(ctx, ~((GLuint)0));
/* The exec VAO assumes to have all arributes bound to binding 0 */
for (unsigned i = 0; i < VERT_ATTRIB_MAX; ++i)
@@ -219,7 +226,9 @@ _vbo_DestroyContext(struct gl_context *ctx)
_ae_destroy_context(ctx);
ctx->aelt_context = NULL;
}
-
+#ifdef MESA_BBOX_OPT
+ vbo_bbox_free(ctx);
+#endif
if (vbo) {
_mesa_reference_buffer_object(ctx, &vbo->binding.BufferObj, NULL);
diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c
index 51c000e..3321a21 100644
--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -39,6 +39,10 @@
#include "main/macros.h"
#include "main/transformfeedback.h"
+#ifdef MESA_BBOX_OPT
+#include "vbo_bbox.h"
+#endif
+
typedef struct {
GLuint count;
GLuint primCount;
@@ -784,6 +788,16 @@ skip_draw_elements(struct gl_context *ctx, GLsizei count,
* Do the rendering for a glDrawElements or glDrawRangeElements call after
* we've validated buffer bounds, etc.
*/
+#ifdef MESA_BBOX_OPT
+void
+vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
+ GLboolean index_bounds_valid,
+ GLuint start, GLuint end,
+ GLsizei count, GLenum type,
+ const GLvoid * indices,
+ GLint basevertex, GLuint numInstances,
+ GLuint baseInstance)
+#else
static void
vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
GLboolean index_bounds_valid,
@@ -792,6 +806,7 @@ vbo_validated_drawrangeelements(struct gl_context *ctx,
GLenum mode,
const GLvoid * indices,
GLint basevertex, GLuint numInstances,
GLuint baseInstance)
+#endif
{
struct _mesa_index_buffer ib;
struct _mesa_prim prim;
@@ -997,6 +1012,11 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum
type,
_mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n",
_mesa_enum_to_string(mode), count,
_mesa_enum_to_string(type), indices);
+#ifdef MESA_BBOX_OPT
+ MESA_BBOX_PRINT("glDrawElements(%s, %u, %s, %p)\n",
+ _mesa_enum_to_string(mode), count,
+ _mesa_enum_to_string(type), indices);
+#endif
FLUSH_FOR_DRAW(ctx);
@@ -1011,9 +1031,13 @@ vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type,
if (!_mesa_validate_DrawElements(ctx, mode, count, type, indices))
return;
}
-
+#ifdef MESA_BBOX_OPT
+ vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0,
+ count, type, indices, 0, 1, 0);
+#else
vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0,
count, type, indices, 0, 1, 0);
+#endif
}
@@ -1045,8 +1069,13 @@ vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
return;
}
+#ifdef MESA_BBOX_OPT
+ vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0,
+ count, type, indices, basevertex, 1, 0);
+#else
vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0,
count, type, indices, basevertex, 1, 0);
+#endif
}
@@ -1078,9 +1107,13 @@ vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
indices, numInstances))
return;
}
-
+#ifdef MESA_BBOX_OPT
+ vbo_bbox_drawelements(ctx, mode, GL_FALSE, 0, ~0,
+ count, type, indices, 0, numInstances, 0);
+#else
vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, 0, ~0,
count, type, indices, 0, numInstances, 0);
+#endif
}
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index ad59efb..15f064b 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -197,6 +197,10 @@ TODO: document the other workarounds.
<option name="force_glsl_extensions_warn" value="true" />
</application>
+ <application name="GLBenchmark" executable="GLBenchmark">
+ <option name="enable_bounding_box_culling" value="true"/>
+ </application>
+
<!-- The GL thread whitelist is below, workarounds are above.
Keep it that way. -->
diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
index ecf495a..c3f8928 100644
--- a/src/util/xmlpool/t_options.h
+++ b/src/util/xmlpool/t_options.h
@@ -135,6 +135,11 @@
DRI_CONF_OPT_BEGIN_B(allow_glsl_cross_stage_interpolation_mismatch, def) \
DRI_CONF_DESC(en,gettext("Allow interpolation qualifier mismatch across
shader stages")) \
DRI_CONF_OPT_END
+#define DRI_CONF_ENABLE_BOUNDING_BOX_CULLING(def) \
+DRI_CONF_OPT_BEGIN_B(enable_bounding_box_culling, def) \
+ DRI_CONF_DESC(en,gettext("Enable bounding box culling in CPU")) \
+DRI_CONF_OPT_END
+
/**
* \brief Image quality-related options
*/
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev