.gitignore | 1 configure.ac | 202 + docs/index.html | 6 docs/relnotes.html | 1 docs/relnotes/10.4.3.html | 145 + include/D3D9/d3d9.h | 10 include/D3D9/d3d9types.h | 13 scons/gallium.py | 3 src/Makefile.am | 17 src/egl/drivers/dri2/platform_wayland.c | 2 src/gallium/drivers/freedreno/a2xx/a2xx.xml.h | 4 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h | 4 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h | 31 src/gallium/drivers/freedreno/a4xx/fd4_gmem.c | 61 src/gallium/drivers/freedreno/adreno_common.xml.h | 4 src/gallium/drivers/freedreno/adreno_pm4.xml.h | 16 src/gallium/drivers/r600/evergreen_state.c | 5 src/gallium/drivers/r600/r600_hw_context.c | 2 src/gallium/drivers/r600/r600_shader.c | 31 src/gallium/drivers/r600/r600_shader.h | 4 src/gallium/drivers/r600/r600_state.c | 5 src/gallium/drivers/r600/r600_state_common.c | 12 src/gallium/drivers/radeon/r600_pipe_common.c | 4 src/gallium/drivers/radeon/r600_pipe_common.h | 8 src/gallium/drivers/radeon/radeon_elf_util.c | 65 src/gallium/drivers/radeon/radeon_elf_util.h | 15 src/gallium/drivers/radeon/radeon_llvm_emit.c | 4 src/gallium/drivers/radeonsi/si_compute.c | 46 src/gallium/drivers/radeonsi/si_pipe.h | 1 src/gallium/drivers/radeonsi/si_shader.c | 4 src/gallium/drivers/radeonsi/si_state_shaders.c | 20 src/gallium/drivers/vc4/vc4_cl_dump.c | 151 + src/gallium/drivers/vc4/vc4_context.c | 7 src/gallium/drivers/vc4/vc4_opt_cse.c | 2 src/gallium/drivers/vc4/vc4_opt_small_immediates.c | 3 src/gallium/drivers/vc4/vc4_program.c | 29 src/gallium/drivers/vc4/vc4_qir.c | 22 src/gallium/drivers/vc4/vc4_qir.h | 1 src/gallium/drivers/vc4/vc4_qpu.c | 83 src/gallium/drivers/vc4/vc4_register_allocate.c | 22 src/gallium/state_trackers/clover/llvm/invocation.cpp | 51 src/gallium/state_trackers/nine/adapter9.c | 7 src/gallium/state_trackers/nine/basetexture9.c | 66 src/gallium/state_trackers/nine/cubetexture9.c | 16 src/gallium/state_trackers/nine/device9.c | 121 src/gallium/state_trackers/nine/device9.h | 6 src/gallium/state_trackers/nine/nine_ff.c | 37 src/gallium/state_trackers/nine/nine_pipe.h | 5 src/gallium/state_trackers/nine/nine_shader.c | 667 +++- src/gallium/state_trackers/nine/nine_state.c | 94 src/gallium/state_trackers/nine/nine_state.h | 3 src/gallium/state_trackers/nine/pixelshader9.c | 10 src/gallium/state_trackers/nine/pixelshader9.h | 2 src/gallium/state_trackers/nine/query9.c | 1 src/gallium/state_trackers/nine/stateblock9.c | 4 src/gallium/state_trackers/nine/surface9.c | 29 src/gallium/state_trackers/nine/swapchain9.c | 2 src/gallium/state_trackers/nine/texture9.c | 9 src/gallium/state_trackers/nine/volumetexture9.c | 14 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 src/gbm/backends/dri/gbm_dri.c | 7 src/gbm/main/gbm.c | 1 src/glsl/Android.mk | 1 src/glsl/Makefile.am | 79 src/glsl/Makefile.sources | 302 +- src/glsl/ast_to_hir.cpp | 16 src/glsl/blob.c | 323 ++ src/glsl/blob.h | 289 ++ src/glsl/glcpp/glcpp-parse.y | 4 src/glsl/glsl_parser_extras.cpp | 2 src/glsl/glsl_parser_extras.h | 4 src/glsl/glsl_types.cpp | 111 src/glsl/glsl_types.h | 9 src/glsl/ir_function_can_inline.cpp | 9 src/glsl/list.h | 25 src/glsl/nir/.gitignore | 4 src/glsl/nir/README | 118 src/glsl/nir/glsl_to_nir.cpp | 1784 +++++++++++++ src/glsl/nir/glsl_to_nir.h | 40 src/glsl/nir/nir.c | 2047 +++++++++++++++ src/glsl/nir/nir.h | 1563 +++++++++++ src/glsl/nir/nir_algebraic.py | 257 + src/glsl/nir/nir_constant_expressions.h | 31 src/glsl/nir/nir_constant_expressions.py | 352 ++ src/glsl/nir/nir_dominance.c | 297 ++ src/glsl/nir/nir_from_ssa.c | 881 ++++++ src/glsl/nir/nir_intrinsics.c | 49 src/glsl/nir/nir_intrinsics.h | 140 + src/glsl/nir/nir_live_variables.c | 296 ++ src/glsl/nir/nir_lower_alu_to_scalar.c | 186 + src/glsl/nir/nir_lower_atomics.c | 148 + src/glsl/nir/nir_lower_global_vars_to_local.c | 106 src/glsl/nir/nir_lower_io.c | 315 ++ src/glsl/nir/nir_lower_locals_to_regs.c | 284 ++ src/glsl/nir/nir_lower_samplers.cpp | 188 + src/glsl/nir/nir_lower_system_values.c | 116 src/glsl/nir/nir_lower_to_source_mods.c | 181 + src/glsl/nir/nir_lower_var_copies.c | 222 + src/glsl/nir/nir_lower_vars_to_ssa.c | 1102 ++++++++ src/glsl/nir/nir_lower_vec_to_movs.c | 93 src/glsl/nir/nir_metadata.c | 54 src/glsl/nir/nir_opcodes.py | 591 ++++ src/glsl/nir/nir_opcodes_c.py | 55 src/glsl/nir/nir_opcodes_h.py | 47 src/glsl/nir/nir_opt_algebraic.py | 119 src/glsl/nir/nir_opt_constant_folding.c | 201 + src/glsl/nir/nir_opt_copy_propagate.c | 317 ++ src/glsl/nir/nir_opt_cse.c | 297 ++ src/glsl/nir/nir_opt_dce.c | 179 + src/glsl/nir/nir_opt_global_to_local.c | 103 src/glsl/nir/nir_opt_peephole_select.c | 208 + src/glsl/nir/nir_print.c | 888 ++++++ src/glsl/nir/nir_remove_dead_variables.c | 136 src/glsl/nir/nir_search.c | 321 ++ src/glsl/nir/nir_search.h | 80 src/glsl/nir/nir_split_var_copies.c | 279 ++ src/glsl/nir/nir_to_ssa.c | 532 +++ src/glsl/nir/nir_types.cpp | 143 + src/glsl/nir/nir_types.h | 75 src/glsl/nir/nir_validate.c | 881 ++++++ src/glsl/nir/nir_worklist.c | 144 + src/glsl/nir/nir_worklist.h | 91 src/glsl/tests/.gitignore | 1 src/glsl/tests/blob_test.c | 320 ++ src/loader/Makefile.am | 2 src/mapi/Makefile.am | 46 src/mapi/Makefile.sources | 52 src/mapi/glapi/Makefile.sources | 22 src/mapi/glapi/gen/es_EXT.xml | 9 src/mapi/glapi/gen/gl_API.xml | 6 src/mapi/glapi/gen/glapi_gen.mk | 40 src/mesa/Android.libmesa_dricore.mk | 2 src/mesa/Android.libmesa_st_mesa.mk | 2 src/mesa/Makefile.am | 33 src/mesa/Makefile.sources | 1106 ++++---- src/mesa/drivers/common/meta.c | 10 src/mesa/drivers/common/meta.h | 24 src/mesa/drivers/common/meta_tex_subimage.c | 357 ++ src/mesa/drivers/dri/i965/Makefile.am | 8 src/mesa/drivers/dri/i965/Makefile.sources | 4 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 2 src/mesa/drivers/dri/i965/brw_cfg.h | 6 src/mesa/drivers/dri/i965/brw_context.h | 24 src/mesa/drivers/dri/i965/brw_curbe.c | 13 src/mesa/drivers/dri/i965/brw_device_info.c | 4 src/mesa/drivers/dri/i965/brw_draw_upload.c | 13 src/mesa/drivers/dri/i965/brw_eu.h | 3 src/mesa/drivers/dri/i965/brw_eu_emit.c | 11 src/mesa/drivers/dri/i965/brw_fs.cpp | 179 - src/mesa/drivers/dri/i965/brw_fs.h | 69 src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp | 134 src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 4 src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 3 src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp | 11 src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 76 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 47 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 1803 +++++++++++++ src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 2 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 250 + src/mesa/drivers/dri/i965/brw_reg.h | 16 src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 12 src/mesa/drivers/dri/i965/brw_shader.cpp | 51 src/mesa/drivers/dri/i965/brw_shader.h | 2 src/mesa/drivers/dri/i965/brw_state_upload.c | 19 src/mesa/drivers/dri/i965/brw_vec4.cpp | 26 src/mesa/drivers/dri/i965/brw_vec4.h | 6 src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp | 6 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 3 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 23 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 3 src/mesa/drivers/dri/i965/gen6_sol.c | 2 src/mesa/drivers/dri/i965/gen6_surface_state.c | 3 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 3 src/mesa/drivers/dri/i965/gen8_draw_upload.c | 2 src/mesa/drivers/dri/i965/gen8_surface_state.c | 5 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 7 src/mesa/drivers/dri/i965/intel_fbo.c | 10 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 32 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 1 src/mesa/drivers/dri/i965/intel_pixel_draw.c | 2 src/mesa/drivers/dri/i965/intel_pixel_read.c | 201 - src/mesa/drivers/dri/i965/intel_tex.c | 62 src/mesa/drivers/dri/i965/intel_tex.h | 9 src/mesa/drivers/dri/i965/intel_tex_image.c | 280 -- src/mesa/drivers/dri/i965/intel_tex_subimage.c | 512 --- src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 722 +++++ src/mesa/drivers/dri/i965/intel_tiled_memcpy.h | 61 src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp | 417 +++ src/mesa/main/api_validate.c | 113 src/mesa/main/bitset.h | 1 src/mesa/main/bufferobj.c | 18 src/mesa/main/buffers.c | 8 src/mesa/main/clear.c | 2 src/mesa/main/context.c | 78 src/mesa/main/dd.h | 16 src/mesa/main/dlist.c | 2 src/mesa/main/errors.c | 8 src/mesa/main/extensions.c | 2 src/mesa/main/fbobject.c | 15 src/mesa/main/feedback.c | 2 src/mesa/main/ffvertex_prog.c | 4 src/mesa/main/format_info.py | 2 src/mesa/main/format_parser.py | 7 src/mesa/main/format_utils.c | 22 src/mesa/main/format_utils.h | 25 src/mesa/main/formatquery.c | 16 src/mesa/main/formats.c | 70 src/mesa/main/formats.csv | 1 src/mesa/main/formats.h | 1 src/mesa/main/framebuffer.c | 2 src/mesa/main/glformats.c | 4 src/mesa/main/hash.c | 2 src/mesa/main/matrix.c | 6 src/mesa/main/mipmap.c | 2 src/mesa/main/mtypes.h | 1 src/mesa/main/querymatrix.c | 55 src/mesa/main/rastpos.c | 2 src/mesa/main/readpix.c | 6 src/mesa/main/renderbuffer.c | 20 src/mesa/main/set.c | 346 -- src/mesa/main/set.h | 94 src/mesa/main/shared.c | 5 src/mesa/main/syncobj.c | 14 src/mesa/main/texgetimage.c | 10 src/mesa/main/varray.c | 2 src/mesa/main/vdpau.c | 19 src/mesa/program/Android.mk | 2 src/mesa/program/hash_table.h | 34 src/mesa/program/ir_to_mesa.cpp | 1 src/mesa/program/prog_execute.c | 3 src/mesa/program/prog_optimize.c | 4 src/mesa/program/prog_print.c | 4 src/mesa/program/prog_statevars.c | 2 src/mesa/state_tracker/st_atom_blend.c | 2 src/mesa/state_tracker/st_atom_scissor.c | 2 src/mesa/state_tracker/st_atom_shader.c | 7 src/mesa/state_tracker/st_atom_viewport.c | 2 src/mesa/state_tracker/st_cb_fbo.c | 6 src/mesa/state_tracker/st_cb_texture.c | 4 src/mesa/state_tracker/st_draw.c | 2 src/mesa/state_tracker/st_extensions.c | 5 src/mesa/state_tracker/st_format.c | 3 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 5 src/mesa/state_tracker/st_manager.c | 2 src/mesa/state_tracker/st_mesa_to_tgsi.c | 4 src/mesa/state_tracker/st_program.c | 2 src/mesa/swrast/s_drawpix.c | 2 src/mesa/swrast/s_texfetch.c | 2 src/mesa/vbo/vbo_exec_array.c | 3 src/util/Makefile.am | 7 src/util/Makefile.sources | 6 src/util/SConscript | 5 src/util/hash_table.c | 23 src/util/hash_table.h | 23 src/util/macros.h | 54 src/util/mesa-sha1.c | 316 ++ src/util/mesa-sha1.h | 53 src/util/set.c | 382 ++ src/util/set.h | 100 src/util/tests/hash_table/collision.c | 8 261 files changed, 26073 insertions(+), 3143 deletions(-)
New commits: commit d7d340fb2f68c46bd5a0008ecf53c6693e29c916 Author: Jason Ekstrand <jason.ekstr...@intel.com> Date: Mon Jan 26 14:21:15 2015 -0800 nir/opcodes: Use fpclassify() instead of isnormal() for ldexp Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88806 Reviewed-by: Ian Romanick <ian.d.roman...@intel.com> diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index f54a017..3f21539 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -480,7 +480,7 @@ else opcode("ldexp", 0, tunsigned, [0, 0], [tfloat, tint], "", """ dst = ldexp(src0, src1); /* flush denormals to zero. */ -if (!isnormal(dst)) +if (fpclassify(dst) != FP_NORMAL) dst = copysign(0.0f, src0); """) commit d6eb572905e39c36168b8f5da240af961f9dde0a Author: Jason Ekstrand <jason.ekstr...@intel.com> Date: Mon Jan 26 14:19:30 2015 -0800 util: Move the alternate fpclassify implementation to util Reviewed-by: Ian Romanick <ian.d.roman...@intel.com> diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c index ef85175..d2d643b 100644 --- a/src/mesa/main/querymatrix.c +++ b/src/mesa/main/querymatrix.c @@ -17,6 +17,7 @@ #include "glheader.h" #include "querymatrix.h" #include "main/get.h" +#include "util/macros.h" /** @@ -37,56 +38,6 @@ #define INT_TO_FIXED(x) ((GLfixed) ((x) << 16)) #define FLOAT_TO_FIXED(x) ((GLfixed) ((x) * 65536.0)) -#if defined(fpclassify) -/* ISO C99 says that fpclassify is a macro. Assume that any implementation - * of fpclassify, whether it's in a C99 compiler or not, will be a macro. - */ -#elif defined(_MSC_VER) -/* Not required on VS2013 and above. */ -/* Oddly, the fpclassify() function doesn't exist in such a form - * on MSVC. This is an implementation using slightly different - * lower-level Windows functions. - */ -#include <float.h> - -enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL} -fpclassify(double x) -{ - switch(_fpclass(x)) { - case _FPCLASS_SNAN: /* signaling NaN */ - case _FPCLASS_QNAN: /* quiet NaN */ - return FP_NAN; - case _FPCLASS_NINF: /* negative infinity */ - case _FPCLASS_PINF: /* positive infinity */ - return FP_INFINITE; - case _FPCLASS_NN: /* negative normal */ - case _FPCLASS_PN: /* positive normal */ - return FP_NORMAL; - case _FPCLASS_ND: /* negative denormalized */ - case _FPCLASS_PD: /* positive denormalized */ - return FP_SUBNORMAL; - case _FPCLASS_NZ: /* negative zero */ - case _FPCLASS_PZ: /* positive zero */ - return FP_ZERO; - default: - /* Should never get here; but if we do, this will guarantee - * that the pattern is not treated like a number. - */ - return FP_NAN; - } -} - -#else - -enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL} -fpclassify(double x) -{ - /* XXX do something better someday */ - return FP_NORMAL; -} - -#endif - GLbitfield GLAPIENTRY _mesa_QueryMatrixxOES(GLfixed mantissa[16], GLint exponent[16]) { GLfloat matrix[16]; diff --git a/src/util/macros.h b/src/util/macros.h index eec8b93..180f2f6 100644 --- a/src/util/macros.h +++ b/src/util/macros.h @@ -24,6 +24,8 @@ #ifndef UTIL_MACROS_H #define UTIL_MACROS_H +#include <math.h> + /* Compute the size of an array */ #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) @@ -156,4 +158,54 @@ do { \ # endif #endif +#if defined(fpclassify) +/* ISO C99 says that fpclassify is a macro. Assume that any implementation + * of fpclassify, whether it's in a C99 compiler or not, will be a macro. + */ +#elif defined(_MSC_VER) +/* Not required on VS2013 and above. */ +/* Oddly, the fpclassify() function doesn't exist in such a form + * on MSVC. This is an implementation using slightly different + * lower-level Windows functions. + */ +#include <float.h> + +static inline enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL} +fpclassify(double x) +{ + switch(_fpclass(x)) { + case _FPCLASS_SNAN: /* signaling NaN */ + case _FPCLASS_QNAN: /* quiet NaN */ + return FP_NAN; + case _FPCLASS_NINF: /* negative infinity */ + case _FPCLASS_PINF: /* positive infinity */ + return FP_INFINITE; + case _FPCLASS_NN: /* negative normal */ + case _FPCLASS_PN: /* positive normal */ + return FP_NORMAL; + case _FPCLASS_ND: /* negative denormalized */ + case _FPCLASS_PD: /* positive denormalized */ + return FP_SUBNORMAL; + case _FPCLASS_NZ: /* negative zero */ + case _FPCLASS_PZ: /* positive zero */ + return FP_ZERO; + default: + /* Should never get here; but if we do, this will guarantee + * that the pattern is not treated like a number. + */ + return FP_NAN; + } +} + +#else + +enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL} +fpclassify(double x) +{ + /* XXX do something better someday */ + return FP_NORMAL; +} + +#endif + #endif /* UTIL_MACROS_H */ commit 5e8468e6dadb06da061ad51e2c40e85bc1c6c3e4 Author: Jason Ekstrand <jason.ekstr...@intel.com> Date: Tue Jan 27 14:18:57 2015 -0800 i965/tex: Don't create read-write textures with non-renderable formats I haven't actually seen this bug in the wild, but it's possible that someone could ask to do a S3TC PBO download or something. This protects us from accidentally creating a render target with a compressed or otherwise non-renderable format. Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c index 184702f..2d3009a 100644 --- a/src/mesa/drivers/dri/i965/intel_tex.c +++ b/src/mesa/drivers/dri/i965/intel_tex.c @@ -323,6 +323,11 @@ intel_set_texture_storage_for_buffer_object(struct gl_context *ctx, perf_debug("Bad PBO alignment; fallback to CPU mapping\n"); return false; } + + if (!brw->format_supported_as_render_target[image->TexFormat]) { + perf_debug("Non-renderable PBO format; fallback to CPU mapping\n"); + return false; + } } assert(intel_texobj->mt == NULL); commit 34723c0861f9fa955dab4902c80afb6a6e1153c3 Author: Jason Ekstrand <jason.ekstr...@intel.com> Date: Tue Jan 27 14:06:44 2015 -0800 i965/gen8: Include the buffer offset when emitting renderbuffer relocs Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88792 Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 45c35db..639382f 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -450,7 +450,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, drm_intel_bo_emit_reloc(brw->batch.bo, brw->wm.base.surf_offset[surf_index] + 8 * 4, mt->bo, - 0, + mt->offset, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } commit 291d7ef84dcd83962515cbd7ce91ce1ff70930ed Author: Tapani Pälli <tapani.pa...@intel.com> Date: Mon Jan 26 12:35:23 2015 +0200 mesa: improve error messaging for format CSV parser Patch adds 2 error messages that point user directly to fix mispelled or impossible swizzle field for a format. Signed-off-by: Tapani Pälli <tapani.pa...@intel.com> Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com> diff --git a/src/mesa/main/format_info.py b/src/mesa/main/format_info.py index fe2063d..3bae57e 100644 --- a/src/mesa/main/format_info.py +++ b/src/mesa/main/format_info.py @@ -58,7 +58,7 @@ def get_gl_base_format(fmat): elif fmat.has_channel('i') and fmat.num_channels() == 1: return 'GL_INTENSITY' else: - assert False + sys.exit("error, could not determine base format for {0}, check swizzle".format(fmat.name)); def get_gl_data_type(fmat): if fmat.is_compressed(): diff --git a/src/mesa/main/format_parser.py b/src/mesa/main/format_parser.py index 522bce3..11184f7 100755 --- a/src/mesa/main/format_parser.py +++ b/src/mesa/main/format_parser.py @@ -24,6 +24,8 @@ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +import sys + VOID = 'x' UNSIGNED = 'u' SIGNED = 's' @@ -562,7 +564,10 @@ def parse(filename): block_height = int(fields[3]) colorspace = fields[9] - swizzle = Swizzle(fields[8]) + try: + swizzle = Swizzle(fields[8]) + except: + sys.exit("error parsing swizzle for format " + name) channels = _parse_channels(fields[4:8], layout, colorspace, swizzle) yield Format(name, layout, block_width, block_height, channels, swizzle, colorspace) commit 6ee5effac1ac8858c96820ece352be57c5c53cf6 Author: EdB <edb+m...@sigluy.net> Date: Wed Jan 28 02:20:38 2015 +0200 clover/llvm: Dump the OpenCL C code earlier. [ Francisco Jerez: As discussed on the mailing list, this is intended to produce more useful debug output in cases where the compilation terminates unexpectedly. ] Reviewed-by: Francisco Jerez <curroje...@riseup.net> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 085bb6a..0794e61 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -717,6 +717,9 @@ clover::compile_program_llvm(const compat::string &source, llvm_ctx.setDiagnosticHandler(diagnostic_handler, &r_log); #endif + if (get_debug_flags() & DBG_CLC) + debug_log(source, ".cl"); + // The input file name must have the .cl extension in order for the // CompilerInvocation class to recognize it as an OpenCL source file. llvm::Module *mod = compile_llvm(llvm_ctx, source, headers, "input.cl", @@ -727,9 +730,6 @@ clover::compile_program_llvm(const compat::string &source, optimize(mod, optimization_level, kernels); - if (get_debug_flags() & DBG_CLC) - debug_log(source, ".cl"); - if (get_debug_flags() & DBG_LLVM) { std::string log; llvm::raw_string_ostream s_log(log); commit 13d23a9a176b62453d36035b5d793082a638a686 Author: EdB <edb+m...@sigluy.net> Date: Sun Dec 14 11:31:21 2014 +0100 clover/llvm: Move CLOVER_DEBUG stuff into anonymous namespace. [ Francisco Jerez: As we're at it make debug_options[] local to its only user and remove temporary. ] Reviewed-by: Francisco Jerez <curroje...@riseup.net> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index edeed56..085bb6a 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -672,19 +672,27 @@ namespace { targets_initialized = true; } } -} // End anonymous namespace #define DBG_CLC (1 << 0) #define DBG_LLVM (1 << 1) #define DBG_ASM (1 << 2) -static const struct debug_named_value debug_options[] = { - {"clc", DBG_CLC, "Dump the OpenCL C code for all kernels."}, - {"llvm", DBG_LLVM, "Dump the generated LLVM IR for all kernels."}, - {"asm", DBG_ASM, "Dump kernel assembly code for targets specifying " - "PIPE_SHADER_IR_NATIVE"}, - DEBUG_NAMED_VALUE_END // must be last -}; + unsigned + get_debug_flags() { + static const struct debug_named_value debug_options[] = { + {"clc", DBG_CLC, "Dump the OpenCL C code for all kernels."}, + {"llvm", DBG_LLVM, "Dump the generated LLVM IR for all kernels."}, + {"asm", DBG_ASM, "Dump kernel assembly code for targets specifying " + "PIPE_SHADER_IR_NATIVE"}, + DEBUG_NAMED_VALUE_END // must be last + }; + static const unsigned debug_flags = + debug_get_flags_option("CLOVER_DEBUG", debug_options, 0); + + return debug_flags; + } + +} // End anonymous namespace module clover::compile_program_llvm(const compat::string &source, @@ -695,8 +703,6 @@ clover::compile_program_llvm(const compat::string &source, compat::string &r_log) { init_targets(); - static unsigned debug_flags = debug_get_flags_option("CLOVER_DEBUG", - debug_options, 0); std::vector<llvm::Function *> kernels; size_t processor_str_len = std::string(target).find_first_of("-"); @@ -721,10 +727,10 @@ clover::compile_program_llvm(const compat::string &source, optimize(mod, optimization_level, kernels); - if (debug_flags & DBG_CLC) + if (get_debug_flags() & DBG_CLC) debug_log(source, ".cl"); - if (debug_flags & DBG_LLVM) { + if (get_debug_flags() & DBG_LLVM) { std::string log; llvm::raw_string_ostream s_log(log); mod->print(s_log, NULL); @@ -745,7 +751,8 @@ clover::compile_program_llvm(const compat::string &source, break; case PIPE_SHADER_IR_NATIVE: { std::vector<char> code = compile_native(mod, triple, processor, - debug_flags & DBG_ASM, r_log); + get_debug_flags() & DBG_ASM, + r_log); m = build_module_native(code, mod, kernels, address_spaces, r_log); break; } commit 349df23eb0d119e3f22ff6149824497414f07505 Author: Dave Airlie <airl...@redhat.com> Date: Tue Jan 27 13:39:51 2015 +1000 r600g: add support for primitive id without geom shader (v2) GLSL 1.50 specifies a fragment shader may have a primitive id input without a geometry shader present. On r600 hw there is a special GS scenario for this, you have to enable GS_SCENARIO_A and pass the primitive id through the vertex shader which operates in GS_A mode. This is a first pass attempt at this, and passes the piglit tests that test for this. v1.1: clean up debug print + no need to assign key value to setup output. v2: add r600 support Reviewed-by: Glenn Kennard <glenn.kenn...@gmail.com> Signed-off-by: Dave Airlie <airl...@redhat.com> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 36b86aa..ea58aea 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2111,6 +2111,11 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_ uint32_t v = 0, v2 = 0, primid = 0; + if (rctx->vs_shader->current->shader.vs_as_gs_a) { + v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_A); + primid = 1; + } + if (state->geom_enable) { uint32_t cut_val; diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index ccc5a8b..cd57eed 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -315,9 +315,9 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->stencil_ref.atom.dirty = true; ctx->vertex_fetch_shader.atom.dirty = true; ctx->export_shader.atom.dirty = true; + ctx->shader_stages.atom.dirty = true; if (ctx->gs_shader) { ctx->geometry_shader.atom.dirty = true; - ctx->shader_stages.atom.dirty = true; ctx->gs_rings.atom.dirty = true; } ctx->vertex_shader.atom.dirty = true; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 471df91..16e820e 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -596,6 +596,20 @@ static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back return 0; } +static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) +{ + int i; + i = ctx->shader->noutput++; + ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID; + ctx->shader->output[i].sid = 0; + ctx->shader->output[i].gpr = 0; + ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT; + ctx->shader->output[i].write_mask = 0x4; + ctx->shader->output[i].spi_sid = prim_id_sid; + + return 0; +} + static int tgsi_declaration(struct r600_shader_ctx *ctx) { struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; @@ -626,6 +640,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_SEMANTIC_POSITION: ctx->fragcoord_input = i; break; + case TGSI_SEMANTIC_PRIMID: + /* set this for now */ + ctx->shader->gs_prim_id_input = true; + ctx->shader->ps_prim_id_input = i; + break; } if (ctx->bc->chip_class >= EVERGREEN) { if ((r = evergreen_interp_input(ctx, i))) @@ -1800,6 +1819,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.shader = shader; ctx.native_integers = true; + shader->vs_as_gs_a = key.vs_as_gs_a; shader->vs_as_es = key.vs_as_es; r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, @@ -1938,6 +1958,10 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.nliterals = 0; ctx.literals = NULL; shader->fs_write_all = FALSE; + + if (shader->vs_as_gs_a) + vs_add_primid_output(&ctx, key.vs_prim_id_out); + while (!tgsi_parse_end_of_tokens(&ctx.parse)) { tgsi_parse_token(&ctx.parse); switch (ctx.parse.FullToken.Token.Type) { @@ -2335,7 +2359,14 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, output[j].swizzle_z = 4; /* 0 */ output[j].swizzle_w = 5; /* 1 */ break; + case TGSI_SEMANTIC_PRIMID: + output[j].swizzle_x = 2; + output[j].swizzle_y = 4; /* 0 */ + output[j].swizzle_z = 4; /* 0 */ + output[j].swizzle_w = 4; /* 0 */ + break; } + break; case TGSI_PROCESSOR_FRAGMENT: if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index ab67013..b2559e9 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -84,6 +84,8 @@ struct r600_shader { unsigned max_arrays; unsigned num_arrays; unsigned vs_as_es; + unsigned vs_as_gs_a; + unsigned ps_prim_id_input; struct r600_shader_array * arrays; }; @@ -92,6 +94,8 @@ struct r600_shader_key { unsigned alpha_to_one:1; unsigned nr_cbufs:4; unsigned vs_as_es:1; + unsigned vs_as_gs_a:1; + unsigned vs_prim_id_out:8; }; struct r600_shader_array { diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 9a4b972..3c2fdfa 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1943,6 +1943,11 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom uint32_t v2 = 0, primid = 0; + if (rctx->vs_shader->current->shader.vs_as_gs_a) { + v2 = S_028A40_MODE(V_028A40_GS_SCENARIO_A); + primid = 1; + } + if (state->geom_enable) { uint32_t cut_val; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 1030620..b498d00 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -707,6 +707,10 @@ static INLINE struct r600_shader_key r600_shader_selector_key(struct pipe_contex key.nr_cbufs = 2; } else if (sel->type == PIPE_SHADER_VERTEX) { key.vs_as_es = (rctx->gs_shader != NULL); + if (rctx->ps_shader->current->shader.gs_prim_id_input && !rctx->gs_shader) { + key.vs_as_gs_a = true; + key.vs_prim_id_out = rctx->ps_shader->current->shader.input[rctx->ps_shader->current->shader.ps_prim_id_input].spi_sid; + } } return key; } @@ -1265,6 +1269,7 @@ static bool r600_update_derived_state(struct r600_context *rctx) r600_update_ps_state(ctx, rctx->ps_shader->current); } + rctx->shader_stages.atom.dirty = true; update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current); } commit cc2fc095bfae1c0494240e4f06931e2ca90e182c Author: Dave Airlie <airl...@redhat.com> Date: Tue Jan 27 13:34:50 2015 +1000 r600g: move selecting the pixel shader earlier. In order to detect that a pixel shader has a prim id input when we have no geometry shader we need to reorder the shader selection so the pixel shader is selected first, then the vertex shader key can take into account the primitive id input requirement and lack of geom shader. Reviewed-by: Glenn Kennard <glenn.kenn...@gmail.com> Signed-off-by: Dave Airlie <airl...@redhat.com> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 09d8952..1030620 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1170,6 +1170,10 @@ static bool r600_update_derived_state(struct r600_context *rctx) } } + r600_shader_select(ctx, rctx->ps_shader, &ps_dirty); + if (unlikely(!rctx->ps_shader->current)) + return false; + update_gs_block_state(rctx, rctx->gs_shader != NULL); if (rctx->gs_shader) { @@ -1232,9 +1236,6 @@ static bool r600_update_derived_state(struct r600_context *rctx) } } - r600_shader_select(ctx, rctx->ps_shader, &ps_dirty); - if (unlikely(!rctx->ps_shader->current)) - return false; if (unlikely(ps_dirty || rctx->pixel_shader.shader != rctx->ps_shader->current || rctx->rasterizer->sprite_coord_enable != rctx->ps_shader->current->sprite_coord_enable || commit 5c83a0d2ce41fa4b7b39d13c69d39990f7c926f5 Author: Michel Dänzer <michel.daen...@amd.com> Date: Thu Jan 22 12:30:24 2015 +0900 st/clover: Pass target instead of target.begin() to std::string() Fixes reading beyond allocated memory: ==1936== Invalid read of size 1 ==1936== at 0x4C2C1B4: strlen (vg_replace_strmem.c:412) ==1936== by 0x9E00C30: std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(char const*, std::allocator<char> const&) (in /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.20) ==1936== by 0x5B44FAE: clover::compile_program_llvm(clover::compat::string const&, clover::compat::vector<clover::compat::pair<clover::compat::string, clover::compat::string> > const&, pipe_shader_ir, clover::compat::string const&, clover::compat::string const&, clover::compat::string&) (invocation.cpp:698) ==1936== by 0x5B39A20: clover::program::build(clover::ref_vector<clover::device> const&, char const*, clover::compat::vector<clover::compat::pair<clover::compat::string, clover::compat::string> > const&) (program.cpp:63) ==1936== by 0x5B20152: clBuildProgram (program.cpp:182) ==1936== by 0x400F41: main (hello_world.c:109) ==1936== Address 0x56fee1f is 0 bytes after a block of size 15 alloc'd ==1936== at 0x4C28C20: malloc (vg_replace_malloc.c:296) ==1936== by 0x5B398F0: alloc (compat.hpp:59) ==1936== by 0x5B398F0: vector<std::basic_string<char> > (compat.hpp:98) ==1936== by 0x5B398F0: string<std::basic_string<char> > (compat.hpp:327) ==1936== by 0x5B398F0: clover::program::build(clover::ref_vector<clover::device> const&, char const*, clover::compat::vector<clover::compat::pair<clover::compat::string, clover::compat::string> > const&) (program.cpp:63) ==1936== by 0x5B20152: clBuildProgram (program.cpp:182) ==1936== by 0x400F41: main (hello_world.c:109) Reviewed-by: Francisco Jerez <curroje...@riseup.net> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 7a0be53..edeed56 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -699,9 +699,9 @@ clover::compile_program_llvm(const compat::string &source, debug_options, 0); std::vector<llvm::Function *> kernels; - size_t processor_str_len = std::string(target.begin()).find_first_of("-"); - std::string processor(target.begin(), 0, processor_str_len); - std::string triple(target.begin(), processor_str_len + 1, + size_t processor_str_len = std::string(target).find_first_of("-"); + std::string processor(target, 0, processor_str_len); + std::string triple(target, processor_str_len + 1, target.size() - processor_str_len - 1); clang::LangAS::Map address_spaces; llvm::LLVMContext llvm_ctx; commit ee31c8d7067ec5a563cdce5a12d8e077db0a7f67 Author: Michel Dänzer <michel.daen...@amd.com> Date: Thu Jan 22 12:36:13 2015 +0900 r600g,radeonsi: Fix calculation of IR target cap string buffer size Fixes writing beyond the allocated buffer: ==31855== Invalid write of size 1 ==31855== at 0x50AB2A9: vsprintf (iovsprintf.c:43) ==31855== by 0x508F6F6: sprintf (sprintf.c:32) ==31855== by 0xB59C7EC: r600_get_compute_param (r600_pipe_common.c:526) ==31855== by 0x5B2B7DE: get_compute_param<char> (device.cpp:37) ==31855== by 0x5B2B7DE: clover::device::ir_target() const (device.cpp:201) ==31855== by 0x5B398E0: clover::program::build(clover::ref_vector<clover::device> const&, char const*, clover::compat::vector<clover::compat::pair<clover::compat::string, clover::compat::string> > const&) (program.cpp:63) ==31855== by 0x5B20152: clBuildProgram (program.cpp:182) ==31855== by 0x400F41: main (hello_world.c:109) ==31855== Address 0x56fed5f is 0 bytes after a block of size 15 alloc'd ==31855== at 0x4C29180: operator new(unsigned long) (vg_replace_malloc.c:324) ==31855== by 0x5B2B7C2: allocate (new_allocator.h:104) ==31855== by 0x5B2B7C2: allocate (alloc_traits.h:357) ==31855== by 0x5B2B7C2: _M_allocate (stl_vector.h:170) ==31855== by 0x5B2B7C2: _M_create_storage (stl_vector.h:185) ==31855== by 0x5B2B7C2: _Vector_base (stl_vector.h:136) ==31855== by 0x5B2B7C2: vector (stl_vector.h:278) ==31855== by 0x5B2B7C2: get_compute_param<char> (device.cpp:35) ==31855== by 0x5B2B7C2: clover::device::ir_target() const (device.cpp:201) ==31855== by 0x5B398E0: clover::program::build(clover::ref_vector<clover::device> const&, char const*, clover::compat::vector<clover::compat::pair<clover::compat::string, clover::compat::string> > const&) (program.cpp:63) ==31855== by 0x5B20152: clBuildProgram (program.cpp:182) ==31855== by 0x400F41: main (hello_world.c:109) Reviewed-by: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Tom Stellard <thomas.stell...@amd.com> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index f91772e..ddb4142 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -524,9 +524,9 @@ static int r600_get_compute_param(struct pipe_screen *screen, } if (ret) { sprintf(ret, "%s-%s", gpu, triple); - } - return (strlen(triple) + strlen(gpu)) * sizeof(char); + /* +2 for dash and terminating NIL byte */ + return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); } case PIPE_COMPUTE_CAP_GRID_DIMENSION: if (ret) { commit f1a9252defb77f3986305c6a1fc9592093b5a4e5 Author: Connor Abbott <cwabbo...@gmail.com> Date: Sun Jan 25 11:47:53 2015 -0500 nir: fix a bug with constant folding non-per-component instructions Before, we were only copying the first N channels, where N is the size of the SSA destination, which is fine for per-component instructions, but non-per-component instructions like fdot3 can have more source components than destination components. Fix this using the helper function introduced in the last patch. v2: use new helper name Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com> Signed-off-by: Connor Abbott <cwabbo...@gmail.com> diff --git a/src/glsl/nir/nir_opt_constant_folding.c b/src/glsl/nir/nir_opt_constant_folding.c index f727453..85c09fc 100644 -- To UNSUBSCRIBE, email to debian-x-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org Archive: https://lists.debian.org/e1ygutl-0008rc...@moszumanska.debian.org