VERSION | 2 bin/.cherry-ignore | 4 debian/changelog | 6 docs/relnotes/11.1.1.html | 3 docs/relnotes/11.1.2.html | 181 +++++++++++++++ src/egl/drivers/dri2/egl_dri2.c | 2 src/gallium/auxiliary/pipe-loader/SConscript | 9 src/gallium/auxiliary/pipe-loader/pipe_loader.c | 5 src/gallium/auxiliary/util/u_cpu_detect.c | 2 src/gallium/auxiliary/util/u_pstipple.c | 1 src/gallium/auxiliary/vl/vl_zscan.c | 7 src/gallium/auxiliary/vl/vl_zscan.h | 1 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 3 src/gallium/drivers/nouveau/nv50/nv50_surface.c | 120 ++++++++- src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 179 ++++++++++++-- src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 16 + src/gallium/drivers/r600/r600_pipe.c | 6 src/gallium/drivers/radeonsi/si_descriptors.c | 33 +- src/gallium/drivers/radeonsi/si_shader.c | 3 src/gallium/drivers/radeonsi/si_state_shaders.c | 85 ++++--- src/gallium/drivers/vc4/vc4_job.c | 11 src/gallium/state_trackers/omx/vid_dec_h264.c | 7 src/gallium/targets/dri/Android.mk | 4 src/glsl/ast_to_hir.cpp | 48 +++ src/glsl/ir.h | 7 src/glsl/link_varyings.cpp | 43 ++- src/glsl/linker.cpp | 2 src/glsl/lower_subroutine.cpp | 24 + src/mesa/drivers/common/meta.c | 12 src/mesa/drivers/common/meta.h | 2 src/mesa/drivers/common/meta_blit.c | 35 +- src/mesa/drivers/common/meta_generate_mipmap.c | 26 +- src/mesa/drivers/dri/i915/intel_buffer_objects.c | 2 src/mesa/drivers/dri/i915/intel_context.c | 2 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 2 src/mesa/drivers/dri/i965/brw_device_info.c | 2 src/mesa/drivers/dri/i965/brw_fs.cpp | 6 src/mesa/drivers/dri/i965/brw_fs.h | 4 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 16 + src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 6 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 2 src/mesa/drivers/dri/radeon/radeon_buffer_objects.c | 2 src/mesa/main/bufferobj.c | 2 src/mesa/main/bufferobj.h | 4 src/mesa/main/shaderapi.c | 10 src/mesa/state_tracker/st_cb_bufferobjects.c | 4 src/mesa/state_tracker/st_cb_texture.c | 10 src/mesa/state_tracker/st_gen_mipmap.c | 8 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 5 52 files changed, 789 insertions(+), 193 deletions(-)
New commits: commit 487e085acd106f1c208cdf5eb1ed28f15c3fd083 Author: Timo Aaltonen <tjaal...@debian.org> Date: Thu Feb 11 13:25:27 2016 +0200 release to unstable diff --git a/debian/changelog b/debian/changelog index 0ec2a1e..c9c87e3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +mesa (11.1.2-1) unstable; urgency=medium + + * New upstream release. + + -- Timo Aaltonen <tjaal...@debian.org> Thu, 11 Feb 2016 13:14:08 +0200 + mesa (11.1.1-2) unstable; urgency=medium * Limit the symbols glXGetDriverConfig and glXGetScreenDriver to commit 7bcd827806b0816d61122ba3d37dd40178d96d98 Author: Emil Velikov <emil.veli...@collabora.com> Date: Thu Feb 11 00:03:22 2016 +0000 docs: add release notes for 11.1.2 Signed-off-by: Emil Velikov <emil.l.veli...@gmail.com> diff --git a/docs/relnotes/11.1.2.html b/docs/relnotes/11.1.2.html new file mode 100644 index 0000000..947dcbf --- /dev/null +++ b/docs/relnotes/11.1.2.html @@ -0,0 +1,181 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 11.1.2 Release Notes / February 10, 2016</h1> + +<p> +Mesa 11.1.2 is a bug fix release which fixes bugs found since the 11.1.1 release. +</p> +<p> +Mesa 11.1.2 implements the OpenGL 4.1 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.1. OpenGL +4.1 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +TBD +</pre> + + +<h2>New features</h2> +<p>None</p> + +<h2>Bug fixes</h2> + +<p>This list is likely incomplete.</p> + +<ul> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93628">Bug 93628</a> - Exception: attempt to use unavailable module DRM when building MesaGL 11.1.0 on windows</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93648">Bug 93648</a> - Random lines being rendered when playing Dolphin (geometry shaders related, w/ apitrace)</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93650">Bug 93650</a> - GL_ARB_separate_shader_objects is buggy (PCSX2)</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93717">Bug 93717</a> - Meta mipmap generation can corrupt texture state</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93722">Bug 93722</a> - Segfault when compiling shader with a subroutine that takes a parameter</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93731">Bug 93731</a> - glUniformSubroutinesuiv segfaults when subroutine uniform is bound to a specific location</li> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93761">Bug 93761</a> - A conditional discard in a fragment shader causes no depth writing at all</li> + +</ul> + + +<h2>Changes</h2> + +<p>Ben Widawsky (1):</p> +<ul> + <li>i965/bxt: Fix conservative wm thread counts.</li> +</ul> + +<p>Dave Airlie (1):</p> +<ul> + <li>glsl: fix subroutine lowering reusing actual parmaters</li> +</ul> + +<p>Emil Velikov (6):</p> +<ul> + <li>docs: add sha256 checksums for 11.1.1</li> + <li>cherry-ignore: drop the i965/kbl .num_slices patch</li> + <li>i915: correctly parse/set the context flags</li> + <li>targets/dri: android: use WHOLE static libraries</li> + <li>egl/dri2: expose srgb configs when KHR_gl_colorspace is available</li> + <li>Update version to 11.1.2</li> +</ul> + +<p>Eric Anholt (2):</p> +<ul> + <li>vc4: Don't record the seqno of a failed job submit.</li> + <li>vc4: Throttle outstanding rendering after submission.</li> +</ul> + +<p>François Tigeot (1):</p> +<ul> + <li>gallium: Add DragonFly support</li> +</ul> + +<p>Grazvydas Ignotas (1):</p> +<ul> + <li>r600g: don't leak driver const buffers</li> +</ul> + +<p>Ian Romanick (2):</p> +<ul> + <li>meta/blit: Restore GL_DEPTH_STENCIL_TEXTURE_MODE state for GL_TEXTURE_RECTANGLE</li> + <li>meta: Use internal functions to set texture parameters</li> +</ul> + +<p>Ilia Mirkin (6):</p> +<ul> + <li>st/mesa: use surface format to generate mipmaps when available</li> + <li>glsl: always compute proper varying type, irrespective of varying packing</li> + <li>nvc0: avoid crashing when there are holes in vertex array bindings</li> + <li>nv50,nvc0: fix buffer clearing to respect engine alignment requirements</li> + <li>nv50/ir: fix false global CSE on instructions with multiple defs</li> + <li>st/mesa: treat a write as a read for range purposes</li> +</ul> + +<p>Jason Ekstrand (3):</p> +<ul> + <li>i965/vec4: Use UW type for multiply into accumulator on GEN8+</li> + <li>i965/fs/generator: Take an actual shader stage rather than a string</li> + <li>i965/fs: Always set channel 2 of texture headers in some stages</li> +</ul> + +<p>Jose Fonseca (2):</p> +<ul> + <li>scons: Conditionally use DRM module on pipe-loader.</li> + <li>pipe-loader: Fix PATH_MAX define on MSVC.</li> +</ul> + +<p>Karol Herbst (1):</p> +<ul> + <li>nv50/ir: fix memory corruption when spilling and redoing RA</li> +</ul> + +<p>Kenneth Graunke (2):</p> +<ul> + <li>glsl: Make bitfield_insert/extract and bfi/bfm non-vectorizable.</li> + <li>glsl: Allow implicit int -> uint conversions for bitwise operators (&, ^, |).</li> +</ul> + +<p>Leo Liu (2):</p> +<ul> + <li>vl: add zig zag scan for list 4x4</li> + <li>st/omx/dec/h264: fix corruption when scaling matrix present flag set</li> +</ul> + +<p>Marek Olšák (1):</p> +<ul> + <li>radeonsi: don't miss changes to SPI_TMPRING_SIZE</li> +</ul> + +<p>Nicolai Hähnle (11):</p> +<ul> + <li>mesa/bufferobj: make _mesa_delete_buffer_object externally accessible</li> + <li>st/mesa: use _mesa_delete_buffer_object</li> + <li>radeon: use _mesa_delete_buffer_object</li> + <li>i915: use _mesa_delete_buffer_object</li> + <li>i965: use _mesa_delete_buffer_object</li> + <li>util/u_pstipple.c: copy immediates during transformation</li> + <li>radeonsi: extract the VGT_GS_MODE calculation into its own function</li> + <li>radeonsi: ensure that VGT_GS_MODE is sent when necessary</li> + <li>radeonsi: add DCC buffer for sampler views on new CS</li> + <li>st/mesa: use the correct address generation functions in st_TexSubImage blit</li> + <li>radeonsi: fix discard-only fragment shaders (11.1 version)</li> +</ul> + +<p>Timothy Arceri (4):</p> +<ul> + <li>glsl: fix segfault linking subroutine uniform with explicit location</li> + <li>mesa: fix segfault in glUniformSubroutinesuiv()</li> + <li>glsl: fix interface block error message</li> + <li>glsl: create helper to remove outer vertex index array used by some stages</li> +</ul> + + +</div> +</body> +</html> commit ac65994a8ed4b5cf369a094e21876a08cf63dda7 Author: Emil Velikov <emil.veli...@collabora.com> Date: Wed Feb 10 23:59:40 2016 +0000 Update version to 11.1.2 Signed-off-by: Emil Velikov <emil.veli...@collabora.com> diff --git a/VERSION b/VERSION index 668182d..e9ac13b 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -11.1.1 +11.1.2 commit 185ee6b612aca567dfaa31ae1be93e3385af4be4 Author: Nicolai Hähnle <nicolai.haeh...@amd.com> Date: Tue Jan 19 15:18:31 2016 -0500 radeonsi: fix discard-only fragment shaders (11.1 version) When a fragment shader is used that has no outputs but does conditional discard (KILL_IF), all fragments are killed without this patch. By comparing various register settings, my conclusion is that the exec mask is either not properly forwarded to the DB by NULL exports or ends up being unused, at least when there is _only_ a NULL export (the ISA documentation claims that NULL exports can be used to override a previously exported exec mask). Of the various approaches I have tried to work around the problem, this one seems to be the least invasive one. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93761 Cc: "11.0 11.1" <mesa-sta...@lists.freedesktop.org> Reviewed-by: Marek Olšák <marek.ol...@amd.com> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 1baa2eb..1c1aaa0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2291,6 +2291,9 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base) last_args[6]= uint->zero; last_args[7]= uint->zero; last_args[8]= uint->zero; + + if (info->uses_kill) + si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_R; } /* Specify whether the EXEC mask represents the valid mask */ commit 25fc54992e96886d71e5f266069ca21ac4828a4b Author: Nicolai Hähnle <nicolai.haeh...@amd.com> Date: Sat Jan 16 15:15:13 2016 -0500 st/mesa: use the correct address generation functions in st_TexSubImage blit We need to tell the address generation functions about the dimensionality of the texture to correctly implement the part of Section 3.8.1 (Texture Image Specification) of the OpenGL 2.1 specification which says: "For the purposes of decoding the texture image, TexImage2D is equivalent to calling TexImage3D with corresponding arguments and depth of 1, except that ... * UNPACK SKIP IMAGES is ignored." Fixes a low impact bug that was found by chance while browsing the spec and extending piglit tests. Cc: "11.0 11.1" <mesa-sta...@lists.freedesktop.org> Reviewed-by: Edward O'Callaghan <eocallag...@alterapraxis.com> (cherry picked from commit 4a448a63adbbece1d9bddacd9428aad7cc68a628) diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 62f149a..a76410f 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -805,18 +805,18 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims, /* 1D array textures. * We need to convert gallium coords to GL coords. */ - GLvoid *src = _mesa_image_address3d(unpack, pixels, + GLvoid *src = _mesa_image_address2d(unpack, pixels, width, depth, format, - type, 0, slice, 0); + type, slice, 0); memcpy(map, src, bytesPerRow); } else { ubyte *slice_map = map; for (row = 0; row < (unsigned) height; row++) { - GLvoid *src = _mesa_image_address3d(unpack, pixels, - width, height, format, - type, slice, row, 0); + GLvoid *src = _mesa_image_address(dims, unpack, pixels, + width, height, format, + type, slice, row, 0); memcpy(slice_map, src, bytesPerRow); slice_map += transfer->stride; } commit 37aed859693d5eee5e108d09deda249478cc07ec Author: Leo Liu <leo....@amd.com> Date: Mon Feb 1 13:32:31 2016 -0500 st/omx/dec/h264: fix corruption when scaling matrix present flag set The scaling list should be filled out with zig zag scan v2: integrate zig zag scan for list 4x4 to vl(Christian) v3: move list determination out from the loop(Ilia) Cc: "11.0 11.1" <mesa-sta...@lists.freedesktop.org> Signed-off-by: Leo Liu <leo....@amd.com> Reviewed-by: Christian König <christian.koe...@amd.com> (cherry picked from commit 6ad2e55a1405ac3757439dae55ed86425bb65806) diff --git a/src/gallium/state_trackers/omx/vid_dec_h264.c b/src/gallium/state_trackers/omx/vid_dec_h264.c index f66ed89..b453682 100644 --- a/src/gallium/state_trackers/omx/vid_dec_h264.c +++ b/src/gallium/state_trackers/omx/vid_dec_h264.c @@ -35,6 +35,7 @@ #include "util/u_memory.h" #include "util/u_video.h" #include "vl/vl_rbsp.h" +#include "vl/vl_zscan.h" #include "entrypoint.h" #include "vid_dec.h" @@ -205,6 +206,7 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si const uint8_t *defaultList, const uint8_t *fallbackList) { unsigned lastScale = 8, nextScale = 8; + const int *list; unsigned i; /* (pic|seq)_scaling_list_present_flag[i] */ @@ -214,6 +216,7 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si return; } + list = (sizeOfScalingList == 16) ? vl_zscan_normal_16 : vl_zscan_normal; for (i = 0; i < sizeOfScalingList; ++i ) { if (nextScale != 0) { @@ -224,8 +227,8 @@ static void scaling_list(struct vl_rbsp *rbsp, uint8_t *scalingList, unsigned si return; } } - scalingList[i] = nextScale == 0 ? lastScale : nextScale; - lastScale = scalingList[i]; + scalingList[list[i]] = nextScale == 0 ? lastScale : nextScale; + lastScale = scalingList[list[i]]; } } commit 3adf11182110f0e623c173d0658ae08b1012d6f6 Author: Leo Liu <leo....@amd.com> Date: Mon Feb 1 12:04:34 2016 -0500 vl: add zig zag scan for list 4x4 Cc: "11.0 11.1" <mesa-sta...@lists.freedesktop.org> Signed-off-by: Leo Liu <leo....@amd.com> Reviewed-by: Christian König <christian.koe...@amd.com> (cherry picked from commit 4f598f2173c6555a52aad942ce6ea75c65afe21a) diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c index 1c6cdd4..5241471 100644 --- a/src/gallium/auxiliary/vl/vl_zscan.c +++ b/src/gallium/auxiliary/vl/vl_zscan.c @@ -49,6 +49,13 @@ enum VS_OUTPUT VS_O_VTEX = 0 }; +const int vl_zscan_normal_16[] = +{ + /* Zig-Zag scan pattern */ + 0, 1, 4, 8, 5, 2, 3, 6, + 9,12,13,10, 7,11,14,15 +}; + const int vl_zscan_linear[] = { /* Linear scan pattern */ diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h index eacee2d..268cf0a 100644 --- a/src/gallium/auxiliary/vl/vl_zscan.h +++ b/src/gallium/auxiliary/vl/vl_zscan.h @@ -64,6 +64,7 @@ struct vl_zscan_buffer struct pipe_surface *dst; }; +extern const int vl_zscan_normal_16[]; extern const int vl_zscan_linear[]; extern const int vl_zscan_normal[]; extern const int vl_zscan_alternate[]; commit f5f021ecc599fae8b668da76cd68a2b0e8c68cb2 Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Fri Jan 29 14:45:38 2016 -0500 st/mesa: treat a write as a read for range purposes We use this logic to detect live ranges and then do plain renaming across the whole codebase. As such, to prevent WaW hazards, we have to treat a write as if it were also a read. For example, the following sequence was observed before this patch: 13: UIF TEMP[6].xxxx :0 14: ADD TEMP[6].x, CONST[6].xxxx, -IN[3].yyyy 15: RCP TEMP[7].x, TEMP[3].xxxx 16: MUL TEMP[3].x, TEMP[6].xxxx, TEMP[7].xxxx 17: ADD TEMP[6].x, CONST[7].xxxx, -IN[3].yyyy 18: RCP TEMP[7].x, TEMP[3].xxxx 19: MUL TEMP[4].x, TEMP[6].xxxx, TEMP[7].xxxx While after this patch it becomes: 13: UIF TEMP[7].xxxx :0 14: ADD TEMP[7].x, CONST[6].xxxx, -IN[3].yyyy 15: RCP TEMP[8].x, TEMP[3].xxxx 16: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[8].xxxx 17: ADD TEMP[7].x, CONST[7].xxxx, -IN[3].yyyy 18: RCP TEMP[8].x, TEMP[3].xxxx 19: MUL TEMP[5].x, TEMP[7].xxxx, TEMP[8].xxxx Most importantly note that in the first example, the second RCP is done on the result of the MUL while in the second, the second RCP should have the same value as the first. Looking at the GLSL source, it is apparent that both of the RCP's should have had the same source. Looking at what's going on, the GLSL looks something like float tmin_8; float tmin_10; tmin_10 = tmin_8; ... lots of code ... tmin_8 = tmpvar_17; ... more code that never looks at tmin_8 ... And so we end up with a last_read somewhere at the beginning, and a first_write somewhere at the bottom. For some reason DCE doesn't remove it, but even if that were fixed, DCE doesn't handle 100% of cases, esp including loops. With the last_read somewhere high up, we overwrite the previously correct (and large) last_read with a low one, and then proceed to decide to merge all kinds of junk onto this temp. Even if that weren't the case, and there were just some writes after the last read, then we might still overwrite a merged value with one of those. As a result, we should treat a write as a last_read for the purpose of determining the live range. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Reviewed-by: Dave Airlie <airl...@redhat.com> Cc: mesa-sta...@lists.freedesktop.org (cherry picked from commit 047b91771845453826dcdd0019adc7333348b158) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index a32c4cf..a21e229 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -3776,9 +3776,11 @@ glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int * last_reads[inst->src[j].index] = (depth == 0) ? i : -2; } for (j = 0; j < num_inst_dst_regs(inst); j++) { - if (inst->dst[j].file == PROGRAM_TEMPORARY) + if (inst->dst[j].file == PROGRAM_TEMPORARY) { if (first_writes[inst->dst[j].index] == -1) first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; + last_reads[inst->dst[j].index] = (depth == 0) ? i : -2; + } } for (j = 0; j < inst->tex_offset_num_offset; j++) { if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) @@ -4292,6 +4294,7 @@ glsl_to_tgsi_visitor::merge_registers(void) /* Update the first_writes and last_reads arrays with the new * values for the merged register index, and mark the newly unused * register index as such. */ + assert(last_reads[j] >= last_reads[i]); last_reads[i] = last_reads[j]; first_writes[j] = -1; last_reads[j] = -1; commit 3ef2a4bb2eb4cec7ee00a7f78da7edc24ceb2a43 Author: François Tigeot <ftig...@wolfpond.org> Date: Sun Jan 17 10:10:21 2016 +0100 gallium: Add DragonFly support Cc: mesa-sta...@lists.freedesktop.org Signed-off-by: Emil Velikov <emil.l.veli...@gmail.com> (cherry picked from commit a48afb92ffda6e149c553ec82a05fee9a17441f8) diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c index d1f9e97..51f2766 100644 --- a/src/gallium/auxiliary/util/u_cpu_detect.c +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -52,7 +52,7 @@ #include <machine/cpu.h> #endif -#if defined(PIPE_OS_FREEBSD) +#if defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_DRAGONFLY) #include <sys/types.h> #include <sys/sysctl.h> #endif commit 12888ad942a3e5580d153ed37bbcb4e48ced6a04 Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Sat Jan 30 17:13:33 2016 -0500 nv50/ir: fix false global CSE on instructions with multiple defs If an instruction has multiple defs, we have to do a lot more checks to make sure that we can move it forward. Among other things, various code likes to do a, b = tex() if () c = a else c = b which means that a single phi node will have results pointing at the same instruction. We obviously can't propagate the tex in this case, but properly accounting for this situation is tricky. Just don't try for instructions with multiple defs. This fixes about 20 shaders in shader-db, including the dolphin efb2ram shader. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Cc: mesa-sta...@lists.freedesktop.org (cherry picked from commit 3ca941d60ed38800038cd545842e0ed3a69946da) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 8a2516b..881836e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2814,6 +2814,8 @@ GlobalCSE::visit(BasicBlock *bb) ik = phi->getSrc(0)->getInsn(); if (!ik) continue; // probably a function input + if (ik->defCount(0xff) > 1) + continue; // too painful to check if we can really push this forward for (s = 1; phi->srcExists(s); ++s) { if (phi->getSrc(s)->refCount() > 1) break; commit 0f7d3d661d1cc3a0f0c57468bc4a378cacf88b0b Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Sat Jan 30 10:02:43 2016 -0500 nv50,nvc0: fix buffer clearing to respect engine alignment requirements It appears that the nvidia render engine is quite picky when it comes to linear surfaces. It doesn't like non-256-byte aligned offsets, and apparently doesn't even do non-256-byte strides. This makes arb_clear_buffer_object-unaligned pass on both nv50 and nvc0. As a side-effect this also allows RGB32 clears to work via GPU data upload instead of synchronizing the buffer to the CPU (nvc0 only). Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> # tested on GF108, GT215 Tested-by: Nick Sarnie <commendsar...@gmail.com> # GK208 Cc: mesa-sta...@lists.freedesktop.org (cherry picked from commit 3ca2001b537a2709e7ef60410e7dfad5d38663f4) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 8ba19d2..58bbcd0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -592,6 +592,82 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers, } static void +nv50_clear_buffer_push(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size, + const void *data, int data_size) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv04_resource *buf = nv04_resource(res); + unsigned count = (size + 3) / 4; + unsigned xcoord = offset & 0xff; + unsigned tmp, i; + + if (data_size == 1) { + tmp = *(unsigned char *)data; + tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp; + data = &tmp; + data_size = 4; + } else if (data_size == 2) { + tmp = *(unsigned short *)data; + tmp = (tmp << 16) | tmp; + data = &tmp; + data_size = 4; + } + + unsigned data_words = data_size / 4; + + nouveau_bufctx_refn(nv50->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nv50->bufctx); + nouveau_pushbuf_validate(push); + + offset &= ~0xff; + + BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); + PUSH_DATA (push, 262144); + PUSH_DATA (push, 65536); + PUSH_DATA (push, 1); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); + PUSH_DATA (push, 0); + PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); + PUSH_DATA (push, size); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, 1); + PUSH_DATA (push, 0); + PUSH_DATA (push, xcoord); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + + while (count) { + unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words; + unsigned nr = nr_data * data_words; + + BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr); + for (i = 0; i < nr_data; i++) + PUSH_DATAp(push, data, data_words); + + count -= nr; + } + + if (buf->mm) { + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); + } + + nouveau_bufctx_reset(nv50->bufctx, 0); +} + +static void nv50_clear_buffer(struct pipe_context *pipe, struct pipe_resource *res, unsigned offset, unsigned size, @@ -640,9 +716,22 @@ nv50_clear_buffer(struct pipe_context *pipe, assert(size % data_size == 0); + if (offset & 0xff) { + unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset); + assert(fixup_size % data_size == 0); + nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size); + offset += fixup_size; + size -= fixup_size; + if (!size) + return; + } + elements = size / data_size; height = (elements + 8191) / 8192; width = elements / height; + if (height > 1) + width &= ~0xff; + assert(width > 0); BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4); PUSH_DATAf(push, color.f[0]); @@ -666,13 +755,13 @@ nv50_clear_buffer(struct pipe_context *pipe, BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5); - PUSH_DATAh(push, buf->bo->offset + buf->offset + offset); - PUSH_DATA (push, buf->bo->offset + buf->offset + offset); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); PUSH_DATA (push, nv50_format_table[dst_fmt].rt); PUSH_DATA (push, 0); PUSH_DATA (push, 0); BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2); - PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size)); + PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | align(width * data_size, 0x100)); PUSH_DATA (push, height); BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1); PUSH_DATA (push, 0); @@ -691,26 +780,21 @@ nv50_clear_buffer(struct pipe_context *pipe, BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); PUSH_DATA (push, 0x3c); + BEGIN_NV04(push, NV50_3D(COND_MODE), 1); + PUSH_DATA (push, nv50->cond_condmode); + + if (buf->mm) { + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); + } + if (width * height != elements) { offset += width * height * data_size; width = elements - width * height; - height = 1; - BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 2); - PUSH_DATAh(push, buf->bo->offset + buf->offset + offset); - PUSH_DATA (push, buf->bo->offset + buf->offset + offset); - BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2); - PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size)); - PUSH_DATA (push, height); - BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1); - PUSH_DATA (push, 0x3c); + nv50_clear_buffer_push(pipe, res, offset, width * data_size, + data, data_size); } - BEGIN_NV04(push, NV50_3D(COND_MODE), 1); - PUSH_DATA (push, nv50->cond_condmode); - - nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence); - nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr); - nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR; } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index aa3e4f2..5b3a153 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -355,27 +355,132 @@ nvc0_clear_render_target(struct pipe_context *pipe, } static void -nvc0_clear_buffer_cpu(struct pipe_context *pipe, - struct pipe_resource *res, - unsigned offset, unsigned size, - const void *data, int data_size) +nvc0_clear_buffer_push_nvc0(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size, + const void *data, int data_size) { + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nv04_resource *buf = nv04_resource(res); - struct pipe_transfer *pt; - struct pipe_box box; - unsigned elements, i; + unsigned i; - elements = size / data_size; + nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nvc0->bufctx); + nouveau_pushbuf_validate(push); - u_box_1d(offset, size, &box); + unsigned count = (size + 3) / 4; + unsigned data_words = data_size / 4; - uint8_t *map = buf->vtbl->transfer_map(pipe, res, 0, PIPE_TRANSFER_WRITE, - &box, &pt); + while (count) { + unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words; + unsigned nr = nr_data * data_words; - for (i = 0; i < elements; ++i) - memcpy(&map[i*data_size], data, data_size); + if (!PUSH_SPACE(push, nr + 9)) + break; - buf->vtbl->transfer_unmap(pipe, pt); + BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2); + PUSH_DATA (push, MIN2(size, nr * 4)); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1); + PUSH_DATA (push, 0x100111); + + /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ + BEGIN_NIC0(push, NVC0_M2MF(DATA), nr); + for (i = 0; i < nr_data; i++) + PUSH_DATAp(push, data, data_words); + + count -= nr; + offset += nr * 4; + size -= nr * 4; + } + + if (buf->mm) { + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); + } + + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +static void +nvc0_clear_buffer_push_nve4(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size, + const void *data, int data_size) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nv04_resource *buf = nv04_resource(res); + unsigned i; + + nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR); + nouveau_pushbuf_bufctx(push, nvc0->bufctx); + nouveau_pushbuf_validate(push); + + unsigned count = (size + 3) / 4; + unsigned data_words = data_size / 4; + + while (count) { + unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words; + unsigned nr = nr_data * data_words; + + if (!PUSH_SPACE(push, nr + 10)) + break; + + BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); + BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2); + PUSH_DATA (push, MIN2(size, nr * 4)); + PUSH_DATA (push, 1); + /* must not be interrupted (trap on QUERY fence, 0x50 works however) */ + BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), nr + 1); + PUSH_DATA (push, 0x1001); + for (i = 0; i < nr_data; i++) + PUSH_DATAp(push, data, data_words); + + count -= nr; + offset += nr * 4; + size -= nr * 4; + } + + if (buf->mm) { + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence); + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr); + } + + nouveau_bufctx_reset(nvc0->bufctx, 0); +} + +static void +nvc0_clear_buffer_push(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size, + const void *data, int data_size) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + unsigned tmp; + + if (data_size == 1) { + tmp = *(unsigned char *)data; + tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp; + data = &tmp; + data_size = 4; + } else if (data_size == 2) { + tmp = *(unsigned short *)data; + tmp = (tmp << 16) | tmp; + data = &tmp; + data_size = 4; + } + + if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) + nvc0_clear_buffer_push_nvc0(pipe, res, offset, size, data, data_size); + else + nvc0_clear_buffer_push_nve4(pipe, res, offset, size, data, data_size); } static void @@ -400,10 +505,8 @@ nvc0_clear_buffer(struct pipe_context *pipe, memcpy(&color.ui, data, 16); break; case 12: - /* This doesn't work, RGB32 is not a valid RT format. - * dst_fmt = PIPE_FORMAT_R32G32B32_UINT; - * memcpy(&color.ui, data, 12); - * memset(&color.ui[3], 0, 4); + /* RGB32 is not a valid RT format. This will be handled by the pushbuf + * uploader. */ break; case 8: @@ -435,14 +538,26 @@ nvc0_clear_buffer(struct pipe_context *pipe, assert(size % data_size == 0); if (data_size == 12) { - /* TODO: Find a way to do this with the GPU! */ - nvc0_clear_buffer_cpu(pipe, res, offset, size, data, data_size); + nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size); return; } + if (offset & 0xff) { + unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset); + assert(fixup_size % data_size == 0); + nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size); + offset += fixup_size; + size -= fixup_size; + if (!size) + return; + } + elements = size / data_size; height = (elements + 16383) / 16384; width = elements / height; + if (height > 1)