Plamena Manolova <plamena.n.manol...@gmail.com> writes: > Adds suppport for ARB_fragment_shader_interlock. We achieve > the interlock and fragment ordering by issuing a memory fence > via sendc. > > Signed-off-by: Plamena Manolova <plamena.n.manol...@gmail.com>
Reviewed-by: Francisco Jerez <curroje...@riseup.net> > --- > docs/features.txt | 2 +- > docs/relnotes/18.1.0.html | 1 + > src/intel/compiler/brw_eu.h | 3 ++- > src/intel/compiler/brw_eu_defines.h | 2 ++ > src/intel/compiler/brw_eu_emit.c | 7 ++++--- > src/intel/compiler/brw_fs_generator.cpp | 7 ++++++- > src/intel/compiler/brw_fs_nir.cpp | 15 +++++++++++++++ > src/intel/compiler/brw_shader.cpp | 4 ++++ > src/intel/compiler/brw_vec4_generator.cpp | 2 +- > src/mesa/drivers/dri/i965/intel_extensions.c | 1 + > 10 files changed, 37 insertions(+), 7 deletions(-) > > diff --git a/docs/features.txt b/docs/features.txt > index e786bbecf4..ed4050cf98 100644 > --- a/docs/features.txt > +++ b/docs/features.txt > @@ -300,7 +300,7 @@ Khronos, ARB, and OES extensions that are not part of any > OpenGL or OpenGL ES ve > GL_ARB_cl_event not started > GL_ARB_compute_variable_group_size DONE (nvc0, radeonsi) > GL_ARB_ES3_2_compatibility DONE (i965/gen8+) > - GL_ARB_fragment_shader_interlock not started > + GL_ARB_fragment_shader_interlock DONE (i965) > GL_ARB_gpu_shader_int64 DONE (i965/gen8+, > nvc0, radeonsi, softpipe, llvmpipe) > GL_ARB_parallel_shader_compile not started, but > Chia-I Wu did some related work in 2014 > GL_ARB_post_depth_coverage DONE (i965, nvc0) > diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html > index deeb23db03..e6a1343c8b 100644 > --- a/docs/relnotes/18.1.0.html > +++ b/docs/relnotes/18.1.0.html > @@ -53,6 +53,7 @@ Note: some of the new features are only available with > certain drivers. > <li>GL_EXT_shader_framebuffer_fetch_non_coherent on i965</li> > <li>GL_KHR_blend_equation_advanced on radeonsi</li> > <li>Disk shader cache support for i965 enabled by default</li> > +<li>GL_ARB_fragment_shader_interlock on i965</li> > </ul> > > <h2>Bug fixes</h2> > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h > index 120a74f035..10c17e2fc6 100644 > --- a/src/intel/compiler/brw_eu.h > +++ b/src/intel/compiler/brw_eu.h > @@ -510,7 +510,8 @@ brw_byte_scattered_write(struct brw_codegen *p, > > void > brw_memory_fence(struct brw_codegen *p, > - struct brw_reg dst); > + struct brw_reg dst, > + enum opcode send_op); > > void > brw_pixel_interpolator_query(struct brw_codegen *p, > diff --git a/src/intel/compiler/brw_eu_defines.h > b/src/intel/compiler/brw_eu_defines.h > index 332d627bc3..2980e98a58 100644 > --- a/src/intel/compiler/brw_eu_defines.h > +++ b/src/intel/compiler/brw_eu_defines.h > @@ -480,6 +480,8 @@ enum opcode { > > SHADER_OPCODE_GET_BUFFER_SIZE, > > + SHADER_OPCODE_INTERLOCK, > + > VEC4_OPCODE_MOV_BYTES, > VEC4_OPCODE_PACK_BYTES, > VEC4_OPCODE_UNPACK_UNIFORM, > diff --git a/src/intel/compiler/brw_eu_emit.c > b/src/intel/compiler/brw_eu_emit.c > index ee5a048bca..6fdee1a1dc 100644 > --- a/src/intel/compiler/brw_eu_emit.c > +++ b/src/intel/compiler/brw_eu_emit.c > @@ -3288,7 +3288,8 @@ brw_set_memory_fence_message(struct brw_codegen *p, > > void > brw_memory_fence(struct brw_codegen *p, > - struct brw_reg dst) > + struct brw_reg dst, > + enum opcode send_op) > { > const struct gen_device_info *devinfo = p->devinfo; > const bool commit_enable = > @@ -3304,7 +3305,7 @@ brw_memory_fence(struct brw_codegen *p, > /* Set dst as destination for dependency tracking, the MEMORY_FENCE > * message doesn't write anything back. > */ > - insn = next_insn(p, BRW_OPCODE_SEND); > + insn = next_insn(p, send_op); > dst = retype(dst, BRW_REGISTER_TYPE_UW); > brw_set_dest(p, insn, dst); > brw_set_src0(p, insn, dst); > @@ -3316,7 +3317,7 @@ brw_memory_fence(struct brw_codegen *p, > * flush it too. Use a different register so both flushes can be > * pipelined by the hardware. > */ > - insn = next_insn(p, BRW_OPCODE_SEND); > + insn = next_insn(p, send_op); > brw_set_dest(p, insn, offset(dst, 1)); > brw_set_src0(p, insn, offset(dst, 1)); > brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, > diff --git a/src/intel/compiler/brw_fs_generator.cpp > b/src/intel/compiler/brw_fs_generator.cpp > index 6d5306a0ee..f21115e34d 100644 > --- a/src/intel/compiler/brw_fs_generator.cpp > +++ b/src/intel/compiler/brw_fs_generator.cpp > @@ -2277,7 +2277,12 @@ fs_generator::generate_code(const cfg_t *cfg, int > dispatch_width) > break; > > case SHADER_OPCODE_MEMORY_FENCE: > - brw_memory_fence(p, dst); > + brw_memory_fence(p, dst, BRW_OPCODE_SEND); > + break; > + > + case SHADER_OPCODE_INTERLOCK: > + /* The interlock is basically a memory fence issued via sendc */ > + brw_memory_fence(p, dst, BRW_OPCODE_SENDC); > break; > > case SHADER_OPCODE_FIND_LIVE_CHANNEL: { > diff --git a/src/intel/compiler/brw_fs_nir.cpp > b/src/intel/compiler/brw_fs_nir.cpp > index 1ce89520bf..1417b31abc 100644 > --- a/src/intel/compiler/brw_fs_nir.cpp > +++ b/src/intel/compiler/brw_fs_nir.cpp > @@ -4825,6 +4825,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr > break; > } > > + case nir_intrinsic_begin_invocation_interlock: { > + const fs_builder ubld = bld.group(8, 0); > + const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2); > + > + ubld.emit(SHADER_OPCODE_INTERLOCK, tmp)->size_written = 2 * > + REG_SIZE; > + > + break; > + } > + > + case nir_intrinsic_end_invocation_interlock: { > + /* We don't need to do anything here */ > + break; > + } > + > default: > unreachable("unknown intrinsic"); > } > diff --git a/src/intel/compiler/brw_shader.cpp > b/src/intel/compiler/brw_shader.cpp > index 537defd05d..307072369c 100644 > --- a/src/intel/compiler/brw_shader.cpp > +++ b/src/intel/compiler/brw_shader.cpp > @@ -296,6 +296,9 @@ brw_instruction_name(const struct gen_device_info > *devinfo, enum opcode op) > return "typed_surface_write_logical"; > case SHADER_OPCODE_MEMORY_FENCE: > return "memory_fence"; > + case SHADER_OPCODE_INTERLOCK: > + /* For an interlock we actually issue a memory fence via sendc. */ > + return "interlock"; > > case SHADER_OPCODE_BYTE_SCATTERED_READ: > return "byte_scattered_read"; > @@ -1007,6 +1010,7 @@ backend_instruction::has_side_effects() const > case SHADER_OPCODE_TYPED_SURFACE_WRITE: > case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: > case SHADER_OPCODE_MEMORY_FENCE: > + case SHADER_OPCODE_INTERLOCK: > case SHADER_OPCODE_URB_WRITE_SIMD8: > case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: > case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: > diff --git a/src/intel/compiler/brw_vec4_generator.cpp > b/src/intel/compiler/brw_vec4_generator.cpp > index 3d17ff9797..7519ccc9df 100644 > --- a/src/intel/compiler/brw_vec4_generator.cpp > +++ b/src/intel/compiler/brw_vec4_generator.cpp > @@ -1904,7 +1904,7 @@ generate_code(struct brw_codegen *p, > break; > > case SHADER_OPCODE_MEMORY_FENCE: > - brw_memory_fence(p, dst); > + brw_memory_fence(p, dst, BRW_OPCODE_SEND); > break; > > case SHADER_OPCODE_FIND_LIVE_CHANNEL: { > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c > b/src/mesa/drivers/dri/i965/intel_extensions.c > index b5860f13cb..5a16a42f4e 100644 > --- a/src/mesa/drivers/dri/i965/intel_extensions.c > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c > @@ -241,6 +241,7 @@ intelInitExtensions(struct gl_context *ctx) > ctx->Extensions.EXT_shader_samples_identical = true; > ctx->Extensions.OES_primitive_bounding_box = true; > ctx->Extensions.OES_texture_buffer = true; > + ctx->Extensions.ARB_fragment_shader_interlock = true; > > if (can_do_pipelined_register_writes(brw->screen)) { > ctx->Extensions.ARB_draw_indirect = true; > -- > 2.11.0
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev