Plamena Manolova <plamena.n.manol...@gmail.com> writes:

> Adds suppport for ARB_fragment_shader_interlock. We achieve
> the interlock and fragment ordering by issuing a memory fence
> via sendc.
>
> Signed-off-by: Plamena Manolova <plamena.n.manol...@gmail.com>

Reviewed-by: Francisco Jerez <curroje...@riseup.net>

> ---
>  docs/features.txt                            |  2 +-
>  docs/relnotes/18.1.0.html                    |  1 +
>  src/intel/compiler/brw_eu.h                  |  3 ++-
>  src/intel/compiler/brw_eu_defines.h          |  2 ++
>  src/intel/compiler/brw_eu_emit.c             |  7 ++++---
>  src/intel/compiler/brw_fs_generator.cpp      |  7 ++++++-
>  src/intel/compiler/brw_fs_nir.cpp            | 15 +++++++++++++++
>  src/intel/compiler/brw_shader.cpp            |  4 ++++
>  src/intel/compiler/brw_vec4_generator.cpp    |  2 +-
>  src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
>  10 files changed, 37 insertions(+), 7 deletions(-)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index e786bbecf4..ed4050cf98 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -300,7 +300,7 @@ Khronos, ARB, and OES extensions that are not part of any 
> OpenGL or OpenGL ES ve
>    GL_ARB_cl_event                                       not started
>    GL_ARB_compute_variable_group_size                    DONE (nvc0, radeonsi)
>    GL_ARB_ES3_2_compatibility                            DONE (i965/gen8+)
> -  GL_ARB_fragment_shader_interlock                      not started
> +  GL_ARB_fragment_shader_interlock                      DONE (i965)
>    GL_ARB_gpu_shader_int64                               DONE (i965/gen8+, 
> nvc0, radeonsi, softpipe, llvmpipe)
>    GL_ARB_parallel_shader_compile                        not started, but 
> Chia-I Wu did some related work in 2014
>    GL_ARB_post_depth_coverage                            DONE (i965, nvc0)
> diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html
> index deeb23db03..e6a1343c8b 100644
> --- a/docs/relnotes/18.1.0.html
> +++ b/docs/relnotes/18.1.0.html
> @@ -53,6 +53,7 @@ Note: some of the new features are only available with 
> certain drivers.
>  <li>GL_EXT_shader_framebuffer_fetch_non_coherent on i965</li>
>  <li>GL_KHR_blend_equation_advanced on radeonsi</li>
>  <li>Disk shader cache support for i965 enabled by default</li>
> +<li>GL_ARB_fragment_shader_interlock on i965</li>
>  </ul>
>  
>  <h2>Bug fixes</h2>
> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> index 120a74f035..10c17e2fc6 100644
> --- a/src/intel/compiler/brw_eu.h
> +++ b/src/intel/compiler/brw_eu.h
> @@ -510,7 +510,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
>  
>  void
>  brw_memory_fence(struct brw_codegen *p,
> -                 struct brw_reg dst);
> +                 struct brw_reg dst,
> +                 enum opcode send_op);
>  
>  void
>  brw_pixel_interpolator_query(struct brw_codegen *p,
> diff --git a/src/intel/compiler/brw_eu_defines.h 
> b/src/intel/compiler/brw_eu_defines.h
> index 332d627bc3..2980e98a58 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -480,6 +480,8 @@ enum opcode {
>  
>     SHADER_OPCODE_GET_BUFFER_SIZE,
>  
> +   SHADER_OPCODE_INTERLOCK,
> +
>     VEC4_OPCODE_MOV_BYTES,
>     VEC4_OPCODE_PACK_BYTES,
>     VEC4_OPCODE_UNPACK_UNIFORM,
> diff --git a/src/intel/compiler/brw_eu_emit.c 
> b/src/intel/compiler/brw_eu_emit.c
> index ee5a048bca..6fdee1a1dc 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -3288,7 +3288,8 @@ brw_set_memory_fence_message(struct brw_codegen *p,
>  
>  void
>  brw_memory_fence(struct brw_codegen *p,
> -                 struct brw_reg dst)
> +                 struct brw_reg dst,
> +                 enum opcode send_op)
>  {
>     const struct gen_device_info *devinfo = p->devinfo;
>     const bool commit_enable =
> @@ -3304,7 +3305,7 @@ brw_memory_fence(struct brw_codegen *p,
>     /* Set dst as destination for dependency tracking, the MEMORY_FENCE
>      * message doesn't write anything back.
>      */
> -   insn = next_insn(p, BRW_OPCODE_SEND);
> +   insn = next_insn(p, send_op);
>     dst = retype(dst, BRW_REGISTER_TYPE_UW);
>     brw_set_dest(p, insn, dst);
>     brw_set_src0(p, insn, dst);
> @@ -3316,7 +3317,7 @@ brw_memory_fence(struct brw_codegen *p,
>         * flush it too.  Use a different register so both flushes can be
>         * pipelined by the hardware.
>         */
> -      insn = next_insn(p, BRW_OPCODE_SEND);
> +      insn = next_insn(p, send_op);
>        brw_set_dest(p, insn, offset(dst, 1));
>        brw_set_src0(p, insn, offset(dst, 1));
>        brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index 6d5306a0ee..f21115e34d 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -2277,7 +2277,12 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> dispatch_width)
>           break;
>  
>        case SHADER_OPCODE_MEMORY_FENCE:
> -         brw_memory_fence(p, dst);
> +         brw_memory_fence(p, dst, BRW_OPCODE_SEND);
> +         break;
> +
> +      case SHADER_OPCODE_INTERLOCK:
> +         /* The interlock is basically a memory fence issued via sendc */
> +         brw_memory_fence(p, dst, BRW_OPCODE_SENDC);
>           break;
>  
>        case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index 1ce89520bf..1417b31abc 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -4825,6 +4825,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
> nir_intrinsic_instr *instr
>        break;
>     }
>  
> +   case nir_intrinsic_begin_invocation_interlock: {
> +      const fs_builder ubld = bld.group(8, 0);
> +      const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
> +
> +      ubld.emit(SHADER_OPCODE_INTERLOCK, tmp)->size_written = 2 *
> +         REG_SIZE;
> +
> +      break;
> +   }
> +
> +   case nir_intrinsic_end_invocation_interlock: {
> +      /* We don't need to do anything here */
> +      break;
> +   }
> +
>     default:
>        unreachable("unknown intrinsic");
>     }
> diff --git a/src/intel/compiler/brw_shader.cpp 
> b/src/intel/compiler/brw_shader.cpp
> index 537defd05d..307072369c 100644
> --- a/src/intel/compiler/brw_shader.cpp
> +++ b/src/intel/compiler/brw_shader.cpp
> @@ -296,6 +296,9 @@ brw_instruction_name(const struct gen_device_info 
> *devinfo, enum opcode op)
>        return "typed_surface_write_logical";
>     case SHADER_OPCODE_MEMORY_FENCE:
>        return "memory_fence";
> +   case SHADER_OPCODE_INTERLOCK:
> +      /* For an interlock we actually issue a memory fence via sendc. */
> +      return "interlock";
>  
>     case SHADER_OPCODE_BYTE_SCATTERED_READ:
>        return "byte_scattered_read";
> @@ -1007,6 +1010,7 @@ backend_instruction::has_side_effects() const
>     case SHADER_OPCODE_TYPED_SURFACE_WRITE:
>     case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
>     case SHADER_OPCODE_MEMORY_FENCE:
> +   case SHADER_OPCODE_INTERLOCK:
>     case SHADER_OPCODE_URB_WRITE_SIMD8:
>     case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
>     case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
> diff --git a/src/intel/compiler/brw_vec4_generator.cpp 
> b/src/intel/compiler/brw_vec4_generator.cpp
> index 3d17ff9797..7519ccc9df 100644
> --- a/src/intel/compiler/brw_vec4_generator.cpp
> +++ b/src/intel/compiler/brw_vec4_generator.cpp
> @@ -1904,7 +1904,7 @@ generate_code(struct brw_codegen *p,
>           break;
>  
>        case SHADER_OPCODE_MEMORY_FENCE:
> -         brw_memory_fence(p, dst);
> +         brw_memory_fence(p, dst, BRW_OPCODE_SEND);
>           break;
>  
>        case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
> b/src/mesa/drivers/dri/i965/intel_extensions.c
> index b5860f13cb..5a16a42f4e 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -241,6 +241,7 @@ intelInitExtensions(struct gl_context *ctx)
>        ctx->Extensions.EXT_shader_samples_identical = true;
>        ctx->Extensions.OES_primitive_bounding_box = true;
>        ctx->Extensions.OES_texture_buffer = true;
> +      ctx->Extensions.ARB_fragment_shader_interlock = true;
>  
>        if (can_do_pipelined_register_writes(brw->screen)) {
>           ctx->Extensions.ARB_draw_indirect = true;
> -- 
> 2.11.0

Attachment: signature.asc
Description: PGP signature

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to