Am 11.02.2014 02:43, schrieb Dave Airlie: > From: Dave Airlie <airl...@redhat.com> > > This adds support to gallium for a TG4 instruction, > and two CAPs. The first CAP is required for GL_ARB_texture_gather. > > The second CAP is required to expose GL_ARB_gpu_shader5. > > However so far we haven't found any hardware that natively > exposes the textureGatherOffsets feature from GL, so just > lower it for now. If hardware appears for this we can add > another CAP to allow TG4 to take 4 offsets. > > v2: add component selection src and a cap to say > hw can do it. (st can use to help control > GL_ARB_gpu_shader5/GLSL 4.00). Add docs. > > v3: rename to SM5, add docs. > > Signed-off-by: Dave Airlie <airl...@redhat.com> > --- > src/gallium/auxiliary/tgsi/tgsi_info.c | 1 + > src/gallium/docs/source/screen.rst | 6 ++++ > src/gallium/docs/source/tgsi.rst | 36 > ++++++++++++++++++++++++ > src/gallium/drivers/freedreno/freedreno_screen.c | 2 ++ > src/gallium/drivers/i915/i915_screen.c | 2 ++ > src/gallium/drivers/ilo/ilo_screen.c | 2 ++ > src/gallium/drivers/llvmpipe/lp_screen.c | 2 ++ > src/gallium/drivers/nouveau/nv30/nv30_screen.c | 2 ++ > src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 ++ > src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 ++ > src/gallium/drivers/r300/r300_screen.c | 2 ++ > src/gallium/drivers/r600/r600_pipe.c | 2 ++ > src/gallium/drivers/radeonsi/si_pipe.c | 2 ++ > src/gallium/drivers/softpipe/sp_screen.c | 2 ++ > src/gallium/drivers/svga/svga_screen.c | 2 ++ > src/gallium/include/pipe/p_defines.h | 4 ++- > src/gallium/include/pipe/p_shader_tokens.h | 4 ++- > 17 files changed, 73 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c > b/src/gallium/auxiliary/tgsi/tgsi_info.c > index f993600..565f274 100644 > --- a/src/gallium/auxiliary/tgsi/tgsi_info.c > +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c > @@ -221,6 +221,7 @@ static const struct tgsi_opcode_info > opcode_info[TGSI_OPCODE_LAST] = > { 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 }, > { 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI }, > { 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI }, > + { 1, 3, 1, 0, 0, 0, OTHR, "TG4", TGSI_OPCODE_TG4 }, > }; > > const struct tgsi_opcode_info * > diff --git a/src/gallium/docs/source/screen.rst > b/src/gallium/docs/source/screen.rst > index bd553f4..6b5a195 100644 > --- a/src/gallium/docs/source/screen.rst > +++ b/src/gallium/docs/source/screen.rst > @@ -182,6 +182,12 @@ The integer capabilities: > vertex components output by a single invocation of a geometry shader. > This is the product of the number of attribute components per vertex and > the number of output vertices. > +* ``PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS``: Max number of components > + in format that texture gather can operate on. 1 == RED, ALPHA etc, > + 4 == All formats. > +* ``PIPE_CAP_TEXTURE_GATHER_SM5``: Whether the texture gather > + hardware implements the SM5 features, component selection, > + shadow comparison, and run-time offsets. > > > .. _pipe_capf: > diff --git a/src/gallium/docs/source/tgsi.rst > b/src/gallium/docs/source/tgsi.rst > index be42572..03c5df8 100644 > --- a/src/gallium/docs/source/tgsi.rst > +++ b/src/gallium/docs/source/tgsi.rst > @@ -986,6 +986,42 @@ XXX doesn't look like most of the opcodes really belong > here. > > dst.z = texture_depth(unit, lod) > > +.. opcode:: TG4 - Texture Gather (as per ARB_texture_gather) > + Gathers the four texels to be used in a bi-linear > + filtering operation and packs them into a single register. > + Only works with 2D, 2D array, cubemaps, and cubemaps arrays. > + For 2D textures, only the addressing modes of the sampler and > + the top level of any mip pyramid are used. Set W to zero. > + It behaves like the TEX instruction, but a filtered > + sample is not generated. The four samples that contribute > + to filtering are placed into xyzw in clockwise order, > + starting with the (u,v) texture coordinate delta at the > + following locations (-, +), (+, +), (+, -), (-, -), where > + the magnitude of the deltas are half a texel. > + > + PIPE_CAP_TEXTURE_SM5 enhances this instruction to support > + shadow per-sample depth compares, single component selection, > + and a non-constant offset. It doesn't allow support for the > + GL independent offset to get i0,j0. This would require another > + CAP is hw can do it natively. For now we lower that before > + TGSI. > + > +.. math:: > + > + coord = src0 > + > + component = src1 > + > + dst = texture_gather4 (unit, coord, component) > + > +(with SM5 - cube array shadow) > + > + coord = src0 > + > + compare = src1 > + > + dst = texture_gather (uint, coord, compare) > + So how does component selection work with the latter version? I think it would be nice if you wouldn't really need two versions (so if you don't support comparisons, the src would just be unused).
> > Integer ISA > ^^^^^^^^^^^^^^^^^^^^^^^^ > diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c > b/src/gallium/drivers/freedreno/freedreno_screen.c > index e1b5dae..c53300f 100644 > --- a/src/gallium/drivers/freedreno/freedreno_screen.c > +++ b/src/gallium/drivers/freedreno/freedreno_screen.c > @@ -203,6 +203,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum > pipe_cap param) > case PIPE_CAP_QUERY_PIPELINE_STATISTICS: > case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: > case PIPE_CAP_TGSI_VS_LAYER: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > > /* Stream output. */ > diff --git a/src/gallium/drivers/i915/i915_screen.c > b/src/gallium/drivers/i915/i915_screen.c > index 9f08f86..c5d2888 100644 > --- a/src/gallium/drivers/i915/i915_screen.c > +++ b/src/gallium/drivers/i915/i915_screen.c > @@ -216,6 +216,8 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap > cap) > case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: > case PIPE_CAP_TGSI_TEXCOORD: > case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > > case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: > diff --git a/src/gallium/drivers/ilo/ilo_screen.c > b/src/gallium/drivers/ilo/ilo_screen.c > index 9c363ac..2551664 100644 > --- a/src/gallium/drivers/ilo/ilo_screen.c > +++ b/src/gallium/drivers/ilo/ilo_screen.c > @@ -433,6 +433,8 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap > param) > case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: > return true; > case PIPE_CAP_TGSI_VS_LAYER: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > > default: > diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c > b/src/gallium/drivers/llvmpipe/lp_screen.c > index 43142e7..4ab1949 100644 > --- a/src/gallium/drivers/llvmpipe/lp_screen.c > +++ b/src/gallium/drivers/llvmpipe/lp_screen.c > @@ -235,6 +235,8 @@ llvmpipe_get_param(struct pipe_screen *screen, enum > pipe_cap param) > case PIPE_CAP_ENDIANNESS: > return PIPE_ENDIAN_NATIVE; > case PIPE_CAP_TGSI_VS_LAYER: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > } > /* should only get here on unhandled cases */ > diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c > b/src/gallium/drivers/nouveau/nv30/nv30_screen.c > index 8eee06b..84596b3 100644 > --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c > +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c > @@ -126,6 +126,8 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum > pipe_cap param) > case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: > case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: > case PIPE_CAP_TGSI_VS_LAYER: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: > case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c > b/src/gallium/drivers/nouveau/nv50/nv50_screen.c > index e636bf8..a59ca62 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c > @@ -196,6 +196,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum > pipe_cap param) > case PIPE_CAP_ENDIANNESS: > return PIPE_ENDIAN_LITTLE; > case PIPE_CAP_TGSI_VS_LAYER: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > default: > NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c > b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c > index f84c41b..a2e459b 100644 > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c > @@ -174,6 +174,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum > pipe_cap param) > case PIPE_CAP_ENDIANNESS: > return PIPE_ENDIAN_LITTLE; > case PIPE_CAP_TGSI_VS_LAYER: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > default: > NOUVEAU_ERR("unknown PIPE_CAP %d\n", param); > diff --git a/src/gallium/drivers/r300/r300_screen.c > b/src/gallium/drivers/r300/r300_screen.c > index fcb01e8..26d73e4 100644 > --- a/src/gallium/drivers/r300/r300_screen.c > +++ b/src/gallium/drivers/r300/r300_screen.c > @@ -166,6 +166,8 @@ static int r300_get_param(struct pipe_screen* pscreen, > enum pipe_cap param) > case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: > case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: > case PIPE_CAP_TGSI_VS_LAYER: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > > /* SWTCL-only features. */ > diff --git a/src/gallium/drivers/r600/r600_pipe.c > b/src/gallium/drivers/r600/r600_pipe.c > index d9b4509..c488ce8 100644 > --- a/src/gallium/drivers/r600/r600_pipe.c > +++ b/src/gallium/drivers/r600/r600_pipe.c > @@ -399,6 +399,8 @@ static int r600_get_param(struct pipe_screen* pscreen, > enum pipe_cap param) > case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: > case PIPE_CAP_VERTEX_COLOR_CLAMPED: > case PIPE_CAP_USER_VERTEX_BUFFERS: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > > /* Stream output. */ > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index c64621b..c69be58 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -285,6 +285,8 @@ static int si_get_param(struct pipe_screen* pscreen, enum > pipe_cap param) > case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: > case PIPE_CAP_USER_VERTEX_BUFFERS: > case PIPE_CAP_CUBE_MAP_ARRAY: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > > case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: > diff --git a/src/gallium/drivers/softpipe/sp_screen.c > b/src/gallium/drivers/softpipe/sp_screen.c > index 147196e..cef2a34 100644 > --- a/src/gallium/drivers/softpipe/sp_screen.c > +++ b/src/gallium/drivers/softpipe/sp_screen.c > @@ -187,6 +187,8 @@ softpipe_get_param(struct pipe_screen *screen, enum > pipe_cap param) > case PIPE_CAP_ENDIANNESS: > return PIPE_ENDIAN_NATIVE; > case PIPE_CAP_TGSI_VS_LAYER: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > } > /* should only get here on unhandled cases */ > diff --git a/src/gallium/drivers/svga/svga_screen.c > b/src/gallium/drivers/svga/svga_screen.c > index d5ae69a..ae2623d 100644 > --- a/src/gallium/drivers/svga/svga_screen.c > +++ b/src/gallium/drivers/svga/svga_screen.c > @@ -266,6 +266,8 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap > param) > case PIPE_CAP_QUERY_PIPELINE_STATISTICS: > case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: > case PIPE_CAP_TGSI_VS_LAYER: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > + case PIPE_CAP_TEXTURE_GATHER_SM5: > return 0; > case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: > return 64; > diff --git a/src/gallium/include/pipe/p_defines.h > b/src/gallium/include/pipe/p_defines.h > index 83815cd..764c248 100644 > --- a/src/gallium/include/pipe/p_defines.h > +++ b/src/gallium/include/pipe/p_defines.h > @@ -522,7 +522,9 @@ enum pipe_cap { > PIPE_CAP_MIXED_FRAMEBUFFER_SIZES = 86, > PIPE_CAP_TGSI_VS_LAYER = 87, > PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES = 88, > - PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS = 89 > + PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS = 89, > + PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 90, > + PIPE_CAP_TEXTURE_GATHER_SM5 = 91 > }; > > #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) > diff --git a/src/gallium/include/pipe/p_shader_tokens.h > b/src/gallium/include/pipe/p_shader_tokens.h > index 8750bd2..8fa6a0a 100644 > --- a/src/gallium/include/pipe/p_shader_tokens.h > +++ b/src/gallium/include/pipe/p_shader_tokens.h > @@ -453,7 +453,9 @@ struct tgsi_property_data { > #define TGSI_OPCODE_IMUL_HI 180 > #define TGSI_OPCODE_UMUL_HI 181 > > -#define TGSI_OPCODE_LAST 182 > +#define TGSI_OPCODE_TG4 182 > + > +#define TGSI_OPCODE_LAST 183 > > #define TGSI_SAT_NONE 0 /* do not saturate */ > #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ > Also, FWIW for llvmpipe you'd probably wanted a native 4 offsets versions, I don't think llvm could eliminate the huge amount of duplicated code completely if you generate 4 texture lookups. Of course, someone would need to implement it first (shouldn't be too difficult). Roland _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev