[Mesa-dev] [PATCH 07/11] i965: Add support for correct GL_CLAMP behavior by clamping coordinates.

Eric Anholt Thu, 12 May 2011 21:10:31 -0700

This removes the stupid strict-conformance fallback code I broke when
adding ARB_sampler_objects.


Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=36572
---
 src/mesa/drivers/dri/i965/brw_draw.c             |   36 --------------------
 src/mesa/drivers/dri/i965/brw_fs.cpp             |   17 +++++++---
 src/mesa/drivers/dri/i965/brw_fs.h               |    6 ++-
 src/mesa/drivers/dri/i965/brw_wm.c               |   10 ++++++
 src/mesa/drivers/dri/i965/brw_wm.h               |    2 +-
 src/mesa/drivers/dri/i965/brw_wm_emit.c          |    5 +++
 src/mesa/drivers/dri/i965/brw_wm_sampler_state.c |   38 +++++++++++++++-------
 7 files changed, 58 insertions(+), 56 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 9ab5331..2c3c373 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -270,42 +270,6 @@ static GLboolean check_fallbacks( struct brw_context *brw,
            return GL_TRUE;
    }
 
-   /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
-    * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
-    * as GL_CLAMP_TO_EDGE instead.  If we're using GL_CLAMP, and
-    * we want strict conformance, force the fallback.
-    * Right now, we only do this for 2D textures.
-    */
-   {
-      int u;
-      for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
-         struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
-
-         if (texUnit->Enabled) {
-           struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, u);
-
-            if (texUnit->Enabled & TEXTURE_1D_BIT) {
-               if (sampler->WrapS == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_2D_BIT) {
-               if (sampler->WrapS == GL_CLAMP ||
-                   sampler->WrapT == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_3D_BIT) {
-               if (sampler->WrapS == GL_CLAMP ||
-                   sampler->WrapT == GL_CLAMP ||
-                   sampler->WrapR == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-         }
-      }
-   }
-      
    /* Nothing stopping us from the fast path now */
    return GL_FALSE;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 4ea9f2b..0d520e2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1172,7 +1172,8 @@ fs_visitor::visit(ir_assignment *ir)
 }
 
 fs_inst *
-fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                             int sampler)
 {
    int mlen;
    int base_mrf = 1;
@@ -1184,7 +1185,12 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg 
dst, fs_reg coordinate)
 
    if (ir->shadow_comparitor) {
       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-        emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
+        fs_inst *inst = emit(BRW_OPCODE_MOV,
+                             fs_reg(MRF, base_mrf + mlen + i), coordinate);
+
+        if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+           inst->saturate = true;
+
         coordinate.reg_offset++;
       }
       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@@ -1298,7 +1304,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, 
fs_reg coordinate)
  * surprising in the disassembly.
  */
 fs_inst *
-fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                             int sampler)
 {
    int mlen = 1; /* g0 header always present. */
    int base_mrf = 1;
@@ -1457,9 +1464,9 @@ fs_visitor::visit(ir_texture *ir)
    fs_reg dst = fs_reg(this, glsl_type::vec4_type);
 
    if (intel->gen < 5) {
-      inst = emit_texture_gen4(ir, dst, coordinate);
+      inst = emit_texture_gen4(ir, dst, coordinate, sampler);
    } else {
-      inst = emit_texture_gen5(ir, dst, coordinate);
+      inst = emit_texture_gen5(ir, dst, coordinate, sampler);
    }
 
    /* If there's an offset, we already set up m1.  To avoid the implied move,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 3901207..dd63777 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -512,8 +512,10 @@ public:
    fs_reg *emit_general_interpolation(ir_variable *ir);
    void emit_interpolation_setup_gen4();
    void emit_interpolation_setup_gen6();
-   fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate);
-   fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate);
+   fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                             int sampler);
+   fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+                             int sampler);
    fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
    fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
    bool try_emit_saturate(ir_expression *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index 06512de..6054918 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -409,6 +409,16 @@ static void brw_wm_populate_key( struct brw_context *brw,
                          swizzles[GET_SWZ(t->_Swizzle, 1)],
                          swizzles[GET_SWZ(t->_Swizzle, 2)],
                          swizzles[GET_SWZ(t->_Swizzle, 3)]);
+
+        if (sampler->MinFilter != GL_NEAREST &&
+            sampler->MagFilter != GL_NEAREST) {
+           if (sampler->WrapS == GL_CLAMP)
+              key->gl_clamp_mask[0] |= 1 << i;
+           if (sampler->WrapT == GL_CLAMP)
+              key->gl_clamp_mask[1] |= 1 << i;
+           if (sampler->WrapR == GL_CLAMP)
+              key->gl_clamp_mask[2] |= 1 << i;
+        }
       }
       else {
          key->tex_swizzles[i] = SWIZZLE_NOOP;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h 
b/src/mesa/drivers/dri/i965/brw_wm.h
index a5f99a0..8ab531b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -71,9 +71,9 @@ struct brw_wm_prog_key {
    GLuint shadowtex_mask:16;
    GLuint yuvtex_mask:16;
    GLuint yuvtex_swap_mask:16; /* UV swaped */
+   uint16_t gl_clamp_mask[3];
 
    GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
-
    GLushort drawable_height;
    GLbitfield64 vp_outputs_written;
    GLuint iz_lookup;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c 
b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index fd4cd89..f61757a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -1100,11 +1100,16 @@ void emit_tex(struct brw_wm_compile *c,
 
    /* Emit the texcoords. */
    for (i = 0; i < nr_texcoords; i++) {
+      if (c->key.gl_clamp_mask[i] & (1 << sampler))
+        brw_set_saturate(p, true);
+
       if (emit & (1<<i))
         brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
       else
         brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
       cur_mrf += mrf_per_channel;
+
+      brw_set_saturate(p, false);
    }
 
    /* Fill in the shadow comparison reference value. */
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 7b93bf9..6ac02d2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -29,7 +29,7 @@
   *   Keith Whitwell <ke...@tungstengraphics.com>
   */
                    
-
+#include <stdbool.h>
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
@@ -44,19 +44,27 @@
 
 
 
-/* The brw (and related graphics cores) do not support GL_CLAMP.  The
- * Intel drivers for "other operating systems" implement GL_CLAMP as
- * GL_CLAMP_TO_EDGE, so the same is done here.
- */
-static GLuint translate_wrap_mode( GLenum wrap )
+static GLuint translate_wrap_mode(GLenum wrap, bool using_nearest)
 {
    switch( wrap ) {
    case GL_REPEAT: 
       return BRW_TEXCOORDMODE_WRAP;
-   case GL_CLAMP:  
-      return BRW_TEXCOORDMODE_CLAMP;
+   case GL_CLAMP:
+      /* GL_CLAMP is the weird mode where coordinates are clamped to
+       * [0.0, 1.0], so linear filtering of coordinates outside of
+       * [0.0, 1.0] give you half edge texel value and half border
+       * color.  The fragment shader will clamp the coordinates, and
+       * we set clamp_border here, which gets the result desired.  We
+       * just use clamp(_to_edge) for nearest, because for nearest
+       * clamping to 1.0 gives border color instead of the desired
+       * edge texels.
+       */
+      if (using_nearest)
+        return BRW_TEXCOORDMODE_CLAMP;
+      else
+        return BRW_TEXCOORDMODE_CLAMP_BORDER;
    case GL_CLAMP_TO_EDGE: 
-      return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+      return BRW_TEXCOORDMODE_CLAMP;
    case GL_CLAMP_TO_BORDER: 
       return BRW_TEXCOORDMODE_CLAMP_BORDER;
    case GL_MIRRORED_REPEAT: 
@@ -151,11 +159,13 @@ static void brw_update_sampler_state(struct brw_context 
*brw,
    struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
    struct gl_texture_object *texObj = texUnit->_Current;
    struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+   bool using_nearest = false;
 
    switch (gl_sampler->MinFilter) {
    case GL_NEAREST:
       sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
       sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      using_nearest = true;
       break;
    case GL_LINEAR:
       sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@@ -196,6 +206,7 @@ static void brw_update_sampler_state(struct brw_context 
*brw,
       switch (gl_sampler->MagFilter) {
       case GL_NEAREST:
         sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+        using_nearest = true;
         break;
       case GL_LINEAR:
         sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
@@ -205,9 +216,12 @@ static void brw_update_sampler_state(struct brw_context 
*brw,
       }  
    }
 
-   sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
-   sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
-   sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+                                                 using_nearest);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+                                                 using_nearest);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+                                                 using_nearest);
 
    if (intel->gen >= 6 &&
        sampler->ss0.min_filter != sampler->ss0.mag_filter)
-- 
1.7.5.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 07/11] i965: Add support for correct GL_CLAMP behavior by clamping coordinates.

Reply via email to