Connor Abbott <cwabbo...@gmail.com> writes:

> sources with file == HW_REG get all their information from the
> fixed_hw_reg field, so we need to get the stride and type from there
> when computing the size.
>
> Signed-off-by: Connor Abbott <connor.w.abb...@intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 24 ++++++++++++++++++------
>  1 file changed, 18 insertions(+), 6 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 38b9095..64f093b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -696,24 +696,36 @@ fs_inst::regs_read(int arg) const
>        break;
>     }
>  
> +   unsigned stride;
> +   enum brw_reg_type type;
> +
>     switch (src[arg].file) {
>     case BAD_FILE:
>     case UNIFORM:
>     case IMM:
>        return 1;
> +
>     case GRF:
> +      stride = src[arg].stride;
> +      type = src[arg].type;
> +      break;
> +
>     case HW_REG:
> -      if (src[arg].stride == 0) {
> -         return 1;
> -      } else {
> -         int size = components * this->exec_size * type_sz(src[arg].type);
> -         return DIV_ROUND_UP(size * src[arg].stride, 32);
> -      }
> +      stride = src[arg].fixed_hw_reg.hstride;
> +      type = src[arg].fixed_hw_reg.type;
> +      break;
> +
>     case MRF:
>        unreachable("MRF registers are not allowed as sources");
>     default:
>        unreachable("Invalid register file");
>     }
> +
> +   if (stride == 0)
> +      return 1;
> +
> +   int size = components * this->exec_size * type_sz(type);
> +   return DIV_ROUND_UP(size * stride, 32);

I don't think this will work unfortunately, brw_reg::hstride is the log2
of the actual stride, unlike fs_reg::stride.  Did I already mention I'm
appalled by the fact that fs_reg has a number of fields with overlapping
semantics but different representation, one or the other being
applicable depending on the occasion.  I guess it would be more or less
bearable if these data members were declared private and some reasonable
abstraction was provided to access them.

How do you like the attached patch?  It doesn't solve the fundamental
problem but it seems to improve the situation slightly.

>  }
>  
>  bool
> -- 
> 2.4.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

From f09181eadd3ff1cd10f1afeee13e6c4bb86caa91 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <curroje...@riseup.net>
Date: Tue, 14 Jul 2015 15:43:44 +0300
Subject: [PATCH] i965/fs: Factor out universally broken calculation of the
 register component size.

This in principle simple calculation was being open-coded in a number
of places (in a series I haven't yet sent for review there will be a
couple more), all of them were subtly broken in one way or another:
None of them were handling the HW_REG case correctly as pointed out by
Connor, and fs_inst::regs_read() was handling the stride=0 case rather
naively.  This patch solves both problems and factors out the
calculation as a new fs_reg method.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp           | 21 +++++++++++++--------
 src/mesa/drivers/dri/i965/brw_fs.h             |  6 +++---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |  2 +-
 src/mesa/drivers/dri/i965/brw_ir_fs.h          |  6 ++++++
 4 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index feb4c6c..9f15560 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -78,8 +78,8 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
    case HW_REG:
    case MRF:
    case ATTR:
-      this->regs_written =
-         DIV_ROUND_UP(MAX2(exec_size * dst.stride, 1) * type_sz(dst.type), 32);
+      this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size),
+                                        REG_SIZE);
       break;
    case BAD_FILE:
       this->regs_written = 0;
@@ -443,6 +443,15 @@ fs_reg::is_contiguous() const
    return stride == 1;
 }
 
+unsigned
+fs_reg::component_size(unsigned width) const
+{
+   const unsigned stride = (file != HW_REG ? this->stride :
+                            fixed_hw_reg.hstride == 0 ? 0 :
+                            1 << (fixed_hw_reg.hstride - 1));
+   return MAX2(width * stride, 1) * type_sz(type);
+}
+
 int
 fs_visitor::type_size(const struct glsl_type *type)
 {
@@ -703,12 +712,8 @@ fs_inst::regs_read(int arg) const
       return 1;
    case GRF:
    case HW_REG:
-      if (src[arg].stride == 0) {
-         return 1;
-      } else {
-         int size = components * this->exec_size * type_sz(src[arg].type);
-         return DIV_ROUND_UP(size * src[arg].stride, 32);
-      }
+      return DIV_ROUND_UP(components * src[arg].component_size(exec_size),
+                          REG_SIZE);
    case MRF:
       unreachable("MRF registers are not allowed as sources");
    default:
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 5bc7f08..83d5952 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -70,14 +70,14 @@ offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
       break;
    case GRF:
    case MRF:
+   case HW_REG:
    case ATTR:
       return byte_offset(reg,
-                         delta * MAX2(bld.dispatch_width() * reg.stride, 1) *
-                         type_sz(reg.type));
+                         delta * reg.component_size(bld.dispatch_width()));
    case UNIFORM:
       reg.reg_offset += delta;
       break;
-   default:
+   case IMM:
       assert(delta == 0);
    }
    return reg;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 0fba88e..11c55ab 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1601,7 +1601,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
          /* If the instruction writes to more than one register, it needs to
           * be a "compressed" instruction on Gen <= 5.
           */
-         if (inst->exec_size * inst->dst.stride * type_sz(inst->dst.type) > 32)
+         if (inst->dst.component_size(inst->exec_size) > REG_SIZE)
             brw_set_default_compression_control(p, BRW_COMPRESSION_COMPRESSED);
          else
             brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index b48244a..693357f 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -48,6 +48,12 @@ public:
    bool equals(const fs_reg &r) const;
    bool is_contiguous() const;
 
+   /**
+    * Return the size in bytes of a single logical component of the
+    * register assuming the given execution width.
+    */
+   unsigned component_size(unsigned width) const;
+
    /** Smear a channel of the reg to all channels. */
    fs_reg &set_smear(unsigned subreg);
 
-- 
2.4.3

Attachment: signature.asc
Description: PGP signature

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to