Hi,

the patch below adds support for HSA vector immediates and
instructions storing them directly to memory, which was hitherto
missing on the branch.

Committed as r224554.

Thanks,

Martin


2015-06-16  Martin Jambor  <mjam...@suse.cz>

        * hsa-brig.c (hsa_get_imm_brig_type_len): New function.
        (emit_immediate_scalar_to_data_section): Likewise.
        (emit_immediate_operand): Reimplemented.
        * hsa-gen.c (gen_hsa_insns_for_load): Trimmed long line.
        (gen_hsa_insns_for_store): Added missing comment, trimmed long line,
        added another type exception for vector immediates.

diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index d28634d..bb4a2c1 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -785,118 +785,170 @@ enqueue_op (hsa_op_base *op)
   return ret;
 }
 
-/* Emit an immediate BRIG operand IMM.  */
+/* Return the length of the birg type TYPE that is going to be streamed out as
+   an immediate constant (so it must not be B1).  */
 
-static void
-emit_immediate_operand (hsa_op_immed *imm)
+static unsigned
+hsa_get_imm_brig_type_len (BrigType16_t type)
 {
-  struct BrigOperandConstantBytes out;
-  uint32_t byteCount;
+  BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK;
+  BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK;
 
-  union
-  {
-    uint8_t b8;
-    uint16_t b16;
-    uint32_t b32;
-    uint64_t b64;
-  } bytes;
-  unsigned len;
+  switch (pack_type)
+    {
+    case BRIG_TYPE_PACK_NONE:
+      break;
+    case BRIG_TYPE_PACK_32:
+      return 4;
+    case BRIG_TYPE_PACK_64:
+      return 8;
+    case BRIG_TYPE_PACK_128:
+      return 16;
+    default:
+      gcc_unreachable ();
+    }
 
-  memset (&out, 0, sizeof (out));
-  switch (imm->type)
+  switch (base_type)
     {
     case BRIG_TYPE_U8:
     case BRIG_TYPE_S8:
-      len = 1;
-      bytes.b8 = (uint8_t) TREE_INT_CST_LOW (imm->value);
-      break;
+    case BRIG_TYPE_B8:
+      return 1;
     case BRIG_TYPE_U16:
     case BRIG_TYPE_S16:
-      bytes.b16 = (uint16_t) TREE_INT_CST_LOW (imm->value);
-      len = 2;
-      break;
-
     case BRIG_TYPE_F16:
-      sorry ("Support for HSA does not implement immediate 16 bit FPU "
-            "operands");
-      len = 2;
-      break;
-
+    case BRIG_TYPE_B16:
+      return 2;
     case BRIG_TYPE_U32:
     case BRIG_TYPE_S32:
-      bytes.b32 = (uint32_t) TREE_INT_CST_LOW (imm->value);
-      len = 4;
-      break;
-
+    case BRIG_TYPE_F32:
+    case BRIG_TYPE_B32:
+      return 4;
     case BRIG_TYPE_U64:
     case BRIG_TYPE_S64:
-      bytes.b64 = (uint64_t) int_cst_value (imm->value);
-      len = 8;
-      break;
-
-    case BRIG_TYPE_F32:
     case BRIG_TYPE_F64:
-      {
-       tree expr = imm->value;
-       tree type = TREE_TYPE (expr);
+    case BRIG_TYPE_B64:
+      return 8;
+    case BRIG_TYPE_B128:
+      return 16;
+    default:
+      gcc_unreachable ();
+    }
+}
 
-       len = GET_MODE_SIZE (TYPE_MODE (type));
+/* Emit one scalar VALUE to the data BRIG section.  If NEED_LEN is not equal to
+   zero, shrink or extend the value to NEED_LEN bytes.  Return how many bytes
+   were written.  */
 
-       /* There are always 32 bits in each long, no matter the size of
-          the hosts long.  */
-       long tmp[6];
+static int
+emit_immediate_scalar_to_data_section (tree value, unsigned need_len)
+{
+  union
+  {
+    uint8_t b8;
+    uint16_t b16;
+    uint32_t b32;
+    uint64_t b64;
+  } bytes;
 
-       gcc_assert (len == 4 || len == 8);
+  memset (&bytes, 0, sizeof (bytes));
+  tree type = TREE_TYPE (value);
+  gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE);
+  unsigned data_len = tree_to_uhwi (TYPE_SIZE (type))/BITS_PER_UNIT;
+  if (INTEGRAL_TYPE_P (type))
+    switch (data_len)
+      {
+      case 1:
+       bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value);
+       break;
+      case 2:
+       bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value);
+       break;
+      case 4:
+       bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value);
+       break;
+      case 8:
+       bytes.b64 = (uint64_t) int_cst_value (value);
+       break;
+      default:
+       gcc_unreachable ();
+      }
+  else if (SCALAR_FLOAT_TYPE_P (type))
+    {
+      if (data_len == 2)
+       {
+         sorry ("Support for HSA does not implement immediate 16 bit FPU "
+                "operands");
+         return 2;
+       }
+      unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type));
+      /* There are always 32 bits in each long, no matter the size of
+        the hosts long.  */
+      long tmp[6];
 
-       real_to_target (tmp, TREE_REAL_CST_PTR (expr), TYPE_MODE (type));
+      real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type));
 
-       if (len == 4)
-         bytes.b32 = (uint32_t) tmp[0];
-       else
-         {
-           bytes.b64 = (uint64_t)(uint32_t) tmp[1];
-           bytes.b64 <<= 32;
-           bytes.b64 |= (uint32_t) tmp[0];
-         }
+      if (int_len == 4)
+       bytes.b32 = (uint32_t) tmp[0];
+      else
+       {
+         bytes.b64 = (uint64_t)(uint32_t) tmp[1];
+         bytes.b64 <<= 32;
+         bytes.b64 |= (uint32_t) tmp[0];
+       }
+    }
+  else
+    gcc_unreachable ();
 
-       break;
-      }
+  int len;
+  if (need_len == 0)
+    len = data_len;
+  else
+    len = need_len;
 
-    case BRIG_TYPE_U8X4:
-    case BRIG_TYPE_S8X4:
-    case BRIG_TYPE_U16X2:
-    case BRIG_TYPE_S16X2:
-    case BRIG_TYPE_F16X2:
-      len = 4;
-      sorry ("Support for HSA does not implement immediate 32bit "
-            "vector operands. ");
-      break;
+  brig_data.add (&bytes, len);
+  return len;
+}
 
-    case BRIG_TYPE_U8X8:
-    case BRIG_TYPE_S8X8:
-    case BRIG_TYPE_U16X4:
-    case BRIG_TYPE_S16X4:
-    case BRIG_TYPE_F16X4:
-    case BRIG_TYPE_U32X2:
-    case BRIG_TYPE_S32X2:
-    case BRIG_TYPE_F32X2:
-      len = 8;
-      sorry ("Support for HSA does not implement immediate 32bit "
-            "vector operands. ");
-      break;
+/* Emit an immediate BRIG operand IMM.  The BRIG type of the immedaite might
+   have been massaged to comply with various HSA/BRIG type requirements, so the
+   ony important aspect of that is the length (because HSAIL might expect
+   smaller constants or become bit-data).  The data should be represented
+   according to what is in the tree representation.  */
 
-    default:
-      gcc_unreachable ();
-    }
+static void
+emit_immediate_operand (hsa_op_immed *imm)
+{
+  struct BrigOperandConstantBytes out;
+  unsigned total_len = hsa_get_imm_brig_type_len (imm->type);
+
+  /* We do not produce HSAIL array types anywhere.  */
+  gcc_assert (!(imm->type & BRIG_TYPE_ARRAY));
 
+  memset (&out, 0, sizeof (out));
   out.base.byteCount = htole16 (sizeof (out));
   out.base.kind = htole16 (BRIG_KIND_OPERAND_CONSTANT_BYTES);
-  byteCount = htole32 (len);
+  uint32_t byteCount = htole32 (total_len);
   out.type = htole16 (imm->type);
-  out.bytes = brig_data.add (&byteCount, sizeof (byteCount));
-  brig_data.add (&bytes, len);
-
+  out.bytes = htole32 (brig_data.add (&byteCount, sizeof (byteCount)));
   brig_operand.add (&out, sizeof(out));
+
+  if (TREE_CODE (imm->value) == VECTOR_CST)
+    {
+      int i, num = VECTOR_CST_NELTS (imm->value);
+      for (i = 0; i < num; i++)
+       {
+         unsigned actual;
+         actual = emit_immediate_scalar_to_data_section
+           (VECTOR_CST_ELT (imm->value, i), 0);
+         total_len -= actual;
+       }
+      /* Vectors should have the exact size.  */
+      gcc_assert (total_len == 0);
+    }
+  else
+    emit_immediate_scalar_to_data_section (imm->value, total_len);
+
   brig_data.round_size_up (4);
 }
 
diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index db5200d..0349efd 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -1318,7 +1318,8 @@ gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree 
type, hsa_bb *hbb,
       addr = gen_hsa_addr (rhs, hbb, ssa_map);
       mem->opcode = BRIG_OPCODE_LD;
       /* Not dest->type, that's possibly extended.  */
-      mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (type, 
false));
+      mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (type,
+                                                                   false));
       mem->operands[0] = dest;
       mem->operands[1] = addr;
       set_reg_def (dest, mem);
@@ -1331,6 +1332,9 @@ gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree 
type, hsa_bb *hbb,
           rhs);
 }
 
+/* Generate HSAIL instructions storing into memory.  LHS is the destination of
+   the store, SRC is the source operand.  Add instructions to HBB, use SSA_MAP
+   for HSA SSA lookup.  */
 
 static void
 gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb,
@@ -1343,7 +1347,8 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, 
hsa_bb *hbb,
   mem->opcode = BRIG_OPCODE_ST;
   if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (src))
     reg->uses.safe_push (mem);
-  mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE 
(lhs), false));
+  mem->type = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE 
(lhs),
+                                                               false));
 
   /* XXX The HSAIL disasm has another constraint: if the source
      is an immediate then it must match the destination type.  If
@@ -1351,7 +1356,32 @@ gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, 
hsa_bb *hbb,
      We're always allocating new operands so we can modify the above
      in place.  */
   if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (src))
-    imm->type = mem->type;
+    {
+      if ((imm->type & BRIG_TYPE_PACK_MASK) == BRIG_TYPE_PACK_NONE)
+       imm->type = mem->type;
+      else
+       {
+         /* ...and all vector immediates apparently need to be vectors of
+            unsigned bytes. */
+         BrigType16_t bt = bittype_for_type (imm->type);
+         gcc_assert (bt == bittype_for_type (mem->type));
+         switch (bt)
+           {
+           case BRIG_TYPE_B32:
+             imm->type = BRIG_TYPE_U8X4;
+             break;
+           case BRIG_TYPE_B64:
+             imm->type = BRIG_TYPE_U8X8;
+             break;
+           case BRIG_TYPE_B128:
+             imm->type = BRIG_TYPE_U8X16;
+             break;
+           default:
+             gcc_unreachable ();
+           }
+       }
+    }
+
   mem->operands[0] = src;
   mem->operands[1] = addr;
   if (addr->reg)

Reply via email to