Re: [PR] Bassiclly override bufferload and bufferstore with TPC intrinsic [tvm]

via GitHub Mon, 02 Mar 2026 21:25:24 -0800


gemini-code-assist[bot] commented on code in PR #18866:
URL: https://github.com/apache/tvm/pull/18866#discussion_r2876161684



##########
src/target/source/codegen_tpc.cc:
##########
@@ -0,0 +1,401 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file codegen_tpc.cc
+ * \brief TPC-C code generator for Habana Gaudi accelerators.
+ *
+ * Generates TPC-C kernel source code from TVM TIR.
+ * Currently targets Gaudi2 with float32 support.
+ */
+
+#include "codegen_tpc.h"
+
+#include <tvm/arith/analyzer.h>
+#include <tvm/ffi/reflection/registry.h>
+#include <tvm/ir/module.h>
+#include <tvm/tir/stmt_functor.h>
+
+#include <string>
+#include <vector>
+
+#include "../../tir/transforms/ir_utils.h"
+
+namespace tvm {
+namespace codegen {
+
+using namespace tir;
+
+CodeGenTPC::CodeGenTPC() {
+  // TPC-C doesn't use restrict keyword
+  restrict_keyword_ = "";
+}
+
+void CodeGenTPC::Init(bool output_ssa) {
+  CodeGenC::Init(output_ssa);
+  // Reset TPC-specific state
+  tensor_buffers_.clear();
+  index_space_emitted_ = false;
+}
+
+std::string CodeGenTPC::Finish() {
+  // TPC-C kernel preamble: no special includes needed.
+  // TPC-C has built-in types (float64, int5, tensor) and intrinsics.
+  return CodeGenC::Finish();
+}
+
+// ---------------------------------------------------------------------------
+// Function signature: void main(tensor input0, tensor input1, tensor output)
+// ---------------------------------------------------------------------------
+
+void CodeGenTPC::PrintFuncPrefix(std::ostream& os) {
+  // TPC-C kernels don't need a prefix like OpenCL's "__kernel"
+}
+
+void CodeGenTPC::PrintExtraAttrs(const PrimFunc& f, std::ostream& os) {
+  // No extra attributes for TPC kernels (unlike CUDA's __launch_bounds__)
+}
+
+void CodeGenTPC::PrintFunctionSignature(const ffi::String& function_name, 
const PrimFunc& func,
+                                        std::ostream& os) {
+  PrintFuncPrefix(os);
+
+  // TPC kernel entry point is always "void main"
+  os << "void main(";
+
+  for (size_t i = 0; i < func->params.size(); ++i) {
+    tir::Var v = func->params[i];
+    LOG(INFO) << v;
+    // auto type = GetType(v);
+    // LOG(INFO) << type.is_scalar();
+    // LOG(INFO) << "type: " << 
v->type_annotation.as<PointerTypeNode>()->element_type.as<PrimTypeNode>();
+    if (i > 0) {
+      os << ", ";
+    }
+
+    if (v.dtype().is_handle()) {
+      // Handle-type parameters are TPC tensor descriptors
+      os << "tensor " << AllocVarID(v.get());
+      // Track this buffer as a TPC tensor for intrinsic-based access
+      tensor_buffers_.insert(v.get());
+    } else {
+      // Scalar parameters (int, float, etc.) stay as-is
+      PrintType(GetType(v), os);
+      os << " " << AllocVarID(v.get());
+    }
+  }
+  os << ")";
+
+  // Register handle data types for buffer access resolution
+  for (const auto& param : func->params) {
+    if (auto* ptr = param->type_annotation.as<PointerTypeNode>()) {
+      if (auto* prim = ptr->element_type.as<PrimTypeNode>()) {
+        LOG(INFO) << prim->dtype;

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   This debugging log should be removed before merging.
   
   ```suggestion
           RegisterHandleType(param.get(), prim->dtype);
   ```



##########
src/target/source/codegen_tpc.cc:
##########
@@ -0,0 +1,401 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file codegen_tpc.cc
+ * \brief TPC-C code generator for Habana Gaudi accelerators.
+ *
+ * Generates TPC-C kernel source code from TVM TIR.
+ * Currently targets Gaudi2 with float32 support.
+ */
+
+#include "codegen_tpc.h"
+
+#include <tvm/arith/analyzer.h>
+#include <tvm/ffi/reflection/registry.h>
+#include <tvm/ir/module.h>
+#include <tvm/tir/stmt_functor.h>
+
+#include <string>
+#include <vector>
+
+#include "../../tir/transforms/ir_utils.h"
+
+namespace tvm {
+namespace codegen {
+
+using namespace tir;
+
+CodeGenTPC::CodeGenTPC() {
+  // TPC-C doesn't use restrict keyword
+  restrict_keyword_ = "";
+}
+
+void CodeGenTPC::Init(bool output_ssa) {
+  CodeGenC::Init(output_ssa);
+  // Reset TPC-specific state
+  tensor_buffers_.clear();
+  index_space_emitted_ = false;
+}
+
+std::string CodeGenTPC::Finish() {
+  // TPC-C kernel preamble: no special includes needed.
+  // TPC-C has built-in types (float64, int5, tensor) and intrinsics.
+  return CodeGenC::Finish();
+}
+
+// ---------------------------------------------------------------------------
+// Function signature: void main(tensor input0, tensor input1, tensor output)
+// ---------------------------------------------------------------------------
+
+void CodeGenTPC::PrintFuncPrefix(std::ostream& os) {
+  // TPC-C kernels don't need a prefix like OpenCL's "__kernel"
+}
+
+void CodeGenTPC::PrintExtraAttrs(const PrimFunc& f, std::ostream& os) {
+  // No extra attributes for TPC kernels (unlike CUDA's __launch_bounds__)
+}
+
+void CodeGenTPC::PrintFunctionSignature(const ffi::String& function_name, 
const PrimFunc& func,
+                                        std::ostream& os) {
+  PrintFuncPrefix(os);
+
+  // TPC kernel entry point is always "void main"
+  os << "void main(";
+
+  for (size_t i = 0; i < func->params.size(); ++i) {
+    tir::Var v = func->params[i];
+    LOG(INFO) << v;
+    // auto type = GetType(v);
+    // LOG(INFO) << type.is_scalar();
+    // LOG(INFO) << "type: " << 
v->type_annotation.as<PointerTypeNode>()->element_type.as<PrimTypeNode>();

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   These lines contain a debugging log and commented-out code that should be 
removed before merging.



##########
src/target/source/codegen_tpc.cc:
##########
@@ -0,0 +1,401 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file codegen_tpc.cc
+ * \brief TPC-C code generator for Habana Gaudi accelerators.
+ *
+ * Generates TPC-C kernel source code from TVM TIR.
+ * Currently targets Gaudi2 with float32 support.
+ */
+
+#include "codegen_tpc.h"
+
+#include <tvm/arith/analyzer.h>
+#include <tvm/ffi/reflection/registry.h>
+#include <tvm/ir/module.h>
+#include <tvm/tir/stmt_functor.h>
+
+#include <string>
+#include <vector>
+
+#include "../../tir/transforms/ir_utils.h"
+
+namespace tvm {
+namespace codegen {
+
+using namespace tir;
+
+CodeGenTPC::CodeGenTPC() {
+  // TPC-C doesn't use restrict keyword
+  restrict_keyword_ = "";
+}
+
+void CodeGenTPC::Init(bool output_ssa) {
+  CodeGenC::Init(output_ssa);
+  // Reset TPC-specific state
+  tensor_buffers_.clear();
+  index_space_emitted_ = false;
+}
+
+std::string CodeGenTPC::Finish() {
+  // TPC-C kernel preamble: no special includes needed.
+  // TPC-C has built-in types (float64, int5, tensor) and intrinsics.
+  return CodeGenC::Finish();
+}
+
+// ---------------------------------------------------------------------------
+// Function signature: void main(tensor input0, tensor input1, tensor output)
+// ---------------------------------------------------------------------------
+
+void CodeGenTPC::PrintFuncPrefix(std::ostream& os) {
+  // TPC-C kernels don't need a prefix like OpenCL's "__kernel"
+}
+
+void CodeGenTPC::PrintExtraAttrs(const PrimFunc& f, std::ostream& os) {
+  // No extra attributes for TPC kernels (unlike CUDA's __launch_bounds__)
+}
+
+void CodeGenTPC::PrintFunctionSignature(const ffi::String& function_name, 
const PrimFunc& func,
+                                        std::ostream& os) {
+  PrintFuncPrefix(os);
+
+  // TPC kernel entry point is always "void main"
+  os << "void main(";
+
+  for (size_t i = 0; i < func->params.size(); ++i) {
+    tir::Var v = func->params[i];
+    LOG(INFO) << v;
+    // auto type = GetType(v);
+    // LOG(INFO) << type.is_scalar();
+    // LOG(INFO) << "type: " << 
v->type_annotation.as<PointerTypeNode>()->element_type.as<PrimTypeNode>();
+    if (i > 0) {
+      os << ", ";
+    }
+
+    if (v.dtype().is_handle()) {
+      // Handle-type parameters are TPC tensor descriptors
+      os << "tensor " << AllocVarID(v.get());
+      // Track this buffer as a TPC tensor for intrinsic-based access
+      tensor_buffers_.insert(v.get());
+    } else {
+      // Scalar parameters (int, float, etc.) stay as-is
+      PrintType(GetType(v), os);
+      os << " " << AllocVarID(v.get());
+    }
+  }
+  os << ")";
+
+  // Register handle data types for buffer access resolution
+  for (const auto& param : func->params) {
+    if (auto* ptr = param->type_annotation.as<PointerTypeNode>()) {
+      if (auto* prim = ptr->element_type.as<PrimTypeNode>()) {
+        LOG(INFO) << prim->dtype;
+        RegisterHandleType(param.get(), prim->dtype);
+      }
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// PreFunctionBody: inject TPC index space initialization
+// ---------------------------------------------------------------------------
+
+void CodeGenTPC::PreFunctionBody(const PrimFunc& f) {
+  // Emit TPC index space boilerplate at the start of every kernel
+  PrintIndent();
+  stream << "const int5 index_space_start = get_index_space_offset();\n";
+  PrintIndent();
+  stream << "const int5 index_space_end = get_index_space_size() + 
index_space_start;\n";
+  // Shared 5D coordinate vector used by all tensor loads/stores
+  PrintIndent();
+  stream << "int5 " << coords_var_ << " = {0, 0, 0, 0, 0};\n";
+  stream << "\n";
+  index_space_emitted_ = true;
+}
+
+// ---------------------------------------------------------------------------
+// Type printing: TPC-C has unique SIMD vector types
+// ---------------------------------------------------------------------------
+
+void CodeGenTPC::PrintType(DataType t, std::ostream& os) {
+  LOG(INFO) <<"here";
+  int lanes = t.lanes();
+
+  if (t.is_handle()) {
+    ICHECK(t.is_scalar()) << "TPC: do not support vector of handles";
+    os << "void*";
+    return;
+  }
+
+  if (t.is_void()) {
+    os << "void";
+    return;
+  }
+
+  // TPC-C SIMD vector types for float32:
+  //   64 lanes -> float64 (64 x float32, the native SIMD width)
+  // For int32:
+  //   64 lanes -> int64 (64 x int32)
+  if (t.is_float()) {
+    switch (t.bits()) {
+      case 32:
+        if (lanes == 1) {
+          os << "float";
+        } else if (lanes == 64) {
+          // TPC native SIMD vector: 64 x float32
+          os << "float64";
+        } else {
+          LOG(FATAL) << "TPC: unsupported float32 vector width " << lanes
+                     << " (only scalar and 64-lane supported)";
+        }
+        return;
+      default:
+        LOG(FATAL) << "TPC: unsupported float bit width " << t.bits()
+                   << " (only float32 supported currently)";
+    }
+  }
+
+  if (t.is_int()) {
+    switch (t.bits()) {
+      case 32:
+        if (lanes == 1) {
+          os << "int";
+        } else if (lanes == 64) {
+          os << "int64";
+        } else {
+          LOG(FATAL) << "TPC: unsupported int32 vector width " << lanes;
+        }
+        return;
+      case 16:
+        if (lanes == 1) {
+          os << "short";
+        } else {
+          LOG(FATAL) << "TPC: unsupported int16 vector width " << lanes;
+        }
+        return;
+      case 8:
+        if (lanes == 1) {
+          os << "char";
+        } else {
+          LOG(FATAL) << "TPC: unsupported int8 vector width " << lanes;
+        }
+        return;
+      case 1:
+        os << "bool";
+        return;
+      default:
+        LOG(FATAL) << "TPC: unsupported int bit width " << t.bits();
+    }
+  }
+
+  if (t.is_uint()) {
+    switch (t.bits()) {
+      case 32:
+        if (lanes == 1) {
+          os << "unsigned int";
+        } else if (lanes == 64) {
+          os << "uint64";
+        } else {
+          LOG(FATAL) << "TPC: unsupported uint32 vector width " << lanes;
+        }
+        return;
+      case 16:
+        os << "unsigned short";
+        return;
+      case 8:
+        os << "unsigned char";
+        return;
+      case 1:
+        os << "bool";
+        return;
+      default:
+        LOG(FATAL) << "TPC: unsupported uint bit width " << t.bits();
+    }
+  }
+
+  LOG(FATAL) << "TPC: unknown type " << t;
+}
+
+// ---------------------------------------------------------------------------
+// AttrStmt: intercept thread_extent to generate TPC index space range loops
+// ---------------------------------------------------------------------------
+
+void CodeGenTPC::VisitStmt_(const AttrStmtNode* op) {
+  if (op->attr_key == tir::attr::thread_extent) {
+    IterVar iv = Downcast<IterVar>(op->node);
+    std::string tag = iv->thread_tag;
+
+    // Map thread tag to TPC index space dimension
+    int dim = -1;
+    if (tag == "threadIdx.x" || tag == "tpc.index_space.0") {
+      dim = 0;
+    } else if (tag == "threadIdx.y" || tag == "tpc.index_space.1") {
+      dim = 1;
+    } else if (tag == "threadIdx.z" || tag == "tpc.index_space.2") {
+      dim = 2;
+    } else if (tag == "blockIdx.x" || tag == "tpc.index_space.3") {
+      dim = 3;
+    } else if (tag == "blockIdx.y" || tag == "tpc.index_space.4") {
+      dim = 4;
+    }
+
+    if (dim >= 0) {
+      // dim 0 (depth) is the SIMD dimension: 1 index space unit = 64 f32 
elements.
+      // Other dims: 1 unit = 1 element (step=1).
+      // The step can be overridden via a "tpc.step" annotation on the IterVar 
if needed.
+      int step = (dim == 0) ? 64 : 1;

Review Comment:
   ![high](https://www.gstatic.com/codereviewagent/high-priority.svg)
   
   The comment on line 262 mentions that the step can be overridden via a 
`tpc.step` annotation, but the implementation on line 263 hardcodes the step 
value. This discrepancy can be misleading. Please either implement the logic to 
read the `tpc.step` annotation from the `IterVar` or remove the comment if this 
feature is not intended to be supported.



##########
src/target/source/codegen_tpc.h:
##########
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file codegen_tpc.h
+ * \brief Utility to generate TPC-C code for Habana Gaudi accelerators.
+ *
+ * TPC (Tensor Processing Core) is the programmable SIMD engine in Habana 
Gaudi.
+ * TPC-C is a C-like language with vector intrinsics (e.g., float64 = 64xf32 
SIMD).
+ *
+ * Key differences from standard C codegen:
+ * - Function params use `tensor` type instead of pointers
+ * - Buffer access uses TPC intrinsics (v_f32_ld_tnsr_b / v_f32_st_tnsr)
+ * - Thread parallelism via index space (get_index_space_offset/size)
+ * - SIMD vector types: float64 (64xf32), int64 (64xi32), etc.
+ */
+#ifndef TVM_TARGET_SOURCE_CODEGEN_TPC_H_
+#define TVM_TARGET_SOURCE_CODEGEN_TPC_H_
+
+#include <tvm/target/codegen.h>
+#include <tvm/tir/expr.h>
+#include <tvm/tir/op.h>
+
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "codegen_c.h"
+
+namespace tvm {
+namespace codegen {
+
+class CodeGenTPC final : public CodeGenC {
+ public:
+  CodeGenTPC();
+  void Init(bool output_ssa);
+  std::string Finish();
+
+  /*!
+   * \brief Print TPC function signature.
+   *
+   * TPC kernels use `void main(tensor input0, tensor input1, tensor output, 
...)`
+   * instead of C-style `void func(float* a, float* b, float* c)`.
+   * Handle-type params become `tensor`, scalar params stay as-is.
+   */
+  void PrintFunctionSignature(const ffi::String& function_name, const 
PrimFunc& func,
+                              std::ostream& os) final;
+
+  /*! \brief No prefix needed for TPC (unlike OpenCL's __kernel) */
+  void PrintFuncPrefix(std::ostream& os) final;
+
+  /*! \brief No extra attrs needed for now */
+  void PrintExtraAttrs(const PrimFunc& f, std::ostream& os) final;
+
+  /*!
+   * \brief Inject TPC index space initialization before function body.
+   *
+   * Generates:
+   *   const int5 index_space_start = get_index_space_offset();
+   *   const int5 index_space_end = get_index_space_size() + index_space_start;
+   */
+  void PreFunctionBody(const PrimFunc& f) final;
+
+  /*!
+   * \brief Print TPC-C types.
+   *
+   * Key mappings:
+   *   float32 scalar  -> float
+   *   float32x64      -> float64  (TPC 64-element SIMD vector)
+   *   int32 scalar    -> int
+   *   int32x64        -> int64    (TPC 64-element SIMD int vector)
+   *   handle          -> void*    (for non-tensor handles)
+   */
+  void PrintType(DataType t, std::ostream& os) final;
+  // Bring the Type overload into scope (hidden by DataType override)
+  using CodeGenSourceBase::PrintType;
+
+  /*!
+   * \brief Override AttrStmt to intercept thread_extent and generate TPC 
range loops.
+   *
+   * For thread-tagged IterVars (threadIdx.x/y/z, blockIdx.x/y):
+   *   Emits: const int dimStart = index_space_start[dim] * step;
+   *          const int dimEnd   = index_space_end[dim] * step;
+   *          for (int var = dimStart; var < dimEnd; var += step) { 
coords[dim] = var; ... }
+   * For other AttrStmts: falls back to base class.
+   */
+  void VisitStmt_(const AttrStmtNode* op) final;
+
+  /*!
+   * \brief Override BufferLoad for TPC tensor intrinsics.
+   *
+   * For tensor buffers with vector access:
+   *   v_f32_ld_tnsr_b(coords, tensor_name)
+   * For scalar/local buffers:
+   *   falls back to base class behavior
+   */
+  void VisitExpr_(const BufferLoadNode* op, std::ostream& os) final;
+
+  /*!
+   * \brief Override BufferStore for TPC tensor intrinsics.
+   *
+   * For tensor buffers with vector store:
+   *   v_f32_st_tnsr(coords, tensor_name, value)
+   * For scalar/local buffers:
+   *   falls back to base class behavior
+   */
+  void VisitStmt_(const BufferStoreNode* op) final;
+
+  /*! \brief TPC storage scope handling (no shared memory concept) */
+  void PrintStorageScope(const std::string& scope, std::ostream& os) final;
+
+ private:
+  /*! \brief Whether scope is part of type (TPC: no) */
+  bool IsScopePartOfType() const final { return false; }
+
+  /*!
+   * \brief Track which buffer variables are TPC tensor descriptors.
+   * These are function parameters with handle type that should use
+   * tensor intrinsics instead of pointer arithmetic.
+   */
+  std::unordered_set<const VarNode*> tensor_buffers_;
+
+  /*! \brief Whether index space variables have been emitted */
+  bool index_space_emitted_{false};
+
+  /*!
+   * \brief Name of the shared int5 coordinate variable emitted in 
PreFunctionBody.
+   * All tensor loads/stores reference this variable via 
v_f32_ld_tnsr_b(coords, tensor).
+   */
+  std::string coords_var_{"coords"};

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   The `coords_var_` member is initialized once and never modified. It can be 
declared as a `static constexpr const char*` to avoid `std::string` overhead 
and make its immutability explicit.
   
   ```suggestion
     static constexpr const char* coords_var_{"coords"};
   ```



##########
src/target/source/codegen_tpc.cc:
##########
@@ -0,0 +1,401 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file codegen_tpc.cc
+ * \brief TPC-C code generator for Habana Gaudi accelerators.
+ *
+ * Generates TPC-C kernel source code from TVM TIR.
+ * Currently targets Gaudi2 with float32 support.
+ */
+
+#include "codegen_tpc.h"
+
+#include <tvm/arith/analyzer.h>
+#include <tvm/ffi/reflection/registry.h>
+#include <tvm/ir/module.h>
+#include <tvm/tir/stmt_functor.h>
+
+#include <string>
+#include <vector>
+
+#include "../../tir/transforms/ir_utils.h"
+
+namespace tvm {
+namespace codegen {
+
+using namespace tir;
+
+CodeGenTPC::CodeGenTPC() {
+  // TPC-C doesn't use restrict keyword
+  restrict_keyword_ = "";
+}
+
+void CodeGenTPC::Init(bool output_ssa) {
+  CodeGenC::Init(output_ssa);
+  // Reset TPC-specific state
+  tensor_buffers_.clear();
+  index_space_emitted_ = false;
+}
+
+std::string CodeGenTPC::Finish() {
+  // TPC-C kernel preamble: no special includes needed.
+  // TPC-C has built-in types (float64, int5, tensor) and intrinsics.
+  return CodeGenC::Finish();
+}
+
+// ---------------------------------------------------------------------------
+// Function signature: void main(tensor input0, tensor input1, tensor output)
+// ---------------------------------------------------------------------------
+
+void CodeGenTPC::PrintFuncPrefix(std::ostream& os) {
+  // TPC-C kernels don't need a prefix like OpenCL's "__kernel"
+}
+
+void CodeGenTPC::PrintExtraAttrs(const PrimFunc& f, std::ostream& os) {
+  // No extra attributes for TPC kernels (unlike CUDA's __launch_bounds__)
+}
+
+void CodeGenTPC::PrintFunctionSignature(const ffi::String& function_name, 
const PrimFunc& func,
+                                        std::ostream& os) {
+  PrintFuncPrefix(os);
+
+  // TPC kernel entry point is always "void main"
+  os << "void main(";
+
+  for (size_t i = 0; i < func->params.size(); ++i) {
+    tir::Var v = func->params[i];
+    LOG(INFO) << v;
+    // auto type = GetType(v);
+    // LOG(INFO) << type.is_scalar();
+    // LOG(INFO) << "type: " << 
v->type_annotation.as<PointerTypeNode>()->element_type.as<PrimTypeNode>();
+    if (i > 0) {
+      os << ", ";
+    }
+
+    if (v.dtype().is_handle()) {
+      // Handle-type parameters are TPC tensor descriptors
+      os << "tensor " << AllocVarID(v.get());
+      // Track this buffer as a TPC tensor for intrinsic-based access
+      tensor_buffers_.insert(v.get());
+    } else {
+      // Scalar parameters (int, float, etc.) stay as-is
+      PrintType(GetType(v), os);
+      os << " " << AllocVarID(v.get());
+    }
+  }
+  os << ")";
+
+  // Register handle data types for buffer access resolution
+  for (const auto& param : func->params) {
+    if (auto* ptr = param->type_annotation.as<PointerTypeNode>()) {
+      if (auto* prim = ptr->element_type.as<PrimTypeNode>()) {
+        LOG(INFO) << prim->dtype;
+        RegisterHandleType(param.get(), prim->dtype);
+      }
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// PreFunctionBody: inject TPC index space initialization
+// ---------------------------------------------------------------------------
+
+void CodeGenTPC::PreFunctionBody(const PrimFunc& f) {
+  // Emit TPC index space boilerplate at the start of every kernel
+  PrintIndent();
+  stream << "const int5 index_space_start = get_index_space_offset();\n";
+  PrintIndent();
+  stream << "const int5 index_space_end = get_index_space_size() + 
index_space_start;\n";
+  // Shared 5D coordinate vector used by all tensor loads/stores
+  PrintIndent();
+  stream << "int5 " << coords_var_ << " = {0, 0, 0, 0, 0};\n";
+  stream << "\n";
+  index_space_emitted_ = true;
+}
+
+// ---------------------------------------------------------------------------
+// Type printing: TPC-C has unique SIMD vector types
+// ---------------------------------------------------------------------------
+
+void CodeGenTPC::PrintType(DataType t, std::ostream& os) {
+  LOG(INFO) <<"here";

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   This debugging log should be removed before merging.
   
   ```suggestion
     int lanes = t.lanes();
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Bassiclly override bufferload and bufferstore with TPC intrinsic [tvm]

Reply via email to