[PATCH] D116238: [mips] Add -mfix4300 flag to enable vr4300 mulmul bugfix pass

2021-12-23 Thread Random via Phabricator via cfe-commits
Random06457 created this revision.
Random06457 added a reviewer: atanasyan.
Herald added subscribers: dang, jrtc27, hiraditya, arichardson, mgorny, sdardis.
Random06457 requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Early revisions of the VR4300 have a hardware bug where two consecutive 
multiplications can produce an incorrect result in the second multiply.
This revision adds the `-mfix4300` flag to llvm (and clang) which, when passed, 
provides a software fix for this issue.

**More precise description of the "mulmul" bug:**

  1: mul.[s,d] fd,fs,ft
  2: mul.[s,d] fd,fs,ft  or  [D]MULT[U] rs,rt

When the above sequence is executed by the CPU, if at least one of the source 
operands of the first mul instruction happens to be `sNaN`, `0` or `Infinity`, 
then the second mul instruction may produce an incorrect result.
This can happen both if the two mul instructions are next to each other of if 
the first one is in a delay slot and the second is the first instruction of the 
branch target.

**Description of the fix:**
This fix adds a backend pass to llvm which scans for mul instructions in each 
basic block and happens a nop whenever the following conditions are met:

- The current instruction is a single or double-precision floating-point mul 
instruction.
- The next instrution is either a mul instruction (any kind) or a branch 
instruction.

**Note:**
I chose `-mfix4300` as a name for the flag to follow the GCC nomenclature but I 
don't know if this is a good name.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D116238

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Clang.cpp
  llvm/lib/Target/Mips/CMakeLists.txt
  llvm/lib/Target/Mips/Mips.h
  llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
  llvm/lib/Target/Mips/MipsTargetMachine.cpp
  llvm/test/CodeGen/Mips/vr4300mulmul/mulbranch.ll
  llvm/test/CodeGen/Mips/vr4300mulmul/mulmul.ll

Index: llvm/test/CodeGen/Mips/vr4300mulmul/mulmul.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Mips/vr4300mulmul/mulmul.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mips -mfix4300 -verify-machineinstrs < %s | FileCheck %s
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
+define dso_local i32 @fun(i32 signext %x, i32 signext %y) local_unnamed_addr #0 {
+; CHECK: mul
+; CHECK-NEXT: nop
+; CHECK-NEXT: mul
+  %mul = mul nsw i32 %x, %x
+  %mul1 = mul nsw i32 %y, %y
+  %add = add nuw nsw i32 %mul1, %mul
+  ret i32 %add
+}
Index: llvm/test/CodeGen/Mips/vr4300mulmul/mulbranch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Mips/vr4300mulmul/mulbranch.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mips -mfix4300 -verify-machineinstrs < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define dso_local i32 @my_func(i32 signext %a) local_unnamed_addr #0 {
+; CHECK: mul
+; CHECK-NEXT: nop
+  %mul = mul nsw i32 %a, %a
+  %call = tail call i32 @foo(i32 signext %mul) #2
+  ret i32 %call
+}
+
+declare dso_local i32 @foo(i32 signext) local_unnamed_addr #1
Index: llvm/lib/Target/Mips/MipsTargetMachine.cpp
===
--- llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -292,6 +292,10 @@
   // instructions which can be remapped to a 16 bit instruction.
   addPass(createMicroMipsSizeReducePass());
 
+  // This pass inserts a nop instruction between two back-to-back multiplication
+  // instructions when the "mfix4300" flag is passed.
+  addPass(createMipsMulMulBugPass());
+
   // The delay slot filler pass can potientially create forbidden slot hazards
   // for MIPSR6 and therefore it should go before MipsBranchExpansion pass.
   addPass(createMipsDelaySlotFillerPass());
Index: llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
===
--- /dev/null
+++ llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
@@ -0,0 +1,121 @@
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "mips-r4300-mulmul-fix"
+
+using namespace llvm;
+
+static cl::opt
+EnableMulMulFix("mfix4300", cl::init(false),
+cl::desc("Enable the VR4300 mulmul bug fix."), cl::Hidden);
+
+class MipsMulMulBugFix : public MachineFunctionPass {
+public:
+  MipsMulMulBugFix() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return "Mips mulmul bugfix"; }
+
+  MachineFunctionProperties getRequiredProperties() const override {
+return MachineFunctionProperties().set(
+MachineFunctionProperties:

[PATCH] D116238: [mips] Add -mfix4300 flag to enable vr4300 mulmul bugfix pass

2021-12-24 Thread Random via Phabricator via cfe-commits
Random06457 updated this revision to Diff 396171.
Random06457 added a comment.

Addressed the comments.
I also updated `isFirstMul` to exclude integer multiplications which do not 
produce the bug. That change made me fix the tests too.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116238/new/

https://reviews.llvm.org/D116238

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/Clang.cpp
  llvm/lib/Target/Mips/CMakeLists.txt
  llvm/lib/Target/Mips/Mips.h
  llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
  llvm/lib/Target/Mips/MipsTargetMachine.cpp
  llvm/test/CodeGen/Mips/vr4300mulmul/mulbranch.ll
  llvm/test/CodeGen/Mips/vr4300mulmul/mulmul.ll

Index: llvm/test/CodeGen/Mips/vr4300mulmul/mulmul.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Mips/vr4300mulmul/mulmul.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=mips -mfix4300 -verify-machineinstrs < %s | FileCheck %s
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
+define dso_local float @fun_s(float %x) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: fun_s
+; CHECK: mul.s
+; CHECK-NEXT: nop
+; CHECK: mul.s
+  %mul = fmul float %x, %x
+  %mul1 = fmul float %mul, %x
+  ret float %mul1
+}
+
+define dso_local double @fun_d(double %x) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: fun_d
+; CHECK: mul.d
+; CHECK-NEXT: nop
+; CHECK: mul.d
+  %mul = fmul double %x, %x
+  %mul1 = fmul double %mul, %x
+  ret double %mul1
+}
\ No newline at end of file
Index: llvm/test/CodeGen/Mips/vr4300mulmul/mulbranch.ll
===
--- /dev/null
+++ llvm/test/CodeGen/Mips/vr4300mulmul/mulbranch.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mips -mfix4300 -verify-machineinstrs < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define dso_local void @fun_s(float %a) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: fun_s
+; CHECK: mul.s
+; CHECK-NEXT: nop
+  %mul = fmul float %a, %a
+  tail call void @foo_s(float %mul) #2
+  ret void
+}
+
+declare dso_local void @foo_s(float) local_unnamed_addr #1
+
+; Function Attrs: nounwind
+define dso_local void @fun_d(double %a) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: fun_d
+; CHECK: mul.d
+; CHECK-NEXT: nop
+  %mul = fmul double %a, %a
+  tail call void @foo_d(double %mul) #2
+  ret void
+}
+
+declare dso_local void @foo_d(double) local_unnamed_addr #1
\ No newline at end of file
Index: llvm/lib/Target/Mips/MipsTargetMachine.cpp
===
--- llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -45,6 +45,10 @@
 
 #define DEBUG_TYPE "mips"
 
+static cl::opt
+EnableMulMulFix("mfix4300", cl::init(false),
+cl::desc("Enable the VR4300 mulmul bug fix."), cl::Hidden);
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() {
   // Register the target.
   RegisterTargetMachine X(getTheMipsTarget());
@@ -292,6 +296,11 @@
   // instructions which can be remapped to a 16 bit instruction.
   addPass(createMicroMipsSizeReducePass());
 
+  // This pass inserts a nop instruction between two back-to-back multiplication
+  // instructions when the "mfix4300" flag is passed.
+  if (EnableMulMulFix)
+addPass(createMipsMulMulBugPass());
+
   // The delay slot filler pass can potientially create forbidden slot hazards
   // for MIPSR6 and therefore it should go before MipsBranchExpansion pass.
   addPass(createMipsDelaySlotFillerPass());
Index: llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
===
--- /dev/null
+++ llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
@@ -0,0 +1,111 @@
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "mips-r4300-mulmul-fix"
+
+using namespace llvm;
+
+namespace {
+
+class MipsMulMulBugFix : public MachineFunctionPass {
+public:
+  MipsMulMulBugFix() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return "Mips mulmul bugfix"; }
+
+  MachineFunctionProperties getRequiredProperties() const override {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::NoVRegs);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  static char ID;
+
+private:
+  bool fixMulMulBB(MachineBasicBlock &MBB, const MipsInstrInfo &MipsII);
+};
+
+} // namespace
+
+char MipsMulMulBugFix::ID = 0;
+
+bool MipsMulMulBugFix::runOnMachineFunction(MachineFunction &MF) {
+
+  const MipsInstrInfo &MipsII =
+  *static_cast(MF.getSubtarget().getInstrInfo());
+
+  bool Modified = false;
+
+  for (auto &MBB : MF)
+Modified |= fixMul