date:20250902

[llvm-branch-commits] [mlir] [mlir][ptr] Add `gather`, `masked_load`, `masked_store`, and `scatter` ops (PR #156368)

2025-09-02 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-mlir-llvm

Author: Fabian Mora (fabianmcg)


Changes

This patch adds the `gather`, `masked_load`, `masked_store`, and `scatter` 
operations to the `ptr` dialect. It also implements translation from these 
operations to LLVM intrinsics:
- ptr.gather -> llvm.masked.gather
- ptr.masked_load -> llvm.masked.load  
- ptr.masked_store -> llvm.masked.store
- ptr.scatter -> llvm.masked.scatter

Example:
```mlir
llvm.func @mixed_masked_ops_address_spaces(%ptr: 
!ptr.ptr<#llvm.address_space<3>>, %ptrs: 
vector<4x!ptr.ptr<#llvm.address_space<3>>>, 
  %mask: vector<4xi1>, %value: 
vector<4xf64>, %passthrough: vector<4xf64>) {
  %0 = ptr.gather %ptrs, %mask, %passthrough alignment = 8 : 
vector<4x!ptr.ptr<#llvm.address_space<3>>> -> 
vector<4xf64>
  ptr.scatter %value, %ptrs, %mask alignment = 8 : vector<4xf64>, 
vector<4x!ptr.ptr<#llvm.address_space<3>>>
  %1 = ptr.masked_load %ptr, %mask, %passthrough alignment = 8 : 
!ptr.ptr<#llvm.address_space<3>> -> vector<4xf64>
  ptr.masked_store %value, %ptr, %mask alignment = 8 : vector<4xf64>, 
!ptr.ptr<#llvm.address_space<3>>
  llvm.return
}
```
Translates to:
```llvm
define void @mixed_masked_ops_address_spaces(ptr addrspace(3) %0, <4 
x ptr addrspace(3)> %1, <4 x i1> %2, <4 x double> %3, <4 x 
double> %4) {
  %6 = call <4 x double> @llvm.masked.gather.v4f64.v4p3(<4 x 
ptr addrspace(3)> %1, i32 8, <4 x i1> %2, <4 x double> %4)
  call void @llvm.masked.scatter.v4f64.v4p3(<4 x double> %3, 
<4 x ptr addrspace(3)> %1, i32 8, <4 x i1> %2)
  %7 = call <4 x double> @llvm.masked.load.v4f64.p3(ptr 
addrspace(3) %0, i32 8, <4 x i1> %2, <4 x double> %4)
  call void @llvm.masked.store.v4f64.p3(<4 x double> %3, ptr 
addrspace(3) %0, i32 8, <4 x i1> %2)
  ret void
}
```

---

Patch is 35.92 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/156368.diff


5 Files Affected:

- (modified) mlir/include/mlir/Dialect/Ptr/IR/PtrOps.td (+236-2) 
- (modified) mlir/lib/Dialect/Ptr/IR/PtrDialect.cpp (+143-13) 
- (modified) mlir/lib/Target/LLVMIR/Dialect/Ptr/PtrToLLVMIRTranslation.cpp 
(+119) 
- (modified) mlir/test/Dialect/Ptr/ops.mlir (+70) 
- (modified) mlir/test/Target/LLVMIR/ptr.mlir (+114) 


``diff
diff --git a/mlir/include/mlir/Dialect/Ptr/IR/PtrOps.td 
b/mlir/include/mlir/Dialect/Ptr/IR/PtrOps.td
index 1c88efced950e..170513d57c7be 100644
--- a/mlir/include/mlir/Dialect/Ptr/IR/PtrOps.td
+++ b/mlir/include/mlir/Dialect/Ptr/IR/PtrOps.td
@@ -17,6 +17,46 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/ViewLikeInterface.td"
 include "mlir/IR/OpAsmInterface.td"
 
+//===--===//
+// Common props
+//===--===//
+
+def AlignmentProp : OptionalProp;
+
+//===--===//
+// Common types
+//===--===//
+
+// A shaped value type with value semantics and rank.
+class Ptr_ShapedValueType allowedTypes, list preds = []> :
+  ShapedContainerType,
+/*descr=*/[{A shaped type with value semantics and rank.}],
+/*cppType=*/"::mlir::ShapedType">;
+
+// A shaped pointer type with value semantics and rank.
+class Ptr_ShapedPtrType : Ptr_ShapedValueType<[Ptr_PtrType], [HasRankPred]>;
+
+// A shaped value type of rank 1 of any element type.
+def Ptr_Any1DType :
+  Ptr_ShapedValueType<[AnyType], [HasAnyRankOfPred<[1]>]>;
+
+// A shaped value type of rank 1 of `i1` element type.
+def Ptr_Mask1DType :
+  Ptr_ShapedValueType<[I1], [HasAnyRankOfPred<[1]>]>;
+
+// A shaped value type of rank 1 of `i1` element type.
+def Ptr_Ptr1DType :
+  Ptr_ShapedValueType<[Ptr_PtrType], [HasAnyRankOfPred<[1]>]>;
+
+// Gets the type ID of a type.  
+class TypeIDType :
+StrFunc<"$" # name # ".getType().getTypeID()">;
+
+// Checks that all type IDs match.
+class AllTypeIDsMatch names> :
+AllMatchSameOperatorTrait.result, "type IDs">;
+
 
//===--===//
 // FromPtrOp
 
//===--===//
@@ -56,6 +96,58 @@ def Ptr_FromPtrOp : Pointer_Op<"from_ptr", [
   let hasVerifier = 1;
 }
 
+//===--===//
+// GatherOp
+//===--===//
+
+def Ptr_GatherOp : Pointer_Op<"gather", [
+DeclareOpInterfaceMethods,
+TypesMatchWith<"result and mask must be compatible", "result", "mask", [{
+  ::llvm::cast($_self).clone(
+IntegerType::get($_self.getContext(), 1))
+}]>,
+AllTypesMatch<["result", "passthrough"]>,
+// Check the shapes are compatible and both use the same shaped container
+// type.
+AllShapesMatch<["result", "ptrs"]>, AllT

[llvm-branch-commits] [llvm] [AArch64][SME] Support agnostic ZA functions in the MachineSMEABIPass (PR #149064)

2025-09-02 Thread Sander de Smalen via llvm-branch-commits


https://github.com/sdesmalen-arm edited 
https://github.com/llvm/llvm-project/pull/149064
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AArch64] Correctly disassemble TSB instruction (PR #156362)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156362

>From fe7aa772ca06338ec5a42b2a3fe9e78a22559149 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:22:53 +0300
Subject: [PATCH] [AArch64] Correctly disassemble TSB instruction

TSB instruction has one operand, but the generated disassembler didn't
decode this operand. AArch64InstPrinter had a workaround for this.

This instruction can now be disassembled correctly.
---
 llvm/lib/Target/AArch64/AArch64SystemOperands.td   | 2 +-
 llvm/lib/Target/AArch64/CMakeLists.txt | 3 +--
 .../lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 7 ---
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td 
b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 1b0e90b0e0dc3..65b752ed40c90 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -362,7 +362,7 @@ def lookupTSBByName : SearchIndex {
   let Key = ["Name"];
 }
 
-def : TSB<"csync", 0>;
+def : TSB<"csync", 2>;
 
 
//===--===//
 // PRFM (prefetch) instruction options.
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt 
b/llvm/lib/Target/AArch64/CMakeLists.txt
index 833ce48ea1d7a..79b56ea9cf850 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -8,8 +8,7 @@ tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer 
-asmwriternum=1)
 tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands
-  -ignore-fully-defined-operands)
+  -ignore-non-decodable-operands)
 tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
 tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp 
b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 54b58e948daf2..2552ee3009338 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -365,13 +365,6 @@ void AArch64InstPrinter::printInst(const MCInst *MI, 
uint64_t Address,
 return;
   }
 
-  // Instruction TSB is specified as a one operand instruction, but 'csync' is
-  // not encoded, so for printing it is treated as a special case here:
-  if (Opcode == AArch64::TSB) {
-O << "\ttsb\tcsync";
-return;
-  }
-
   if (!PrintAliases || !printAliasInstr(MI, Address, STI, O))
 printInstruction(MI, Address, STI, O);
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Fold 64-bit immediate into copy to AV class (PR #155615)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/155615

>From 8961a0c7eb2c5fc7f93ad2d79e8dd2b6b3eab03a Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 26 Aug 2025 23:53:57 +0900
Subject: [PATCH] AMDGPU: Fold 64-bit immediate into copy to AV class

This is in preparation for patches which will intoduce more
copies to av registers.
---
 llvm/lib/Target/AMDGPU/SIDefines.h| 10 +--
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 25 --
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp|  6 +-
 .../CodeGen/AMDGPU/fold-imm-copy-agpr.mir | 85 ---
 llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir| 26 +++---
 5 files changed, 70 insertions(+), 82 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h 
b/llvm/lib/Target/AMDGPU/SIDefines.h
index 268b153c6c924..150e05b59c29f 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -237,16 +237,16 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_AC_FP32,
   OPERAND_REG_INLINE_AC_FP64,
 
+  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
+  // constants. Does not accept registers.
+  OPERAND_INLINE_C_AV64_PSEUDO,
+
   // Operand for source modifiers for VOP instructions
   OPERAND_INPUT_MODS,
 
   // Operand for SDWA instructions
   OPERAND_SDWA_VOPC_DST,
 
-  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
-  // constants.
-  OPERAND_INLINE_C_AV64_PSEUDO,
-
   OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
 
@@ -254,7 +254,7 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64,
 
   OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32,
-  OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_FP64,
+  OPERAND_REG_INLINE_AC_LAST = OPERAND_INLINE_C_AV64_PSEUDO,
 
   OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp 
b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a116b57c85a88..92eaa8b29ccb8 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1296,7 +1296,8 @@ void SIFoldOperandsImpl::foldOperand(
 for (unsigned MovOp :
  {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
   AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
-  AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) {
+  AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
+  AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
   const MCInstrDesc &MovDesc = TII->get(MovOp);
   assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
 
@@ -1312,11 +1313,23 @@ void SIFoldOperandsImpl::foldOperand(
   const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
   const TargetRegisterClass *MovSrcRC =
   TRI->getRegClass(MovDesc.operands()[SrcIdx].RegClass);
-
-  if (UseSubReg)
-MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
-  if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
-break;
+  if (MovSrcRC) {
+if (UseSubReg)
+  MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
+if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
+  break;
+
+// FIXME: This is mutating the instruction only and deferring the 
actual
+// fold of the immediate
+  } else {
+// For the _IMM_PSEUDO cases, there can be value restrictions on the
+// immediate to verify. Technically we should always verify this, but 
it
+// only matters for these concrete cases.
+// TODO: Handle non-imm case if it's useful.
+if (!OpToFold.isImm() ||
+!TII->isImmOperandLegal(MovDesc, 1, 
*OpToFold.getEffectiveImmVal()))
+  break;
+  }
 
   MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
   MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 887092182f7d1..2b187c641da1c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3444,12 +3444,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) 
{
   case AMDGPU::V_ACCVGPR_READ_B32_e64:
   case AMDGPU::V_ACCVGPR_MOV_B32:
   case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
-return true;
   case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
-// TODO: We could fold this, but it's a strange case. The immediate value
-// can't be directly folded into any real use. We would have to spread new
-// immediate legality checks around and only accept subregister extracts 
for
-// profitability.
+return true;
   default:
 return false;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir 
b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mi

[llvm-branch-commits] [llvm] release/21.x: [CMake][AIX] Enable CMP0182: Create shared library archives by default (#155686) (PR #156504)

2025-09-02 Thread via llvm-branch-commits


llvmbot wrote:

@hubert-reinterpretcast What do you think about merging this PR to the release 
branch?

https://github.com/llvm/llvm-project/pull/156504
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] release/21.x: [libc++][AIX] Fixup problems with ABI list checking (#155643) (PR #156502)

2025-09-02 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/156502

Backport b8456e2a9698aa927d7b3f9c38213f3219aa0498

Requested by: @amy-kwan

>From 31292e1a3428feffb46e5a853699b1c587bb04e7 Mon Sep 17 00:00:00 2001
From: David Tenty 
Date: Wed, 27 Aug 2025 18:28:26 -0400
Subject: [PATCH] [libc++][AIX] Fixup problems with ABI list checking (#155643)

There are some problems with our ABI list checking exposed by recent
compiler/cmake upgrades.

- For symcheck, there are typos in how XCOFF magic are defined, we
intended the second two digits to be a hex value, but our syntax doesn't
say that. Thus this will never match a valid XCOFF file.
- AIX triples can have version numbers. Those need to be discarded when
looking for an libc++ ABI list, like we do for other targets.

(cherry picked from commit b8456e2a9698aa927d7b3f9c38213f3219aa0498)
---
 libcxx/lib/abi/CMakeLists.txt | 3 +++
 libcxx/utils/libcxx/sym_check/util.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libcxx/lib/abi/CMakeLists.txt b/libcxx/lib/abi/CMakeLists.txt
index 7c08bd06c50b2..8f277aad2dcd5 100644
--- a/libcxx/lib/abi/CMakeLists.txt
+++ b/libcxx/lib/abi/CMakeLists.txt
@@ -16,6 +16,9 @@ function(cxx_abi_list_identifier result triple abi_library 
abi_version unstable
   elseif("${triple}" MATCHES "freebsd")
 # Ignore the major and minor versions of freebsd targets.
 string(REGEX REPLACE "freebsd[0-9]+\\.[0-9]+" "freebsd" triple "${triple}")
+  elseif("${triple}" MATCHES "aix")
+# Ignore the V.R.M.F version string of aix targets.
+string(REGEX REPLACE "aix[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+" "aix" triple 
"${triple}")
   endif()
   list(APPEND abi_properties "${triple}")
   list(APPEND abi_properties "${abi_library}")
diff --git a/libcxx/utils/libcxx/sym_check/util.py 
b/libcxx/utils/libcxx/sym_check/util.py
index fc7ba4244ab5a..dbc886f29ddea 100644
--- a/libcxx/utils/libcxx/sym_check/util.py
+++ b/libcxx/utils/libcxx/sym_check/util.py
@@ -95,7 +95,7 @@ def is_xcoff_or_big_ar(filename):
 with open(filename, "rb") as f:
 magic_bytes = f.read(7)
 return (
-magic_bytes[:4] in [b"\x01DF", b"\x01F7"]  # XCOFF32  # XCOFF64
+magic_bytes[:2] in [b"\x01\xDF", b"\x01\xF7"]  # XCOFF32  # XCOFF64
 or magic_bytes == b""
 )
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AArch64][SME] Support agnostic ZA functions in the MachineSMEABIPass (PR #149064)

2025-09-02 Thread Gaëtan Bossu via llvm-branch-commits



@@ -250,6 +286,9 @@ struct MachineSMEABI : public MachineFunctionPass {
 SmallVector BundleStates;

gbossu wrote:

We are starting to accumulate a lot of state, which makes the code harder to 
follow as it allows any member function to modify it instead of having clear 
ins/outs.

I know we discussed that already 
[here](https://github.com/llvm/llvm-project/pull/149062#discussion_r2276758787),
 but I feel it would be nice not to delay the refactoring too much. Even having 
a first step that collects all the info in a struct would help. We could then 
pass that info around by const ref to any function that needs it. If some info 
needs to be mutable, then it should not be in the struct, and be a clear in/out 
parameter.

Doing something like this would clearly decouple the "collection" phase from 
the "let me correctly handle the state changes" phase.

https://github.com/llvm/llvm-project/pull/149064
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [HLSL] Add static methods for resource initialization and constructor from handle (PR #155866)

2025-09-02 Thread Helena Kotas via llvm-branch-commits


https://github.com/hekota edited 
https://github.com/llvm/llvm-project/pull/155866
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Fix DPP combiner using isOperandLegal on incomplete inst (PR #155595)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/155595

>From a6e2e0d83c2724f04313372df0deda5d1f889ed6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 27 Aug 2025 19:39:38 +0900
Subject: [PATCH 1/2] AMDGPU: Fix DPP combiner using isOperandLegal on
 incomplete inst

It is not safe to use isOperandLegal on an instruction that does
not have a complete set of operands. Unforunately the APIs are
not set up in a convenient way to speculatively check if an instruction
will be legal in a hypothetical instruction. Build all the operands
and then verify they are legal after. This is clumsy, we should have
a more direct check for will these operands give a legal instruction.

This seems to fix a missed optimization in the gfx11 test. The
fold was firing for gfx1150, but not gfx1100. Both should support
vop3 literals so I'm not sure why it wasn't working before.
---
 llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp  | 36 ++-
 .../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 18 --
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp 
b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 184929a5a50f6..81582613e93ae 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -250,7 +250,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
   ++NumOperands;
 }
 if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
-  if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
+  if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::sdst)) {
 DPPInst.add(*SDst);
 ++NumOperands;
   }
@@ -296,11 +296,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
 auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
 assert(Src0);
 int Src0Idx = NumOperands;
-if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
-  LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
-  Fail = true;
-  break;
-}
+
 DPPInst.add(*Src0);
 DPPInst->getOperand(NumOperands).setIsKill(false);
 ++NumOperands;
@@ -319,7 +315,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
 }
 auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
 if (Src1) {
-  int OpNum = NumOperands;
+  assert(AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1) &&
+ "dpp version of instruction missing src1");
   // If subtarget does not support SGPRs for src1 operand then the
   // requirements are the same as for src0. We check src0 instead because
   // pseudos are shared between subtargets and allow SGPR for src1 on all.
@@ -327,13 +324,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
 assert(getOperandSize(*DPPInst, Src0Idx, *MRI) ==
getOperandSize(*DPPInst, NumOperands, *MRI) &&
"Src0 and Src1 operands should have the same size");
-OpNum = Src0Idx;
-  }
-  if (!TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) {
-LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
-Fail = true;
-break;
   }
+
   DPPInst.add(*Src1);
   ++NumOperands;
 }
@@ -349,9 +341,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
 }
 auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
 if (Src2) {
-  if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
-  !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
-LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
+  if (!AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src2)) {
+LLVM_DEBUG(dbgs() << "  failed: dpp does not have src2\n");
 Fail = true;
 break;
   }
@@ -431,6 +422,19 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr 
&OrigMI,
 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
 DPPInst.addImm(CombBCZ ? 1 : 0);
+
+for (AMDGPU::OpName Op :
+ {AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2}) {
+  int OpIdx = AMDGPU::getNamedOperandIdx(DPPOp, Op);
+  if (OpIdx == -1)
+break;
+
+  if (!TII->isOperandLegal(*DPPInst, OpIdx)) {
+LLVM_DEBUG(dbgs() << "  failed: src operand is illegal\n");
+Fail = true;
+break;
+  }
+}
   } while (false);
 
   if (Fail) {
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir 
b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index fb20e72a77103..3725384e885ee 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,6 +1,6 @@
-# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 
-run-pass=gcn-dpp-combine -verify-machineinstrs -o -

[llvm-branch-commits] [mlir] [mlir][ptr] Add `gather`, `masked_load`, `masked_store`, and `scatter` ops (PR #156368)

2025-09-02 Thread Mehdi Amini via llvm-branch-commits



@@ -56,6 +96,58 @@ def Ptr_FromPtrOp : Pointer_Op<"from_ptr", [
   let hasVerifier = 1;
 }
 
+//===--===//
+// GatherOp
+//===--===//
+
+def Ptr_GatherOp : Pointer_Op<"gather", [
+DeclareOpInterfaceMethods,
+TypesMatchWith<"result and mask must be compatible", "result", "mask", [{
+  ::llvm::cast($_self).clone(
+IntegerType::get($_self.getContext(), 1))
+}]>,
+AllTypesMatch<["result", "passthrough"]>,
+// Check the shapes are compatible and both use the same shaped container
+// type.
+AllShapesMatch<["result", "ptrs"]>, AllTypeIDsMatch<["result", "ptrs"]>
+  ]> {
+  let summary = "Gather operation";
+  let description = [{
+The `gather` operation performs conditional loads from multiple memory
+locations specified by `ptrs` based on a mask `mask`. Elements of the
+result corresponding to masked-off lanes are taken from the passthrough
+operand.
+
+The mask operand is a shaped type of `i1` elements that must have the same
+shape as the result type.
+
+Examples:
+```mlir
+// Gather values from multiple memory locations
+%result = ptr.gather %ptrs, %mask, %passthrough :
+  vector<4x!ptr.ptr<#ptr.generic_space>> -> vector<4xf32>
+
+// Gather with alignment
+%result = ptr.gather %ptrs, %mask, %passthrough alignment = 8 :
+  vector<4x!ptr.ptr<#ptr.generic_space>> -> vector<4xf32>
+```
+  }];
+  let arguments = (ins Ptr_Ptr1DType:$ptrs,
+   Ptr_Mask1DType:$mask,
+   Ptr_Any1DType:$passthrough,
+   AlignmentProp:$alignment);
+  let results = (outs Ptr_Any1DType:$result);
+  let assemblyFormat = [{
+$ptrs `,` $mask `,` $passthrough (`alignment` `=` $alignment^)?
+attr-dict `:` qualified(type($ptrs)) `->` type($result)

joker-eph wrote:

What is the `qualified` trying to address here?

https://github.com/llvm/llvm-project/pull/156368
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AArch64] Remove post-decoding instruction mutations (PR #156364)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156364

>From b60b806cae55aa12437ff99ad2b1f6c3d3c8da34 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:30:01 +0300
Subject: [PATCH] [AArch64] Remove post-decoding instruction mutations

These instructions can now be fully decoded automatically.
---
 .../lib/Target/AArch64/AArch64InstrFormats.td | 30 +-
 llvm/lib/Target/AArch64/CMakeLists.txt|  3 +-
 .../Disassembler/AArch64Disassembler.cpp  | 49 
 .../MCTargetDesc/AArch64MCTargetDesc.h|  3 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 56 ++-
 llvm/lib/Target/AArch64/SVEInstrFormats.td|  8 ++-
 6 files changed, 102 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 8958ad129269c..b0b012214813d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1561,13 +1561,11 @@ def VectorIndexHOperand : AsmVectorIndex<0, 7>;
 def VectorIndexSOperand : AsmVectorIndex<0, 3>;
 def VectorIndexDOperand : AsmVectorIndex<0, 1>;
 
-let OperandNamespace = "AArch64" in {
-  let OperandType = "OPERAND_IMPLICIT_IMM_0" in {
-defm VectorIndex0 : VectorIndex;
-defm VectorIndex032b : VectorIndex;
-  }
+let DecoderMethod = "DecodeZeroImm" in {
+  defm VectorIndex0 : VectorIndex;
+  defm VectorIndex032b : VectorIndex;
 }
 defm VectorIndex1 : VectorIndex;
@@ -1617,9 +1615,8 @@ def sme_elm_idx0_0 : Operand, TImmLeaf {
   let ParserMatchClass = Imm0_0Operand;
+  let DecoderMethod = "DecodeZeroImm";
   let PrintMethod = "printMatrixIndex";
-  let OperandNamespace = "AArch64";
-  let OperandType = "OPERAND_IMPLICIT_IMM_0";
 }
 def sme_elm_idx0_1 : Operand, TImmLeaf;
 
 def uimm0s2range : Operand, ImmLeaf {
+  let DecoderMethod = "DecodeZeroImm";
   let PrintMethod = "printImmRangeScale<2, 1>";
   let ParserMatchClass = UImm0s2RangeOperand;
-  let OperandNamespace = "AArch64";
-  let OperandType = "OPERAND_IMPLICIT_IMM_0";
 }
 
 def uimm0s4range : Operand, ImmLeaf {
+  let DecoderMethod = "DecodeZeroImm";
   let PrintMethod = "printImmRangeScale<4, 3>";
   let ParserMatchClass = UImm0s4RangeOperand;
-  let OperandNamespace = "AArch64";
-  let OperandType = "OPERAND_IMPLICIT_IMM_0";
 }
 
 def uimm1s2range : Operand, ImmLeaf {
+  bits<0> idx;
   let Inst{20-16} = 0b1;
 }
 def vi8to64_idx0 : SIMDSMov<1, ".b", GPR64, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b1;
 }
 def vi16to32_idx0 : SIMDSMov<0, ".h", GPR32, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b00010;
 }
 def vi16to64_idx0 : SIMDSMov<1, ".h", GPR64, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b00010;
 }
 def vi32to64_idx0 : SIMDSMov<1, ".s", GPR64, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b00100;
 }
   }
@@ -8267,15 +8267,19 @@ multiclass UMov {
   // streaming mode.
   let Predicates = [HasNEONandIsStreamingSafe] in {
 def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b1;
 }
 def vi16_idx0 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b00010;
 }
 def vi32_idx0 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b00100;
 }
 def vi64_idx0 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b01000;
 }
 def : SIMDMovAlias<"mov", ".s",
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt 
b/llvm/lib/Target/AArch64/CMakeLists.txt
index 79b56ea9cf850..803943fd57c4d 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -7,8 +7,7 @@ tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
 tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
 tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp 
b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 8c1e9f61693fb..fb4930ea1a755 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -130,6 +130,16 @@ DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned 
RegMask,
   return Success;
 }
 
+static void DecodeMPRRegisterClass(MCInst &Inst,
+

[llvm-branch-commits] [llvm] [RISCV] Remove post-decoding instruction adjustments (PR #156360)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156360

>From fd7e685e86a7f20048293d8bb9f5a60b613b3737 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:18:06 +0300
Subject: [PATCH] [RISCV] Remove post-decoding instruction adjustments

---
 llvm/lib/Target/RISCV/CMakeLists.txt  |  3 +--
 .../RISCV/Disassembler/RISCVDisassembler.cpp  | 23 +--
 llvm/lib/Target/RISCV/RISCVInstrFormatsC.td   |  1 -
 llvm/lib/Target/RISCV/RISCVInstrInfoC.td  |  8 +--
 llvm/lib/Target/RISCV/RISCVInstrInfoXwch.td   |  4 
 5 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt 
b/llvm/lib/Target/RISCV/CMakeLists.txt
index 720361dc3da5b..531238ae85029 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -8,8 +8,7 @@ tablegen(LLVM RISCVGenCompressInstEmitter.inc 
-gen-compress-inst-emitter)
 tablegen(LLVM RISCVGenMacroFusion.inc -gen-macro-fusion-pred)
 tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler
-  --specialize-decoders-per-bitwidth
-  -ignore-non-decodable-operands)
+  --specialize-decoders-per-bitwidth)
 tablegen(LLVM RISCVGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM RISCVGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM RISCVGenMCPseudoLowering.inc -gen-pseudo-lowering)
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp 
b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index b1b7ea5246fda..e31b826d8e22a 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -46,8 +46,6 @@ class RISCVDisassembler : public MCDisassembler {
   raw_ostream &CStream) const override;
 
 private:
-  void addSPOperands(MCInst &MI) const;
-
   DecodeStatus getInstruction48(MCInst &Instr, uint64_t &Size,
 ArrayRef Bytes, uint64_t Address,
 raw_ostream &CStream) const;
@@ -196,6 +194,10 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst 
&Inst, uint32_t RegNo,
   return MCDisassembler::Success;
 }
 
+static void DecodeSPRegisterClass(MCInst &Inst, const MCDisassembler *Decoder) 
{
+  Inst.addOperand(MCOperand::createReg(RISCV::X2));
+}
+
 static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -600,15 +602,6 @@ static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, 
uint32_t Insn,
 
 #include "RISCVGenDisassemblerTables.inc"
 
-// Add implied SP operand for C.*SP compressed instructions. The SP operand
-// isn't explicitly encoded in the instruction.
-void RISCVDisassembler::addSPOperands(MCInst &MI) const {
-  const MCInstrDesc &MCID = MCII->get(MI.getOpcode());
-  for (unsigned i = 0; i < MCID.getNumOperands(); i++)
-if (MCID.operands()[i].RegClass == RISCV::SPRegClassID)
-  MI.insert(MI.begin() + i, MCOperand::createReg(RISCV::X2));
-}
-
 namespace {
 
 struct DecoderListEntry {
@@ -774,12 +767,8 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst 
&MI, uint64_t &Size,
 LLVM_DEBUG(dbgs() << "Trying " << Entry.Desc << " table:\n");
 DecodeStatus Result =
 decodeInstruction(Entry.Table, MI, Insn, Address, this, STI);
-if (Result == MCDisassembler::Fail)
-  continue;
-
-addSPOperands(MI);
-
-return Result;
+if (Result != MCDisassembler::Fail)
+  return Result;
   }
 
   return MCDisassembler::Fail;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td 
b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
index 209c3fae63f45..4c7cd05723ac8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
@@ -54,7 +54,6 @@ class RVInst16CSS funct3, bits<2> opcode, dag outs, 
dag ins,
 : RVInst16 {
   bits<10> imm;
   bits<5> rs2;
-  bits<5> rs1;
 
   let Inst{15-13} = funct3;
   let Inst{12-7} = imm{5-0};
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index bfc766dfc27e5..9fc73662d9704 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -230,13 +230,17 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
 class CStackLoad funct3, string OpcodeStr,
  DAGOperand cls, DAGOperand opnd>
 : RVInst16CI;
+ OpcodeStr, "$rd, ${imm}(${rs1})"> {
+  bits<0> rs1;
+}
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
 class CStackStore funct3, string OpcodeStr,
   DAGOperand cls, DAGOperand opnd>
 : RVInst16CSS;
+  OpcodeStr, "$rs2, ${imm}(${rs1})"> {
+  bits<0> rs1;
+}
 
 let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
 class CLoad_r

[llvm-branch-commits] [llvm] AMDGPU: Handle true16 disassembly of ds_write_b8/b16 (PR #156406)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/156406
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AArch64] Correctly disassemble TSB instruction (PR #156362)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156362

>From 8b1424a14a78d15c2ecb356cdc4df80a796a0050 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:22:53 +0300
Subject: [PATCH] [AArch64] Correctly disassemble TSB instruction

TSB instruction has one operand, but the generated disassembler didn't
decode this operand. AArch64InstPrinter had a workaround for this.

This instruction can now be disassembled correctly.
---
 llvm/lib/Target/AArch64/AArch64SystemOperands.td   | 2 +-
 llvm/lib/Target/AArch64/CMakeLists.txt | 3 +--
 .../lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 7 ---
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td 
b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 1b0e90b0e0dc3..65b752ed40c90 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -362,7 +362,7 @@ def lookupTSBByName : SearchIndex {
   let Key = ["Name"];
 }
 
-def : TSB<"csync", 0>;
+def : TSB<"csync", 2>;
 
 
//===--===//
 // PRFM (prefetch) instruction options.
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt 
b/llvm/lib/Target/AArch64/CMakeLists.txt
index 833ce48ea1d7a..79b56ea9cf850 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -8,8 +8,7 @@ tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer 
-asmwriternum=1)
 tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands
-  -ignore-fully-defined-operands)
+  -ignore-non-decodable-operands)
 tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
 tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp 
b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 54b58e948daf2..2552ee3009338 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -365,13 +365,6 @@ void AArch64InstPrinter::printInst(const MCInst *MI, 
uint64_t Address,
 return;
   }
 
-  // Instruction TSB is specified as a one operand instruction, but 'csync' is
-  // not encoded, so for printing it is treated as a special case here:
-  if (Opcode == AArch64::TSB) {
-O << "\ttsb\tcsync";
-return;
-  }
-
   if (!PrintAliases || !printAliasInstr(MI, Address, STI, O))
 printInstruction(MI, Address, STI, O);
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [Remarks] Remove redundant size from StringRefs (NFC) (PR #156357)

2025-09-02 Thread Tobias Stadler via llvm-branch-commits


https://github.com/tobias-stadler created 
https://github.com/llvm/llvm-project/pull/156357

None


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][ptr] Add `gather`, `masked_load`, `masked_store`, and `scatter` ops (PR #156368)

2025-09-02 Thread Mehdi Amini via llvm-branch-commits



@@ -17,6 +17,46 @@ include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/ViewLikeInterface.td"
 include "mlir/IR/OpAsmInterface.td"
 
+//===--===//
+// Common props
+//===--===//
+
+def AlignmentProp : OptionalProp;
+
+//===--===//
+// Common types
+//===--===//
+
+// A shaped value type with value semantics and rank.
+class Ptr_ShapedValueType allowedTypes, list preds = []> :
+  ShapedContainerType,
+/*descr=*/[{A shaped type with value semantics and rank.}],
+/*cppType=*/"::mlir::ShapedType">;
+
+// A shaped pointer type with value semantics and rank.
+class Ptr_ShapedPtrType : Ptr_ShapedValueType<[Ptr_PtrType], [HasRankPred]>;
+
+// A shaped value type of rank 1 of any element type.
+def Ptr_Any1DType :
+  Ptr_ShapedValueType<[AnyType], [HasAnyRankOfPred<[1]>]>;
+
+// A shaped value type of rank 1 of `i1` element type.
+def Ptr_Mask1DType :
+  Ptr_ShapedValueType<[I1], [HasAnyRankOfPred<[1]>]>;
+
+// A shaped value type of rank 1 of `i1` element type.
+def Ptr_Ptr1DType :
+  Ptr_ShapedValueType<[Ptr_PtrType], [HasAnyRankOfPred<[1]>]>;
+
+// Gets the type ID of a type.  
+class TypeIDType :
+StrFunc<"$" # name # ".getType().getTypeID()">;
+
+// Checks that all type IDs match.
+class AllTypeIDsMatch names> :

joker-eph wrote:

Nit: does not seem like a ptr-specific helper?

https://github.com/llvm/llvm-project/pull/156368
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AArch64] Correctly disassemble TSB instruction (PR #156362)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156362

>From c82858d3ba2267af6638e29d2601715b582a2968 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:22:53 +0300
Subject: [PATCH] [AArch64] Correctly disassemble TSB instruction

TSB instruction has one operand, but the generated disassembler didn't
decode this operand. AArch64InstPrinter had a workaround for this.

This instruction can now be disassembled correctly.
---
 llvm/lib/Target/AArch64/AArch64SystemOperands.td   | 2 +-
 llvm/lib/Target/AArch64/CMakeLists.txt | 3 +--
 .../lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 7 ---
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td 
b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 1b0e90b0e0dc3..65b752ed40c90 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -362,7 +362,7 @@ def lookupTSBByName : SearchIndex {
   let Key = ["Name"];
 }
 
-def : TSB<"csync", 0>;
+def : TSB<"csync", 2>;
 
 
//===--===//
 // PRFM (prefetch) instruction options.
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt 
b/llvm/lib/Target/AArch64/CMakeLists.txt
index 833ce48ea1d7a..79b56ea9cf850 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -8,8 +8,7 @@ tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer 
-asmwriternum=1)
 tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands
-  -ignore-fully-defined-operands)
+  -ignore-non-decodable-operands)
 tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
 tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp 
b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 54b58e948daf2..2552ee3009338 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -365,13 +365,6 @@ void AArch64InstPrinter::printInst(const MCInst *MI, 
uint64_t Address,
 return;
   }
 
-  // Instruction TSB is specified as a one operand instruction, but 'csync' is
-  // not encoded, so for printing it is treated as a special case here:
-  if (Opcode == AArch64::TSB) {
-O << "\ttsb\tcsync";
-return;
-  }
-
   if (!PrintAliases || !printAliasInstr(MI, Address, STI, O))
 printInstruction(MI, Address, STI, O);
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Hexagon] Remove post-decoding instruction adjustments (PR #156359)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156359

>From 26edc73312a06af78a0e3d31f86a2c5bcf97f734 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:16:14 +0300
Subject: [PATCH] [Hexagon] Remove post-decoding instruction adjustments

These instructions can now be fully decoded automatically.
---
 llvm/lib/Target/Hexagon/CMakeLists.txt|   3 +-
 .../Disassembler/HexagonDisassembler.cpp  |  62 ++---
 .../Target/Hexagon/HexagonDepInstrFormats.td  | 129 --
 llvm/lib/Target/Hexagon/HexagonOperands.td|  10 +-
 4 files changed, 46 insertions(+), 158 deletions(-)

diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt 
b/llvm/lib/Target/Hexagon/CMakeLists.txt
index b615536af03be..d758260a8ab5d 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -7,8 +7,7 @@ tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer)
-tablegen(LLVM HexagonGenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands)
+tablegen(LLVM HexagonGenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM HexagonGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp 
b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index de10092cbe3c8..f22afd1e15d74 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -173,6 +173,16 @@ static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned 
tmp,
 const MCDisassembler *Decoder);
 static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
 const MCDisassembler *Decoder);
+
+static void n1ConstDecoder(MCInst &MI, const MCDisassembler *Decoder) {
+  MCContext &Ctx = Decoder->getContext();
+  MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(-1, Ctx)));
+}
+
+static void sgp10ConstDecoder(MCInst &MI, const MCDisassembler *Decoder) {
+  MI.addOperand(MCOperand::createReg(Hexagon::SGP1_0));
+}
+
 #include "HexagonDepDecoders.inc"
 #include "HexagonGenDisassemblerTables.inc"
 
@@ -349,21 +359,6 @@ void HexagonDisassembler::remapInstruction(MCInst &Instr) 
const {
   }
 }
 
-static void adjustDuplex(MCInst &MI, MCContext &Context) {
-  switch (MI.getOpcode()) {
-  case Hexagon::SA1_setin1:
-MI.insert(MI.begin() + 1,
-  MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
-break;
-  case Hexagon::SA1_dec:
-MI.insert(MI.begin() + 2,
-  MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
-break;
-  default:
-break;
-  }
-}
-
 DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
ArrayRef Bytes,
uint64_t Address,
@@ -468,12 +463,10 @@ DecodeStatus 
HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
 CurrentExtender = TmpExtender;
 if (Result != DecodeStatus::Success)
   return DecodeStatus::Fail;
-adjustDuplex(*MILow, getContext());
 Result = decodeInstruction(
 DecodeHigh, *MIHigh, (Instruction >> 16) & 0x1fff, Address, this, STI);
 if (Result != DecodeStatus::Success)
   return DecodeStatus::Fail;
-adjustDuplex(*MIHigh, getContext());
 MCOperand OPLow = MCOperand::createInst(MILow);
 MCOperand OPHigh = MCOperand::createInst(MIHigh);
 MI.addOperand(OPLow);
@@ -499,41 +492,6 @@ DecodeStatus 
HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
 
   }
 
-  switch (MI.getOpcode()) {
-  case Hexagon::J4_cmpeqn1_f_jumpnv_nt:
-  case Hexagon::J4_cmpeqn1_f_jumpnv_t:
-  case Hexagon::J4_cmpeqn1_fp0_jump_nt:
-  case Hexagon::J4_cmpeqn1_fp0_jump_t:
-  case Hexagon::J4_cmpeqn1_fp1_jump_nt:
-  case Hexagon::J4_cmpeqn1_fp1_jump_t:
-  case Hexagon::J4_cmpeqn1_t_jumpnv_nt:
-  case Hexagon::J4_cmpeqn1_t_jumpnv_t:
-  case Hexagon::J4_cmpeqn1_tp0_jump_nt:
-  case Hexagon::J4_cmpeqn1_tp0_jump_t:
-  case Hexagon::J4_cmpeqn1_tp1_jump_nt:
-  case Hexagon::J4_cmpeqn1_tp1_jump_t:
-  case Hexagon::J4_cmpgtn1_f_jumpnv_nt:
-  case Hexagon::J4_cmpgtn1_f_jumpnv_t:
-  case Hexagon::J4_cmpgtn1_fp0_jump_nt:
-  case Hexagon::J4_cmpgtn1_fp0_jump_t:
-  case Hexagon::J4_cmpgtn1_fp1_jump_nt:
-  case Hexagon::J4_cmpgtn1_fp1_jump_t:
-  case Hexagon::J4_cmpgtn1_t_jumpnv_nt:
-  case Hexagon::J4_cmpgtn1_t_jumpnv_t:
-  case Hexagon::J4_cmpgtn1_tp0_jump_nt:
-  case Hexagon::J4_cmpgtn1_tp0_jump_t:
-  case Hexagon::J4_cmpgtn1_tp1_jump_nt:
-  case Hexagon::J4_cmpgtn1_tp1_jump_t:

[llvm-branch-commits] [clang-tools-extra] [clangd] Show type hint for simple cases of dependent 'auto' (PR #156284)

2025-09-02 Thread Nathan Ridge via llvm-branch-commits



@@ -633,13 +633,30 @@ class InlayHintVisitor : public 
RecursiveASTVisitor {
 }
 
 if (auto *AT = D->getType()->getContainedAutoType()) {
-  if (AT->isDeduced() && !D->getType()->isDependentType()) {
-// Our current approach is to place the hint on the variable
-// and accordingly print the full type
-// (e.g. for `const auto& x = 42`, print `const int&`).
-// Alternatively, we could place the hint on the `auto`
-// (and then just print the type deduced for the `auto`).
-addTypeHint(D->getLocation(), D->getType(), /*Prefix=*/": ");
+  if (AT->isDeduced()) {
+QualType T;
+// If the type is dependent, HeuristicResolver *may* be able to
+// resolve it to something that's useful to print. In other
+// cases, it can't, and the resultng type would just be printed
+// as "", in which case don't hint it at all.
+if (D->getType()->isDependentType()) {
+  if (D->hasInit()) {
+QualType Resolved = Resolver->resolveExprToType(D->getInit());
+if (Resolved != AST.DependentTy) {

HighCommander4 wrote:

`TemplateTypeParmType` returns true for `isDependentType()`, so it would cause 
us to not issue a hint in the example from the previous comment.

https://github.com/llvm/llvm-project/pull/156284
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV] Remove post-decoding instruction adjustments (PR #156360)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


s-barannikov wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/156360?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#156364** https://app.graphite.dev/github/pr/llvm/llvm-project/156364?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#156363** https://app.graphite.dev/github/pr/llvm/llvm-project/156363?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#156362** https://app.graphite.dev/github/pr/llvm/llvm-project/156362?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#156361** https://app.graphite.dev/github/pr/llvm/llvm-project/156361?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#156360** https://app.graphite.dev/github/pr/llvm/llvm-project/156360?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/156360?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#156359** https://app.graphite.dev/github/pr/llvm/llvm-project/156359?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#156358** https://app.graphite.dev/github/pr/llvm/llvm-project/156358?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/156360
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lld] Update cgdata-* tests to not use subshells (PR #156533)

2025-09-02 Thread Aiden Grossman via llvm-branch-commits


https://github.com/boomanaiden154 created 
https://github.com/llvm/llvm-project/pull/156533

Subshells are not supported in the lit internal shell. We can remove
them by constructing sed commands directly inside of a separate file.

Towards #102700.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Change FLAT classes to use RegisterOperand parameters (PR #156581)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/156581

This will make it easier to precisely express operand constraints
without having the implicit getLdStRegisterOperand at the bottom.
Also prunes out using AV classes in some instructions where AGPRs
are not relevant.

>From b85bf0819fe5a8eafb96a09cb47062068dc41961 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 3 Sep 2025 12:06:53 +0900
Subject: [PATCH] AMDGPU: Change FLAT classes to use RegisterOperand parameters

This will make it easier to precisely express operand constraints
without having the implicit getLdStRegisterOperand at the bottom.
Also prunes out using AV classes in some instructions where AGPRs
are not relevant.
---
 llvm/lib/Target/AMDGPU/FLATInstructions.td | 421 ++---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td   |   1 +
 2 files changed, 208 insertions(+), 214 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td 
b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 19f95c5ac4c37..2bbd4dde7df6e 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -215,11 +215,10 @@ class GlobalSaddrTable  {
 // same encoding value as exec_hi, so it isn't possible to use that if
 // saddr is 32-bit (which isn't handled here yet).
 class FLAT_Load_Pseudo<
-string opName, RegisterClass regClass, bit HasTiedOutput = 0,
+string opName, RegisterOperand vdata_op, bit HasTiedOutput = 0,
 bit HasSaddr = 0, bit EnableSaddr = 0>
 : FLAT_Pseudo {
 
-  defvar vdata_op = getLdStRegisterOperand.ret;
   let OutOperandList = (outs vdata_op:$vdst);
   let InOperandList = !con(
 !if(EnableSaddr,
@@ -242,7 +241,7 @@ class FLAT_Load_Pseudo<
   let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
 }
 
-multiclass FLAT_Flat_Load_Pseudo {
+multiclass FLAT_Flat_Load_Pseudo {
   def "" : FLAT_Load_Pseudo,
 GlobalSaddrTable<0, opName>;
   let OtherPredicates = [HasFlatGVSMode] in
@@ -251,19 +250,19 @@ multiclass FLAT_Flat_Load_Pseudo {
-  defm "" : FLAT_Flat_Load_Pseudo;
+  defm "" : FLAT_Flat_Load_Pseudo;
   let True16Predicate = UseRealTrue16Insts in
-defm _t16 : FLAT_Flat_Load_Pseudo, 
True16D16Table;
+defm _t16 : FLAT_Flat_Load_Pseudo, 
True16D16Table;
 }
 
-class FLAT_Store_Pseudo  : FLAT_Pseudo<
   opName,
   (outs),
   !con(
 !if(EnableSaddr,
-  (ins VGPR_32:$vaddr, getLdStRegisterOperand.ret:$vdata, 
SReg_64_XEXEC_XNULL:$saddr),
-  (ins VReg_64:$vaddr, getLdStRegisterOperand.ret:$vdata)),
+  (ins VGPR_32:$vaddr, vdataClass:$vdata, SReg_64_XEXEC_XNULL:$saddr),
+  (ins VReg_64:$vaddr, vdataClass:$vdata)),
   (ins flat_offset:$offset, CPol_0:$cpol)),
   " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), 
"")#"$offset$cpol"> {
   let mayLoad  = 0;
@@ -273,7 +272,7 @@ class FLAT_Store_Pseudo  {
+multiclass FLAT_Flat_Store_Pseudo {
   def "" : FLAT_Store_Pseudo,
 GlobalSaddrTable<0, opName>;
   let OtherPredicates = [HasFlatGVSMode] in
@@ -282,20 +281,21 @@ multiclass FLAT_Flat_Store_Pseudo {
 }
 
 multiclass FLAT_Flat_Store_Pseudo_t16 {
-  defm "" : FLAT_Flat_Store_Pseudo;
+  defm "" : FLAT_Flat_Store_Pseudo;
 
   defvar Name16 = opName#"_t16";
   let OtherPredicates = [HasFlatGVSMode, HasTrue16BitInsts] in {
-def _t16 : FLAT_Store_Pseudo,
+def _t16 : FLAT_Store_Pseudo,
   GlobalSaddrTable<0, Name16>,
   True16D16Table;
-   def _SADDR_t16 : FLAT_Store_Pseudo,
+   def _SADDR_t16 : FLAT_Store_Pseudo,
   GlobalSaddrTable<1, Name16>,
   True16D16Table;
   }
 }
 
-multiclass FLAT_Global_Load_Pseudo {
+multiclass FLAT_Global_Load_Pseudo {
   let is_flat_global = 1 in {
 def "" : FLAT_Load_Pseudo,
   GlobalSaddrTable<0, opName>;
@@ -305,21 +305,21 @@ multiclass FLAT_Global_Load_Pseudo {
-  defm "" : FLAT_Global_Load_Pseudo;
+  defm "" : FLAT_Global_Load_Pseudo;
 
   defvar Name16 = opName#"_t16";
   let OtherPredicates = [HasTrue16BitInsts],
   SubtargetPredicate = HasFlatGlobalInsts, is_flat_global = 1 in {
-def _t16 : FLAT_Load_Pseudo,
+def _t16 : FLAT_Load_Pseudo,
   GlobalSaddrTable<0, Name16>,
   True16D16Table;
-def _SADDR_t16 : FLAT_Load_Pseudo,
+def _SADDR_t16 : FLAT_Load_Pseudo,
   GlobalSaddrTable<1, Name16>,
   True16D16Table;
   }
 }
 
-class FLAT_Global_Load_AddTid_Pseudo  : FLAT_Pseudo<
   opName,
   (outs regClass:$vdst),
@@ -337,7 +337,7 @@ class FLAT_Global_Load_AddTid_Pseudo  {
   def "" : FLAT_Global_Load_AddTid_Pseudo,
 GlobalSaddrTable<0, opName>;
@@ -345,7 +345,7 @@ multiclass FLAT_Global_Load_AddTid_Pseudo;
 }
 
-multiclass FLAT_Global_Store_Pseudo {
+multiclass FLAT_Global_Store_Pseudo {
   let is_flat_global = 1 in {
 def "" : FLAT_Store_Pseudo,
   GlobalSaddrTable<0, opName>;
@@ -355,15 +355,15 @@ multiclass FLAT_Global_Store_Pseudo {
 }
 
 multiclass FLAT_Global_Store_Pseudo_t16 {
-  defm "" : FLAT_Global_Store_Pseudo;
+  defm "" : FLAT_Global_Store_Pseudo;

[llvm-branch-commits] [llvm] AMDGPU: Change DS classes to use RegisterOperand parameters (PR #156580)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/156580

Start stripping out the uses of getLdStRegisterOperand. This
added a confusing level of indirection where the class at the
definition point was not the actual class used. This was also
pulling in the AV class usage for targets where it isn't
relevant. This was also inflexible for special cases.

Also fixes using default arguments which only served to wrap the
class argument in a RegisterOperand.

This should be done for all the memory instructions.

>From fcc371c8e749152ea4717c66c61082c34dade856 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 3 Sep 2025 10:58:25 +0900
Subject: [PATCH] AMDGPU: Change DS classes to use RegisterOperand parameters

Start stripping out the uses of getLdStRegisterOperand. This
added a confusing level of indirection where the class at the
definition point was not the actual class used. This was also
pulling in the AV class usage for targets where it isn't
relevant. This was also inflexible for special cases.

Also fixes using default arguments which only served to wrap the
class argument in a RegisterOperand.

This should be done for all the memory instructions.
---
 llvm/lib/Target/AMDGPU/DSInstructions.td | 319 ---
 llvm/lib/Target/AMDGPU/SIInstrInfo.td|  11 +
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td |  20 ++
 3 files changed, 197 insertions(+), 153 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td 
b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 7552326c39468..960f3282fb6f6 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -130,10 +130,10 @@ class DS_Real  
:
 
 // DS Pseudo instructions
 
-class DS_0A1D_NORET
+class DS_0A1D_NORET
 : DS_Pseudo.ret:$data0, Offset:$offset, gds:$gds),
+  (ins rc:$data0, Offset:$offset, gds:$gds),
   " $data0$offset$gds"> {
 
   let has_addr = 0;
@@ -141,10 +141,10 @@ class DS_0A1D_NORET
   let has_vdst = 0;
 }
 
-class DS_1A1D_NORET
+class DS_1A1D_NORET
 : DS_Pseudo.ret:$data0, Offset:$offset, 
gds:$gds),
+  (ins VGPR_32:$addr, rc:$data0, Offset:$offset, gds:$gds),
   " $addr, $data0$offset$gds"> {
 
   let has_data1 = 0;
@@ -152,7 +152,7 @@ class DS_1A1D_NORET
   let IsAtomicNoRet = 1;
 }
 
-multiclass DS_1A1D_NORET_mc {
+multiclass DS_1A1D_NORET_mc {
   def "" : DS_1A1D_NORET;
 
   let has_m0_read = 0 in {
@@ -160,23 +160,23 @@ multiclass DS_1A1D_NORET_mc {
   }
 }
 
-multiclass DS_1A1D_NORET_t16
+multiclass DS_1A1D_NORET_t16
 : DS_1A1D_NORET_mc {
   let has_m0_read = 0 in {
 let True16Predicate = UseRealTrue16Insts in {
-  def "_t16" : DS_1A1D_NORET,
+  def "_t16" : DS_1A1D_NORET,
 True16D16Table;
 }
   }
 }
 
-multiclass DS_1A1D_NORET_mc_gfx9 {
+multiclass DS_1A1D_NORET_mc_gfx9 {
   let has_m0_read = 0 in {
 def "" : DS_1A1D_NORET;
   }
 }
 
-class DS_1A2D_NORET
+class DS_1A2D_NORET
 : DS_Pseudo
   let IsAtomicNoRet = 1;
 }
 
-multiclass DS_1A2D_NORET_mc {
+// DS_xx2D cases should only be instantiated with VGPR operand classes.
+multiclass DS_1A2D_NORET_mc {
+  assert OperandIsVGPR.ret,
+ "DS with 2 data operands should be declared with VGPRs";
+
   def "" : DS_1A2D_NORET;
 
   let has_m0_read = 0 in {
@@ -194,12 +198,12 @@ multiclass DS_1A2D_NORET_mc {
 
 // All data operands are replaced with AGPRs in this form.
 let SubtargetPredicate = isGFX90APlus in {
-  def _agpr : DS_1A2D_NORET.ret>;
+  def _agpr : DS_1A2D_NORET.ret>;
 }
   }
 }
 
-class DS_1A2D_Off8_NORET 
+class DS_1A2D_Off8_NORET 
 : DS_Pseudo
   let has_offset = 0;
 }
 
-multiclass DS_1A2D_Off8_NORET_mc  {
+multiclass DS_1A2D_Off8_NORET_mc  {
+  assert OperandIsVGPR.ret,
+ "DS with 2 data operands should be declared with VGPRs";
+
   def "" : DS_1A2D_Off8_NORET;
 
   let has_m0_read = 0 in {
 def _gfx9 : DS_1A2D_Off8_NORET;
 
 let SubtargetPredicate = isGFX90APlus in {
-  def _agpr : DS_1A2D_Off8_NORET.ret>;
+  def _agpr : DS_1A2D_Off8_NORET.ret>;
 }
   }
 }
 
-class DS_0A1D_RET_GDS.ret,
-  RegisterOperand src_op = getLdStRegisterOperand.ret>
+class DS_0A1D_RET_GDS
 : DS_Pseudo.ret>
+class DS_1A1D_RET 
 : DS_Pseudo {
+multiclass DS_1A1D_RET_mc  {
   def "" : DS_1A1D_RET;
 
   let has_m0_read = 0 in {
@@ -256,15 +261,15 @@ multiclass DS_1A1D_RET_mc  {
   }
 }
 
-multiclass DS_1A1D_RET_mc_gfx9  {
+multiclass DS_1A1D_RET_mc_gfx9  
{
   let has_m0_read = 0 in {
 def "" : DS_1A1D_RET;
   }
 }
 
 class DS_1A2D_RET: DS_Pseudo: DS_Pseudo {
@@ -273,20 +278,23 @@ class DS_1A2D_RET {
+  RegisterOperand dst_rc = VGPROp_32,
+  RegisterOperand src_rc = dst_rc> {
+  assert !and(OperandIsVGPR.ret, OperandIsVGPR.ret),
+ "DS with 2 data operands should be declared with VGPRs";
+
   def "" : DS_1A2D_RET;
 
   let has_m0_read = 0 in {
 def _gfx9 : DS_1A2D_RET;
-def _agpr : DS_1A2D_RET.ret,
-getEquivalentAGPRCl

[llvm-branch-commits] [llvm] AMDGPU: Change FLAT classes to use RegisterOperand parameters (PR #156581)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/156581?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#156581** https://app.graphite.dev/github/pr/llvm/llvm-project/156581?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/156581?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#156580** https://app.graphite.dev/github/pr/llvm/llvm-project/156580?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#156420** https://app.graphite.dev/github/pr/llvm/llvm-project/156420?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#156419** https://app.graphite.dev/github/pr/llvm/llvm-project/156419?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/156581
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Change DS classes to use RegisterOperand parameters (PR #156580)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/156580?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#156581** https://app.graphite.dev/github/pr/llvm/llvm-project/156581?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#156580** https://app.graphite.dev/github/pr/llvm/llvm-project/156580?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/156580?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#156420** https://app.graphite.dev/github/pr/llvm/llvm-project/156420?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#156419** https://app.graphite.dev/github/pr/llvm/llvm-project/156419?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/156580
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Change FLAT classes to use RegisterOperand parameters (PR #156581)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/156581
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Change DS classes to use RegisterOperand parameters (PR #156580)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/156580
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [LifetimeSafety] Mark all DeclRefExpr as usages of the corresp. origin (PR #154316)

2025-09-02 Thread Utkarsh Saxena via llvm-branch-commits


https://github.com/usx95 updated 
https://github.com/llvm/llvm-project/pull/154316

>From 0be0b1335e8855e4e128edcd4661888c299d1287 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena 
Date: Tue, 19 Aug 2025 12:00:53 +
Subject: [PATCH] Identify DeclRefExpr as a use of an origin

---
 clang/lib/Analysis/LifetimeSafety.cpp | 90 +--
 .../Sema/warn-lifetime-safety-dataflow.cpp| 26 ++
 2 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/clang/lib/Analysis/LifetimeSafety.cpp 
b/clang/lib/Analysis/LifetimeSafety.cpp
index 9397c530a9af2..770840870a585 100644
--- a/clang/lib/Analysis/LifetimeSafety.cpp
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -119,6 +119,7 @@ class OriginManager {
 return AllOrigins.back();
   }
 
+  // TODO: Mark this method as const once we remove the call to getOrCreate.
   OriginID get(const Expr &E) {
 // Origin of DeclRefExpr is that of the declaration it refers to.
 if (const auto *DRE = dyn_cast(&E))
@@ -315,22 +316,28 @@ class ReturnOfOriginFact : public Fact {
 };
 
 class UseFact : public Fact {
-  OriginID UsedOrigin;
   const Expr *UseExpr;
+  // True if this use is a write operation (e.g., left-hand side of 
assignment).
+  // Write operations are exempted from use-after-free checks.
+  bool IsWritten = false;
 
 public:
   static bool classof(const Fact *F) { return F->getKind() == Kind::Use; }
 
-  UseFact(OriginID UsedOrigin, const Expr *UseExpr)
-  : Fact(Kind::Use), UsedOrigin(UsedOrigin), UseExpr(UseExpr) {}
+  UseFact(const Expr *UseExpr) : Fact(Kind::Use), UseExpr(UseExpr) {}
 
-  OriginID getUsedOrigin() const { return UsedOrigin; }
+  OriginID getUsedOrigin(const OriginManager &OM) const {
+// TODO: Remove const cast and make OriginManager::get as const.
+return const_cast(OM).get(*UseExpr);
+  }
   const Expr *getUseExpr() const { return UseExpr; }
+  void markAsWritten() { IsWritten = true; }
+  bool isWritten() const { return IsWritten; }
 
   void dump(llvm::raw_ostream &OS, const OriginManager &OM) const override {
 OS << "Use (";
-OM.dump(getUsedOrigin(), OS);
-OS << ")\n";
+OM.dump(getUsedOrigin(OM), OS);
+OS << " " << (isWritten() ? "Write" : "Read") << ")\n";
   }
 };
 
@@ -428,6 +435,8 @@ class FactGeneratorVisitor : public 
ConstStmtVisitor {
 addAssignOriginFact(*VD, *InitExpr);
   }
 
+  void VisitDeclRefExpr(const DeclRefExpr *DRE) { handleUse(DRE); }
+
   void VisitCXXNullPtrLiteralExpr(const CXXNullPtrLiteralExpr *N) {
 /// TODO: Handle nullptr expr as a special 'null' loan. Uninitialized
 /// pointers can use the same type of loan.
@@ -461,10 +470,6 @@ class FactGeneratorVisitor : public 
ConstStmtVisitor {
   }
 }
   }
-} else if (UO->getOpcode() == UO_Deref) {
-  // This is a pointer use, like '*p'.
-  OriginID OID = FactMgr.getOriginMgr().get(*UO->getSubExpr());
-  CurrentBlockFacts.push_back(FactMgr.createFact(OID, UO));
 }
   }
 
@@ -479,20 +484,13 @@ class FactGeneratorVisitor : public 
ConstStmtVisitor {
   }
 
   void VisitBinaryOperator(const BinaryOperator *BO) {
-if (BO->isAssignmentOp()) {
-  const Expr *LHSExpr = BO->getLHS();
-  const Expr *RHSExpr = BO->getRHS();
-
-  // We are interested in assignments like `ptr1 = ptr2` or `ptr = &var`
-  // LHS must be a pointer/reference type that can be an origin.
-  // RHS must also represent an origin (either another pointer/ref or an
-  // address-of).
-  if (const auto *DRE_LHS = dyn_cast(LHSExpr))
-if (const auto *VD_LHS =
-dyn_cast(DRE_LHS->getDecl()->getCanonicalDecl());
-VD_LHS && hasOrigin(VD_LHS->getType()))
-  addAssignOriginFact(*VD_LHS, *RHSExpr);
-}
+if (BO->isAssignmentOp())
+  handleAssignment(BO->getLHS(), BO->getRHS());
+  }
+
+  void VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) {
+if (OCE->isAssignmentOp() && OCE->getNumArgs() == 2)
+  handleAssignment(OCE->getArg(0), OCE->getArg(1));
   }
 
   void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) {
@@ -559,9 +557,48 @@ class FactGeneratorVisitor : public 
ConstStmtVisitor {
 return false;
   }
 
+  void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) {
+// Find the underlying variable declaration for the left-hand side.
+if (const auto *DRE_LHS =
+dyn_cast(LHSExpr->IgnoreParenImpCasts())) {
+  markUseAsWrite(DRE_LHS);
+  if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl()))
+if (hasOrigin(VD_LHS->getType()))
+  // We are interested in assignments like `ptr1 = ptr2` or `ptr = 
&var`
+  // LHS must be a pointer/reference type that can be an origin.
+  // RHS must also represent an origin (either another pointer/ref or 
an
+  // address-of).
+  addAssignOriginFact(*VD_LHS, *RHSExpr);
+}
+  }
+
+  // A DeclRefExpr is a use of the referenced decl. It is checked for
+  // use-aft

[llvm-branch-commits] [llvm] Add deactivation symbol operand to ConstantPtrAuth. (PR #133537)

2025-09-02 Thread Peter Collingbourne via llvm-branch-commits


https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/133537

>From e728f3444624a5f47f0af84c21fb3a584f3e05b7 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne 
Date: Fri, 1 Aug 2025 17:27:41 -0700
Subject: [PATCH] Add verifier check

Created using spr 1.3.6-beta.1
---
 llvm/lib/IR/Verifier.cpp   | 5 +
 llvm/test/Verifier/ptrauth-constant.ll | 6 ++
 2 files changed, 11 insertions(+)
 create mode 100644 llvm/test/Verifier/ptrauth-constant.ll

diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3ff9895e161c4..3478c2c450ae7 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -2627,6 +2627,11 @@ void Verifier::visitConstantPtrAuth(const 
ConstantPtrAuth *CPA) {
 
   Check(CPA->getDiscriminator()->getBitWidth() == 64,
 "signed ptrauth constant discriminator must be i64 constant integer");
+
+  Check(isa(CPA->getDeactivationSymbol()) ||
+CPA->getDeactivationSymbol()->isNullValue(),
+"signed ptrauth constant deactivation symbol must be a global value "
+"or null");
 }
 
 bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) {
diff --git a/llvm/test/Verifier/ptrauth-constant.ll 
b/llvm/test/Verifier/ptrauth-constant.ll
new file mode 100644
index 0..fdd6352cf8469
--- /dev/null
+++ b/llvm/test/Verifier/ptrauth-constant.ll
@@ -0,0 +1,6 @@
+; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
+
+@g = external global i8
+
+; CHECK: signed ptrauth constant deactivation symbol must be a global variable 
or null
+@ptr = global ptr ptrauth (ptr @g, i32 0, i64 65535, ptr null, ptr inttoptr 
(i64 16 to ptr))

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Add agpr variants of multi-data DS instructions (PR #156420)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/156420
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LoongArch] Use xvperm.w for cross-lane access within a single vector (PR #151634)

2025-09-02 Thread via llvm-branch-commits


zhaoqi5 wrote:

Ping.

https://github.com/llvm/llvm-project/pull/151634
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AVR] Remove workarounds for instructions using Z register (PR #156361)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156361

>From 6ff221b947f637f100c1e8db3d2df2651beff3af Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:18:57 +0300
Subject: [PATCH] [AVR] Remove workarounds for instructions using Z register

The generated disassembler can now correctly decode these instructions.
---
 llvm/lib/Target/AVR/AVRInstrFormats.td  |  1 +
 llvm/lib/Target/AVR/AVRInstrInfo.td |  4 +++-
 llvm/lib/Target/AVR/CMakeLists.txt  |  3 +--
 .../Target/AVR/Disassembler/AVRDisassembler.cpp |  5 +
 .../Target/AVR/MCTargetDesc/AVRInstPrinter.cpp  | 17 -
 5 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/AVR/AVRInstrFormats.td 
b/llvm/lib/Target/AVR/AVRInstrFormats.td
index e1e65b56370cc..eb4daf74545b0 100644
--- a/llvm/lib/Target/AVR/AVRInstrFormats.td
+++ b/llvm/lib/Target/AVR/AVRInstrFormats.td
@@ -79,6 +79,7 @@ class FRdRr opcode, bits<2> f, dag outs, dag ins, 
string asmstr,
 
//===--===//
 class FZRd t, dag outs, dag ins, string asmstr, list pattern>
 : AVRInst16 {
+  bits<0> z;
   bits<5> rd;
 
   let Inst{15 - 12} = 0b1001;
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td 
b/llvm/lib/Target/AVR/AVRInstrInfo.td
index 958e1383acef2..70efda46093c4 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1230,7 +1230,9 @@ let Uses = [R1, R0] in {
 
   let Defs = [R31R30] in 
   def SPMZPi : F16<0b100101011000, (outs), (ins ZREG:$z), "spm $z+", []>,
-   Requires<[HasSPMX]>;
+   Requires<[HasSPMX]> {
+bits<0> z;
+  }
 }
 
 // Read data from IO location operations.
diff --git a/llvm/lib/Target/AVR/CMakeLists.txt 
b/llvm/lib/Target/AVR/CMakeLists.txt
index 2d5cb7e048778..a31c545f48ba3 100644
--- a/llvm/lib/Target/AVR/CMakeLists.txt
+++ b/llvm/lib/Target/AVR/CMakeLists.txt
@@ -6,8 +6,7 @@ tablegen(LLVM AVRGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(LLVM AVRGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM AVRGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AVRGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM AVRGenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands)
+tablegen(LLVM AVRGenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM AVRGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM AVRGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM AVRGenRegisterInfo.inc -gen-register-info)
diff --git a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp 
b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index 56b3cf7f88e2a..98fd0ac3d44ee 100644
--- a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -91,6 +91,11 @@ static DecodeStatus DecodeLD8RegisterClass(MCInst &Inst, 
unsigned RegNo,
   return MCDisassembler::Success;
 }
 
+static void DecodeZREGRegisterClass(MCInst &Inst,
+const MCDisassembler *Decoder) {
+  Inst.addOperand(MCOperand::createReg(AVR::R31R30));
+}
+
 static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address,
  const MCDisassembler *Decoder) {
   unsigned addr = 0;
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp 
b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
index 481219164a0f9..5adffeed04bda 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
@@ -101,23 +101,6 @@ const char 
*AVRInstPrinter::getPrettyRegisterName(MCRegister Reg,
 void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   raw_ostream &O) {
   const MCOperandInfo &MOI = this->MII.get(MI->getOpcode()).operands()[OpNo];
-  if (MOI.RegClass == AVR::ZREGRegClassID) {
-// Special case for the Z register, which sometimes doesn't have an operand
-// in the MCInst.
-O << "Z";
-return;
-  }
-
-  if (OpNo >= MI->size()) {
-// Not all operands are correctly disassembled at the moment. This means
-// that some machine instructions won't have all the necessary operands
-// set.
-// To avoid asserting, print  instead until the necessary support
-// has been implemented.
-O << "";
-return;
-  }
-
   const MCOperand &Op = MI->getOperand(OpNo);
 
   if (Op.isReg()) {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Add tests for every mfma intrinsic v-to-a mapping (PR #153026)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/153026

>From 46f04e8aefd98d782131616030857eb51dc8b1fb Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 11 Aug 2025 19:12:49 +0900
Subject: [PATCH 1/2] AMDGPU: Add tests for every mfma intrinsic v-to-a mapping

Make sure the MFMA VGPR to AGPR InstrMapping table is complete.
I think I got everything, except the full cross product of input
types with the mfma scale intrinsics. Also makes sure we have
coverage for smfmac and mfma_scale cases.
---
 .../rewrite-vgpr-mfma-to-agpr.gfx90a.ll   | 141 +++
 .../rewrite-vgpr-mfma-to-agpr.gfx950.ll   | 664 ++
 .../AMDGPU/rewrite-vgpr-mfma-to-agpr.ll   | 867 ++
 3 files changed, 1672 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.gfx90a.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.gfx950.ll

diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.gfx90a.ll 
b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.gfx90a.ll
new file mode 100644
index 0..7d00b12e7334a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.gfx90a.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -mcpu=gfx90a -amdgpu-mfma-vgpr-form < %s | FileCheck %s
+
+target triple = "amdgcn-amd-amdhsa"
+
+define void @test_rewrite_mfma_i32_32x32x8i8(i32 %arg0, i32 %arg1, ptr 
addrspace(1) %ptr) #0 {
+; CHECK-LABEL: test_rewrite_mfma_i32_32x32x8i8:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:global_load_dwordx4 a[12:15], v[2:3], off offset:48
+; CHECK-NEXT:global_load_dwordx4 a[8:11], v[2:3], off offset:32
+; CHECK-NEXT:global_load_dwordx4 a[4:7], v[2:3], off offset:16
+; CHECK-NEXT:global_load_dwordx4 a[0:3], v[2:3], off
+; CHECK-NEXT:s_waitcnt vmcnt(0)
+; CHECK-NEXT:v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[0:15]
+; CHECK-NEXT:;;#ASMSTART
+; CHECK-NEXT:; use a[0:15]
+; CHECK-NEXT:;;#ASMEND
+; CHECK-NEXT:s_setpc_b64 s[30:31]
+  %src2 = load <16 x i32>, ptr addrspace(1) %ptr
+  %mai = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %arg0, i32 %arg1, 
<16 x i32> %src2, i32 0, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<16 x i32> %mai)
+  ret void
+}
+
+define void @test_rewrite_mfma_i32_16x16x16i8(i32 %arg0, i32 %arg1, ptr 
addrspace(1) %ptr) #0 {
+; CHECK-LABEL: test_rewrite_mfma_i32_16x16x16i8:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:global_load_dwordx4 a[0:3], v[2:3], off
+; CHECK-NEXT:s_waitcnt vmcnt(0)
+; CHECK-NEXT:v_mfma_i32_16x16x16i8 a[0:3], v0, v1, a[0:3]
+; CHECK-NEXT:;;#ASMSTART
+; CHECK-NEXT:; use a[0:3]
+; CHECK-NEXT:;;#ASMEND
+; CHECK-NEXT:s_setpc_b64 s[30:31]
+  %src2 = load <4 x i32>, ptr addrspace(1) %ptr
+  %mai = call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %arg0, i32 %arg1, 
<4 x i32> %src2, i32 0, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x i32> %mai)
+  ret void
+}
+
+define void @test_rewrite_mfma_f32_32x32x2bf16(<2 x i16> %arg0, <2 x i16> 
%arg1, ptr addrspace(1) %ptr) #0 {
+; CHECK-LABEL: test_rewrite_mfma_f32_32x32x2bf16:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:global_load_dwordx4 a[28:31], v[2:3], off offset:112
+; CHECK-NEXT:global_load_dwordx4 a[24:27], v[2:3], off offset:96
+; CHECK-NEXT:global_load_dwordx4 a[20:23], v[2:3], off offset:80
+; CHECK-NEXT:global_load_dwordx4 a[16:19], v[2:3], off offset:64
+; CHECK-NEXT:global_load_dwordx4 a[12:15], v[2:3], off offset:48
+; CHECK-NEXT:global_load_dwordx4 a[8:11], v[2:3], off offset:32
+; CHECK-NEXT:global_load_dwordx4 a[4:7], v[2:3], off offset:16
+; CHECK-NEXT:global_load_dwordx4 a[0:3], v[2:3], off
+; CHECK-NEXT:s_waitcnt vmcnt(0)
+; CHECK-NEXT:v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[0:31]
+; CHECK-NEXT:;;#ASMSTART
+; CHECK-NEXT:; use a[0:31]
+; CHECK-NEXT:;;#ASMEND
+; CHECK-NEXT:s_setpc_b64 s[30:31]
+  %src2 = load <32 x float>, ptr addrspace(1) %ptr
+  %mai = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %arg0, 
<2 x i16> %arg1, <32 x float> %src2, i32 0, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<32 x float> %mai)
+  ret void
+}
+
+define void @test_rewrite_mfma_f32_16x16x2bf16(<2 x i16> %arg0, <2 x i16> 
%arg1, ptr addrspace(1) %ptr) #0 {
+; CHECK-LABEL: test_rewrite_mfma_f32_16x16x2bf16:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:global_load_dwordx4 a[12:15], v[2:3], off offset:48
+; CHECK-NEXT:global_load_dwordx4 a[8:11], v[2:3], off offset:32
+; CHECK-NEXT:global_load_dwordx4 a[4:7], v[2:3], off offset:16
+; CHECK-NEXT:global_load_dwordx4 a[0:3], v[2:3], off
+; CHECK-NEXT:s_waitcn

[llvm-branch-commits] [lld] Make lld tests use lit internal shell by default (PR #156538)

2025-09-02 Thread Fangrui Song via llvm-branch-commits


https://github.com/MaskRay approved this pull request.


https://github.com/llvm/llvm-project/pull/156538
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Add test for mfma rewrite pass respecting optnone (PR #153025)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/153025

>From e9015799806374bc266257627df96ae2c2dfd43e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 11 Aug 2025 19:05:44 +0900
Subject: [PATCH] AMDGPU: Add test for mfma rewrite pass respecting optnone

---
 .../AMDGPU/rewrite-vgpr-mfma-to-agpr.ll   | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll 
b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
index 343a5c8511ee9..6f7809f46d10a 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
@@ -3,6 +3,40 @@
 
 target triple = "amdgcn-amd-amdhsa"
 
+define amdgpu_kernel void @respect_optnone(double %arg0, double %arg1, ptr 
addrspace(1) %ptr) #4 {
+; CHECK-LABEL: respect_optnone:
+; CHECK:   ; %bb.0: ; %bb
+; CHECK-NEXT:s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CHECK-NEXT:s_load_dwordx2 s[2:3], s[4:5], 0x8
+; CHECK-NEXT:s_nop 0
+; CHECK-NEXT:s_load_dwordx2 s[4:5], s[4:5], 0x10
+; CHECK-NEXT:s_mov_b32 s6, 0x3ff
+; CHECK-NEXT:v_and_b32_e64 v0, v0, s6
+; CHECK-NEXT:s_mov_b32 s6, 3
+; CHECK-NEXT:v_lshlrev_b32_e64 v0, s6, v0
+; CHECK-NEXT:s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:global_load_dwordx2 v[0:1], v0, s[4:5]
+; CHECK-NEXT:v_mov_b64_e32 v[2:3], s[0:1]
+; CHECK-NEXT:v_mov_b64_e32 v[4:5], s[2:3]
+; CHECK-NEXT:s_waitcnt vmcnt(0)
+; CHECK-NEXT:s_nop 0
+; CHECK-NEXT:v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], v[0:1]
+; CHECK-NEXT:s_nop 5
+; CHECK-NEXT:v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT:v_accvgpr_write_b32 a1, v1
+; CHECK-NEXT:;;#ASMSTART
+; CHECK-NEXT:; use a[0:1]
+; CHECK-NEXT:;;#ASMEND
+; CHECK-NEXT:s_endpgm
+bb:
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep = getelementptr double, ptr addrspace(1) %ptr, i32 %id
+  %src2 = load double, ptr addrspace(1) %gep
+  %mai = call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %arg0, double 
%arg1, double %src2, i32 0, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(double %mai)
+  ret void
+}
+
 define amdgpu_kernel void @test_mfma_f32_32x32x1f32_rewrite_vgpr_mfma(ptr 
addrspace(1) %arg) #0 {
 ; CHECK-LABEL: test_mfma_f32_32x32x1f32_rewrite_vgpr_mfma:
 ; CHECK:   ; %bb.0: ; %bb
@@ -859,3 +893,4 @@ attributes #0 = { nounwind 
"amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-p
 attributes #1 = { mustprogress nofree norecurse nounwind willreturn 
"amdgpu-waves-per-eu"="8,8" }
 attributes #2 = { convergent nocallback nofree nosync nounwind willreturn 
memory(none) }
 attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn 
memory(none) }
+attributes #4 = { nounwind noinline optnone }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/21.x: [AArch64][BTI] Add BTI at EH entries. (#155308) (PR #156170)

2025-09-02 Thread David Green via llvm-branch-commits


https://github.com/davemgreen updated 
https://github.com/llvm/llvm-project/pull/156170

>From 51081359fa4a76c9d20489084ce2a399b5bfc520 Mon Sep 17 00:00:00 2001
From: Shashi Shankar 
Date: Sat, 30 Aug 2025 11:56:03 +0200
Subject: [PATCH 1/2] [AArch64][BTI] Add BTI at EH entries. (#155308)

Mark EH landing pads as indirect-branch targets (BTI j) and treat WinEH
funclet entries as call-like (BTI c). Add lit tests for ELF and COFF.
Tests:
Adds lit tests: bti-ehpad.ll and wineh-bti-funclet.ll.

Fixes: #149267

Signed-off-by: Shashi Shankar 
(cherry picked from commit 1b37b9e6d788d7058381b68b5ab265bcb6181335)
---
 .../Target/AArch64/AArch64BranchTargets.cpp   | 46 ---
 llvm/test/CodeGen/AArch64/bti-ehpad.ll| 44 +++
 .../AArch64/sign-return-address-pauth-lr.ll   | 16 ++--
 .../test/CodeGen/AArch64/wineh-bti-funclet.ll | 79 +++
 llvm/test/CodeGen/AArch64/wineh-bti.ll|  2 +-
 5 files changed, 167 insertions(+), 20 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/bti-ehpad.ll
 create mode 100644 llvm/test/CodeGen/AArch64/wineh-bti-funclet.ll

diff --git a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp 
b/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
index 3436dc9ef4521..137ff898e86a3 100644
--- a/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
+++ b/llvm/lib/Target/AArch64/AArch64BranchTargets.cpp
@@ -30,6 +30,14 @@ using namespace llvm;
 #define AARCH64_BRANCH_TARGETS_NAME "AArch64 Branch Targets"
 
 namespace {
+// BTI HINT encoding: base (32) plus 'c' (2) and/or 'j' (4).
+enum : unsigned {
+  BTIBase = 32,   // Base immediate for BTI HINT
+  BTIC = 1u << 1, // 2
+  BTIJ = 1u << 2, // 4
+  BTIMask = BTIC | BTIJ,
+};
+
 class AArch64BranchTargets : public MachineFunctionPass {
 public:
   static char ID;
@@ -42,6 +50,7 @@ class AArch64BranchTargets : public MachineFunctionPass {
   void addBTI(MachineBasicBlock &MBB, bool CouldCall, bool CouldJump,
   bool NeedsWinCFI);
 };
+
 } // end anonymous namespace
 
 char AArch64BranchTargets::ID = 0;
@@ -62,9 +71,8 @@ bool 
AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) {
   if (!MF.getInfo()->branchTargetEnforcement())
 return false;
 
-  LLVM_DEBUG(
-  dbgs() << "** AArch64 Branch Targets  **\n"
- << "** Function: " << MF.getName() << '\n');
+  LLVM_DEBUG(dbgs() << "** AArch64 Branch Targets  **\n"
+<< "** Function: " << MF.getName() << '\n');
   const Function &F = MF.getFunction();
 
   // LLVM does not consider basic blocks which are the targets of jump tables
@@ -103,6 +111,12 @@ bool 
AArch64BranchTargets::runOnMachineFunction(MachineFunction &MF) {
 JumpTableTargets.count(&MBB))
   CouldJump = true;
 
+if (MBB.isEHPad()) {
+  if (HasWinCFI && (MBB.isEHFuncletEntry() || MBB.isCleanupFuncletEntry()))
+CouldCall = true;
+  else
+CouldJump = true;
+}
 if (CouldCall || CouldJump) {
   addBTI(MBB, CouldCall, CouldJump, HasWinCFI);
   MadeChange = true;
@@ -130,7 +144,12 @@ void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, 
bool CouldCall,
 
   auto MBBI = MBB.begin();
 
-  // Skip the meta instructions, those will be removed anyway.
+  // If the block starts with EH_LABEL(s), skip them first.
+  while (MBBI != MBB.end() && MBBI->isEHLabel()) {
+++MBBI;
+  }
+
+  // Skip meta/CFI/etc. (and EMITBKEY) to reach the first executable insn.
   for (; MBBI != MBB.end() &&
  (MBBI->isMetaInstruction() || MBBI->getOpcode() == AArch64::EMITBKEY);
++MBBI)
@@ -138,16 +157,21 @@ void AArch64BranchTargets::addBTI(MachineBasicBlock &MBB, 
bool CouldCall,
 
   // SCTLR_EL1.BT[01] is set to 0 by default which means
   // PACI[AB]SP are implicitly BTI C so no BTI C instruction is needed there.
-  if (MBBI != MBB.end() && HintNum == 34 &&
+  if (MBBI != MBB.end() && ((HintNum & BTIMask) == BTIC) &&
   (MBBI->getOpcode() == AArch64::PACIASP ||
MBBI->getOpcode() == AArch64::PACIBSP))
 return;
 
-  if (HasWinCFI && MBBI->getFlag(MachineInstr::FrameSetup)) {
-BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
-TII->get(AArch64::SEH_Nop));
+  // Insert BTI exactly at the first executable instruction.
+  const DebugLoc DL = MBB.findDebugLoc(MBBI);
+  MachineInstr *BTI = BuildMI(MBB, MBBI, DL, TII->get(AArch64::HINT))
+  .addImm(HintNum)
+  .getInstr();
+
+  // WinEH: put .seh_nop after BTI when the first real insn is FrameSetup.
+  if (HasWinCFI && MBBI != MBB.end() &&
+  MBBI->getFlag(MachineInstr::FrameSetup)) {
+auto AfterBTI = std::next(MachineBasicBlock::iterator(BTI));
+BuildMI(MBB, AfterBTI, DL, TII->get(AArch64::SEH_Nop));
   }
-  BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
-  TII->get(AArch64::HINT))
-  .addImm(HintNum);
 }
diff --git a/llvm/test/CodeGen/AArch64/bti-ehpad.ll 
b/llvm/test/CodeGen/AArch64/b

[llvm-branch-commits] [llvm] AMDGPU: Reorder arguments of DS_Real_gfx12 (PR #156405)

2025-09-02 Thread Pierre van Houtryve via llvm-branch-commits


https://github.com/Pierre-vh approved this pull request.

Add NFC to the title?

https://github.com/llvm/llvm-project/pull/156405
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Refactor isImmOperandLegal (PR #155607)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/155607

>From 96904665ffd481eab0087e1a7c2edcc6ef0bb915 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 27 Aug 2025 15:35:53 +0900
Subject: [PATCH] AMDGPU: Refactor isImmOperandLegal

The goal is to expose more variants that can operate without
preconstructed MachineInstrs or MachineOperands.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp| 38 ---
 llvm/lib/Target/AMDGPU/SIInstrInfo.h  |  6 +++
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp|  7 
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |  9 -
 4 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d3bda9f3875e3..887092182f7d1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4572,19 +4572,24 @@ static bool compareMachineOp(const MachineOperand &Op0,
   }
 }
 
-bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
-const MachineOperand &MO) const {
-  const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];
-
-  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal());
-
+bool SIInstrInfo::isLiteralOperandLegal(const MCInstrDesc &InstDesc,
+const MCOperandInfo &OpInfo) const {
   if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
 return true;
 
-  if (OpInfo.RegClass < 0)
+  if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
 return false;
 
-  if (MO.isImm() && isInlineConstant(MO, OpInfo)) {
+  if (!isVOP3(InstDesc) || !AMDGPU::isSISrcOperand(OpInfo))
+return true;
+
+  return ST.hasVOP3Literal();
+}
+
+bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
+int64_t ImmVal) const {
+  const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];
+  if (isInlineConstant(ImmVal, OpInfo.OperandType)) {
 if (isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
 OpNo == (unsigned)AMDGPU::getNamedOperandIdx(InstDesc.getOpcode(),
  AMDGPU::OpName::src2))
@@ -4592,13 +4597,18 @@ bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc 
&InstDesc, unsigned OpNo,
 return RI.opCanUseInlineConstant(OpInfo.OperandType);
   }
 
-  if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
-return false;
+  return isLiteralOperandLegal(InstDesc, OpInfo);
+}
 
-  if (!isVOP3(InstDesc) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
-return true;
+bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
+const MachineOperand &MO) const {
+  if (MO.isImm())
+return isImmOperandLegal(InstDesc, OpNo, MO.getImm());
 
-  return ST.hasVOP3Literal();
+  assert((MO.isTargetIndex() || MO.isFI() || MO.isGlobal()) &&
+ "unexpected imm-like operand kind");
+  const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];
+  return isLiteralOperandLegal(InstDesc, OpInfo);
 }
 
 bool SIInstrInfo::isLegalAV64PseudoImm(uint64_t Imm) const {
@@ -6268,7 +6278,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, 
unsigned OpIdx,
   return false;
   }
 }
-  } else if (AMDGPU::isSISrcOperand(InstDesc, i) &&
+  } else if (AMDGPU::isSISrcOperand(InstDesc.operands()[i]) &&
  !isInlineConstant(Op, InstDesc.operands()[i])) {
 // The same literal may be used multiple times.
 if (!UsedLiteral)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 2f9f5c54406a3..1070d4824aa14 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1183,6 +1183,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
  const MachineOperand &MO) const;
 
+  bool isLiteralOperandLegal(const MCInstrDesc &InstDesc,
+ const MCOperandInfo &OpInfo) const;
+
+  bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
+ int64_t ImmVal) const;
+
   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
  const MachineOperand &MO) const {
 return isImmOperandLegal(MI.getDesc(), OpNo, MO);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp 
b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 18ee9c16b3ff9..da19a6faa9e0f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2720,13 +2720,6 @@ bool isInlineValue(unsigned Reg) {
 #undef CASE_GFXPRE11_GFX11PLUS_TO
 #undef MAP_REG2REG
 
-bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
-  assert(OpNo < Desc.NumOperands);
-  unsigned OpType = Desc.operands()[OpNo].Oper

[llvm-branch-commits] [llvm] AMDGPU: Add tests for every mfma intrinsic v-to-a mapping (PR #153026)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/153026

>From 46f04e8aefd98d782131616030857eb51dc8b1fb Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 11 Aug 2025 19:12:49 +0900
Subject: [PATCH 1/2] AMDGPU: Add tests for every mfma intrinsic v-to-a mapping

Make sure the MFMA VGPR to AGPR InstrMapping table is complete.
I think I got everything, except the full cross product of input
types with the mfma scale intrinsics. Also makes sure we have
coverage for smfmac and mfma_scale cases.
---
 .../rewrite-vgpr-mfma-to-agpr.gfx90a.ll   | 141 +++
 .../rewrite-vgpr-mfma-to-agpr.gfx950.ll   | 664 ++
 .../AMDGPU/rewrite-vgpr-mfma-to-agpr.ll   | 867 ++
 3 files changed, 1672 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.gfx90a.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.gfx950.ll

diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.gfx90a.ll 
b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.gfx90a.ll
new file mode 100644
index 0..7d00b12e7334a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.gfx90a.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -mcpu=gfx90a -amdgpu-mfma-vgpr-form < %s | FileCheck %s
+
+target triple = "amdgcn-amd-amdhsa"
+
+define void @test_rewrite_mfma_i32_32x32x8i8(i32 %arg0, i32 %arg1, ptr 
addrspace(1) %ptr) #0 {
+; CHECK-LABEL: test_rewrite_mfma_i32_32x32x8i8:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:global_load_dwordx4 a[12:15], v[2:3], off offset:48
+; CHECK-NEXT:global_load_dwordx4 a[8:11], v[2:3], off offset:32
+; CHECK-NEXT:global_load_dwordx4 a[4:7], v[2:3], off offset:16
+; CHECK-NEXT:global_load_dwordx4 a[0:3], v[2:3], off
+; CHECK-NEXT:s_waitcnt vmcnt(0)
+; CHECK-NEXT:v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[0:15]
+; CHECK-NEXT:;;#ASMSTART
+; CHECK-NEXT:; use a[0:15]
+; CHECK-NEXT:;;#ASMEND
+; CHECK-NEXT:s_setpc_b64 s[30:31]
+  %src2 = load <16 x i32>, ptr addrspace(1) %ptr
+  %mai = call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 %arg0, i32 %arg1, 
<16 x i32> %src2, i32 0, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<16 x i32> %mai)
+  ret void
+}
+
+define void @test_rewrite_mfma_i32_16x16x16i8(i32 %arg0, i32 %arg1, ptr 
addrspace(1) %ptr) #0 {
+; CHECK-LABEL: test_rewrite_mfma_i32_16x16x16i8:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:global_load_dwordx4 a[0:3], v[2:3], off
+; CHECK-NEXT:s_waitcnt vmcnt(0)
+; CHECK-NEXT:v_mfma_i32_16x16x16i8 a[0:3], v0, v1, a[0:3]
+; CHECK-NEXT:;;#ASMSTART
+; CHECK-NEXT:; use a[0:3]
+; CHECK-NEXT:;;#ASMEND
+; CHECK-NEXT:s_setpc_b64 s[30:31]
+  %src2 = load <4 x i32>, ptr addrspace(1) %ptr
+  %mai = call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 %arg0, i32 %arg1, 
<4 x i32> %src2, i32 0, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x i32> %mai)
+  ret void
+}
+
+define void @test_rewrite_mfma_f32_32x32x2bf16(<2 x i16> %arg0, <2 x i16> 
%arg1, ptr addrspace(1) %ptr) #0 {
+; CHECK-LABEL: test_rewrite_mfma_f32_32x32x2bf16:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:global_load_dwordx4 a[28:31], v[2:3], off offset:112
+; CHECK-NEXT:global_load_dwordx4 a[24:27], v[2:3], off offset:96
+; CHECK-NEXT:global_load_dwordx4 a[20:23], v[2:3], off offset:80
+; CHECK-NEXT:global_load_dwordx4 a[16:19], v[2:3], off offset:64
+; CHECK-NEXT:global_load_dwordx4 a[12:15], v[2:3], off offset:48
+; CHECK-NEXT:global_load_dwordx4 a[8:11], v[2:3], off offset:32
+; CHECK-NEXT:global_load_dwordx4 a[4:7], v[2:3], off offset:16
+; CHECK-NEXT:global_load_dwordx4 a[0:3], v[2:3], off
+; CHECK-NEXT:s_waitcnt vmcnt(0)
+; CHECK-NEXT:v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[0:31]
+; CHECK-NEXT:;;#ASMSTART
+; CHECK-NEXT:; use a[0:31]
+; CHECK-NEXT:;;#ASMEND
+; CHECK-NEXT:s_setpc_b64 s[30:31]
+  %src2 = load <32 x float>, ptr addrspace(1) %ptr
+  %mai = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %arg0, 
<2 x i16> %arg1, <32 x float> %src2, i32 0, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<32 x float> %mai)
+  ret void
+}
+
+define void @test_rewrite_mfma_f32_16x16x2bf16(<2 x i16> %arg0, <2 x i16> 
%arg1, ptr addrspace(1) %ptr) #0 {
+; CHECK-LABEL: test_rewrite_mfma_f32_16x16x2bf16:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:global_load_dwordx4 a[12:15], v[2:3], off offset:48
+; CHECK-NEXT:global_load_dwordx4 a[8:11], v[2:3], off offset:32
+; CHECK-NEXT:global_load_dwordx4 a[4:7], v[2:3], off offset:16
+; CHECK-NEXT:global_load_dwordx4 a[0:3], v[2:3], off
+; CHECK-NEXT:s_waitcn

[llvm-branch-commits] [llvm] AMDGPU: Add test for mfma rewrite pass respecting optnone (PR #153025)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/153025

>From e9015799806374bc266257627df96ae2c2dfd43e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 11 Aug 2025 19:05:44 +0900
Subject: [PATCH] AMDGPU: Add test for mfma rewrite pass respecting optnone

---
 .../AMDGPU/rewrite-vgpr-mfma-to-agpr.ll   | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll 
b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
index 343a5c8511ee9..6f7809f46d10a 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll
@@ -3,6 +3,40 @@
 
 target triple = "amdgcn-amd-amdhsa"
 
+define amdgpu_kernel void @respect_optnone(double %arg0, double %arg1, ptr 
addrspace(1) %ptr) #4 {
+; CHECK-LABEL: respect_optnone:
+; CHECK:   ; %bb.0: ; %bb
+; CHECK-NEXT:s_load_dwordx2 s[0:1], s[4:5], 0x0
+; CHECK-NEXT:s_load_dwordx2 s[2:3], s[4:5], 0x8
+; CHECK-NEXT:s_nop 0
+; CHECK-NEXT:s_load_dwordx2 s[4:5], s[4:5], 0x10
+; CHECK-NEXT:s_mov_b32 s6, 0x3ff
+; CHECK-NEXT:v_and_b32_e64 v0, v0, s6
+; CHECK-NEXT:s_mov_b32 s6, 3
+; CHECK-NEXT:v_lshlrev_b32_e64 v0, s6, v0
+; CHECK-NEXT:s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:global_load_dwordx2 v[0:1], v0, s[4:5]
+; CHECK-NEXT:v_mov_b64_e32 v[2:3], s[0:1]
+; CHECK-NEXT:v_mov_b64_e32 v[4:5], s[2:3]
+; CHECK-NEXT:s_waitcnt vmcnt(0)
+; CHECK-NEXT:s_nop 0
+; CHECK-NEXT:v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3], v[4:5], v[0:1]
+; CHECK-NEXT:s_nop 5
+; CHECK-NEXT:v_accvgpr_write_b32 a0, v0
+; CHECK-NEXT:v_accvgpr_write_b32 a1, v1
+; CHECK-NEXT:;;#ASMSTART
+; CHECK-NEXT:; use a[0:1]
+; CHECK-NEXT:;;#ASMEND
+; CHECK-NEXT:s_endpgm
+bb:
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep = getelementptr double, ptr addrspace(1) %ptr, i32 %id
+  %src2 = load double, ptr addrspace(1) %gep
+  %mai = call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %arg0, double 
%arg1, double %src2, i32 0, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(double %mai)
+  ret void
+}
+
 define amdgpu_kernel void @test_mfma_f32_32x32x1f32_rewrite_vgpr_mfma(ptr 
addrspace(1) %arg) #0 {
 ; CHECK-LABEL: test_mfma_f32_32x32x1f32_rewrite_vgpr_mfma:
 ; CHECK:   ; %bb.0: ; %bb
@@ -859,3 +893,4 @@ attributes #0 = { nounwind 
"amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-p
 attributes #1 = { mustprogress nofree norecurse nounwind willreturn 
"amdgpu-waves-per-eu"="8,8" }
 attributes #2 = { convergent nocallback nofree nosync nounwind willreturn 
memory(none) }
 attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn 
memory(none) }
+attributes #4 = { nounwind noinline optnone }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lld] Update cgdata-* tests to not use subshells (PR #156533)

2025-09-02 Thread Petr Hosek via llvm-branch-commits


https://github.com/petrhosek approved this pull request.


https://github.com/llvm/llvm-project/pull/156533
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang][OpenMP] `do concurrent` to device mapping lit tests (PR #155992)

2025-09-02 Thread Kareem Ergawy via llvm-branch-commits


https://github.com/ergawy edited 
https://github.com/llvm/llvm-project/pull/155992
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang][OpenMP] Extend `do concurrent` mapping to device (PR #155987)

2025-09-02 Thread Kareem Ergawy via llvm-branch-commits


https://github.com/ergawy edited 
https://github.com/llvm/llvm-project/pull/155987
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] CodeGen: Optionally emit PAuth relocations as IRELATIVE relocations. (PR #133533)

2025-09-02 Thread Peter Collingbourne via llvm-branch-commits


https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/133533


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][ptr] Add translations to LLVMIR for ptr ops. (PR #156355)

2025-09-02 Thread Christian Ulmann via llvm-branch-commits


https://github.com/Dinistro edited 
https://github.com/llvm/llvm-project/pull/156355
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang-tools-extra] [clangd] Show type hint for simple cases of dependent 'auto' (PR #156284)

2025-09-02 Thread Nathan Ridge via llvm-branch-commits


https://github.com/HighCommander4 ready_for_review 
https://github.com/llvm/llvm-project/pull/156284
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] release/21.x: [sanitizer_common] Older Haiku needs _GNU_SOURCE (#156291) (PR #156303)

2025-09-02 Thread David CARLIER via llvm-branch-commits


https://github.com/devnexen approved this pull request.


https://github.com/llvm/llvm-project/pull/156303
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][ptr] Add translations to LLVMIR for ptr ops. (PR #156355)

2025-09-02 Thread via llvm-branch-commits


github-actions[bot] wrote:

⚠️ We detected that you are using a GitHub private e-mail address to contribute 
to the repo. Please turn off [Keep my email addresses 
private](https://github.com/settings/emails) setting in your account. See 
[LLVM Developer 
Policy](https://llvm.org/docs/DeveloperPolicy.html#email-addresses) and [LLVM 
Discourse](https://discourse.llvm.org/t/hidden-emails-on-github-should-we-do-something-about-it)
 for more information.

https://github.com/llvm/llvm-project/pull/156355
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] release/21.x: [libc++] Implement comparison operators for `tuple` added in C++23 (#148799) (PR #151808)

2025-09-02 Thread Hristo Hristov via llvm-branch-commits


Zingam wrote:

@frederick-vs-ja If this isn't landing in LLVM21? Can you update the release 
notes, etc.

https://github.com/llvm/llvm-project/pull/151808
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Reorder arguments of DS_Real_gfx12 (PR #156405)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/156405

This helps shrink the diff in a future change.

>From c3c9b84d25cec1357823ca94dfc9873b941746c4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 2 Sep 2025 14:06:22 +0900
Subject: [PATCH] AMDGPU: Reorder arguments of DS_Real_gfx12

This helps shrink the diff in a future change.
---
 llvm/lib/Target/AMDGPU/DSInstructions.td | 41 +++-
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td 
b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 2de89e1262e9c..a9376250931b6 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1360,8 +1360,10 @@ class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12 
op, DS_Pseudo ps, int ef,
 // GFX12.
 
//===--===//
 
-multiclass DS_Real_gfx12 op, string name = !tolower(NAME)> {
-  defvar ps = !cast(NAME);
+multiclass DS_Real_gfx12 op,
+ DS_Pseudo ps = !cast(NAME),
+ string name = !tolower(NAME)> {
+
   let AssemblerPredicate = isGFX12Plus in {
 let DecoderNamespace = "GFX12" in
   def _gfx12 :
@@ -1372,14 +1374,20 @@ multiclass DS_Real_gfx12 op, string name = 
!tolower(NAME)> {
   } // End AssemblerPredicate
 }
 
-defm DS_MIN_F32   : DS_Real_gfx12<0x012, "ds_min_num_f32">;
-defm DS_MAX_F32   : DS_Real_gfx12<0x013, "ds_max_num_f32">;
-defm DS_MIN_RTN_F32   : DS_Real_gfx12<0x032, "ds_min_num_rtn_f32">;
-defm DS_MAX_RTN_F32   : DS_Real_gfx12<0x033, "ds_max_num_rtn_f32">;
-defm DS_MIN_F64   : DS_Real_gfx12<0x052, "ds_min_num_f64">;
-defm DS_MAX_F64   : DS_Real_gfx12<0x053, "ds_max_num_f64">;
-defm DS_MIN_RTN_F64   : DS_Real_gfx12<0x072, "ds_min_num_rtn_f64">;
-defm DS_MAX_RTN_F64   : DS_Real_gfx12<0x073, "ds_max_num_rtn_f64">;
+// Helper to avoid repeating the pseudo-name if we only need to set
+// the gfx12 name.
+multiclass DS_Real_gfx12_with_name op, string name> {
+  defm "" : DS_Real_gfx12(NAME), name>;
+}
+
+defm DS_MIN_F32   : DS_Real_gfx12_with_name<0x012, "ds_min_num_f32">;
+defm DS_MAX_F32   : DS_Real_gfx12_with_name<0x013, "ds_max_num_f32">;
+defm DS_MIN_RTN_F32   : DS_Real_gfx12_with_name<0x032, 
"ds_min_num_rtn_f32">;
+defm DS_MAX_RTN_F32   : DS_Real_gfx12_with_name<0x033, 
"ds_max_num_rtn_f32">;
+defm DS_MIN_F64   : DS_Real_gfx12_with_name<0x052, "ds_min_num_f64">;
+defm DS_MAX_F64   : DS_Real_gfx12_with_name<0x053, "ds_max_num_f64">;
+defm DS_MIN_RTN_F64   : DS_Real_gfx12_with_name<0x072, 
"ds_min_num_rtn_f64">;
+defm DS_MAX_RTN_F64   : DS_Real_gfx12_with_name<0x073, 
"ds_max_num_rtn_f64">;
 defm DS_COND_SUB_U32  : DS_Real_gfx12<0x098>;
 defm DS_SUB_CLAMP_U32 : DS_Real_gfx12<0x099>;
 defm DS_COND_SUB_RTN_U32  : DS_Real_gfx12<0x0a8>;
@@ -1395,7 +1403,7 @@ defm DS_LOAD_TR6_B96  : DS_Real_gfx12<0x0fb>;
 defm DS_LOAD_TR16_B128: DS_Real_gfx12<0x0fc>;
 defm DS_LOAD_TR8_B64  : DS_Real_gfx12<0x0fd>;
 
-defm DS_BVH_STACK_RTN_B32 : DS_Real_gfx12<0x0e0,
+defm DS_BVH_STACK_RTN_B32 : DS_Real_gfx12_with_name<0x0e0,
   "ds_bvh_stack_push4_pop1_rtn_b32">;
 defm DS_BVH_STACK_PUSH8_POP1_RTN_B32  : DS_Real_gfx12<0x0e1>;
 defm DS_BVH_STACK_PUSH8_POP2_RTN_B64  : DS_Real_gfx12<0x0e2>;
@@ -1424,8 +1432,8 @@ def : MnemonicAlias<"ds_load_tr_b128", 
"ds_load_tr16_b128">, Requires<[isGFX1250
 // GFX11.
 
//===--===//
 
-multiclass DS_Real_gfx11 op, string name = !tolower(NAME)> {
-  defvar ps = !cast(NAME);
+multiclass DS_Real_gfx11 op, DS_Pseudo ps = !cast(NAME),
+ string name = !tolower(NAME)> {
   let AssemblerPredicate = isGFX11Only in {
 let DecoderNamespace = "GFX11" in
   def _gfx11 :
@@ -1436,8 +1444,11 @@ multiclass DS_Real_gfx11 op, string name = 
!tolower(NAME)> {
   } // End AssemblerPredicate
 }
 
-multiclass DS_Real_gfx11_gfx12 op, string name = !tolower(NAME)>
-  : DS_Real_gfx11, DS_Real_gfx12;
+multiclass DS_Real_gfx11_gfx12 op,
+   string name = !tolower(NAME),
+   DS_Pseudo ps = !cast(NAME)>
+  : DS_Real_gfx11,
+DS_Real_gfx12;
 
 defm DS_WRITE_B32   : DS_Real_gfx11_gfx12<0x00d, "ds_store_b32">;
 defm DS_WRITE2_B32  : DS_Real_gfx11_gfx12<0x00e, "ds_store_2addr_b32">;

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][ptr] Add `gather`, `masked_load`, `masked_store`, and `scatter` ops (PR #156368)

2025-09-02 Thread Mehdi Amini via llvm-branch-commits


https://github.com/joker-eph approved this pull request.


https://github.com/llvm/llvm-project/pull/156368
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [LifetimeSafety] Mark all DeclRefExpr as usages of the corresp. origin (PR #154316)

2025-09-02 Thread Utkarsh Saxena via llvm-branch-commits


usx95 wrote:

I think the liveness analysis can be built on top of this. A UseFact with a 
write `a = b` kills value in `a` and gens value of `b`. All other facts 
essentially gens the origins involved. WDYT ?

https://github.com/llvm/llvm-project/pull/154316
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [LifetimeSafety] Mark all DeclRefExpr as usages of the corresp. origin (PR #154316)

2025-09-02 Thread Utkarsh Saxena via llvm-branch-commits



@@ -559,9 +553,49 @@ class FactGeneratorVisitor : public 
ConstStmtVisitor {
 return false;
   }
 
+  void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) {
+// Find the underlying variable declaration for the left-hand side.
+if (const auto *DRE_LHS =
+dyn_cast(LHSExpr->IgnoreParenImpCasts())) {
+  markUseAsWrite(DRE_LHS);
+  if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl()))
+if (hasOrigin(VD_LHS->getType()))
+  // We are interested in assignments like `ptr1 = ptr2` or `ptr = 
&var`
+  // LHS must be a pointer/reference type that can be an origin.
+  // RHS must also represent an origin (either another pointer/ref or 
an
+  // address-of).
+  addAssignOriginFact(*VD_LHS, *RHSExpr);
+}
+  }
+
+  // A DeclRefExpr is a use of the referenced decl. It is checked for
+  // use-after-free unless it is being written to (e.g. on the left-hand side
+  // of an assignment).
+  void handleUse(const DeclRefExpr *DRE) {
+const auto *VD = dyn_cast(DRE->getDecl());
+if (VD && hasOrigin(VD->getType())) {
+  OriginID OID = FactMgr.getOriginMgr().get(*VD);
+  UseFact *UF = FactMgr.createFact(OID, DRE);
+  CurrentBlockFacts.push_back(UF);
+  assert(!UseFacts.contains(DRE));

usx95 wrote:

2 different references to a declaration are given two distinct DRE.
`int a; int b = a + a;` The two `a`s on the RHS would have two different DRE to 
the underlying decl.
Since we do not visit same expression twice, we can assume that we have never 
seen this usage before.
Does that answer your question ?

https://github.com/llvm/llvm-project/pull/154316
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang][HeuristicResolver] Default argument heuristic for template template parameters (PR #156404)

2025-09-02 Thread Nathan Ridge via llvm-branch-commits


https://github.com/HighCommander4 created 
https://github.com/llvm/llvm-project/pull/156404

Fixes https://github.com/clangd/clangd/issues/2478

>From 7c8f310155914c17e8e3c6d485acb4464800ddd1 Mon Sep 17 00:00:00 2001
From: Nathan Ridge 
Date: Tue, 2 Sep 2025 01:06:07 -0400
Subject: [PATCH] [clang][HeuristicResolver] Default argument heuristic for
 template template parameters

Fixes https://github.com/clangd/clangd/issues/2478
---
 clang/lib/Sema/HeuristicResolver.cpp  | 19 +++
 .../unittests/Sema/HeuristicResolverTest.cpp  | 18 ++
 2 files changed, 37 insertions(+)

diff --git a/clang/lib/Sema/HeuristicResolver.cpp 
b/clang/lib/Sema/HeuristicResolver.cpp
index 8b424610feeda..6bfd1db602d4e 100644
--- a/clang/lib/Sema/HeuristicResolver.cpp
+++ b/clang/lib/Sema/HeuristicResolver.cpp
@@ -260,6 +260,25 @@ QualType HeuristicResolverImpl::simplifyType(QualType 
Type, const Expr *E,
 }
   }
 }
+
+// Similarly, heuristically replace a template template parameter with its
+// default argument if it has one.
+if (const auto *TST =
+dyn_cast_if_present(T.Type)) {
+  if (const auto *TTPD = dyn_cast_if_present(
+  TST->getTemplateName().getAsTemplateDecl())) {
+if (TTPD->hasDefaultArgument()) {
+  const auto &DefaultArg = TTPD->getDefaultArgument().getArgument();
+  if (DefaultArg.getKind() == TemplateArgument::Template) {
+if (const auto *CTD = dyn_cast_if_present(
+DefaultArg.getAsTemplate().getAsTemplateDecl())) {
+  return {Ctx.getCanonicalTagType(CTD->getTemplatedDecl())};
+}
+  }
+}
+  }
+}
+
 // Check if the expression refers to an explicit object parameter of
 // templated type. If so, heuristically treat it as having the type of the
 // enclosing class.
diff --git a/clang/unittests/Sema/HeuristicResolverTest.cpp 
b/clang/unittests/Sema/HeuristicResolverTest.cpp
index cdbb4fe7c7eda..a69605e9f7466 100644
--- a/clang/unittests/Sema/HeuristicResolverTest.cpp
+++ b/clang/unittests/Sema/HeuristicResolverTest.cpp
@@ -545,6 +545,24 @@ TEST(HeuristicResolver, 
MemberExpr_DefaultTemplateArgument_Recursive) {
   cxxMethodDecl(hasName("foo")).bind("output"));
 }
 
+TEST(HeuristicResolver, MemberExpr_DefaultTemplateTemplateArgument) {
+  std::string Code = R"cpp(
+template 
+struct vector {
+  void push_back(T);
+};
+template  class Container = vector>
+void foo(Container c, Element e) {
+  c.push_back(e);
+}
+  )cpp";
+  // Test resolution of "push_back" in "c.push_back(e)".
+  expectResolution(
+  Code, &HeuristicResolver::resolveMemberExpr,
+  cxxDependentScopeMemberExpr(hasMemberName("push_back")).bind("input"),
+  cxxMethodDecl(hasName("push_back")).bind("output"));
+}
+
 TEST(HeuristicResolver, MemberExpr_ExplicitObjectParameter) {
   std::string Code = R"cpp(
 struct Foo {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LoongArch] Use xvperm.w for cross-lane access within a single vector (PR #151634)

2025-09-02 Thread via llvm-branch-commits


https://github.com/zhaoqi5 updated 
https://github.com/llvm/llvm-project/pull/151634

>From 29e45d02001d415207d4a05b3d8a398674939b19 Mon Sep 17 00:00:00 2001
From: Qi Zhao 
Date: Fri, 1 Aug 2025 11:30:19 +0800
Subject: [PATCH 1/2] [LoongArch] Use xvperm.w for cross-lane access within a
 single vector

---
 .../LoongArch/LoongArchISelLowering.cpp   | 44 +++
 .../lasx/shuffle-as-permute-and-shuffle.ll| 18 ++--
 2 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index c55997414289a..6cd44acbed5ed 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1990,6 +1990,48 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, 
ArrayRef Mask, MVT VT,
   return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
 }
 
+/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
+static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef Mask,
+  MVT VT, SDValue V1, SDValue V2,
+  SelectionDAG &DAG) {
+  // LoongArch LASX only have XVPERM_W.
+  if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
+return SDValue();
+
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned HalfSize = NumElts / 2;
+  bool FrontLo = true, FrontHi = true;
+  bool BackLo = true, BackHi = true;
+
+  auto inRange = [](int val, int low, int high) {
+return (val == -1) || (val >= low && val < high);
+  };
+
+  for (unsigned i = 0; i < HalfSize; ++i) {
+int Fronti = Mask[i];
+int Backi = Mask[i + HalfSize];
+
+FrontLo &= inRange(Fronti, 0, HalfSize);
+FrontHi &= inRange(Fronti, HalfSize, NumElts);
+BackLo &= inRange(Backi, 0, HalfSize);
+BackHi &= inRange(Backi, HalfSize, NumElts);
+  }
+
+  // If both the lower and upper 128-bit parts access only one half of the
+  // vector (either lower or upper), avoid using xvperm.w. The latency of
+  // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
+  if ((FrontLo && (BackLo || BackHi)) || (FrontHi && (BackLo || BackHi)))
+return SDValue();
+
+  SmallVector Masks;
+  for (unsigned i = 0; i < NumElts; ++i)
+Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
+  : DAG.getConstant(Mask[i], DL, MVT::i64));
+  SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
+
+  return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
+}
+
 /// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
 static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef 
Mask,
 MVT VT, SDValue V1, SDValue V2,
@@ -2396,6 +2438,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, 
ArrayRef Mask, MVT VT,
 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
Subtarget)))
   return Result;
+if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
+  return Result;
 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
  V1, V2, DAG)))
   return Result;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll 
b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
index fed085843485a..5f76d9951df9c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
@@ -61,13 +61,8 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
 ; CHECK-LABEL: shuffle_v8i32:
 ; CHECK:   # %bb.0:
 ; CHECK-NEXT:pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT:xvld $xr2, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT:pcalau12i $a0, %pc_hi20(.LCPI4_1)
-; CHECK-NEXT:xvld $xr1, $a0, %pc_lo12(.LCPI4_1)
-; CHECK-NEXT:xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT:xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT:xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:xvld $xr1, $a0, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:xvperm.w $xr0, $xr0, $xr1
 ; CHECK-NEXT:ret
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> 
   ret <8 x i32> %shuffle
@@ -117,13 +112,8 @@ define <8 x float> @shuffle_v8f32(<8 x float> %a) {
 ; CHECK-LABEL: shuffle_v8f32:
 ; CHECK:   # %bb.0:
 ; CHECK-NEXT:pcalau12i $a0, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT:xvld $xr2, $a0, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT:pcalau12i $a0, %pc_hi20(.LCPI8_1)
-; CHECK-NEXT:xvld $xr1, $a0, %pc_lo12(.LCPI8_1)
-; CHECK-NEXT:xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT:xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT:xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT:xvori.b $xr0, $xr1, 0
+; CHECK-NEXT:xvld $xr1, $a0, %pc_lo12(.LCPI8_0)
+; CHECK-NEXT:xvperm.w $xr0, $xr0, $xr1

[llvm-branch-commits] [llvm] release/21.x: [X86] getScalarMaskingNode - if the mask is zero just return the blended passthrough and preserved source value (#153575) (PR #156430)

2025-09-02 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/156430

Backport ba707db840516b2246c6a31ef8a96e41939deeb5

Requested by: @nikic

>From 543fc154306659f184bb82082b414eaf4536cbf0 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Tue, 2 Sep 2025 09:44:15 +0100
Subject: [PATCH] [X86] getScalarMaskingNode - if the mask is zero just return
 the blended passthrough and preserved source value (#153575)

We already handle the case if the mask is one, so I added the other case where 
the op is replaced with a MOVSH/S/D blend.

This assumes the scalar passthrough is op0.

I had to adjust the test case for #98306 as AFAICT it'd been over
reduced

Fixes #153570

(cherry picked from commit ba707db840516b2246c6a31ef8a96e41939deeb5)
---
 llvm/lib/Target/X86/X86ISelLowering.cpp| 18 ++
 .../CodeGen/X86/avx512cfmulsh-instrinsics.ll   |  6 +++---
 llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll | 16 
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a548170e654a1..578519b1cc3c9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26236,10 +26236,9 @@ static SDValue getScalarMaskingNode(SDValue Op, 
SDValue Mask,
 SDValue PreservedSrc,
 const X86Subtarget &Subtarget,
 SelectionDAG &DAG) {
-
-  if (auto *MaskConst = dyn_cast(Mask))
-if (MaskConst->getZExtValue() & 0x1)
-  return Op;
+  auto *MaskConst = dyn_cast(Mask);
+  if (MaskConst && (MaskConst->getZExtValue() & 0x1))
+return Op;
 
   MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
@@ -26255,6 +26254,17 @@ static SDValue getScalarMaskingNode(SDValue Op, 
SDValue Mask,
 
   if (PreservedSrc.isUndef())
 PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
+
+  if (MaskConst) {
+assert((MaskConst->getZExtValue() & 0x1) == 0 && "Expected false mask");
+// Discard op and blend passthrough with scalar op src/dst.
+SmallVector ShuffleMask(VT.getVectorNumElements());
+std::iota(ShuffleMask.begin(), ShuffleMask.end(), 0);
+ShuffleMask[0] = VT.getVectorNumElements();
+return DAG.getVectorShuffle(VT, dl, Op.getOperand(0), PreservedSrc,
+ShuffleMask);
+  }
+
   return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc);
 }
 
diff --git a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll 
b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
index e449c7192e4bf..b60d7a5463d6b 100644
--- a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
@@ -278,14 +278,14 @@ define <4 x float> 
@test_int_x86_avx512fp16_maskz_cfcmadd_sh(<4 x float> %x0, <4
   ret <4 x float> %res
 }
 
-define <4 x float> @PR98306() {
+define <4 x float> @PR98306(i8 %m) {
 ; CHECK-LABEL: PR98306:
 ; CHECK:   ## %bb.0:
-; CHECK-NEXT:kxorw %k0, %k0, %k1
+; CHECK-NEXT:kmovd %edi, %k1
 ; CHECK-NEXT:vmovaps {{.*#+}} xmm1 = 
[7.8125E-3,1.050912E+6,4.203776E+6,1.6815616E+7]
 ; CHECK-NEXT:vmovaps {{.*#+}} xmm0 = 
[3.2E+1,4.03288064E+8,8.0658432E+8,1.61318502E+9]
 ; CHECK-NEXT:vfmaddcsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 {%k1} 
{z}
 ; CHECK-NEXT:retq
-  %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> 
, <4 x float> , <4 x float> , i8 0, i32 4)
+  %res = call <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.csh(<4 x float> 
, <4 x float> , <4 x float> , i8 %m, i32 4)
   ret <4 x float> %res
 }
diff --git a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll 
b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
index 627a94799424c..b1bacd92f073b 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
@@ -1361,3 +1361,19 @@ define <32 x half> 
@test_mm512_castph256_ph512_freeze(<16 x half> %a0) nounwind
   %res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32> 
   ret <32 x half> %res
 }
+
+define <8 x half> @PR153570(ptr %p) {
+; CHECK-LABEL: PR153570:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vpbroadcastw {{.*#+}} xmm0 = 
[2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0,2.0E+0]
+; CHECK-NEXT:vpbroadcastw {{.*#+}} xmm1 = 
[1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; CHECK-NEXT:vmulsh {rn-sae}, %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:vpbroadcastw {{.*#+}} xmm2 = 
[-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
+; CHECK-NEXT:vmovsh %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:vmovaps %xmm1, (%rdi)
+; CHECK-NEXT:retq
+  %r = tail call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> 
, <8 x half> , <8 x half> , i8 0, i32 8)
+  store <8 x half> %r, ptr %p, align 16
+  %r1 = tail call <8 x half> @llvm.x86.avx512fp16.mask.mul.sh.round(<8 x half> 
, <8 x half> , <8 x half> , i8 1, i32 8)
+  ret <8

[llvm-branch-commits] [mlir] [mlir][ptr] Add translations to LLVMIR for ptr ops. (PR #156355)

2025-09-02 Thread Mehdi Amini via llvm-branch-commits


https://github.com/joker-eph approved this pull request.

LG overall

https://github.com/llvm/llvm-project/pull/156355
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][ptr] Add `gather`, `masked_load`, `masked_store`, and `scatter` ops (PR #156368)

2025-09-02 Thread Mehdi Amini via llvm-branch-commits



@@ -56,6 +96,58 @@ def Ptr_FromPtrOp : Pointer_Op<"from_ptr", [
   let hasVerifier = 1;
 }
 
+//===--===//
+// GatherOp
+//===--===//
+
+def Ptr_GatherOp : Pointer_Op<"gather", [
+DeclareOpInterfaceMethods,
+TypesMatchWith<"result and mask must be compatible", "result", "mask", [{
+  ::llvm::cast($_self).clone(
+IntegerType::get($_self.getContext(), 1))
+}]>,
+AllTypesMatch<["result", "passthrough"]>,
+// Check the shapes are compatible and both use the same shaped container
+// type.
+AllShapesMatch<["result", "ptrs"]>, AllTypeIDsMatch<["result", "ptrs"]>
+  ]> {
+  let summary = "Gather operation";
+  let description = [{
+The `gather` operation performs conditional loads from multiple memory
+locations specified by `ptrs` based on a mask `mask`. Elements of the
+result corresponding to masked-off lanes are taken from the passthrough
+operand.
+
+The mask operand is a shaped type of `i1` elements that must have the same
+shape as the result type.
+
+Examples:
+```mlir
+// Gather values from multiple memory locations
+%result = ptr.gather %ptrs, %mask, %passthrough :
+  vector<4x!ptr.ptr<#ptr.generic_space>> -> vector<4xf32>
+
+// Gather with alignment
+%result = ptr.gather %ptrs, %mask, %passthrough alignment = 8 :
+  vector<4x!ptr.ptr<#ptr.generic_space>> -> vector<4xf32>
+```
+  }];
+  let arguments = (ins Ptr_Ptr1DType:$ptrs,
+   Ptr_Mask1DType:$mask,
+   Ptr_Any1DType:$passthrough,
+   AlignmentProp:$alignment);
+  let results = (outs Ptr_Any1DType:$result);
+  let assemblyFormat = [{
+$ptrs `,` $mask `,` $passthrough (`alignment` `=` $alignment^)?
+attr-dict `:` qualified(type($ptrs)) `->` type($result)

joker-eph wrote:

I was mostly trying to figure out **in this case** what is the impact? Because 
the examples show only `vector` which isn't impacted by this.

https://github.com/llvm/llvm-project/pull/156368
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][ptr] Add `gather`, `masked_load`, `masked_store`, and `scatter` ops (PR #156368)

2025-09-02 Thread Fabian Mora via llvm-branch-commits



@@ -56,6 +96,58 @@ def Ptr_FromPtrOp : Pointer_Op<"from_ptr", [
   let hasVerifier = 1;
 }
 
+//===--===//
+// GatherOp
+//===--===//
+
+def Ptr_GatherOp : Pointer_Op<"gather", [
+DeclareOpInterfaceMethods,
+TypesMatchWith<"result and mask must be compatible", "result", "mask", [{
+  ::llvm::cast($_self).clone(
+IntegerType::get($_self.getContext(), 1))
+}]>,
+AllTypesMatch<["result", "passthrough"]>,
+// Check the shapes are compatible and both use the same shaped container
+// type.
+AllShapesMatch<["result", "ptrs"]>, AllTypeIDsMatch<["result", "ptrs"]>
+  ]> {
+  let summary = "Gather operation";
+  let description = [{
+The `gather` operation performs conditional loads from multiple memory
+locations specified by `ptrs` based on a mask `mask`. Elements of the
+result corresponding to masked-off lanes are taken from the passthrough
+operand.
+
+The mask operand is a shaped type of `i1` elements that must have the same
+shape as the result type.
+
+Examples:
+```mlir
+// Gather values from multiple memory locations
+%result = ptr.gather %ptrs, %mask, %passthrough :
+  vector<4x!ptr.ptr<#ptr.generic_space>> -> vector<4xf32>
+
+// Gather with alignment
+%result = ptr.gather %ptrs, %mask, %passthrough alignment = 8 :
+  vector<4x!ptr.ptr<#ptr.generic_space>> -> vector<4xf32>
+```
+  }];
+  let arguments = (ins Ptr_Ptr1DType:$ptrs,
+   Ptr_Mask1DType:$mask,
+   Ptr_Any1DType:$passthrough,
+   AlignmentProp:$alignment);
+  let results = (outs Ptr_Any1DType:$result);
+  let assemblyFormat = [{
+$ptrs `,` $mask `,` $passthrough (`alignment` `=` $alignment^)?
+attr-dict `:` qualified(type($ptrs)) `->` type($result)

fabianmcg wrote:

You're right, I'll remove.

https://github.com/llvm/llvm-project/pull/156368
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Fix adding m0 uses to gfx94/gfx12 ds atomics (PR #156402)

2025-09-02 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

This was using the legacy multiclass which assumes the base form
has an m0 use. Use the versions which assume no m0 as the base name.
Most of the diff is shuffling around the pattern classes to avoid trying
to match the nonexistent m0-having form.

---
Full diff: https://github.com/llvm/llvm-project/pull/156402.diff


1 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/DSInstructions.td (+57-51) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td 
b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 611695bd26d3a..2de89e1262e9c 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -468,6 +468,56 @@ class DSAtomicRetPat {
+  let OtherPredicates = [LDSRequiresM0Init] in {
+def : DSAtomicRetPat(frag#"_local_m0_"#vt)>;
+  }
+
+  let OtherPredicates = [NotLDSRequiresM0Init] in {
+def : DSAtomicRetPat(!cast(inst)#"_gfx9"), vt,
+ !cast(frag#"_local_"#vt)>;
+  }
+
+  let OtherPredicates = [HasGDS] in {
+def : DSAtomicRetPat(frag#"_region_m0_"#vt),
+ /* complexity */ 0, /* gds */ 1>;
+  }
+}
+
+multiclass DSAtomicRetNoRetPat_NoM0_mc {
+  def : DSAtomicRetPat(frag#"_local_"#vt)>;
+  def : DSAtomicRetPat(frag#"_local_noret_"#vt), /* complexity 
*/ 1>;
+}
+
+multiclass DSAtomicRetNoRetPat_mc {
+  let OtherPredicates = [LDSRequiresM0Init] in {
+def : DSAtomicRetPat(frag#"_local_m0_"#vt)>;
+def : DSAtomicRetPat(frag#"_local_m0_noret_"#vt), /* 
complexity */ 1>;
+  }
+
+  let OtherPredicates = [NotLDSRequiresM0Init] in {
+defm : DSAtomicRetNoRetPat_NoM0_mc<
+  !cast(!cast(inst)#"_gfx9"),
+  !cast(!cast(noRetInst)#"_gfx9"),
+  vt, frag>;
+  }
+
+  let OtherPredicates = [HasGDS] in {
+def : DSAtomicRetPat(frag#"_region_m0_"#vt),
+ /* complexity */ 0, /* gds */ 1>;
+def : DSAtomicRetPat(frag#"_region_m0_noret_"#vt),
+ /* complexity */ 1, /* gds */ 1>;
+  }
+}
+
 defm DS_ADD_U32   : DS_1A1D_NORET_mc<"ds_add_u32">;
 defm DS_SUB_U32   : DS_1A1D_NORET_mc<"ds_sub_u32">;
 defm DS_RSUB_U32  : DS_1A1D_NORET_mc<"ds_rsub_u32">;
@@ -518,10 +568,10 @@ let SubtargetPredicate = HasLdsAtomicAddF64 in {
 } // End SubtargetPredicate = HasLdsAtomicAddF64
 
 let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
-  defm DS_PK_ADD_F16  : DS_1A1D_NORET_mc<"ds_pk_add_f16">;
-  defm DS_PK_ADD_RTN_F16  : DS_1A1D_RET_mc<"ds_pk_add_rtn_f16", VGPR_32>;
-  defm DS_PK_ADD_BF16 : DS_1A1D_NORET_mc<"ds_pk_add_bf16">;
-  defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc<"ds_pk_add_rtn_bf16", VGPR_32>;
+  defm DS_PK_ADD_F16  : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_f16">;
+  defm DS_PK_ADD_RTN_F16  : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_f16", VGPR_32>;
+  defm DS_PK_ADD_BF16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_bf16">;
+  defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_bf16", VGPR_32>;
 } // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
 
 defm DS_CMPSTORE_B32 : DS_1A2D_NORET_mc<"ds_cmpstore_b32">;
@@ -1088,50 +1138,6 @@ defm : DSWritePat_mc ;
 
 } // End AddedComplexity = 100
 
-multiclass DSAtomicRetPat_mc {
-  let OtherPredicates = [LDSRequiresM0Init] in {
-def : DSAtomicRetPat(frag#"_local_m0_"#vt)>;
-  }
-
-  let OtherPredicates = [NotLDSRequiresM0Init] in {
-def : DSAtomicRetPat(!cast(inst)#"_gfx9"), vt,
- !cast(frag#"_local_"#vt)>;
-  }
-
-  let OtherPredicates = [HasGDS] in {
-def : DSAtomicRetPat(frag#"_region_m0_"#vt),
- /* complexity */ 0, /* gds */ 1>;
-  }
-}
-
-multiclass DSAtomicRetNoRetPat_mc {
-  let OtherPredicates = [LDSRequiresM0Init] in {
-def : DSAtomicRetPat(frag#"_local_m0_"#vt)>;
-def : DSAtomicRetPat(frag#"_local_m0_noret_"#vt), /* 
complexity */ 1>;
-  }
-
-  let OtherPredicates = [NotLDSRequiresM0Init] in {
-def : DSAtomicRetPat(!cast(inst)#"_gfx9"), vt,
- !cast(frag#"_local_"#vt)>;
-def : DSAtomicRetPat(!cast(noRetInst)#"_gfx9"), 
vt,
- !cast(frag#"_local_noret_"#vt), /* 
complexity */ 1>;
-  }
-
-  let OtherPredicates = [HasGDS] in {
-def : DSAtomicRetPat(frag#"_region_m0_"#vt),
- /* complexity */ 0, /* gds */ 1>;
-def : DSAtomicRetPat(frag#"_region_m0_noret_"#vt),
- /* complexity */ 1, /* gds */ 1>;
-  }
-}
-
-
-
 let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
 // Caution, the order of src and cmp is the *opposite* of the 
BUFFER_ATOMIC_CMPSWAP opcode.
 class DSAtomicCmpXChgSwapped;
-defm : DSAtomicRetNoRetPat_mc;
+defm : DSAtomicRetNoRetPat_NoM0_mc;
+defm : DSAtomicRetNoRetPat_NoM0_mc;
 }
 
 let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
@@ -1262,7 +1268,7 @@ class DSAtomicRetPatIntrinsic;
+defm : DSAtomicRetNoRetPat_NoM0_mc;
 } // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
 
 let OtherPredicates =

[llvm-branch-commits] [llvm] release/21.x: [X86] getScalarMaskingNode - if the mask is zero just return the blended passthrough and preserved source value (#153575) (PR #156430)

2025-09-02 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/156430
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/21.x: [X86] getScalarMaskingNode - if the mask is zero just return the blended passthrough and preserved source value (#153575) (PR #156430)

2025-09-02 Thread via llvm-branch-commits


llvmbot wrote:

@phoebewang What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/156430
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][ptr] Add `gather`, `masked_load`, `masked_store`, and `scatter` ops (PR #156368)

2025-09-02 Thread Mehdi Amini via llvm-branch-commits


https://github.com/joker-eph approved this pull request.


https://github.com/llvm/llvm-project/pull/156368
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [LifetimeSafety] Mark all DeclRefExpr as usages of the corresp. origin (PR #154316)

2025-09-02 Thread Gábor Horváth via llvm-branch-commits


https://github.com/Xazax-hun edited 
https://github.com/llvm/llvm-project/pull/154316
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [LifetimeSafety] Mark all DeclRefExpr as usages of the corresp. origin (PR #154316)

2025-09-02 Thread Gábor Horváth via llvm-branch-commits


https://github.com/Xazax-hun commented:

Overall looks good, some small questions inline. 

https://github.com/llvm/llvm-project/pull/154316
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [LifetimeSafety] Mark all DeclRefExpr as usages of the corresp. origin (PR #154316)

2025-09-02 Thread Gábor Horváth via llvm-branch-commits



@@ -556,8 +554,47 @@ class FactGeneratorVisitor : public 
ConstStmtVisitor {
 return false;
   }
 
+  void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) {
+// Find the underlying variable declaration for the left-hand side.
+if (const auto *DRE_LHS =
+dyn_cast(LHSExpr->IgnoreParenImpCasts())) {
+  markUseAsWrite(DRE_LHS);
+  if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl()))
+if (hasOrigin(VD_LHS->getType()))
+  // We are interested in assignments like `ptr1 = ptr2` or `ptr = 
&var`
+  // LHS must be a pointer/reference type that can be an origin.
+  // RHS must also represent an origin (either another pointer/ref or 
an
+  // address-of).
+  addAssignOriginFact(*VD_LHS, *RHSExpr);
+}
+  }
+
+  // A DeclRefExpr is a use of the referenced decl. It is checked for
+  // use-after-free unless it is being written to (e.g. on the left-hand side
+  // of an assignment).
+  void handleUse(const DeclRefExpr *DRE) {
+const auto *VD = dyn_cast(DRE->getDecl());
+if (VD && hasOrigin(VD->getType())) {

Xazax-hun wrote:

Same, do we need the declaration for anything?

https://github.com/llvm/llvm-project/pull/154316
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [LifetimeSafety] Mark all DeclRefExpr as usages of the corresp. origin (PR #154316)

2025-09-02 Thread Gábor Horváth via llvm-branch-commits



@@ -556,8 +554,47 @@ class FactGeneratorVisitor : public 
ConstStmtVisitor {
 return false;
   }
 
+  void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) {
+// Find the underlying variable declaration for the left-hand side.
+if (const auto *DRE_LHS =
+dyn_cast(LHSExpr->IgnoreParenImpCasts())) {
+  markUseAsWrite(DRE_LHS);
+  if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl()))

Xazax-hun wrote:

Could we use the expression type directly instead of the type of the decl?

https://github.com/llvm/llvm-project/pull/154316
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [LifetimeSafety] Mark all DeclRefExpr as usages of the corresp. origin (PR #154316)

2025-09-02 Thread Gábor Horváth via llvm-branch-commits


Xazax-hun wrote:

> I think the liveness analysis can be built on top of this. 

I see! I was not sure what the layering would be. Makes sense to me. 

https://github.com/llvm/llvm-project/pull/154316
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [LifetimeSafety] Mark all DeclRefExpr as usages of the corresp. origin (PR #154316)

2025-09-02 Thread Gábor Horváth via llvm-branch-commits



@@ -556,8 +554,47 @@ class FactGeneratorVisitor : public 
ConstStmtVisitor {
 return false;
   }
 
+  void handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) {
+// Find the underlying variable declaration for the left-hand side.
+if (const auto *DRE_LHS =
+dyn_cast(LHSExpr->IgnoreParenImpCasts())) {
+  markUseAsWrite(DRE_LHS);
+  if (const auto *VD_LHS = dyn_cast(DRE_LHS->getDecl()))
+if (hasOrigin(VD_LHS->getType()))
+  // We are interested in assignments like `ptr1 = ptr2` or `ptr = 
&var`
+  // LHS must be a pointer/reference type that can be an origin.
+  // RHS must also represent an origin (either another pointer/ref or 
an
+  // address-of).
+  addAssignOriginFact(*VD_LHS, *RHSExpr);
+}
+  }
+
+  // A DeclRefExpr is a use of the referenced decl. It is checked for
+  // use-after-free unless it is being written to (e.g. on the left-hand side
+  // of an assignment).
+  void handleUse(const DeclRefExpr *DRE) {
+const auto *VD = dyn_cast(DRE->getDecl());
+if (VD && hasOrigin(VD->getType())) {
+  UseFact *UF = FactMgr.createFact(DRE);
+  CurrentBlockFacts.push_back(UF);
+  assert(!UseFacts.contains(DRE));
+  UseFacts[DRE] = UF;
+}
+  }
+
+  void markUseAsWrite(const DeclRefExpr *DRE) {
+assert(UseFacts.contains(DRE));
+UseFacts[DRE]->markAsWritten();
+  }
+
   FactManager &FactMgr;
   llvm::SmallVector CurrentBlockFacts;
+  // To distinguish between reads and writes for use-after-free checks, this 
map
+  // stores the `UseFact` for each `DeclRefExpr`. We initially identify all
+  // `DeclRefExpr`s as "read" uses. When an assignment is processed, the use
+  // corresponding to the left-hand side is updated to be a "write", thereby
+  // exempting it from the check.
+  llvm::DenseMap UseFacts;

Xazax-hun wrote:

What is the plan for writes we have no DREs for? Like:
```
  (cond ? a : b) = ptr;
```

https://github.com/llvm/llvm-project/pull/154316
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/21.x: [release] Correct download links for Windows on Arm packages (#156459) (PR #156462)

2025-09-02 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/156462

Backport e591df63e583a39b2b8356dae7024df82d2f2204

Requested by: @DavidSpickett

>From 6ee50cd80cc912d5b0906399aad7166f79e7a10d Mon Sep 17 00:00:00 2001
From: David Spickett 
Date: Tue, 2 Sep 2025 15:13:03 +0100
Subject: [PATCH] [release] Correct download links for Windows on Arm packages
 (#156459)

Mistakenly repeated the https://github.com... part twice.

Found while editing the links for 21.1.0.

(cherry picked from commit e591df63e583a39b2b8356dae7024df82d2f2204)
---
 llvm/utils/release/github-upload-release.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/release/github-upload-release.py 
b/llvm/utils/release/github-upload-release.py
index 2b4e57d6348e9..d58bb544e17dd 100755
--- a/llvm/utils/release/github-upload-release.py
+++ b/llvm/utils/release/github-upload-release.py
@@ -67,7 +67,7 @@ def create_release(repo, release, tag=None, name=None, 
message=None):
 
 
 
-
+
 
 Download links will appear here once builds have completed. 
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/21.x: [release] Correct download links for Windows on Arm packages (#156459) (PR #156462)

2025-09-02 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/156462
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Add version of isImmOperandLegal for MCInstrDesc (PR #155560)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/155560

>From 493b50264a7f348de647e9817fbd2dcbff81b95a Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 27 Aug 2025 15:17:00 +0900
Subject: [PATCH] AMDGPU: Add version of isImmOperandLegal for MCInstrDesc

This avoids the need for a pre-constructed instruction, at least
for the first argument.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 11 +--
 llvm/lib/Target/AMDGPU/SIInstrInfo.h   | 27 +++---
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d43924d46b005..c5e8f95748cf1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4572,9 +4572,8 @@ static bool compareMachineOp(const MachineOperand &Op0,
   }
 }
 
-bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
+bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
 const MachineOperand &MO) const {
-  const MCInstrDesc &InstDesc = MI.getDesc();
   const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];
 
   assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal());
@@ -4586,9 +4585,9 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr 
&MI, unsigned OpNo,
 return false;
 
   if (MO.isImm() && isInlineConstant(MO, OpInfo)) {
-if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() &&
-OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(),
-AMDGPU::OpName::src2))
+if (isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
+OpNo == (unsigned)AMDGPU::getNamedOperandIdx(InstDesc.getOpcode(),
+ AMDGPU::OpName::src2))
   return false;
 return RI.opCanUseInlineConstant(OpInfo.OperandType);
   }
@@ -4596,7 +4595,7 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr 
&MI, unsigned OpNo,
   if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
 return false;
 
-  if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
+  if (!isVOP3(InstDesc) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
 return true;
 
   return ST.hasVOP3Literal();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index f7c7bb509c9ef..958af0ff1147f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -533,13 +533,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
 return get(Opcode).TSFlags & SIInstrFlags::VOP2;
   }
 
-  static bool isVOP3(const MachineInstr &MI) {
-return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
+  static bool isVOP3(const MCInstrDesc &Desc) {
+return Desc.TSFlags & SIInstrFlags::VOP3;
   }
 
-  bool isVOP3(uint16_t Opcode) const {
-return get(Opcode).TSFlags & SIInstrFlags::VOP3;
-  }
+  static bool isVOP3(const MachineInstr &MI) { return isVOP3(MI.getDesc()); }
+
+  bool isVOP3(uint16_t Opcode) const { return isVOP3(get(Opcode)); }
 
   static bool isSDWA(const MachineInstr &MI) {
 return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
@@ -841,13 +841,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
 return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
   }
 
-  static bool isMAI(const MachineInstr &MI) {
-return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
+  static bool isMAI(const MCInstrDesc &Desc) {
+return Desc.TSFlags & SIInstrFlags::IsMAI;
   }
 
-  bool isMAI(uint16_t Opcode) const {
-return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
-  }
+  static bool isMAI(const MachineInstr &MI) { return isMAI(MI.getDesc()); }
+
+  bool isMAI(uint16_t Opcode) const { return isMAI(get(Opcode)); }
 
   static bool isMFMA(const MachineInstr &MI) {
 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
@@ -1180,9 +1180,14 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
 return isInlineConstant(*MO.getParent(), MO.getOperandNo());
   }
 
-  bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
+  bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
  const MachineOperand &MO) const;
 
+  bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
+ const MachineOperand &MO) const {
+return isImmOperandLegal(MI.getDesc(), OpNo, MO);
+  }
+
   /// Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
   bool isLegalAV64PseudoImm(uint64_t Imm) const;
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Fold 64-bit immediate into copy to AV class (PR #155615)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/155615

>From bc2070b9e0d85b51ddff16ee22faac470afbce53 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 26 Aug 2025 23:53:57 +0900
Subject: [PATCH] AMDGPU: Fold 64-bit immediate into copy to AV class

This is in preparation for patches which will intoduce more
copies to av registers.
---
 llvm/lib/Target/AMDGPU/SIDefines.h| 10 +--
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 25 --
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp|  6 +-
 .../CodeGen/AMDGPU/fold-imm-copy-agpr.mir | 85 ---
 llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir| 26 +++---
 5 files changed, 70 insertions(+), 82 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h 
b/llvm/lib/Target/AMDGPU/SIDefines.h
index 268b153c6c924..150e05b59c29f 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -237,16 +237,16 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_AC_FP32,
   OPERAND_REG_INLINE_AC_FP64,
 
+  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
+  // constants. Does not accept registers.
+  OPERAND_INLINE_C_AV64_PSEUDO,
+
   // Operand for source modifiers for VOP instructions
   OPERAND_INPUT_MODS,
 
   // Operand for SDWA instructions
   OPERAND_SDWA_VOPC_DST,
 
-  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
-  // constants.
-  OPERAND_INLINE_C_AV64_PSEUDO,
-
   OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
 
@@ -254,7 +254,7 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64,
 
   OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32,
-  OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_FP64,
+  OPERAND_REG_INLINE_AC_LAST = OPERAND_INLINE_C_AV64_PSEUDO,
 
   OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp 
b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a116b57c85a88..92eaa8b29ccb8 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1296,7 +1296,8 @@ void SIFoldOperandsImpl::foldOperand(
 for (unsigned MovOp :
  {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
   AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
-  AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) {
+  AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
+  AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
   const MCInstrDesc &MovDesc = TII->get(MovOp);
   assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
 
@@ -1312,11 +1313,23 @@ void SIFoldOperandsImpl::foldOperand(
   const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
   const TargetRegisterClass *MovSrcRC =
   TRI->getRegClass(MovDesc.operands()[SrcIdx].RegClass);
-
-  if (UseSubReg)
-MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
-  if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
-break;
+  if (MovSrcRC) {
+if (UseSubReg)
+  MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
+if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
+  break;
+
+// FIXME: This is mutating the instruction only and deferring the 
actual
+// fold of the immediate
+  } else {
+// For the _IMM_PSEUDO cases, there can be value restrictions on the
+// immediate to verify. Technically we should always verify this, but 
it
+// only matters for these concrete cases.
+// TODO: Handle non-imm case if it's useful.
+if (!OpToFold.isImm() ||
+!TII->isImmOperandLegal(MovDesc, 1, 
*OpToFold.getEffectiveImmVal()))
+  break;
+  }
 
   MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
   MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 887092182f7d1..2b187c641da1c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3444,12 +3444,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) 
{
   case AMDGPU::V_ACCVGPR_READ_B32_e64:
   case AMDGPU::V_ACCVGPR_MOV_B32:
   case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
-return true;
   case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
-// TODO: We could fold this, but it's a strange case. The immediate value
-// can't be directly folded into any real use. We would have to spread new
-// immediate legality checks around and only accept subregister extracts 
for
-// profitability.
+return true;
   default:
 return false;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir 
b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mi

[llvm-branch-commits] [llvm] AMDGPU: Refactor isImmOperandLegal (PR #155607)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/155607

>From 2ee13abfc257a65f5723039c3419371a6cb50ad6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 27 Aug 2025 15:35:53 +0900
Subject: [PATCH] AMDGPU: Refactor isImmOperandLegal

The goal is to expose more variants that can operate without
preconstructed MachineInstrs or MachineOperands.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp| 38 ---
 llvm/lib/Target/AMDGPU/SIInstrInfo.h  |  6 +++
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp|  7 
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |  9 -
 4 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d3bda9f3875e3..887092182f7d1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4572,19 +4572,24 @@ static bool compareMachineOp(const MachineOperand &Op0,
   }
 }
 
-bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
-const MachineOperand &MO) const {
-  const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];
-
-  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI() || MO.isGlobal());
-
+bool SIInstrInfo::isLiteralOperandLegal(const MCInstrDesc &InstDesc,
+const MCOperandInfo &OpInfo) const {
   if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE)
 return true;
 
-  if (OpInfo.RegClass < 0)
+  if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
 return false;
 
-  if (MO.isImm() && isInlineConstant(MO, OpInfo)) {
+  if (!isVOP3(InstDesc) || !AMDGPU::isSISrcOperand(OpInfo))
+return true;
+
+  return ST.hasVOP3Literal();
+}
+
+bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
+int64_t ImmVal) const {
+  const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];
+  if (isInlineConstant(ImmVal, OpInfo.OperandType)) {
 if (isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
 OpNo == (unsigned)AMDGPU::getNamedOperandIdx(InstDesc.getOpcode(),
  AMDGPU::OpName::src2))
@@ -4592,13 +4597,18 @@ bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc 
&InstDesc, unsigned OpNo,
 return RI.opCanUseInlineConstant(OpInfo.OperandType);
   }
 
-  if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
-return false;
+  return isLiteralOperandLegal(InstDesc, OpInfo);
+}
 
-  if (!isVOP3(InstDesc) || !AMDGPU::isSISrcOperand(InstDesc, OpNo))
-return true;
+bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
+const MachineOperand &MO) const {
+  if (MO.isImm())
+return isImmOperandLegal(InstDesc, OpNo, MO.getImm());
 
-  return ST.hasVOP3Literal();
+  assert((MO.isTargetIndex() || MO.isFI() || MO.isGlobal()) &&
+ "unexpected imm-like operand kind");
+  const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];
+  return isLiteralOperandLegal(InstDesc, OpInfo);
 }
 
 bool SIInstrInfo::isLegalAV64PseudoImm(uint64_t Imm) const {
@@ -6268,7 +6278,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, 
unsigned OpIdx,
   return false;
   }
 }
-  } else if (AMDGPU::isSISrcOperand(InstDesc, i) &&
+  } else if (AMDGPU::isSISrcOperand(InstDesc.operands()[i]) &&
  !isInlineConstant(Op, InstDesc.operands()[i])) {
 // The same literal may be used multiple times.
 if (!UsedLiteral)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 2f9f5c54406a3..1070d4824aa14 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1183,6 +1183,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
  const MachineOperand &MO) const;
 
+  bool isLiteralOperandLegal(const MCInstrDesc &InstDesc,
+ const MCOperandInfo &OpInfo) const;
+
+  bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo,
+ int64_t ImmVal) const;
+
   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
  const MachineOperand &MO) const {
 return isImmOperandLegal(MI.getDesc(), OpNo, MO);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp 
b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 18ee9c16b3ff9..da19a6faa9e0f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2720,13 +2720,6 @@ bool isInlineValue(unsigned Reg) {
 #undef CASE_GFXPRE11_GFX11PLUS_TO
 #undef MAP_REG2REG
 
-bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
-  assert(OpNo < Desc.NumOperands);
-  unsigned OpType = Desc.operands()[OpNo].Oper

[llvm-branch-commits] [llvm] AMDGPU: Fix fixme for out of bounds indexing in usesConstantBus check (PR #155603)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/155603

>From 5c284f46a1063d5d0788c25a0d37ba019c171d54 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 27 Aug 2025 16:19:23 +0900
Subject: [PATCH 1/2] AMDGPU: Fix fixme for out of bounds indexing in
 usesConstantBus check

This loop over all the operands in the MachineInstr will eventually
go past the end of the MCInstrDesc's explicit operands. We don't
need the instr desc to compute the constant bus usage, just the
register and whether it's implicit or not. The check here is slightly
conservative. e.g. a random vcc implicit use appended to an instruction
will falsely report a constant bus use.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 62 --
 llvm/lib/Target/AMDGPU/SIInstrInfo.h   |  4 ++
 2 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c5e8f95748cf1..4cf8fd5eb594f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4758,6 +4758,35 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr 
&MI,
   return Inst32;
 }
 
+bool SIInstrInfo::physRegUsesConstantBus(const MachineOperand &RegOp) const {
+  // Null is free
+  Register Reg = RegOp.getReg();
+  if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
+return false;
+
+  // SGPRs use the constant bus
+
+  // FIXME: implicit registers that are not part of the MCInstrDesc's implicit
+  // physical register operands should also count.
+  if (RegOp.isImplicit())
+return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
+
+  // Normal exec read does not count.
+  if ((Reg == AMDGPU::EXEC || Reg == AMDGPU::EXEC_LO) && RegOp.isImplicit())
+return false;
+
+  // SGPRs use the constant bus
+  return AMDGPU::SReg_32RegClass.contains(Reg) ||
+ AMDGPU::SReg_64RegClass.contains(Reg);
+}
+
+bool SIInstrInfo::regUsesConstantBus(const MachineOperand &RegOp,
+ const MachineRegisterInfo &MRI) const {
+  Register Reg = RegOp.getReg();
+  return Reg.isVirtual() ? RI.isSGPRClass(MRI.getRegClass(Reg))
+ : physRegUsesConstantBus(RegOp);
+}
+
 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
   const MachineOperand &MO,
   const MCOperandInfo &OpInfo) const {
@@ -4765,23 +4794,9 @@ bool SIInstrInfo::usesConstantBus(const 
MachineRegisterInfo &MRI,
   if (!MO.isReg())
 return !isInlineConstant(MO, OpInfo);
 
-  if (!MO.isUse())
-return false;
-
-  if (MO.getReg().isVirtual())
-return RI.isSGPRClass(MRI.getRegClass(MO.getReg()));
-
-  // Null is free
-  if (MO.getReg() == AMDGPU::SGPR_NULL || MO.getReg() == AMDGPU::SGPR_NULL64)
-return false;
-
-  // SGPRs use the constant bus
-  if (MO.isImplicit()) {
-return MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
-   MO.getReg() == AMDGPU::VCC_LO;
-  }
-  return AMDGPU::SReg_32RegClass.contains(MO.getReg()) ||
- AMDGPU::SReg_64RegClass.contains(MO.getReg());
+  Register Reg = MO.getReg();
+  return Reg.isVirtual() ? RI.isSGPRClass(MRI.getRegClass(Reg))
+ : physRegUsesConstantBus(MO);
 }
 
 static Register findImplicitSGPRRead(const MachineInstr &MI) {
@@ -6250,13 +6265,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr 
&MI, unsigned OpIdx,
 continue;
   const MachineOperand &Op = MI.getOperand(i);
   if (Op.isReg()) {
-RegSubRegPair SGPR(Op.getReg(), Op.getSubReg());
-if (!SGPRsUsed.count(SGPR) &&
-// FIXME: This can access off the end of the operands() array.
-usesConstantBus(MRI, Op, InstDesc.operands().begin()[i])) {
-  if (--ConstantBusLimit <= 0)
-return false;
-  SGPRsUsed.insert(SGPR);
+if (Op.isUse()) {
+  RegSubRegPair SGPR(Op.getReg(), Op.getSubReg());
+  if (regUsesConstantBus(Op, MRI) && SGPRsUsed.insert(SGPR).second) {
+if (--ConstantBusLimit <= 0)
+  return false;
+  }
 }
   } else if (AMDGPU::isSISrcOperand(InstDesc, i) &&
  !isInlineConstant(Op, InstDesc.operands()[i])) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 958af0ff1147f..2f9f5c54406a3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1195,6 +1195,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
   /// This function will return false if you pass it a 32-bit instruction.
   bool hasVALU32BitEncoding(unsigned Opcode) const;
 
+  bool physRegUsesConstantBus(const MachineOperand &Reg) const;
+  bool regUsesConstantBus(const MachineOperand &Reg,
+  const MachineRegisterInfo &MRI) const;
+
   /// Returns true if this operand uses the constant bus.

[llvm-branch-commits] [llvm] AMDGPU: Handle true16 disassembly of ds_write_b8/b16 (PR #156406)

2025-09-02 Thread Joe Nash via llvm-branch-commits



@@ -4447,76 +4447,76 @@
 # GFX11: ds_store_b128 v255, v[2:5] offset:65535 ; encoding: 
[0xff,0xff,0x7c,0xdb,0xff,0x02,0x00,0x00]
 0xff,0xff,0x7c,0xdb,0xff,0x02,0x00,0x00
 
-# GFX11: ds_store_b16 v0, v1 ; encoding: 
[0x00,0x00,0x7c,0xd8,0x00,0x01,0x00,0x00]
+# GFX11: ds_store_b16 v0, v1.l   ; encoding: 
[0x00,0x00,0x7c,0xd8,0x00,0x01,0x00,0x00]

Sisyph wrote:

Yes I think Jay is right, though the docs here are somewhat misleading. The 
operand is labeled as a vgpr with 16 bits. However, there is no way to encode 
the swizzle (ie v0.h) in DS. At least I can't find any reference that these 
would use the hi bit of the operand field the way vop1/2/C does. 

https://github.com/llvm/llvm-project/pull/156406
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Fold 64-bit immediate into copy to AV class (PR #155615)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/155615

>From bc2070b9e0d85b51ddff16ee22faac470afbce53 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 26 Aug 2025 23:53:57 +0900
Subject: [PATCH] AMDGPU: Fold 64-bit immediate into copy to AV class

This is in preparation for patches which will intoduce more
copies to av registers.
---
 llvm/lib/Target/AMDGPU/SIDefines.h| 10 +--
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 25 --
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp|  6 +-
 .../CodeGen/AMDGPU/fold-imm-copy-agpr.mir | 85 ---
 llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir| 26 +++---
 5 files changed, 70 insertions(+), 82 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h 
b/llvm/lib/Target/AMDGPU/SIDefines.h
index 268b153c6c924..150e05b59c29f 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -237,16 +237,16 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_AC_FP32,
   OPERAND_REG_INLINE_AC_FP64,
 
+  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
+  // constants. Does not accept registers.
+  OPERAND_INLINE_C_AV64_PSEUDO,
+
   // Operand for source modifiers for VOP instructions
   OPERAND_INPUT_MODS,
 
   // Operand for SDWA instructions
   OPERAND_SDWA_VOPC_DST,
 
-  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
-  // constants.
-  OPERAND_INLINE_C_AV64_PSEUDO,
-
   OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
 
@@ -254,7 +254,7 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64,
 
   OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32,
-  OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_FP64,
+  OPERAND_REG_INLINE_AC_LAST = OPERAND_INLINE_C_AV64_PSEUDO,
 
   OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp 
b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a116b57c85a88..92eaa8b29ccb8 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1296,7 +1296,8 @@ void SIFoldOperandsImpl::foldOperand(
 for (unsigned MovOp :
  {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
   AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
-  AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) {
+  AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
+  AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
   const MCInstrDesc &MovDesc = TII->get(MovOp);
   assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
 
@@ -1312,11 +1313,23 @@ void SIFoldOperandsImpl::foldOperand(
   const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
   const TargetRegisterClass *MovSrcRC =
   TRI->getRegClass(MovDesc.operands()[SrcIdx].RegClass);
-
-  if (UseSubReg)
-MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
-  if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
-break;
+  if (MovSrcRC) {
+if (UseSubReg)
+  MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
+if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
+  break;
+
+// FIXME: This is mutating the instruction only and deferring the 
actual
+// fold of the immediate
+  } else {
+// For the _IMM_PSEUDO cases, there can be value restrictions on the
+// immediate to verify. Technically we should always verify this, but 
it
+// only matters for these concrete cases.
+// TODO: Handle non-imm case if it's useful.
+if (!OpToFold.isImm() ||
+!TII->isImmOperandLegal(MovDesc, 1, 
*OpToFold.getEffectiveImmVal()))
+  break;
+  }
 
   MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
   MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 887092182f7d1..2b187c641da1c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3444,12 +3444,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) 
{
   case AMDGPU::V_ACCVGPR_READ_B32_e64:
   case AMDGPU::V_ACCVGPR_MOV_B32:
   case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
-return true;
   case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
-// TODO: We could fold this, but it's a strange case. The immediate value
-// can't be directly folded into any real use. We would have to spread new
-// immediate legality checks around and only accept subregister extracts 
for
-// profitability.
+return true;
   default:
 return false;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir 
b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mi

[llvm-branch-commits] [clang] [Analyzer] No longer crash with VLA operands to unary type traits (PR #154738)

2025-09-02 Thread Balazs Benics via llvm-branch-commits


https://github.com/steakhal updated 
https://github.com/llvm/llvm-project/pull/154738

>From 656763c898bff7783d87ed7d17c3050c631fe06d Mon Sep 17 00:00:00 2001
From: Aaron Ballman 
Date: Fri, 1 Aug 2025 12:31:56 -0400
Subject: [PATCH] [Analyzer] No longer crash with VLA operands to unary type
 traits (#151719)

sizeof was handled correctly, but __datasizeof and _Countof were not.

Fixes #151711

(cherry picked from commit 17327482f045b7119e116320db3e9c12fcf250ae with 
adjustments)
Dropping the ReleaseNotes part of the original patch.
---
 clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp |  3 ++-
 clang/test/Analysis/engine/gh151711.cpp   | 18 ++
 2 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Analysis/engine/gh151711.cpp

diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp 
b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index fa8e669b6bb2f..ab29f86cec326 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -916,7 +916,8 @@ VisitUnaryExprOrTypeTraitExpr(const 
UnaryExprOrTypeTraitExpr *Ex,
   QualType T = Ex->getTypeOfArgument();
 
   for (ExplodedNode *N : CheckedSet) {
-if (Ex->getKind() == UETT_SizeOf) {
+if (Ex->getKind() == UETT_SizeOf || Ex->getKind() == UETT_DataSizeOf ||
+Ex->getKind() == UETT_CountOf) {
   if (!T->isIncompleteType() && !T->isConstantSizeType()) {
 assert(T->isVariableArrayType() && "Unknown non-constant-sized type.");
 
diff --git a/clang/test/Analysis/engine/gh151711.cpp 
b/clang/test/Analysis/engine/gh151711.cpp
new file mode 100644
index 0..a9950a7a3b9d0
--- /dev/null
+++ b/clang/test/Analysis/engine/gh151711.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify 
%s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify 
-x c %s
+
+void clang_analyzer_dump(int);
+
+// Ensure that VLA types are correctly handled by unary type traits in the
+// expression engine. Previously, __datasizeof and _Countof both caused failed
+// assertions.
+void gh151711(int i) {
+  clang_analyzer_dump(sizeof(int[i++]));   // expected-warning {{Unknown}}
+#ifdef __cplusplus
+  // __datasizeof is only available in C++.
+  clang_analyzer_dump(__datasizeof(int[i++])); // expected-warning {{Unknown}}
+#else
+  // _Countof is only available in C.
+  clang_analyzer_dump(_Countof(int[i++])); // expected-warning {{Unknown}}
+#endif
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] Add llvm.protected.field.ptr intrinsic and pre-ISel lowering. (PR #151647)

2025-09-02 Thread Nikita Popov via llvm-branch-commits



@@ -2850,6 +2850,12 @@ def int_experimental_convergence_anchor
 def int_experimental_convergence_loop
   : DefaultAttrsIntrinsic<[llvm_token_ty], [], [IntrNoMem, IntrConvergent]>;
 
+//===- Structure Protection Intrinsics 
===//
+
+def int_protected_field_ptr :
+  DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty, llvm_i1_ty],

nikic wrote:

Should this use `llvm_anyptr_ty` to avoid hardcoding the address space?

https://github.com/llvm/llvm-project/pull/151647
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] Add llvm.protected.field.ptr intrinsic and pre-ISel lowering. (PR #151647)

2025-09-02 Thread Nikita Popov via llvm-branch-commits



@@ -461,6 +463,162 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(
   return Changed;
 }
 
+namespace {
+
+enum class PointerEncoding {
+  Rotate,
+  PACCopyable,
+  PACNonCopyable,
+};
+
+bool expandProtectedFieldPtr(Function &Intr) {
+  Module &M = *Intr.getParent();
+
+  SmallPtrSet DSsToDeactivate;
+  SmallPtrSet LoadsStores;
+
+  Type *Int8Ty = Type::getInt8Ty(M.getContext());
+  Type *Int64Ty = Type::getInt64Ty(M.getContext());
+  PointerType *PtrTy = PointerType::get(M.getContext(), 0);
+
+  Function *SignIntr =
+  Intrinsic::getOrInsertDeclaration(&M, Intrinsic::ptrauth_sign, {});
+  Function *AuthIntr =
+  Intrinsic::getOrInsertDeclaration(&M, Intrinsic::ptrauth_auth, {});
+
+  auto *EmuFnTy = FunctionType::get(Int64Ty, {Int64Ty, Int64Ty}, false);
+  FunctionCallee EmuSignIntr = M.getOrInsertFunction("__emupac_pacda", 
EmuFnTy);
+  FunctionCallee EmuAuthIntr = M.getOrInsertFunction("__emupac_autda", 
EmuFnTy);
+
+  auto CreateSign = [&](IRBuilder<> &B, Value *Val, Value *Disc,
+   OperandBundleDef DSBundle) {
+Function *F = B.GetInsertBlock()->getParent();
+Attribute FSAttr = F->getFnAttribute("target-features");
+if (FSAttr.isValid() && FSAttr.getValueAsString().contains("+pauth"))
+  return B.CreateCall(SignIntr, {Val, B.getInt32(2), Disc}, DSBundle);
+return B.CreateCall(EmuSignIntr, {Val, Disc}, DSBundle);
+  };
+
+  auto CreateAuth = [&](IRBuilder<> &B, Value *Val, Value *Disc,
+   OperandBundleDef DSBundle) {
+Function *F = B.GetInsertBlock()->getParent();
+Attribute FSAttr = F->getFnAttribute("target-features");
+if (FSAttr.isValid() && FSAttr.getValueAsString().contains("+pauth"))
+  return B.CreateCall(AuthIntr, {Val, B.getInt32(2), Disc}, DSBundle);
+return B.CreateCall(EmuAuthIntr, {Val, Disc}, DSBundle);
+  };
+
+  auto GetDeactivationSymbol = [&](CallInst *Call) -> GlobalValue * {
+if (auto Bundle =
+Call->getOperandBundle(LLVMContext::OB_deactivation_symbol))
+  return cast(Bundle->Inputs[0]);
+return nullptr;
+  };
+
+  for (User *U : Intr.users()) {
+auto *Call = cast(U);
+auto *DS = GetDeactivationSymbol(Call);
+
+for (Use &U : Call->uses()) {
+  if (auto *LI = dyn_cast(U.getUser())) {
+if (isa(LI->getType())) {
+  LoadsStores.insert(LI);
+  continue;
+}
+  }
+  if (auto *SI = dyn_cast(U.getUser())) {
+if (U.getOperandNo() == 1 &&
+isa(SI->getValueOperand()->getType())) {
+  LoadsStores.insert(SI);
+  continue;
+}
+  }
+  // Comparisons against null cannot be used to recover the original
+  // pointer so we allow them.
+  if (auto *CI = dyn_cast(U.getUser())) {
+if (auto *Op = dyn_cast(CI->getOperand(0)))
+  if (Op->isNullValue())
+continue;
+if (auto *Op = dyn_cast(CI->getOperand(1)))
+  if (Op->isNullValue())
+continue;
+  }
+  if (DS)
+DSsToDeactivate.insert(DS);
+}
+  }
+
+  for (Instruction *I : LoadsStores) {
+auto *PointerOperand = isa(I)
+   ? cast(I)->getPointerOperand()
+   : cast(I)->getPointerOperand();

nikic wrote:

getLoadStorePointerOperand

https://github.com/llvm/llvm-project/pull/151647
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] Add llvm.protected.field.ptr intrinsic and pre-ISel lowering. (PR #151647)

2025-09-02 Thread Nikita Popov via llvm-branch-commits



@@ -461,6 +463,162 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(
   return Changed;
 }
 
+namespace {
+
+enum class PointerEncoding {
+  Rotate,
+  PACCopyable,
+  PACNonCopyable,
+};
+
+bool expandProtectedFieldPtr(Function &Intr) {
+  Module &M = *Intr.getParent();
+
+  SmallPtrSet DSsToDeactivate;
+  SmallPtrSet LoadsStores;
+
+  Type *Int8Ty = Type::getInt8Ty(M.getContext());
+  Type *Int64Ty = Type::getInt64Ty(M.getContext());
+  PointerType *PtrTy = PointerType::get(M.getContext(), 0);
+
+  Function *SignIntr =
+  Intrinsic::getOrInsertDeclaration(&M, Intrinsic::ptrauth_sign, {});
+  Function *AuthIntr =
+  Intrinsic::getOrInsertDeclaration(&M, Intrinsic::ptrauth_auth, {});
+
+  auto *EmuFnTy = FunctionType::get(Int64Ty, {Int64Ty, Int64Ty}, false);
+  FunctionCallee EmuSignIntr = M.getOrInsertFunction("__emupac_pacda", 
EmuFnTy);
+  FunctionCallee EmuAuthIntr = M.getOrInsertFunction("__emupac_autda", 
EmuFnTy);
+
+  auto CreateSign = [&](IRBuilder<> &B, Value *Val, Value *Disc,
+   OperandBundleDef DSBundle) {
+Function *F = B.GetInsertBlock()->getParent();
+Attribute FSAttr = F->getFnAttribute("target-features");
+if (FSAttr.isValid() && FSAttr.getValueAsString().contains("+pauth"))
+  return B.CreateCall(SignIntr, {Val, B.getInt32(2), Disc}, DSBundle);
+return B.CreateCall(EmuSignIntr, {Val, Disc}, DSBundle);
+  };
+
+  auto CreateAuth = [&](IRBuilder<> &B, Value *Val, Value *Disc,
+   OperandBundleDef DSBundle) {
+Function *F = B.GetInsertBlock()->getParent();
+Attribute FSAttr = F->getFnAttribute("target-features");
+if (FSAttr.isValid() && FSAttr.getValueAsString().contains("+pauth"))
+  return B.CreateCall(AuthIntr, {Val, B.getInt32(2), Disc}, DSBundle);
+return B.CreateCall(EmuAuthIntr, {Val, Disc}, DSBundle);
+  };
+
+  auto GetDeactivationSymbol = [&](CallInst *Call) -> GlobalValue * {
+if (auto Bundle =
+Call->getOperandBundle(LLVMContext::OB_deactivation_symbol))
+  return cast(Bundle->Inputs[0]);
+return nullptr;
+  };
+
+  for (User *U : Intr.users()) {
+auto *Call = cast(U);
+auto *DS = GetDeactivationSymbol(Call);
+
+for (Use &U : Call->uses()) {
+  if (auto *LI = dyn_cast(U.getUser())) {
+if (isa(LI->getType())) {
+  LoadsStores.insert(LI);
+  continue;
+}
+  }
+  if (auto *SI = dyn_cast(U.getUser())) {
+if (U.getOperandNo() == 1 &&
+isa(SI->getValueOperand()->getType())) {
+  LoadsStores.insert(SI);
+  continue;
+}
+  }
+  // Comparisons against null cannot be used to recover the original
+  // pointer so we allow them.
+  if (auto *CI = dyn_cast(U.getUser())) {
+if (auto *Op = dyn_cast(CI->getOperand(0)))
+  if (Op->isNullValue())
+continue;
+if (auto *Op = dyn_cast(CI->getOperand(1)))
+  if (Op->isNullValue())
+continue;
+  }
+  if (DS)
+DSsToDeactivate.insert(DS);
+}
+  }
+
+  for (Instruction *I : LoadsStores) {
+auto *PointerOperand = isa(I)
+   ? cast(I)->getPointerOperand()
+   : cast(I)->getPointerOperand();
+auto *Call = cast(PointerOperand);
+
+auto *Disc = Call->getArgOperand(1);
+bool UseHWEncoding = 
cast(Call->getArgOperand(2))->getZExtValue();
+
+GlobalValue *DS = GetDeactivationSymbol(Call);
+OperandBundleDef DSBundle("deactivation-symbol", DS);
+
+if (auto *LI = dyn_cast(I)) {
+  IRBuilder<> B(LI->getNextNode());
+  auto *LIInt = cast(B.CreatePtrToInt(LI, B.getInt64Ty()));
+  Value *Auth;
+  if (UseHWEncoding) {
+Auth = CreateAuth(B, LIInt, Disc, DSBundle);
+  } else {
+Auth = B.CreateAdd(LIInt, Disc);
+Auth = B.CreateIntrinsic(
+Auth->getType(), Intrinsic::fshr,
+{Auth, Auth, ConstantInt::get(Auth->getType(), 16)});
+  }
+  LI->replaceAllUsesWith(B.CreateIntToPtr(Auth, B.getPtrTy()));
+  LIInt->setOperand(0, LI);

nikic wrote:

I found this code to be somewhat confusing. I think it would be more obvious to 
directly create a new load from the llvm.protected.field.ptr argument instead 
of rewriting things in this way and then later stripping out the intrinsic call.

https://github.com/llvm/llvm-project/pull/151647
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] Add llvm.protected.field.ptr intrinsic and pre-ISel lowering. (PR #151647)

2025-09-02 Thread Nikita Popov via llvm-branch-commits



@@ -0,0 +1,167 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --check-globals all --version 5
+; RUN: opt -passes=pre-isel-intrinsic-lowering -S < %s | FileCheck 
--check-prefix=NOPAUTH %s
+; RUN: opt -passes=pre-isel-intrinsic-lowering -mattr=+pauth -S < %s | 
FileCheck --check-prefix=PAUTH %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+@ds1 = external global i8
+@ds2 = external global i8
+@ds3 = external global i8
+@ds4 = external global i8
+@ds5 = external global i8
+@ds6 = external global i8
+
+;.
+; NOPAUTH: @ds1 = external global i8
+; NOPAUTH: @ds2 = external global i8
+; NOPAUTH: @ds3 = external global i8
+; NOPAUTH: @ds4 = external global i8
+; NOPAUTH: @ds5 = external global i8
+; NOPAUTH: @ds6 = hidden alias i8, inttoptr (i64 3573751839 to ptr)
+;.
+; PAUTH: @ds1 = external global i8
+; PAUTH: @ds2 = external global i8
+; PAUTH: @ds3 = external global i8
+; PAUTH: @ds4 = external global i8
+; PAUTH: @ds5 = external global i8
+; PAUTH: @ds6 = hidden alias i8, inttoptr (i64 3573751839 to ptr)
+;.
+define ptr @load_hw(ptr %ptrptr) {
+; NOPAUTH-LABEL: define ptr @load_hw(
+; NOPAUTH-SAME: ptr [[PTRPTR:%.*]]) {
+; NOPAUTH-NEXT:[[PTR:%.*]] = load ptr, ptr [[PTRPTR]], align 8
+; NOPAUTH-NEXT:[[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
+; NOPAUTH-NEXT:[[TMP2:%.*]] = call i64 @__emupac_autda(i64 [[TMP1]], i64 
1) [ "deactivation-symbol"(ptr @ds1) ]
+; NOPAUTH-NEXT:[[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; NOPAUTH-NEXT:ret ptr [[TMP3]]
+;
+; PAUTH-LABEL: define ptr @load_hw(
+; PAUTH-SAME: ptr [[PTRPTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; PAUTH-NEXT:[[PTR:%.*]] = load ptr, ptr [[PTRPTR]], align 8
+; PAUTH-NEXT:[[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
+; PAUTH-NEXT:[[TMP2:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP1]], i32 
2, i64 1) [ "deactivation-symbol"(ptr @ds1) ]
+; PAUTH-NEXT:[[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; PAUTH-NEXT:ret ptr [[TMP3]]
+;
+  %protptrptr = call ptr @llvm.protected.field.ptr(ptr %ptrptr, i64 1, i1 
true) [ "deactivation-symbol"(ptr @ds1) ]
+  %ptr = load ptr, ptr %protptrptr
+  ret ptr %ptr
+}
+
+define void @store_hw(ptr %ptrptr, ptr %ptr) {
+; NOPAUTH-LABEL: define void @store_hw(
+; NOPAUTH-SAME: ptr [[PTRPTR:%.*]], ptr [[PTR:%.*]]) {
+; NOPAUTH-NEXT:[[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
+; NOPAUTH-NEXT:[[TMP2:%.*]] = call i64 @__emupac_pacda(i64 [[TMP1]], i64 
2) [ "deactivation-symbol"(ptr @ds2) ]
+; NOPAUTH-NEXT:[[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; NOPAUTH-NEXT:store ptr [[TMP3]], ptr [[PTRPTR]], align 8
+; NOPAUTH-NEXT:ret void
+;
+; PAUTH-LABEL: define void @store_hw(
+; PAUTH-SAME: ptr [[PTRPTR:%.*]], ptr [[PTR:%.*]]) #[[ATTR0]] {
+; PAUTH-NEXT:[[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
+; PAUTH-NEXT:[[TMP2:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[TMP1]], i32 
2, i64 2) [ "deactivation-symbol"(ptr @ds2) ]
+; PAUTH-NEXT:[[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; PAUTH-NEXT:store ptr [[TMP3]], ptr [[PTRPTR]], align 8
+; PAUTH-NEXT:ret void
+;
+  %protptrptr = call ptr @llvm.protected.field.ptr(ptr %ptrptr, i64 2, i1 
true) [ "deactivation-symbol"(ptr @ds2) ]
+  store ptr %ptr, ptr %protptrptr
+  ret void
+}
+
+define ptr @load_sw(ptr %ptrptr) {
+; NOPAUTH-LABEL: define ptr @load_sw(
+; NOPAUTH-SAME: ptr [[PTRPTR:%.*]]) {
+; NOPAUTH-NEXT:[[PTR:%.*]] = load ptr, ptr [[PTRPTR]], align 8
+; NOPAUTH-NEXT:[[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
+; NOPAUTH-NEXT:[[TMP2:%.*]] = add i64 [[TMP1]], 1
+; NOPAUTH-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64 
[[TMP2]], i64 16)

nikic wrote:

Something I don't get is how the deactivation is going to work in the non-hw 
case. You don't attach the deactivation symbol anywhere here (and in fact, 
cannot attach it to all the relevant instructions, as the add is not a call).

https://github.com/llvm/llvm-project/pull/151647
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] Add llvm.protected.field.ptr intrinsic and pre-ISel lowering. (PR #151647)

2025-09-02 Thread Nikita Popov via llvm-branch-commits



@@ -461,6 +463,162 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(
   return Changed;
 }
 
+namespace {
+
+enum class PointerEncoding {
+  Rotate,
+  PACCopyable,
+  PACNonCopyable,
+};

nikic wrote:

Unused?

https://github.com/llvm/llvm-project/pull/151647
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] Add llvm.protected.field.ptr intrinsic and pre-ISel lowering. (PR #151647)

2025-09-02 Thread Nikita Popov via llvm-branch-commits



@@ -461,6 +463,162 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(
   return Changed;
 }
 
+namespace {
+
+enum class PointerEncoding {
+  Rotate,
+  PACCopyable,
+  PACNonCopyable,
+};
+
+bool expandProtectedFieldPtr(Function &Intr) {
+  Module &M = *Intr.getParent();
+
+  SmallPtrSet DSsToDeactivate;
+  SmallPtrSet LoadsStores;
+
+  Type *Int8Ty = Type::getInt8Ty(M.getContext());
+  Type *Int64Ty = Type::getInt64Ty(M.getContext());
+  PointerType *PtrTy = PointerType::get(M.getContext(), 0);
+
+  Function *SignIntr =
+  Intrinsic::getOrInsertDeclaration(&M, Intrinsic::ptrauth_sign, {});
+  Function *AuthIntr =
+  Intrinsic::getOrInsertDeclaration(&M, Intrinsic::ptrauth_auth, {});
+
+  auto *EmuFnTy = FunctionType::get(Int64Ty, {Int64Ty, Int64Ty}, false);
+  FunctionCallee EmuSignIntr = M.getOrInsertFunction("__emupac_pacda", 
EmuFnTy);
+  FunctionCallee EmuAuthIntr = M.getOrInsertFunction("__emupac_autda", 
EmuFnTy);
+
+  auto CreateSign = [&](IRBuilder<> &B, Value *Val, Value *Disc,
+   OperandBundleDef DSBundle) {
+Function *F = B.GetInsertBlock()->getParent();
+Attribute FSAttr = F->getFnAttribute("target-features");
+if (FSAttr.isValid() && FSAttr.getValueAsString().contains("+pauth"))
+  return B.CreateCall(SignIntr, {Val, B.getInt32(2), Disc}, DSBundle);
+return B.CreateCall(EmuSignIntr, {Val, Disc}, DSBundle);
+  };
+
+  auto CreateAuth = [&](IRBuilder<> &B, Value *Val, Value *Disc,
+   OperandBundleDef DSBundle) {
+Function *F = B.GetInsertBlock()->getParent();
+Attribute FSAttr = F->getFnAttribute("target-features");
+if (FSAttr.isValid() && FSAttr.getValueAsString().contains("+pauth"))
+  return B.CreateCall(AuthIntr, {Val, B.getInt32(2), Disc}, DSBundle);
+return B.CreateCall(EmuAuthIntr, {Val, Disc}, DSBundle);
+  };
+
+  auto GetDeactivationSymbol = [&](CallInst *Call) -> GlobalValue * {
+if (auto Bundle =
+Call->getOperandBundle(LLVMContext::OB_deactivation_symbol))
+  return cast(Bundle->Inputs[0]);
+return nullptr;
+  };
+
+  for (User *U : Intr.users()) {
+auto *Call = cast(U);
+auto *DS = GetDeactivationSymbol(Call);
+
+for (Use &U : Call->uses()) {
+  if (auto *LI = dyn_cast(U.getUser())) {
+if (isa(LI->getType())) {
+  LoadsStores.insert(LI);
+  continue;
+}
+  }
+  if (auto *SI = dyn_cast(U.getUser())) {
+if (U.getOperandNo() == 1 &&
+isa(SI->getValueOperand()->getType())) {
+  LoadsStores.insert(SI);
+  continue;
+}
+  }
+  // Comparisons against null cannot be used to recover the original
+  // pointer so we allow them.
+  if (auto *CI = dyn_cast(U.getUser())) {
+if (auto *Op = dyn_cast(CI->getOperand(0)))
+  if (Op->isNullValue())
+continue;
+if (auto *Op = dyn_cast(CI->getOperand(1)))
+  if (Op->isNullValue())
+continue;
+  }
+  if (DS)
+DSsToDeactivate.insert(DS);
+}
+  }
+
+  for (Instruction *I : LoadsStores) {
+auto *PointerOperand = isa(I)
+   ? cast(I)->getPointerOperand()
+   : cast(I)->getPointerOperand();
+auto *Call = cast(PointerOperand);
+
+auto *Disc = Call->getArgOperand(1);
+bool UseHWEncoding = 
cast(Call->getArgOperand(2))->getZExtValue();
+
+GlobalValue *DS = GetDeactivationSymbol(Call);
+OperandBundleDef DSBundle("deactivation-symbol", DS);
+
+if (auto *LI = dyn_cast(I)) {
+  IRBuilder<> B(LI->getNextNode());
+  auto *LIInt = cast(B.CreatePtrToInt(LI, B.getInt64Ty()));
+  Value *Auth;
+  if (UseHWEncoding) {
+Auth = CreateAuth(B, LIInt, Disc, DSBundle);
+  } else {
+Auth = B.CreateAdd(LIInt, Disc);
+Auth = B.CreateIntrinsic(
+Auth->getType(), Intrinsic::fshr,
+{Auth, Auth, ConstantInt::get(Auth->getType(), 16)});
+  }
+  LI->replaceAllUsesWith(B.CreateIntToPtr(Auth, B.getPtrTy()));
+  LIInt->setOperand(0, LI);
+} else {
+  auto *SI = cast(I);
+  IRBuilder<> B(SI);
+  auto *SIValInt =
+  B.CreatePtrToInt(SI->getValueOperand(), B.getInt64Ty());
+  Value *Sign;
+  if (UseHWEncoding) {
+Sign = CreateSign(B, SIValInt, Disc, DSBundle);
+  } else {
+Sign = B.CreateIntrinsic(
+SIValInt->getType(), Intrinsic::fshl,
+{SIValInt, SIValInt, ConstantInt::get(SIValInt->getType(), 16)});
+Sign = B.CreateSub(Sign, Disc);
+  }
+  SI->setOperand(0, B.CreateIntToPtr(Sign, B.getPtrTy()));
+}
+  }
+
+  for (User *U : llvm::make_early_inc_range(Intr.users())) {
+auto *Call = cast(U);
+auto *Pointer = Call->getArgOperand(0);
+
+Call->replaceAllUsesWith(Pointer);
+Call->eraseFromParent();
+  }
+
+  if (!DSsToDeactivate.empty()) {
+Constant *Nop =
+ConstantExpr::getIntToPtr(Cons

[llvm-branch-commits] Utils: Inhibit load/store folding through phis for llvm.protected.field.ptr. (PR #151649)

2025-09-02 Thread Nikita Popov via llvm-branch-commits



@@ -697,8 +697,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
 Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
   LoadInst *FirstLI = cast(PN.getIncomingValue(0));
 
-  // Can't forward swifterror through a phi.
-  if (FirstLI->getOperand(0)->isSwiftError())
+  if (!shouldFoldOperandThroughPhi(FirstLI->getOperand(0)))

nikic wrote:

I think my general preference would be to not add a new API for this and just 
call canReplaceOperandWithVariable() here.

https://github.com/llvm/llvm-project/pull/151649
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [libcxx] release/21.x: [libc++][AIX] Fixup problems with ABI list checking (#155643) (PR #156502)

2025-09-02 Thread via llvm-branch-commits


llvmbot wrote:

@hubert-reinterpretcast What do you think about merging this PR to the release 
branch?

https://github.com/llvm/llvm-project/pull/156502
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang-tools-extra] [compiler-rt] [libcxx] [libcxxabi] [libunwind] [lldb] [llvm] [mlir] [openmp] release/21.x: [CMake][AIX] quote the string AIX `if` conditions (PR #1565

2025-09-02 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/156505

Backport 63195d3d7a8bde05590f91a38398f986bb4265b2 
3e6ec475b756559560cba4a16c2bc755aa8caee5

Requested by: @amy-kwan

>From 7df62021a2bcceff725ec5bb24960bf987711f4b Mon Sep 17 00:00:00 2001
From: David Tenty 
Date: Wed, 20 Aug 2025 12:45:41 -0400
Subject: [PATCH 1/2] [NFC][CMake] quote ${CMAKE_SYSTEM_NAME} consistently
 (#154537)

A CMake change included in CMake 4.0 makes `AIX` into a variable
(similar to `APPLE`, etc.)
https://gitlab.kitware.com/cmake/cmake/-/commit/ff03db6657c38c8cf992877ea66174c33d0bcb0b

However, `${CMAKE_SYSTEM_NAME}` unfortunately also expands exactly to
`AIX` and `if` auto-expands variable names in CMake. That means you get
a double expansion if you write:

`if (${CMAKE_SYSTEM_NAME}  MATCHES "AIX")`
which becomes:
`if (AIX  MATCHES "AIX")`
which is as if you wrote:
`if (ON MATCHES "AIX")`

You can prevent this by quoting the expansion of "${CMAKE_SYSTEM_NAME}",
due to policy
[CMP0054](https://cmake.org/cmake/help/latest/policy/CMP0054.html#policy:CMP0054)
which is on by default in 4.0+. Most of the LLVM CMake already does
this, but this PR fixes the remaining cases where we do not.

(cherry picked from commit 63195d3d7a8bde05590f91a38398f986bb4265b2)
---
 clang-tools-extra/clangd/CMakeLists.txt   |  4 +--
 clang/bindings/python/tests/CMakeLists.txt|  2 +-
 clang/tools/libclang/CMakeLists.txt   |  6 ++---
 compiler-rt/cmake/config-ix.cmake |  2 +-
 compiler-rt/lib/asan/CMakeLists.txt   |  4 +--
 libcxx/CMakeLists.txt |  6 ++---
 libcxxabi/CMakeLists.txt  |  4 +--
 libcxxabi/src/CMakeLists.txt  |  2 +-
 libunwind/src/CMakeLists.txt  |  2 +-
 lldb/source/Host/CMakeLists.txt   |  2 +-
 .../Process/FreeBSDKernel/CMakeLists.txt  |  2 +-
 lldb/tools/driver/CMakeLists.txt  |  2 +-
 llvm/CMakeLists.txt   | 14 +-
 llvm/cmake/config-ix.cmake|  8 +++---
 llvm/cmake/modules/AddLLVM.cmake  | 26 +--
 llvm/cmake/modules/HandleLLVMOptions.cmake| 25 +++---
 llvm/lib/Support/CMakeLists.txt   |  4 +--
 llvm/lib/Target/CMakeLists.txt|  2 +-
 llvm/lib/TargetParser/CMakeLists.txt  |  2 +-
 llvm/tools/llvm-jitlink/CMakeLists.txt|  4 +--
 .../Support/DynamicLibrary/CMakeLists.txt |  6 ++---
 mlir/cmake/modules/FindSyclRuntime.cmake  |  2 +-
 offload/cmake/OpenMPTesting.cmake |  2 +-
 openmp/CMakeLists.txt |  2 +-
 openmp/cmake/OpenMPTesting.cmake  |  2 +-
 openmp/runtime/CMakeLists.txt |  2 +-
 openmp/runtime/cmake/LibompHandleFlags.cmake  |  6 ++---
 openmp/runtime/cmake/config-ix.cmake  |  2 +-
 openmp/runtime/src/CMakeLists.txt |  4 +--
 third-party/benchmark/src/CMakeLists.txt  |  4 +--
 third-party/unittest/CMakeLists.txt   |  2 +-
 31 files changed, 81 insertions(+), 76 deletions(-)

diff --git a/clang-tools-extra/clangd/CMakeLists.txt 
b/clang-tools-extra/clangd/CMakeLists.txt
index a1e9da41b4b32..b68b565e90686 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -6,7 +6,7 @@ add_subdirectory(support)
 
 # Configure the Features.inc file.
 if (NOT DEFINED CLANGD_BUILD_XPC)
-  if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+  if("${CMAKE_SYSTEM_NAME}" MATCHES "Darwin")
 set(CLANGD_BUILD_XPC_DEFAULT ON)
   else ()
 set(CLANGD_BUILD_XPC_DEFAULT OFF)
@@ -192,7 +192,7 @@ if(CLANGD_TIDY_CHECKS)
 endif()
 
 add_subdirectory(refactor/tweaks)
-if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
   # FIXME: Make fuzzer not use linux-specific APIs, build it everywhere.
   add_subdirectory(fuzzer)
 endif()
diff --git a/clang/bindings/python/tests/CMakeLists.txt 
b/clang/bindings/python/tests/CMakeLists.txt
index a0ddabc21bb41..d9a6bbf452bd6 100644
--- a/clang/bindings/python/tests/CMakeLists.txt
+++ b/clang/bindings/python/tests/CMakeLists.txt
@@ -35,7 +35,7 @@ if(WIN32)
 endif()
 
 # The Python FFI interface is broken on AIX: 
https://bugs.python.org/issue38628.
-if(${CMAKE_SYSTEM_NAME} MATCHES "AIX")
+if("${CMAKE_SYSTEM_NAME}" MATCHES "AIX")
   set(RUN_PYTHON_TESTS FALSE)
 endif()
 
diff --git a/clang/tools/libclang/CMakeLists.txt 
b/clang/tools/libclang/CMakeLists.txt
index 2b1e266f07392..e0ff7605b68b8 100644
--- a/clang/tools/libclang/CMakeLists.txt
+++ b/clang/tools/libclang/CMakeLists.txt
@@ -93,7 +93,7 @@ if(MSVC)
   set(LLVM_EXPORTED_SYMBOL_FILE)
 endif()
 
-if (UNIX AND NOT APPLE AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX" AND NOT 
CYGWIN)
+if (UNIX AND NOT APPLE AND NOT "${CMAKE_SYSTEM_NAME}" MATCHES "AIX" AND NOT 
CYGWIN)
   set(LLVM_EXPORTED_SYMBOL_FILE)
   set(USE_VERSION_SCRIPT ${LLVM_HAVE_LINK_VERSION_SCRIPT})
 endif()
@@ -125,7 +125,7 @@ else()

[llvm-branch-commits] [clang] [clang-tools-extra] [compiler-rt] [libcxx] [libcxxabi] [libunwind] [lldb] [llvm] [mlir] [openmp] release/21.x: [CMake][AIX] quote the string AIX `if` conditions (PR #1565

2025-09-02 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/156505
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang-tools-extra] [compiler-rt] [libcxx] [libcxxabi] [libunwind] [lldb] [llvm] [mlir] [openmp] release/21.x: [CMake][AIX] quote the string AIX `if` conditions (PR #1565

2025-09-02 Thread via llvm-branch-commits


llvmbot wrote:

@hubert-reinterpretcast What do you think about merging this PR to the release 
branch?

https://github.com/llvm/llvm-project/pull/156505
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] AMDGPU: Reorder arguments of DS_Real_gfx12 (NFC) (PR #156405)

2025-09-02 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec approved this pull request.


https://github.com/llvm/llvm-project/pull/156405
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang][HeuristicResolver] Default argument heuristic for template template parameters (PR #156404)

2025-09-02 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Nathan Ridge (HighCommander4)


Changes

Fixes https://github.com/clangd/clangd/issues/2478

---
Full diff: https://github.com/llvm/llvm-project/pull/156404.diff


2 Files Affected:

- (modified) clang/lib/Sema/HeuristicResolver.cpp (+19) 
- (modified) clang/unittests/Sema/HeuristicResolverTest.cpp (+18) 


``diff
diff --git a/clang/lib/Sema/HeuristicResolver.cpp 
b/clang/lib/Sema/HeuristicResolver.cpp
index 8b424610feeda..6bfd1db602d4e 100644
--- a/clang/lib/Sema/HeuristicResolver.cpp
+++ b/clang/lib/Sema/HeuristicResolver.cpp
@@ -260,6 +260,25 @@ QualType HeuristicResolverImpl::simplifyType(QualType 
Type, const Expr *E,
 }
   }
 }
+
+// Similarly, heuristically replace a template template parameter with its
+// default argument if it has one.
+if (const auto *TST =
+dyn_cast_if_present(T.Type)) {
+  if (const auto *TTPD = dyn_cast_if_present(
+  TST->getTemplateName().getAsTemplateDecl())) {
+if (TTPD->hasDefaultArgument()) {
+  const auto &DefaultArg = TTPD->getDefaultArgument().getArgument();
+  if (DefaultArg.getKind() == TemplateArgument::Template) {
+if (const auto *CTD = dyn_cast_if_present(
+DefaultArg.getAsTemplate().getAsTemplateDecl())) {
+  return {Ctx.getCanonicalTagType(CTD->getTemplatedDecl())};
+}
+  }
+}
+  }
+}
+
 // Check if the expression refers to an explicit object parameter of
 // templated type. If so, heuristically treat it as having the type of the
 // enclosing class.
diff --git a/clang/unittests/Sema/HeuristicResolverTest.cpp 
b/clang/unittests/Sema/HeuristicResolverTest.cpp
index cdbb4fe7c7eda..a69605e9f7466 100644
--- a/clang/unittests/Sema/HeuristicResolverTest.cpp
+++ b/clang/unittests/Sema/HeuristicResolverTest.cpp
@@ -545,6 +545,24 @@ TEST(HeuristicResolver, 
MemberExpr_DefaultTemplateArgument_Recursive) {
   cxxMethodDecl(hasName("foo")).bind("output"));
 }
 
+TEST(HeuristicResolver, MemberExpr_DefaultTemplateTemplateArgument) {
+  std::string Code = R"cpp(
+template 
+struct vector {
+  void push_back(T);
+};
+template  class Container = vector>
+void foo(Container c, Element e) {
+  c.push_back(e);
+}
+  )cpp";
+  // Test resolution of "push_back" in "c.push_back(e)".
+  expectResolution(
+  Code, &HeuristicResolver::resolveMemberExpr,
+  cxxDependentScopeMemberExpr(hasMemberName("push_back")).bind("input"),
+  cxxMethodDecl(hasName("push_back")).bind("output"));
+}
+
 TEST(HeuristicResolver, MemberExpr_ExplicitObjectParameter) {
   std::string Code = R"cpp(
 struct Foo {

``




https://github.com/llvm/llvm-project/pull/156404
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AArch64] Provide a custom decoder for LDR_ZA/STR_ZA (PR #156363)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156363

>From ed950c319568da2a902fde1f1899e9cdbbebf7cb Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:27:48 +0300
Subject: [PATCH] [AArch64] Provide a custom decoder for LDR_ZA/STR_ZA

These are the only instructions that encode two operands in the same
field. Instead of fixing them after they have been incorrectly decoded,
provide a custom decoder.
---
 .../Disassembler/AArch64Disassembler.cpp  | 29 ---
 llvm/lib/Target/AArch64/SMEInstrFormats.td|  4 +++
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp 
b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 23e46b84f6278..8c1e9f61693fb 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1563,6 +1563,25 @@ static DecodeStatus DecodePRFMRegInstruction(MCInst 
&Inst, uint32_t insn,
   return Success;
 }
 
+static DecodeStatus
+DecodeSMESpillFillInstruction(MCInst &Inst, uint32_t Bits, uint64_t Addr,
+  const MCDisassembler *Decoder) {
+  unsigned RvBits = fieldFromInstruction(Bits, 13, 2);
+  unsigned RnBits = fieldFromInstruction(Bits, 5, 5);
+  unsigned Imm4Bits = fieldFromInstruction(Bits, 0, 4);
+
+  DecodeSimpleRegisterClass(
+  Inst, RvBits, Addr, Decoder);
+  Inst.addOperand(MCOperand::createImm(Imm4Bits));
+  DecodeSimpleRegisterClass(Inst, RnBits,
+   Addr, Decoder);
+  // Spill and fill instructions have a single immediate used for both
+  // the vector select offset and optional memory offset. Replicate
+  // the decoded immediate.
+  Inst.addOperand(MCOperand::createImm(Imm4Bits));
+  return Success;
+}
+
 #include "AArch64GenDisassemblerTables.inc"
 #include "AArch64GenInstrInfo.inc"
 
@@ -1621,16 +1640,6 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst 
&MI, uint64_t &Size,
   }
 }
 
-if (MI.getOpcode() == AArch64::LDR_ZA ||
-MI.getOpcode() == AArch64::STR_ZA) {
-  // Spill and fill instructions have a single immediate used for both
-  // the vector select offset and optional memory offset. Replicate
-  // the decoded immediate.
-  const MCOperand &Imm4Op = MI.getOperand(2);
-  assert(Imm4Op.isImm() && "Unexpected operand type!");
-  MI.addOperand(Imm4Op);
-}
-
 if (Result != MCDisassembler::Fail)
   return Result;
   }
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td 
b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index b3005d5120229..40ec371fe79d3 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -1108,6 +1108,10 @@ class sme_spill_fill_base
 : I,
   Sched<[]> {
+  // 'offset' operand is encoded in the same bits as 'imm4'. There is currently
+  // no way to tell TableGen about this.
+  let DecoderMethod = "DecodeSMESpillFillInstruction";
+  bits<0> ZAt;
   bits<2> Rv;
   bits<5> Rn;
   bits<4> imm4;

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AVR] Remove workarounds for instructions using Z register (PR #156361)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156361

>From d3e77cd38331d0e492c704ac3073ec084be88b21 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:18:57 +0300
Subject: [PATCH] [AVR] Remove workarounds for instructions using Z register

The generated disassembler can now correctly decode these instructions.
---
 llvm/lib/Target/AVR/AVRInstrFormats.td  |  1 +
 llvm/lib/Target/AVR/AVRInstrInfo.td |  4 +++-
 llvm/lib/Target/AVR/CMakeLists.txt  |  3 +--
 .../Target/AVR/Disassembler/AVRDisassembler.cpp |  6 ++
 .../Target/AVR/MCTargetDesc/AVRInstPrinter.cpp  | 17 -
 5 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/AVR/AVRInstrFormats.td 
b/llvm/lib/Target/AVR/AVRInstrFormats.td
index e1e65b56370cc..eb4daf74545b0 100644
--- a/llvm/lib/Target/AVR/AVRInstrFormats.td
+++ b/llvm/lib/Target/AVR/AVRInstrFormats.td
@@ -79,6 +79,7 @@ class FRdRr opcode, bits<2> f, dag outs, dag ins, 
string asmstr,
 
//===--===//
 class FZRd t, dag outs, dag ins, string asmstr, list pattern>
 : AVRInst16 {
+  bits<0> z;
   bits<5> rd;
 
   let Inst{15 - 12} = 0b1001;
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td 
b/llvm/lib/Target/AVR/AVRInstrInfo.td
index 958e1383acef2..70efda46093c4 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1230,7 +1230,9 @@ let Uses = [R1, R0] in {
 
   let Defs = [R31R30] in 
   def SPMZPi : F16<0b100101011000, (outs), (ins ZREG:$z), "spm $z+", []>,
-   Requires<[HasSPMX]>;
+   Requires<[HasSPMX]> {
+bits<0> z;
+  }
 }
 
 // Read data from IO location operations.
diff --git a/llvm/lib/Target/AVR/CMakeLists.txt 
b/llvm/lib/Target/AVR/CMakeLists.txt
index 2d5cb7e048778..a31c545f48ba3 100644
--- a/llvm/lib/Target/AVR/CMakeLists.txt
+++ b/llvm/lib/Target/AVR/CMakeLists.txt
@@ -6,8 +6,7 @@ tablegen(LLVM AVRGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(LLVM AVRGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM AVRGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AVRGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM AVRGenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands)
+tablegen(LLVM AVRGenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM AVRGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM AVRGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM AVRGenRegisterInfo.inc -gen-register-info)
diff --git a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp 
b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index 56b3cf7f88e2a..d874697185fac 100644
--- a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -91,6 +91,12 @@ static DecodeStatus DecodeLD8RegisterClass(MCInst &Inst, 
unsigned RegNo,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeZREGRegisterClass(MCInst &Inst,
+const MCDisassembler *Decoder) {
+  Inst.addOperand(MCOperand::createReg(AVR::R31R30));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address,
  const MCDisassembler *Decoder) {
   unsigned addr = 0;
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp 
b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
index 481219164a0f9..5adffeed04bda 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
@@ -101,23 +101,6 @@ const char 
*AVRInstPrinter::getPrettyRegisterName(MCRegister Reg,
 void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   raw_ostream &O) {
   const MCOperandInfo &MOI = this->MII.get(MI->getOpcode()).operands()[OpNo];
-  if (MOI.RegClass == AVR::ZREGRegClassID) {
-// Special case for the Z register, which sometimes doesn't have an operand
-// in the MCInst.
-O << "Z";
-return;
-  }
-
-  if (OpNo >= MI->size()) {
-// Not all operands are correctly disassembled at the moment. This means
-// that some machine instructions won't have all the necessary operands
-// set.
-// To avoid asserting, print  instead until the necessary support
-// has been implemented.
-O << "";
-return;
-  }
-
   const MCOperand &Op = MI->getOperand(OpNo);
 
   if (Op.isReg()) {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AArch64] Remove post-decoding instruction mutations (PR #156364)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156364

>From 46fc93b95a9607f4b9c5f8883c313d94ef519d65 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:30:01 +0300
Subject: [PATCH] [AArch64] Remove post-decoding instruction mutations

These instructions can now be fully decoded automatically.
---
 .../lib/Target/AArch64/AArch64InstrFormats.td | 30 +-
 llvm/lib/Target/AArch64/CMakeLists.txt|  3 +-
 .../Disassembler/AArch64Disassembler.cpp  | 54 +-
 .../MCTargetDesc/AArch64MCTargetDesc.h|  6 --
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 56 ++-
 llvm/lib/Target/AArch64/SVEInstrFormats.td|  8 ++-
 6 files changed, 106 insertions(+), 51 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index feff59061aa16..e0e299cf4afdf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1559,13 +1559,11 @@ def VectorIndexHOperand : AsmVectorIndex<0, 7>;
 def VectorIndexSOperand : AsmVectorIndex<0, 3>;
 def VectorIndexDOperand : AsmVectorIndex<0, 1>;
 
-let OperandNamespace = "AArch64" in {
-  let OperandType = "OPERAND_IMPLICIT_IMM_0" in {
-defm VectorIndex0 : VectorIndex;
-defm VectorIndex032b : VectorIndex;
-  }
+let DecoderMethod = "DecodeZeroImm" in {
+  defm VectorIndex0 : VectorIndex;
+  defm VectorIndex032b : VectorIndex;
 }
 defm VectorIndex1 : VectorIndex;
@@ -1615,9 +1613,8 @@ def sme_elm_idx0_0 : Operand, TImmLeaf {
   let ParserMatchClass = Imm0_0Operand;
+  let DecoderMethod = "DecodeZeroImm";
   let PrintMethod = "printMatrixIndex";
-  let OperandNamespace = "AArch64";
-  let OperandType = "OPERAND_IMPLICIT_IMM_0";
 }
 def sme_elm_idx0_1 : Operand, TImmLeaf;
 
 def uimm0s2range : Operand, ImmLeaf {
+  let DecoderMethod = "DecodeZeroImm";
   let PrintMethod = "printImmRangeScale<2, 1>";
   let ParserMatchClass = UImm0s2RangeOperand;
-  let OperandNamespace = "AArch64";
-  let OperandType = "OPERAND_IMPLICIT_IMM_0";
 }
 
 def uimm0s4range : Operand, ImmLeaf {
+  let DecoderMethod = "DecodeZeroImm";
   let PrintMethod = "printImmRangeScale<4, 3>";
   let ParserMatchClass = UImm0s4RangeOperand;
-  let OperandNamespace = "AArch64";
-  let OperandType = "OPERAND_IMPLICIT_IMM_0";
 }
 
 def uimm1s2range : Operand, ImmLeaf {
+  bits<0> idx;
   let Inst{20-16} = 0b1;
 }
 def vi8to64_idx0 : SIMDSMov<1, ".b", GPR64, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b1;
 }
 def vi16to32_idx0 : SIMDSMov<0, ".h", GPR32, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b00010;
 }
 def vi16to64_idx0 : SIMDSMov<1, ".h", GPR64, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b00010;
 }
 def vi32to64_idx0 : SIMDSMov<1, ".s", GPR64, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b00100;
 }
   }
@@ -8265,15 +8265,19 @@ multiclass UMov {
   // streaming mode.
   let Predicates = [HasNEONandIsStreamingSafe] in {
 def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b1;
 }
 def vi16_idx0 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b00010;
 }
 def vi32_idx0 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b00100;
 }
 def vi64_idx0 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndex0> {
+  bits<0> idx;
   let Inst{20-16} = 0b01000;
 }
 def : SIMDMovAlias<"mov", ".s",
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt 
b/llvm/lib/Target/AArch64/CMakeLists.txt
index 79b56ea9cf850..803943fd57c4d 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -7,8 +7,7 @@ tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
 tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
 tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp 
b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 8c1e9f61693fb..647a6a3d76ef8 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -130,6 +130,18 @@ DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned 
RegMask,
   return Success;
 }
 
+static DecodeStatus DecodeMPRRegisterClass(MCIns

[llvm-branch-commits] [llvm] [AVR] Remove workarounds for instructions using Z register (PR #156361)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156361

>From d3e77cd38331d0e492c704ac3073ec084be88b21 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:18:57 +0300
Subject: [PATCH] [AVR] Remove workarounds for instructions using Z register

The generated disassembler can now correctly decode these instructions.
---
 llvm/lib/Target/AVR/AVRInstrFormats.td  |  1 +
 llvm/lib/Target/AVR/AVRInstrInfo.td |  4 +++-
 llvm/lib/Target/AVR/CMakeLists.txt  |  3 +--
 .../Target/AVR/Disassembler/AVRDisassembler.cpp |  6 ++
 .../Target/AVR/MCTargetDesc/AVRInstPrinter.cpp  | 17 -
 5 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/AVR/AVRInstrFormats.td 
b/llvm/lib/Target/AVR/AVRInstrFormats.td
index e1e65b56370cc..eb4daf74545b0 100644
--- a/llvm/lib/Target/AVR/AVRInstrFormats.td
+++ b/llvm/lib/Target/AVR/AVRInstrFormats.td
@@ -79,6 +79,7 @@ class FRdRr opcode, bits<2> f, dag outs, dag ins, 
string asmstr,
 
//===--===//
 class FZRd t, dag outs, dag ins, string asmstr, list pattern>
 : AVRInst16 {
+  bits<0> z;
   bits<5> rd;
 
   let Inst{15 - 12} = 0b1001;
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td 
b/llvm/lib/Target/AVR/AVRInstrInfo.td
index 958e1383acef2..70efda46093c4 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1230,7 +1230,9 @@ let Uses = [R1, R0] in {
 
   let Defs = [R31R30] in 
   def SPMZPi : F16<0b100101011000, (outs), (ins ZREG:$z), "spm $z+", []>,
-   Requires<[HasSPMX]>;
+   Requires<[HasSPMX]> {
+bits<0> z;
+  }
 }
 
 // Read data from IO location operations.
diff --git a/llvm/lib/Target/AVR/CMakeLists.txt 
b/llvm/lib/Target/AVR/CMakeLists.txt
index 2d5cb7e048778..a31c545f48ba3 100644
--- a/llvm/lib/Target/AVR/CMakeLists.txt
+++ b/llvm/lib/Target/AVR/CMakeLists.txt
@@ -6,8 +6,7 @@ tablegen(LLVM AVRGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(LLVM AVRGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM AVRGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AVRGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM AVRGenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands)
+tablegen(LLVM AVRGenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM AVRGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM AVRGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM AVRGenRegisterInfo.inc -gen-register-info)
diff --git a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp 
b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index 56b3cf7f88e2a..d874697185fac 100644
--- a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -91,6 +91,12 @@ static DecodeStatus DecodeLD8RegisterClass(MCInst &Inst, 
unsigned RegNo,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeZREGRegisterClass(MCInst &Inst,
+const MCDisassembler *Decoder) {
+  Inst.addOperand(MCOperand::createReg(AVR::R31R30));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address,
  const MCDisassembler *Decoder) {
   unsigned addr = 0;
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp 
b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
index 481219164a0f9..5adffeed04bda 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
@@ -101,23 +101,6 @@ const char 
*AVRInstPrinter::getPrettyRegisterName(MCRegister Reg,
 void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   raw_ostream &O) {
   const MCOperandInfo &MOI = this->MII.get(MI->getOpcode()).operands()[OpNo];
-  if (MOI.RegClass == AVR::ZREGRegClassID) {
-// Special case for the Z register, which sometimes doesn't have an operand
-// in the MCInst.
-O << "Z";
-return;
-  }
-
-  if (OpNo >= MI->size()) {
-// Not all operands are correctly disassembled at the moment. This means
-// that some machine instructions won't have all the necessary operands
-// set.
-// To avoid asserting, print  instead until the necessary support
-// has been implemented.
-O << "";
-return;
-  }
-
   const MCOperand &Op = MI->getOperand(OpNo);
 
   if (Op.isReg()) {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AArch64] Correctly disassemble TSB instruction (PR #156362)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156362

>From b62d8435beaf6fda78ff37f10152159426891d95 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:22:53 +0300
Subject: [PATCH] [AArch64] Correctly disassemble TSB instruction

TSB instruction has one operand, but the generated disassembler didn't
decode this operand. AArch64InstPrinter had a workaround for this.

This instruction can now be disassembled correctly.
---
 llvm/lib/Target/AArch64/AArch64SystemOperands.td   | 2 +-
 llvm/lib/Target/AArch64/CMakeLists.txt | 3 +--
 .../lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 7 ---
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td 
b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 1b0e90b0e0dc3..65b752ed40c90 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -362,7 +362,7 @@ def lookupTSBByName : SearchIndex {
   let Key = ["Name"];
 }
 
-def : TSB<"csync", 0>;
+def : TSB<"csync", 2>;
 
 
//===--===//
 // PRFM (prefetch) instruction options.
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt 
b/llvm/lib/Target/AArch64/CMakeLists.txt
index 833ce48ea1d7a..79b56ea9cf850 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -8,8 +8,7 @@ tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer 
-asmwriternum=1)
 tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands
-  -ignore-fully-defined-operands)
+  -ignore-non-decodable-operands)
 tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
 tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp 
b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 54b58e948daf2..2552ee3009338 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -365,13 +365,6 @@ void AArch64InstPrinter::printInst(const MCInst *MI, 
uint64_t Address,
 return;
   }
 
-  // Instruction TSB is specified as a one operand instruction, but 'csync' is
-  // not encoded, so for printing it is treated as a special case here:
-  if (Opcode == AArch64::TSB) {
-O << "\ttsb\tcsync";
-return;
-  }
-
   if (!PrintAliases || !printAliasInstr(MI, Address, STI, O))
 printInstruction(MI, Address, STI, O);
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV] Remove post-decoding instruction adjustments (PR #156360)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156360

>From 4188fa46342e6747f985d232677f6a690fa9972c Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:18:06 +0300
Subject: [PATCH] [RISCV] Remove post-decoding instruction adjustments

---
 llvm/lib/Target/RISCV/CMakeLists.txt  |  3 +--
 .../RISCV/Disassembler/RISCVDisassembler.cpp  | 25 ++-
 llvm/lib/Target/RISCV/RISCVInstrFormatsC.td   |  1 -
 llvm/lib/Target/RISCV/RISCVInstrInfoC.td  |  8 --
 llvm/lib/Target/RISCV/RISCVInstrInfoXwch.td   |  4 +++
 5 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt 
b/llvm/lib/Target/RISCV/CMakeLists.txt
index 720361dc3da5b..531238ae85029 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -8,8 +8,7 @@ tablegen(LLVM RISCVGenCompressInstEmitter.inc 
-gen-compress-inst-emitter)
 tablegen(LLVM RISCVGenMacroFusion.inc -gen-macro-fusion-pred)
 tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler
-  --specialize-decoders-per-bitwidth
-  -ignore-non-decodable-operands)
+  --specialize-decoders-per-bitwidth)
 tablegen(LLVM RISCVGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM RISCVGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM RISCVGenMCPseudoLowering.inc -gen-pseudo-lowering)
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp 
b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index b1b7ea5246fda..89df9d82f8780 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -46,8 +46,6 @@ class RISCVDisassembler : public MCDisassembler {
   raw_ostream &CStream) const override;
 
 private:
-  void addSPOperands(MCInst &MI) const;
-
   DecodeStatus getInstruction48(MCInst &Instr, uint64_t &Size,
 ArrayRef Bytes, uint64_t Address,
 raw_ostream &CStream) const;
@@ -196,6 +194,12 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst 
&Inst, uint32_t RegNo,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeSPRegisterClass(MCInst &Inst,
+  const MCDisassembler *Decoder) {
+  Inst.addOperand(MCOperand::createReg(RISCV::X2));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -600,15 +604,6 @@ static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, 
uint32_t Insn,
 
 #include "RISCVGenDisassemblerTables.inc"
 
-// Add implied SP operand for C.*SP compressed instructions. The SP operand
-// isn't explicitly encoded in the instruction.
-void RISCVDisassembler::addSPOperands(MCInst &MI) const {
-  const MCInstrDesc &MCID = MCII->get(MI.getOpcode());
-  for (unsigned i = 0; i < MCID.getNumOperands(); i++)
-if (MCID.operands()[i].RegClass == RISCV::SPRegClassID)
-  MI.insert(MI.begin() + i, MCOperand::createReg(RISCV::X2));
-}
-
 namespace {
 
 struct DecoderListEntry {
@@ -774,12 +769,8 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst 
&MI, uint64_t &Size,
 LLVM_DEBUG(dbgs() << "Trying " << Entry.Desc << " table:\n");
 DecodeStatus Result =
 decodeInstruction(Entry.Table, MI, Insn, Address, this, STI);
-if (Result == MCDisassembler::Fail)
-  continue;
-
-addSPOperands(MI);
-
-return Result;
+if (Result != MCDisassembler::Fail)
+  return Result;
   }
 
   return MCDisassembler::Fail;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td 
b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
index 209c3fae63f45..4c7cd05723ac8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
@@ -54,7 +54,6 @@ class RVInst16CSS funct3, bits<2> opcode, dag outs, 
dag ins,
 : RVInst16 {
   bits<10> imm;
   bits<5> rs2;
-  bits<5> rs1;
 
   let Inst{15-13} = funct3;
   let Inst{12-7} = imm{5-0};
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td 
b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index bfc766dfc27e5..9fc73662d9704 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -230,13 +230,17 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
 class CStackLoad funct3, string OpcodeStr,
  DAGOperand cls, DAGOperand opnd>
 : RVInst16CI;
+ OpcodeStr, "$rd, ${imm}(${rs1})"> {
+  bits<0> rs1;
+}
 
 let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
 class CStackStore funct3, string OpcodeStr,
   DAGOperand cls, DAGOperand opnd>
 : RVInst16CSS;
+  OpcodeStr, "$rs2, ${imm}(${rs1})"> {
+  bit

[llvm-branch-commits] [llvm] [AArch64] Correctly disassemble TSB instruction (PR #156362)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156362

>From b62d8435beaf6fda78ff37f10152159426891d95 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:22:53 +0300
Subject: [PATCH] [AArch64] Correctly disassemble TSB instruction

TSB instruction has one operand, but the generated disassembler didn't
decode this operand. AArch64InstPrinter had a workaround for this.

This instruction can now be disassembled correctly.
---
 llvm/lib/Target/AArch64/AArch64SystemOperands.td   | 2 +-
 llvm/lib/Target/AArch64/CMakeLists.txt | 3 +--
 .../lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 7 ---
 3 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td 
b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 1b0e90b0e0dc3..65b752ed40c90 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -362,7 +362,7 @@ def lookupTSBByName : SearchIndex {
   let Key = ["Name"];
 }
 
-def : TSB<"csync", 0>;
+def : TSB<"csync", 2>;
 
 
//===--===//
 // PRFM (prefetch) instruction options.
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt 
b/llvm/lib/Target/AArch64/CMakeLists.txt
index 833ce48ea1d7a..79b56ea9cf850 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -8,8 +8,7 @@ tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer 
-asmwriternum=1)
 tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands
-  -ignore-fully-defined-operands)
+  -ignore-non-decodable-operands)
 tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
 tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp 
b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 54b58e948daf2..2552ee3009338 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -365,13 +365,6 @@ void AArch64InstPrinter::printInst(const MCInst *MI, 
uint64_t Address,
 return;
   }
 
-  // Instruction TSB is specified as a one operand instruction, but 'csync' is
-  // not encoded, so for printing it is treated as a special case here:
-  if (Opcode == AArch64::TSB) {
-O << "\ttsb\tcsync";
-return;
-  }
-
   if (!PrintAliases || !printAliasInstr(MI, Address, STI, O))
 printInstruction(MI, Address, STI, O);
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Hexagon] Remove post-decoding instruction adjustments (PR #156359)

2025-09-02 Thread Sergei Barannikov via llvm-branch-commits


https://github.com/s-barannikov updated 
https://github.com/llvm/llvm-project/pull/156359

>From 95a0d748eae948d6c520dd4706cac52cc830ecfd Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Mon, 1 Sep 2025 20:16:14 +0300
Subject: [PATCH] [Hexagon] Remove post-decoding instruction adjustments

These instructions can now be fully decoded automatically.
---
 llvm/lib/Target/Hexagon/CMakeLists.txt|   3 +-
 .../Disassembler/HexagonDisassembler.cpp  |  65 ++---
 .../Target/Hexagon/HexagonDepInstrFormats.td  | 129 --
 llvm/lib/Target/Hexagon/HexagonOperands.td|  10 +-
 4 files changed, 49 insertions(+), 158 deletions(-)

diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt 
b/llvm/lib/Target/Hexagon/CMakeLists.txt
index b615536af03be..d758260a8ab5d 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -7,8 +7,7 @@ tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer)
-tablegen(LLVM HexagonGenDisassemblerTables.inc -gen-disassembler
-  -ignore-non-decodable-operands)
+tablegen(LLVM HexagonGenDisassemblerTables.inc -gen-disassembler)
 tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM HexagonGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp 
b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index de10092cbe3c8..0639878c1256f 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -173,6 +173,19 @@ static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned 
tmp,
 const MCDisassembler *Decoder);
 static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
 const MCDisassembler *Decoder);
+
+static DecodeStatus n1ConstDecoder(MCInst &MI, const MCDisassembler *Decoder) {
+  MCContext &Ctx = Decoder->getContext();
+  MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(-1, Ctx)));
+  return DecodeStatus::Success;
+}
+
+static DecodeStatus sgp10ConstDecoder(MCInst &MI,
+  const MCDisassembler *Decoder) {
+  MI.addOperand(MCOperand::createReg(Hexagon::SGP1_0));
+  return DecodeStatus::Success;
+}
+
 #include "HexagonDepDecoders.inc"
 #include "HexagonGenDisassemblerTables.inc"
 
@@ -349,21 +362,6 @@ void HexagonDisassembler::remapInstruction(MCInst &Instr) 
const {
   }
 }
 
-static void adjustDuplex(MCInst &MI, MCContext &Context) {
-  switch (MI.getOpcode()) {
-  case Hexagon::SA1_setin1:
-MI.insert(MI.begin() + 1,
-  MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
-break;
-  case Hexagon::SA1_dec:
-MI.insert(MI.begin() + 2,
-  MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
-break;
-  default:
-break;
-  }
-}
-
 DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
ArrayRef Bytes,
uint64_t Address,
@@ -468,12 +466,10 @@ DecodeStatus 
HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
 CurrentExtender = TmpExtender;
 if (Result != DecodeStatus::Success)
   return DecodeStatus::Fail;
-adjustDuplex(*MILow, getContext());
 Result = decodeInstruction(
 DecodeHigh, *MIHigh, (Instruction >> 16) & 0x1fff, Address, this, STI);
 if (Result != DecodeStatus::Success)
   return DecodeStatus::Fail;
-adjustDuplex(*MIHigh, getContext());
 MCOperand OPLow = MCOperand::createInst(MILow);
 MCOperand OPHigh = MCOperand::createInst(MIHigh);
 MI.addOperand(OPLow);
@@ -499,41 +495,6 @@ DecodeStatus 
HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
 
   }
 
-  switch (MI.getOpcode()) {
-  case Hexagon::J4_cmpeqn1_f_jumpnv_nt:
-  case Hexagon::J4_cmpeqn1_f_jumpnv_t:
-  case Hexagon::J4_cmpeqn1_fp0_jump_nt:
-  case Hexagon::J4_cmpeqn1_fp0_jump_t:
-  case Hexagon::J4_cmpeqn1_fp1_jump_nt:
-  case Hexagon::J4_cmpeqn1_fp1_jump_t:
-  case Hexagon::J4_cmpeqn1_t_jumpnv_nt:
-  case Hexagon::J4_cmpeqn1_t_jumpnv_t:
-  case Hexagon::J4_cmpeqn1_tp0_jump_nt:
-  case Hexagon::J4_cmpeqn1_tp0_jump_t:
-  case Hexagon::J4_cmpeqn1_tp1_jump_nt:
-  case Hexagon::J4_cmpeqn1_tp1_jump_t:
-  case Hexagon::J4_cmpgtn1_f_jumpnv_nt:
-  case Hexagon::J4_cmpgtn1_f_jumpnv_t:
-  case Hexagon::J4_cmpgtn1_fp0_jump_nt:
-  case Hexagon::J4_cmpgtn1_fp0_jump_t:
-  case Hexagon::J4_cmpgtn1_fp1_jump_nt:
-  case Hexagon::J4_cmpgtn1_fp1_jump_t:
-  case Hexagon::J4_cmpgtn1_t_jumpnv_nt:
-  case Hexagon::J4_cmpgtn1_t_jumpnv_t:
-  case Hexagon::J4_cmpgtn1_tp0_jump_nt:

[llvm-branch-commits] [llvm] AMDGPU: Fold 64-bit immediate into copy to AV class (PR #155615)

2025-09-02 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/155615

>From 05821956deebe21b8dd2bdd0a5962a0987d42775 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 26 Aug 2025 23:53:57 +0900
Subject: [PATCH] AMDGPU: Fold 64-bit immediate into copy to AV class

This is in preparation for patches which will intoduce more
copies to av registers.
---
 llvm/lib/Target/AMDGPU/SIDefines.h| 10 +--
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 25 --
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp|  6 +-
 .../CodeGen/AMDGPU/fold-imm-copy-agpr.mir | 85 ---
 llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir| 26 +++---
 5 files changed, 70 insertions(+), 82 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h 
b/llvm/lib/Target/AMDGPU/SIDefines.h
index 268b153c6c924..150e05b59c29f 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -237,16 +237,16 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_AC_FP32,
   OPERAND_REG_INLINE_AC_FP64,
 
+  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
+  // constants. Does not accept registers.
+  OPERAND_INLINE_C_AV64_PSEUDO,
+
   // Operand for source modifiers for VOP instructions
   OPERAND_INPUT_MODS,
 
   // Operand for SDWA instructions
   OPERAND_SDWA_VOPC_DST,
 
-  // Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
-  // constants.
-  OPERAND_INLINE_C_AV64_PSEUDO,
-
   OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
 
@@ -254,7 +254,7 @@ enum OperandType : unsigned {
   OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_FP64,
 
   OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT32,
-  OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_FP64,
+  OPERAND_REG_INLINE_AC_LAST = OPERAND_INLINE_C_AV64_PSEUDO,
 
   OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
   OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp 
b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a116b57c85a88..92eaa8b29ccb8 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1296,7 +1296,8 @@ void SIFoldOperandsImpl::foldOperand(
 for (unsigned MovOp :
  {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
   AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
-  AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO}) {
+  AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
+  AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
   const MCInstrDesc &MovDesc = TII->get(MovOp);
   assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
 
@@ -1312,11 +1313,23 @@ void SIFoldOperandsImpl::foldOperand(
   const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
   const TargetRegisterClass *MovSrcRC =
   TRI->getRegClass(MovDesc.operands()[SrcIdx].RegClass);
-
-  if (UseSubReg)
-MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
-  if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
-break;
+  if (MovSrcRC) {
+if (UseSubReg)
+  MovSrcRC = TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
+if (!MRI->constrainRegClass(SrcReg, MovSrcRC))
+  break;
+
+// FIXME: This is mutating the instruction only and deferring the 
actual
+// fold of the immediate
+  } else {
+// For the _IMM_PSEUDO cases, there can be value restrictions on the
+// immediate to verify. Technically we should always verify this, but 
it
+// only matters for these concrete cases.
+// TODO: Handle non-imm case if it's useful.
+if (!OpToFold.isImm() ||
+!TII->isImmOperandLegal(MovDesc, 1, 
*OpToFold.getEffectiveImmVal()))
+  break;
+  }
 
   MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
   MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp 
b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 887092182f7d1..2b187c641da1c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3444,12 +3444,8 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) 
{
   case AMDGPU::V_ACCVGPR_READ_B32_e64:
   case AMDGPU::V_ACCVGPR_MOV_B32:
   case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
-return true;
   case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
-// TODO: We could fold this, but it's a strange case. The immediate value
-// can't be directly folded into any real use. We would have to spread new
-// immediate legality checks around and only accept subregister extracts 
for
-// profitability.
+return true;
   default:
 return false;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mir 
b/llvm/test/CodeGen/AMDGPU/fold-imm-copy-agpr.mi

1 2 >

1 - 100 of 144 matches

Mail list logo