https://github.com/brendandahl created https://github.com/llvm/llvm-project/pull/90906
Adds a builtin and intrinsic for the f32.load_f16 instruction. The instruction loads an f16 value from memory and puts it in an f32. Specified at: https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md Note: the current spec has f32.load_f16 as opcode 0xFD0120, but this is incorrect and will be changed to 0xFC30 soon. >From 14313fa9ef33b4cbc8cf18f280ee885b38015ca4 Mon Sep 17 00:00:00 2001 From: Brendan Dahl <brendan.d...@gmail.com> Date: Wed, 1 May 2024 21:53:39 +0000 Subject: [PATCH] [WebAssembly] Implement prototype f32.load_f16 instruction. Adds a builtin and intrinsic for the f32.load_f16 instruction. The instruction loads an f16 value from memory and puts it in an f32. Specified at: https://github.com/WebAssembly/half-precision/blob/29a9b9462c9285d4ccc1a5dc39214ddfd1892658/proposals/half-precision/Overview.md Note: the current spec has f32.load_f16 as opcode 0xFD0120, but this is incorrect and will be changed to 0xFC30 soon. --- clang/include/clang/Basic/BuiltinsWebAssembly.def | 3 +++ clang/lib/CodeGen/CGBuiltin.cpp | 5 +++++ clang/test/CodeGen/builtins-wasm.c | 9 +++++++-- llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 12 ++++++++++++ .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 1 + .../Target/WebAssembly/WebAssemblyISelLowering.cpp | 8 ++++++++ .../lib/Target/WebAssembly/WebAssemblyInstrMemory.td | 5 +++++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 7 +++++++ llvm/test/CodeGen/WebAssembly/half-precision.ll | 12 ++++++++++++ llvm/test/MC/WebAssembly/simd-encodings.s | 5 ++++- 10 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/half-precision.ll diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 7e950914ad946d..8b0a1d4579d84c 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -190,6 +190,9 @@ TARGET_BUILTIN(__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8, "V8sV16ScV16Sc", TARGET_BUILTIN(__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4, "V4iV16ScV16ScV4i", "nc", "relaxed-simd") TARGET_BUILTIN(__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4, "V4fV8UsV8UsV4f", "nc", "relaxed-simd") +// Half-Precision (fp16) +TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fi*", "nU", "half-precision") + // Reference Types builtins // Some builtins are custom type-checked - see 't' as part of the third argument, // in which case the argument spec (second argument) is unused. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a370734e00d3e1..e9d465bd2a6b01 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -21257,6 +21257,11 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32); return Builder.CreateCall(Callee, {LHS, RHS, Acc}); } + case WebAssembly::BI__builtin_wasm_loadf16_f32: { + Value *Addr = EmitScalarExpr(E->getArg(0)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32); + return Builder.CreateCall(Callee, {Addr}); + } case WebAssembly::BI__builtin_wasm_table_get: { assert(E->getArg(0)->getType()->isArrayType()); Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 9a323da9a8e846..a845d5429039d4 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32 -// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64 +// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -target-feature +half-precision -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32 +// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -target-feature +half-precision -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64 // RUN: not %clang_cc1 -triple wasm64-unknown-unknown -target-feature +reference-types -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-SIMD // SIMD convenience types @@ -802,6 +802,11 @@ f32x4 relaxed_dot_bf16x8_add_f32_f32x4(u16x8 a, u16x8 b, f32x4 c) { // WEBASSEMBLY-NEXT: ret } +float load_f16_f32(int *addr) { + return __builtin_wasm_loadf16_f32(addr); + // WEBASSEMBLY: call float @llvm.wasm.loadf16.f32(ptr %{{.*}}) +} + __externref_t externref_null() { return __builtin_wasm_ref_null_extern(); // WEBASSEMBLY: tail call ptr addrspace(10) @llvm.wasm.ref.null.extern() diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index b93a5e7be1b51e..f8142a8ca9e934 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -321,6 +321,18 @@ def int_wasm_relaxed_dot_bf16x8_add_f32: [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4f32_ty], [IntrNoMem, IntrSpeculatable]>; +//===----------------------------------------------------------------------===// +// Half-precision intrinsics (experimental) +//===----------------------------------------------------------------------===// + +// TODO: Replace these intrinsic with normal ISel patterns once the XXX +// instructions are merged to the proposal. +def int_wasm_loadf16_f32: + Intrinsic<[llvm_float_ty], + [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; + //===----------------------------------------------------------------------===// // Thread-local storage intrinsics diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 15aeaaeb8c4a4e..d3b496ae591798 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -206,6 +206,7 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) { WASM_LOAD_STORE(LOAD16_SPLAT) WASM_LOAD_STORE(LOAD_LANE_I16x8) WASM_LOAD_STORE(STORE_LANE_I16x8) + WASM_LOAD_STORE(LOAD_F16_F32) return 1; WASM_LOAD_STORE(LOAD_I32) WASM_LOAD_STORE(LOAD_F32) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 64bcadf3f5677c..ed52fe53bc6096 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -906,6 +906,14 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.align = Align(8); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; + case Intrinsic::wasm_loadf16_f32: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::f16; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = Align(2); + Info.flags = MachineMemOperand::MOLoad; + return true; default: return false; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 01c0909af72e5e..e4baf842462a96 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -72,6 +72,9 @@ defm LOAD16_U_I64 : WebAssemblyLoad<I64, "i64.load16_u", 0x33, []>; defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34, []>; defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35, []>; +// Half Precision +defm LOAD_F16_F32 : WebAssemblyLoad<F32, "f32.load_f16", 0xfc30, [HasHalfPrecision]>; + // Pattern matching multiclass LoadPat<ValueType ty, SDPatternOperator kind, string Name> { @@ -111,6 +114,8 @@ defm : LoadPat<i64, extloadi8, "LOAD8_U_I64">; defm : LoadPat<i64, extloadi16, "LOAD16_U_I64">; defm : LoadPat<i64, extloadi32, "LOAD32_U_I64">; +defm : LoadPat<f32, int_wasm_loadf16_f32, "LOAD_F16_F32">; + // Defines atomic and non-atomic stores, regular and truncating multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode, list<Predicate> reqs = []> { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index af95dfa25a1893..ccf72192eb11c5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -38,6 +38,13 @@ multiclass RELAXED_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, asmstr_s, simdop, HasRelaxedSIMD>; } +multiclass HALF_PRECISION_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, + list<dag> pattern_r, string asmstr_r = "", + string asmstr_s = "", bits<32> simdop = -1> { + defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, + asmstr_s, simdop, HasHalfPrecision>; +} + defm "" : ARGUMENT<V128, v16i8>; defm "" : ARGUMENT<V128, v8i16>; diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll new file mode 100644 index 00000000000000..582771d3f95fcb --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+half-precision | FileCheck %s +; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+half-precision | FileCheck %s + +declare float @llvm.wasm.loadf32.f16(ptr) + +; CHECK-LABEL: ldf16_32: +; CHECK: f32.load_f16 $push[[NUM0:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: return $pop[[NUM0]]{{$}} +define float @ldf16_32(ptr %p) { + %v = call float @llvm.wasm.loadf16.f32(ptr %p) + ret float %v +} diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index c6c554990c2c4f..e7c3761f381d03 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -1,4 +1,4 @@ -# RUN: llvm-mc -no-type-check -show-encoding -triple=wasm32-unknown-unknown -mattr=+simd128,+relaxed-simd < %s | FileCheck %s +# RUN: llvm-mc -no-type-check -show-encoding -triple=wasm32-unknown-unknown -mattr=+simd128,+relaxed-simd,+half-precision < %s | FileCheck %s main: .functype main () -> () @@ -839,4 +839,7 @@ main: # CHECK: i32x4.relaxed_dot_i8x16_i7x16_add_s # encoding: [0xfd,0x93,0x02] i32x4.relaxed_dot_i8x16_i7x16_add_s + # CHECK: f32.load_f16 48 # encoding: [0xfc,0x30,0x01,0x30] + f32.load_f16 48 + end_function _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits