yonghong-song updated this revision to Diff 523569.
yonghong-song edited the summary of this revision.
yonghong-song added a comment.
- Fixed previous llvm-objdump issue for '> 16bit' 'gotol' insns.
- Now basic functionality for cpu=v4 should be complete for llvm, further work
will focus on kernel.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D144829/new/
https://reviews.llvm.org/D144829
Files:
clang/lib/Basic/Targets/BPF.cpp
clang/lib/Basic/Targets/BPF.h
clang/test/Misc/target-invalid-cpu-note.c
llvm/lib/Target/BPF/BPF.td
llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
llvm/lib/Target/BPF/BPFISelLowering.cpp
llvm/lib/Target/BPF/BPFInstrFormats.td
llvm/lib/Target/BPF/BPFInstrInfo.td
llvm/lib/Target/BPF/BPFMIPeephole.cpp
llvm/lib/Target/BPF/BPFSubtarget.cpp
llvm/lib/Target/BPF/BPFSubtarget.h
llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h
llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
llvm/test/CodeGen/BPF/bswap.ll
llvm/test/CodeGen/BPF/sdiv_smod.ll
llvm/test/CodeGen/BPF/sext_ld.ll
llvm/test/CodeGen/BPF/sext_mov.ll
Index: llvm/test/CodeGen/BPF/sext_mov.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/BPF/sext_mov.ll
@@ -0,0 +1,109 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+; short f1(int a) {
+; return (char)a;
+; }
+; int f2(int a) {
+; return (char)a;
+; }
+; long f3(int a) {
+; return (char)a;
+; }
+; int f4(int a) {
+; return (short)a;
+; }
+; long f5(int a) {
+; return (short)a;
+; }
+; long f6(long a) {
+; return (int)a;
+; }
+; Compilation flags:
+; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes t.c
+
+; Function Attrs: nounwind
+define dso_local i16 @f1(i32 noundef %a) #0 {
+entry:
+ %a.addr = alloca i32, align 4
+ store i32 %a, ptr %a.addr, align 4, !tbaa !3
+ %0 = load i32, ptr %a.addr, align 4, !tbaa !3
+ %conv = trunc i32 %0 to i8
+ %conv1 = sext i8 %conv to i16
+ ret i16 %conv1
+}
+; CHECK: w0 = (s8)w1 # encoding: [0xbc,0x10,0x08,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: nounwind
+define dso_local i32 @f2(i32 noundef %a) #0 {
+entry:
+ %a.addr = alloca i32, align 4
+ store i32 %a, ptr %a.addr, align 4, !tbaa !3
+ %0 = load i32, ptr %a.addr, align 4, !tbaa !3
+ %conv = trunc i32 %0 to i8
+ %conv1 = sext i8 %conv to i32
+ ret i32 %conv1
+}
+; CHECK: w0 = (s8)w1 # encoding: [0xbc,0x10,0x08,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: nounwind
+define dso_local i64 @f3(i32 noundef %a) #0 {
+entry:
+ %a.addr = alloca i32, align 4
+ store i32 %a, ptr %a.addr, align 4, !tbaa !3
+ %0 = load i32, ptr %a.addr, align 4, !tbaa !3
+ %conv = trunc i32 %0 to i8
+ %conv1 = sext i8 %conv to i64
+ ret i64 %conv1
+}
+; CHECK: r0 = (s8)r1 # encoding: [0xbf,0x10,0x08,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: nounwind
+define dso_local i32 @f4(i32 noundef %a) #0 {
+entry:
+ %a.addr = alloca i32, align 4
+ store i32 %a, ptr %a.addr, align 4, !tbaa !3
+ %0 = load i32, ptr %a.addr, align 4, !tbaa !3
+ %conv = trunc i32 %0 to i16
+ %conv1 = sext i16 %conv to i32
+ ret i32 %conv1
+}
+; CHECK: w0 = (s16)w1 # encoding: [0xbc,0x10,0x10,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: nounwind
+define dso_local i64 @f5(i32 noundef %a) #0 {
+entry:
+ %a.addr = alloca i32, align 4
+ store i32 %a, ptr %a.addr, align 4, !tbaa !3
+ %0 = load i32, ptr %a.addr, align 4, !tbaa !3
+ %conv = trunc i32 %0 to i16
+ %conv1 = sext i16 %conv to i64
+ ret i64 %conv1
+}
+; CHECK: r0 = (s16)r1 # encoding: [0xbf,0x10,0x10,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: nounwind
+define dso_local i64 @f6(i64 noundef %a) #0 {
+entry:
+ %a.addr = alloca i64, align 8
+ store i64 %a, ptr %a.addr, align 8, !tbaa !7
+ %0 = load i64, ptr %a.addr, align 8, !tbaa !7
+ %conv = trunc i64 %0 to i32
+ %conv1 = sext i32 %conv to i64
+ ret i64 %conv1
+}
+; CHECK: r0 = (s32)r1 # encoding: [0xbf,0x10,0x20,0x00,0x00,0x00,0x00,0x00]
+
+attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git 569bd3b841e3167ddd7c6ceeddb282d3c280e761)"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"long", !5, i64 0}
Index: llvm/test/CodeGen/BPF/sext_ld.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/BPF/sext_ld.ll
@@ -0,0 +1,104 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+; int f1(char *p) {
+; return *p;
+; }
+; int f2(short *p) {
+; return *p;
+; }
+; int f3(int *p) {
+; return *p;
+; }
+; long f4(char *p) {
+; return *p;
+; }
+; long f5(short *p) {
+; return *p;
+; }
+; long f6(int *p) {
+; return *p;
+; }
+; long f7(long *p) {
+; return *p;
+; }
+; Compilation flags:
+; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes t.c
+
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn
+define dso_local i32 @f1(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i8, ptr %p, align 1, !tbaa !3
+ %conv = sext i8 %0 to i32
+; CHECK: w0 = *(s8 *)(r1 + 0) # encoding: [0x91,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+ ret i32 %conv
+}
+
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn
+define dso_local i32 @f2(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i16, ptr %p, align 2, !tbaa !6
+ %conv = sext i16 %0 to i32
+; CHECK: w0 = *(s16 *)(r1 + 0) # encoding: [0x89,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+ ret i32 %conv
+}
+
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn
+define dso_local i32 @f3(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i32, ptr %p, align 4, !tbaa !8
+; CHECK: w0 = *(u32 *)(r1 + 0) # encoding: [0x61,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+ ret i32 %0
+}
+
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn
+define dso_local i64 @f4(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i8, ptr %p, align 1, !tbaa !3
+ %conv = sext i8 %0 to i64
+ ret i64 %conv
+; CHECK: r0 = *(s8 *)(r1 + 0) # encoding: [0x91,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+}
+
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn
+define dso_local i64 @f5(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i16, ptr %p, align 2, !tbaa !6
+ %conv = sext i16 %0 to i64
+ ret i64 %conv
+; CHECK: r0 = *(s16 *)(r1 + 0) # encoding: [0x89,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+}
+
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn
+define dso_local i64 @f6(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i32, ptr %p, align 4, !tbaa !8
+ %conv = sext i32 %0 to i64
+ ret i64 %conv
+; CHECK: r0 = *(s32 *)(r1 + 0) # encoding: [0x81,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+}
+
+; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn
+define dso_local i64 @f7(ptr nocapture noundef readonly %p) local_unnamed_addr #0 {
+entry:
+ %0 = load i64, ptr %p, align 8, !tbaa !10
+ ret i64 %0
+; CHECK: r0 = *(u64 *)(r1 + 0) # encoding: [0x79,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+}
+
+attributes #0 = { argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 68665544c7d59735e9c0bb32b08829c006c7c594)"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"short", !4, i64 0}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"int", !4, i64 0}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"long", !4, i64 0}
Index: llvm/test/CodeGen/BPF/sdiv_smod.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/BPF/sdiv_smod.ll
@@ -0,0 +1,77 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+; int foo(int a, int b, int c) {
+; return a/b + a%c;
+; }
+; long bar(long a, long b, long c) {
+; return a/b + a%c;
+; }
+; Compilation flags:
+; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes t.c
+
+; Function Attrs: nounwind
+define dso_local i32 @foo(i32 noundef %a, i32 noundef %b, i32 noundef %c) #0 {
+entry:
+ %a.addr = alloca i32, align 4
+ %b.addr = alloca i32, align 4
+ %c.addr = alloca i32, align 4
+ store i32 %a, ptr %a.addr, align 4, !tbaa !3
+ store i32 %b, ptr %b.addr, align 4, !tbaa !3
+ store i32 %c, ptr %c.addr, align 4, !tbaa !3
+ %0 = load i32, ptr %a.addr, align 4, !tbaa !3
+ %1 = load i32, ptr %b.addr, align 4, !tbaa !3
+ %div = sdiv i32 %0, %1
+ %2 = load i32, ptr %a.addr, align 4, !tbaa !3
+ %3 = load i32, ptr %c.addr, align 4, !tbaa !3
+ %rem = srem i32 %2, %3
+ %add = add nsw i32 %div, %rem
+ ret i32 %add
+}
+
+; CHECK: w0 = w1
+; CHECK-NEXT: *(u32 *)(r10 - 8) = w2
+; CHECK-NEXT: *(u32 *)(r10 - 4) = w0
+; CHECK-NEXT: *(u32 *)(r10 - 12) = w3
+; CHECK-NEXT: w1 s%= w3 # encoding: [0x9c,0x31,0x01,0x00,0x00,0x00,0x00,0x00]
+; CHECK-NEXT: w0 s/= w2 # encoding: [0x3c,0x20,0x01,0x00,0x00,0x00,0x00,0x00]
+
+; Function Attrs: nounwind
+define dso_local i64 @bar(i64 noundef %a, i64 noundef %b, i64 noundef %c) #0 {
+entry:
+ %a.addr = alloca i64, align 8
+ %b.addr = alloca i64, align 8
+ %c.addr = alloca i64, align 8
+ store i64 %a, ptr %a.addr, align 8, !tbaa !7
+ store i64 %b, ptr %b.addr, align 8, !tbaa !7
+ store i64 %c, ptr %c.addr, align 8, !tbaa !7
+ %0 = load i64, ptr %a.addr, align 8, !tbaa !7
+ %1 = load i64, ptr %b.addr, align 8, !tbaa !7
+ %div = sdiv i64 %0, %1
+ %2 = load i64, ptr %a.addr, align 8, !tbaa !7
+ %3 = load i64, ptr %c.addr, align 8, !tbaa !7
+ %rem = srem i64 %2, %3
+ %add = add nsw i64 %div, %rem
+ ret i64 %add
+}
+
+; CHECK: r0 = r1
+; CHECK-NEXT: *(u64 *)(r10 - 16) = r2
+; CHECK-NEXT: *(u64 *)(r10 - 8) = r0
+; CHECK-NEXT: *(u64 *)(r10 - 24) = r3
+; CHECK-NEXT: r1 s%= r3 # encoding: [0x9f,0x31,0x01,0x00,0x00,0x00,0x00,0x00]
+; CHECK-NEXT: r0 s/= r2 # encoding: [0x3f,0x20,0x01,0x00,0x00,0x00,0x00,0x00]
+
+attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git 569bd3b841e3167ddd7c6ceeddb282d3c280e761)"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"long", !5, i64 0}
Index: llvm/test/CodeGen/BPF/bswap.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/BPF/bswap.ll
@@ -0,0 +1,72 @@
+; RUN: llc -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s
+; Source:
+; long foo(int a, int b, long c) {
+; a = __builtin_bswap16(a);
+; b = __builtin_bswap32(b);
+; c = __builtin_bswap64(c);
+; return a + b + c;
+; }
+; Compilation flags:
+; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes t.c
+
+; Function Attrs: nounwind
+define dso_local i64 @foo(i32 noundef %a, i32 noundef %b, i64 noundef %c) #0 {
+entry:
+ %a.addr = alloca i32, align 4
+ %b.addr = alloca i32, align 4
+ %c.addr = alloca i64, align 8
+ store i32 %a, ptr %a.addr, align 4, !tbaa !3
+ store i32 %b, ptr %b.addr, align 4, !tbaa !3
+ store i64 %c, ptr %c.addr, align 8, !tbaa !7
+ %0 = load i32, ptr %a.addr, align 4, !tbaa !3
+ %conv = trunc i32 %0 to i16
+ %1 = call i16 @llvm.bswap.i16(i16 %conv)
+ %conv1 = zext i16 %1 to i32
+ store i32 %conv1, ptr %a.addr, align 4, !tbaa !3
+ %2 = load i32, ptr %b.addr, align 4, !tbaa !3
+ %3 = call i32 @llvm.bswap.i32(i32 %2)
+ store i32 %3, ptr %b.addr, align 4, !tbaa !3
+ %4 = load i64, ptr %c.addr, align 8, !tbaa !7
+ %5 = call i64 @llvm.bswap.i64(i64 %4)
+ store i64 %5, ptr %c.addr, align 8, !tbaa !7
+ %6 = load i32, ptr %a.addr, align 4, !tbaa !3
+ %7 = load i32, ptr %b.addr, align 4, !tbaa !3
+ %add = add nsw i32 %6, %7
+ %conv2 = sext i32 %add to i64
+ %8 = load i64, ptr %c.addr, align 8, !tbaa !7
+ %add3 = add nsw i64 %conv2, %8
+ ret i64 %add3
+}
+
+; CHECK: w0 = w1
+; CHECK-NEXT: r0 = bswap16 r0 # encoding: [0xd7,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
+; CHECK-NEXT: r2 = bswap32 r2 # encoding: [0xd7,0x02,0x00,0x00,0x20,0x00,0x00,0x00]
+; CHECK-NEXT: *(u32 *)(r10 - 8) = w2
+; CHECK-NEXT: *(u32 *)(r10 - 4) = w0
+; CHECK-NEXT: w0 += w2
+; CHECK-NEXT: r3 = bswap64 r3 # encoding: [0xd7,0x03,0x00,0x00,0x40,0x00,0x00,0x00]
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i16 @llvm.bswap.i16(i16) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.bswap.i32(i32) #1
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.bswap.i64(i64) #1
+
+attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git 569bd3b841e3167ddd7c6ceeddb282d3c280e761)"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!8, !8, i64 0}
+!8 = !{!"long", !5, i64 0}
Index: llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
===================================================================
--- llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -79,7 +79,7 @@
bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const override {
// The target is the 3rd operand of cond inst and the 1st of uncond inst.
- int16_t Imm;
+ int32_t Imm;
if (isConditionalBranch(Inst)) {
Imm = Inst.getOperand(2).getImm();
} else if (isUnconditionalBranch(Inst))
Index: llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h
===================================================================
--- /dev/null
+++ llvm/lib/Target/BPF/MCTargetDesc/BPFMCFixups.h
@@ -0,0 +1,27 @@
+//=======-- BPFMCFixups.h - BPF-specific fixup entries ------*- C++ -*-=======//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_SYSTEMZMCFIXUPS_H
+#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_SYSTEMZMCFIXUPS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace BPF {
+enum FixupKind {
+ // These correspond directly to R_390_* relocations.
+ FK_BPF_PCRel_4 = FirstTargetFixupKind,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // end namespace BPF
+} // end namespace llvm
+
+#endif
Index: llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
===================================================================
--- llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/BPFMCFixups.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -95,6 +96,8 @@
Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_4));
else if (MI.getOpcode() == BPF::LD_imm64)
Fixups.push_back(MCFixup::create(0, Expr, FK_SecRel_8));
+ else if (MI.getOpcode() == BPF::JMPL)
+ Fixups.push_back(MCFixup::create(0, Expr, (MCFixupKind)BPF::FK_BPF_PCRel_4));
else
// bb label
Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_2));
Index: llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
===================================================================
--- llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
+++ llvm/lib/Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
@@ -10,6 +10,8 @@
//
//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
#include "MCTargetDesc/BPFInstPrinter.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -100,8 +102,13 @@
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm()) {
- int16_t Imm = Op.getImm();
- O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
+ if (MI->getOpcode() == BPF::JMPL) {
+ int32_t Imm = Op.getImm();
+ O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
+ } else {
+ int16_t Imm = Op.getImm();
+ O << ((Imm >= 0) ? "+" : "") << formatImm(Imm);
+ }
} else if (Op.isExpr()) {
printExpr(Op.getExpr(), O);
} else {
Index: llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
===================================================================
--- llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -6,12 +6,14 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/BPFMCFixups.h"
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/Support/EndianStream.h"
#include <cassert>
@@ -41,7 +43,10 @@
return false;
}
- unsigned getNumFixupKinds() const override { return 1; }
+ unsigned getNumFixupKinds() const override {
+ return BPF::NumTargetFixupKinds;
+ }
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
bool writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const override;
@@ -49,6 +54,20 @@
} // end anonymous namespace
+const MCFixupKindInfo &
+BPFAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[BPF::NumTargetFixupKinds] = {
+ { "FK_BPF_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+}
+
bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
const MCSubtargetInfo *STI) const {
if ((Count % 8) != 0)
@@ -85,6 +104,11 @@
Data[Fixup.getOffset() + 1] = 0x1;
support::endian::write32be(&Data[Fixup.getOffset() + 4], Value);
}
+ } else if (Fixup.getTargetKind() == BPF::FK_BPF_PCRel_4) {
+ // The input Value represents the number of bytes.
+ Value = (uint32_t)((Value - 8) / 8);
+ support::endian::write<uint32_t>(&Data[Fixup.getOffset() + 4], Value,
+ Endian);
} else {
assert(Fixup.getKind() == FK_PCRel_2);
Index: llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
===================================================================
--- llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -57,8 +57,7 @@
BPF_ABS = 0x1,
BPF_IND = 0x2,
BPF_MEM = 0x3,
- BPF_LEN = 0x4,
- BPF_MSH = 0x5,
+ BPF_SMEM = 0x4,
BPF_ATOMIC = 0x6
};
@@ -178,7 +177,7 @@
uint8_t InstMode = getInstMode(Insn);
if ((InstClass == BPF_LDX || InstClass == BPF_STX) &&
getInstSize(Insn) != BPF_DW &&
- (InstMode == BPF_MEM || InstMode == BPF_ATOMIC) &&
+ (InstMode == BPF_MEM || InstMode == BPF_SMEM || InstMode == BPF_ATOMIC) &&
STI.hasFeature(BPF::ALU32))
Result = decodeInstruction(DecoderTableBPFALU3264, Instr, Insn, Address,
this, STI);
Index: llvm/lib/Target/BPF/BPFSubtarget.h
===================================================================
--- llvm/lib/Target/BPF/BPFSubtarget.h
+++ llvm/lib/Target/BPF/BPFSubtarget.h
@@ -57,6 +57,9 @@
// whether we should enable MCAsmInfo DwarfUsesRelocationsAcrossSections
bool UseDwarfRIS;
+ // whether cpu v4 is enabled.
+ bool CPUv4;
+
public:
// This constructor initializes the data members to match that
// of the specified triple.
@@ -72,6 +75,7 @@
bool getHasJmp32() const { return HasJmp32; }
bool getHasAlu32() const { return HasAlu32; }
bool getUseDwarfRIS() const { return UseDwarfRIS; }
+ bool getCPUv4() const { return CPUv4; }
const BPFInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const BPFFrameLowering *getFrameLowering() const override {
Index: llvm/lib/Target/BPF/BPFSubtarget.cpp
===================================================================
--- llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -38,6 +38,7 @@
HasJmp32 = false;
HasAlu32 = false;
UseDwarfRIS = false;
+ CPUv4 = false;
}
void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -55,6 +56,13 @@
HasAlu32 = true;
return;
}
+ if (CPU == "v4") {
+ HasJmpExt = true;
+ HasJmp32 = true;
+ HasAlu32 = true;
+ CPUv4 = true;
+ return;
+ }
}
BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
Index: llvm/lib/Target/BPF/BPFMIPeephole.cpp
===================================================================
--- llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include <set>
+#include <map>
using namespace llvm;
@@ -301,7 +302,9 @@
static char ID;
MachineFunction *MF;
+ const BPFInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ bool IsCPUv4;
BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
@@ -311,7 +314,12 @@
// Initialize class variables.
void initialize(MachineFunction &MFParm);
+ bool in16BitRange(int Num);
+ void buildReverseCondOpMap();
bool eliminateRedundantMov();
+ bool adjustBranch();
+
+ std::map<unsigned, unsigned> ReverseCondOpMap;
public:
@@ -322,14 +330,20 @@
initialize(MF);
- return eliminateRedundantMov();
+ bool Changed;
+ Changed = eliminateRedundantMov();
+ if (IsCPUv4)
+ Changed = adjustBranch() || Changed;
+ return Changed;
}
};
// Initialize class variables.
void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;
+ TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
+ IsCPUv4 = MF->getSubtarget<BPFSubtarget>().getCPUv4();
LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
}
@@ -374,6 +388,256 @@
return Eliminated;
}
+// FIXME: this is very crude, do we have a better way to do this?
+void BPFMIPreEmitPeephole::buildReverseCondOpMap() {
+ ReverseCondOpMap[BPF::JEQ_rr] = BPF::JNE_rr;
+ ReverseCondOpMap[BPF::JNE_rr] = BPF::JEQ_rr;
+ ReverseCondOpMap[BPF::JUGT_rr] = BPF::JULE_rr;
+ ReverseCondOpMap[BPF::JULE_rr] = BPF::JUGT_rr;
+ ReverseCondOpMap[BPF::JUGE_rr] = BPF::JULT_rr;
+ ReverseCondOpMap[BPF::JULT_rr] = BPF::JUGE_rr;
+ ReverseCondOpMap[BPF::JSGT_rr] = BPF::JSLE_rr;
+ ReverseCondOpMap[BPF::JSLE_rr] = BPF::JSGT_rr;
+ ReverseCondOpMap[BPF::JSGE_rr] = BPF::JSLT_rr;
+ ReverseCondOpMap[BPF::JSLT_rr] = BPF::JSGE_rr;
+
+ ReverseCondOpMap[BPF::JEQ_ri] = BPF::JNE_ri;
+ ReverseCondOpMap[BPF::JNE_ri] = BPF::JEQ_ri;
+ ReverseCondOpMap[BPF::JUGT_ri] = BPF::JULE_ri;
+ ReverseCondOpMap[BPF::JULE_ri] = BPF::JUGT_ri;
+ ReverseCondOpMap[BPF::JUGE_ri] = BPF::JULT_ri;
+ ReverseCondOpMap[BPF::JULT_ri] = BPF::JUGE_ri;
+ ReverseCondOpMap[BPF::JSGT_ri] = BPF::JSLE_ri;
+ ReverseCondOpMap[BPF::JSLE_ri] = BPF::JSGT_ri;
+ ReverseCondOpMap[BPF::JSGE_ri] = BPF::JSLT_ri;
+ ReverseCondOpMap[BPF::JSLT_ri] = BPF::JSGE_ri;
+
+ ReverseCondOpMap[BPF::JEQ_rr_32] = BPF::JNE_rr_32;
+ ReverseCondOpMap[BPF::JNE_rr_32] = BPF::JEQ_rr_32;
+ ReverseCondOpMap[BPF::JUGT_rr_32] = BPF::JULE_rr_32;
+ ReverseCondOpMap[BPF::JULE_rr_32] = BPF::JUGT_rr_32;
+ ReverseCondOpMap[BPF::JUGE_rr_32] = BPF::JULT_rr_32;
+ ReverseCondOpMap[BPF::JULT_rr_32] = BPF::JUGE_rr_32;
+ ReverseCondOpMap[BPF::JSGT_rr_32] = BPF::JSLE_rr_32;
+ ReverseCondOpMap[BPF::JSLE_rr_32] = BPF::JSGT_rr_32;
+ ReverseCondOpMap[BPF::JSGE_rr_32] = BPF::JSLT_rr_32;
+ ReverseCondOpMap[BPF::JSLT_rr_32] = BPF::JSGE_rr_32;
+
+ ReverseCondOpMap[BPF::JEQ_ri_32] = BPF::JNE_ri_32;
+ ReverseCondOpMap[BPF::JNE_ri_32] = BPF::JEQ_ri_32;
+ ReverseCondOpMap[BPF::JUGT_ri_32] = BPF::JULE_ri_32;
+ ReverseCondOpMap[BPF::JULE_ri_32] = BPF::JUGT_ri_32;
+ ReverseCondOpMap[BPF::JUGE_ri_32] = BPF::JULT_ri_32;
+ ReverseCondOpMap[BPF::JULT_ri_32] = BPF::JUGE_ri_32;
+ ReverseCondOpMap[BPF::JSGT_ri_32] = BPF::JSLE_ri_32;
+ ReverseCondOpMap[BPF::JSLE_ri_32] = BPF::JSGT_ri_32;
+ ReverseCondOpMap[BPF::JSGE_ri_32] = BPF::JSLT_ri_32;
+ ReverseCondOpMap[BPF::JSLT_ri_32] = BPF::JSGE_ri_32;
+}
+
+bool BPFMIPreEmitPeephole::in16BitRange(int Num) {
+ // Well, the cut-off is not precisely at 16bit range since
+ // new codes are added during the transformation. So let us
+ // a little bit conservative.
+ return Num >= (INT16_MIN >> 1) && Num <= (INT16_MAX >> 1);
+}
+
+// Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)
+// is supported for both unconditional (JMP) and condition (JEQ, JSGT,
+// etc.) branches. In certain cases, e.g., full unrolling, the branch
+// target offset might exceed 16bit range. If this happens, the llvm
+// will generate incorrect code as the offset is truncated to 16bit.
+//
+// To fix this rare case, a new insn JMPL is introduced. This new
+// insn supports supports 32bit branch target offset. The compiler
+// does not use this insn during insn selection. Rather, BPF backend
+// will estimate the branch target offset and do JMP -> JMPL and
+// JEQ -> JEQ + JMPL conversion if the estimated branch target offset
+// is beyond 16bit.
+bool BPFMIPreEmitPeephole::adjustBranch() {
+ bool Changed = false;
+ int CurrNumInsns = 0;
+ std::map<MachineBasicBlock *, int> SoFarNumInsns;
+ std::map<MachineBasicBlock *, MachineBasicBlock *> FollowThroughBB;
+ std::vector<MachineBasicBlock *> MBBs;
+
+ buildReverseCondOpMap();
+
+ MachineBasicBlock *PrevBB = nullptr;
+ for (MachineBasicBlock &MBB : *MF) {
+ // MBB.size() is the number of insns in this basic block, including some
+ // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.
+ // Typically we have way more normal insns than DEBUG_VALUE insns.
+ // Also, if we indeed need to convert conditional branch like JEQ to
+ // JEQ + JMPL, we actually introduced some new insns like below.
+ CurrNumInsns += (int)MBB.size();
+ SoFarNumInsns[&MBB] = CurrNumInsns;
+ if (PrevBB != nullptr)
+ FollowThroughBB[PrevBB] = &MBB;
+ PrevBB = &MBB;
+ // A list of original BBs to make later traveral easier.
+ MBBs.push_back(&MBB);
+ }
+ FollowThroughBB[PrevBB] = nullptr;
+
+ for (unsigned i = 0; i < MBBs.size(); i++) {
+ // We have four cases here:
+ // (1). no terminator, simple follow through.
+ // (2). jmp to another bb.
+ // (3). conditional jmp to another bb or follow through.
+ // (4). conditional jmp followed by an unconditional jmp.
+ MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr;
+
+ MachineBasicBlock *MBB = MBBs[i];
+ for (MachineInstr &Term : MBB->terminators()) {
+ if (Term.isConditionalBranch()) {
+ assert(CondJmp == nullptr);
+ CondJmp = &Term;
+ } else if (Term.isUnconditionalBranch()) {
+ assert(UncondJmp == nullptr);
+ UncondJmp = &Term;
+ }
+ }
+
+ // (1). no terminator, simple follow through.
+ if (!CondJmp && !UncondJmp)
+ continue;
+
+ MachineBasicBlock *CondTargetBB, *JmpBB;
+ CurrNumInsns = SoFarNumInsns[MBB];
+
+ // (2). jmp to another bb.
+ if (!CondJmp && UncondJmp) {
+ JmpBB = UncondJmp->getOperand(0).getMBB();
+ if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns))
+ continue;
+
+ // replace this insn as a JMPL.
+ BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
+ UncondJmp->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ const BasicBlock *TermBB = MBB->getBasicBlock();
+ int Dist;
+
+ // (3). conditional jmp to another bb or follow through.
+ if (!UncondJmp) {
+ CondTargetBB = CondJmp->getOperand(2).getMBB();
+ MachineBasicBlock *FollowBB = FollowThroughBB[MBB];
+ Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
+ if (in16BitRange(Dist))
+ continue;
+
+ // We have
+ // B2: ...
+ // if (cond) goto B5
+ // B3: ...
+ // where B2 -> B5 is beyond 16bit range.
+ //
+ // We do not have 32bit cond jmp insn. So we try to do
+ // the following.
+ // B2: ...
+ // if (!cond) goto B3
+ // New_B0 gotol B5
+ // B3: ...
+ // Basically two new basic blocks are created.
+ MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB);
+
+ // Insert New_B0 and New_B1 into function block list.
+ MachineFunction::iterator MBB_I = ++MBB->getIterator();
+ MF->insert(MBB_I, New_B0);
+
+ // replace B2 cond jump
+ unsigned ReverseOpcode = ReverseCondOpMap[CondJmp->getOpcode()];
+ if (CondJmp->getOperand(1).isReg())
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(),TII->get(ReverseOpcode))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addReg(CondJmp->getOperand(1).getReg())
+ .addMBB(FollowBB);
+ else
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(ReverseOpcode))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addImm(CondJmp->getOperand(1).getImm())
+ .addMBB(FollowBB);
+
+ MBB->removeSuccessor(CondTargetBB);
+ MBB->addSuccessor(New_B0);
+
+ // Populate insns in New_B0.
+ BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMPL))
+ .addMBB(CondTargetBB);
+
+ New_B0->addSuccessor(CondTargetBB);
+ CondJmp->eraseFromParent();
+ Changed = true;
+ continue;
+ }
+
+ // (4). conditional jmp followed by an unconditional jmp.
+ CondTargetBB = CondJmp->getOperand(2).getMBB();
+ JmpBB = UncondJmp->getOperand(0).getMBB();
+
+ // We have
+ // B2: ...
+ // if (cond) goto B5
+ // JMP B7
+ // B3: ...
+ //
+ // If only B2->B5 is out of 16bit range, we can do
+ // B2: ...
+ // if (cond) goto new_B
+ // JMP B7
+ // New_B: gotol B5
+ // B3: ...
+ //
+ // If only 'JMP B7' is out of 16bit range, we can replace
+ // 'JMP B7' with 'JMPL B7'.
+ //
+ // If both B2->B5 and 'JMP B7' is out of range, just do
+ // both the above transformations.
+ Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
+ if (!in16BitRange(Dist)) {
+ MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB);
+
+ // Insert New_B0 into function block list.
+ MF->insert(++MBB->getIterator(), New_B);
+
+ // replace B2 cond jump
+ if (CondJmp->getOperand(1).isReg())
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addReg(CondJmp->getOperand(1).getReg())
+ .addMBB(New_B);
+ else
+ BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
+ .addReg(CondJmp->getOperand(0).getReg())
+ .addImm(CondJmp->getOperand(1).getImm())
+ .addMBB(New_B);
+
+ if (CondTargetBB != JmpBB)
+ MBB->removeSuccessor(CondTargetBB);
+ MBB->addSuccessor(New_B);
+
+ // Populate insn in New_B.
+ BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB);
+
+ New_B->addSuccessor(CondTargetBB);
+ CondJmp->eraseFromParent();
+ Changed = true;
+ }
+
+ if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) {
+ BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
+ UncondJmp->eraseFromParent();
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
} // end default namespace
INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
Index: llvm/lib/Target/BPF/BPFInstrInfo.td
===================================================================
--- llvm/lib/Target/BPF/BPFInstrInfo.td
+++ llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -53,6 +53,8 @@
def BPFIsBigEndian : Predicate<"!CurDAG->getDataLayout().isLittleEndian()">;
def BPFHasALU32 : Predicate<"Subtarget->getHasAlu32()">;
def BPFNoALU32 : Predicate<"!Subtarget->getHasAlu32()">;
+def BPFHasCPUv4 : Predicate<"Subtarget->getCPUv4()">;
+def BPFNoCPUv4 : Predicate<"!Subtarget->getCPUv4()">;
def brtarget : Operand<OtherVT> {
let PrintMethod = "printBrTargetOperand";
@@ -240,18 +242,19 @@
}
// ALU instructions
-class ALU_RI<BPFOpClass Class, BPFArithOp Opc,
+class ALU_RI<BPFOpClass Class, BPFArithOp Opc, int off,
dag outs, dag ins, string asmstr, list<dag> pattern>
: TYPE_ALU_JMP<Opc.Value, BPF_K.Value, outs, ins, asmstr, pattern> {
bits<4> dst;
bits<32> imm;
let Inst{51-48} = dst;
+ let Inst{47-32} = off;
let Inst{31-0} = imm;
let BPFClass = Class;
}
-class ALU_RR<BPFOpClass Class, BPFArithOp Opc,
+class ALU_RR<BPFOpClass Class, BPFArithOp Opc, int off,
dag outs, dag ins, string asmstr, list<dag> pattern>
: TYPE_ALU_JMP<Opc.Value, BPF_X.Value, outs, ins, asmstr, pattern> {
bits<4> dst;
@@ -259,26 +262,27 @@
let Inst{55-52} = src;
let Inst{51-48} = dst;
+ let Inst{47-32} = off;
let BPFClass = Class;
}
-multiclass ALU<BPFArithOp Opc, string OpcodeStr, SDNode OpNode> {
- def _rr : ALU_RR<BPF_ALU64, Opc,
+multiclass ALU<BPFArithOp Opc, int off, string OpcodeStr, SDNode OpNode> {
+ def _rr : ALU_RR<BPF_ALU64, Opc, off,
(outs GPR:$dst),
(ins GPR:$src2, GPR:$src),
"$dst "#OpcodeStr#" $src",
[(set GPR:$dst, (OpNode i64:$src2, i64:$src))]>;
- def _ri : ALU_RI<BPF_ALU64, Opc,
+ def _ri : ALU_RI<BPF_ALU64, Opc, off,
(outs GPR:$dst),
(ins GPR:$src2, i64imm:$imm),
"$dst "#OpcodeStr#" $imm",
[(set GPR:$dst, (OpNode GPR:$src2, i64immSExt32:$imm))]>;
- def _rr_32 : ALU_RR<BPF_ALU, Opc,
+ def _rr_32 : ALU_RR<BPF_ALU, Opc, off,
(outs GPR32:$dst),
(ins GPR32:$src2, GPR32:$src),
"$dst "#OpcodeStr#" $src",
[(set GPR32:$dst, (OpNode i32:$src2, i32:$src))]>;
- def _ri_32 : ALU_RI<BPF_ALU, Opc,
+ def _ri_32 : ALU_RI<BPF_ALU, Opc, off,
(outs GPR32:$dst),
(ins GPR32:$src2, i32imm:$imm),
"$dst "#OpcodeStr#" $imm",
@@ -287,18 +291,23 @@
let Constraints = "$dst = $src2" in {
let isAsCheapAsAMove = 1 in {
- defm ADD : ALU<BPF_ADD, "+=", add>;
- defm SUB : ALU<BPF_SUB, "-=", sub>;
- defm OR : ALU<BPF_OR, "|=", or>;
- defm AND : ALU<BPF_AND, "&=", and>;
- defm SLL : ALU<BPF_LSH, "<<=", shl>;
- defm SRL : ALU<BPF_RSH, ">>=", srl>;
- defm XOR : ALU<BPF_XOR, "^=", xor>;
- defm SRA : ALU<BPF_ARSH, "s>>=", sra>;
-}
- defm MUL : ALU<BPF_MUL, "*=", mul>;
- defm DIV : ALU<BPF_DIV, "/=", udiv>;
- defm MOD : ALU<BPF_MOD, "%=", urem>;
+ defm ADD : ALU<BPF_ADD, 0, "+=", add>;
+ defm SUB : ALU<BPF_SUB, 0, "-=", sub>;
+ defm OR : ALU<BPF_OR, 0, "|=", or>;
+ defm AND : ALU<BPF_AND, 0, "&=", and>;
+ defm SLL : ALU<BPF_LSH, 0, "<<=", shl>;
+ defm SRL : ALU<BPF_RSH, 0, ">>=", srl>;
+ defm XOR : ALU<BPF_XOR, 0, "^=", xor>;
+ defm SRA : ALU<BPF_ARSH, 0, "s>>=", sra>;
+}
+ defm MUL : ALU<BPF_MUL, 0, "*=", mul>;
+ defm DIV : ALU<BPF_DIV, 0, "/=", udiv>;
+ defm MOD : ALU<BPF_MOD, 0, "%=", urem>;
+
+ let Predicates = [BPFHasCPUv4] in {
+ defm SDIV : ALU<BPF_DIV, 1, "s/=", sdiv>;
+ defm SMOD : ALU<BPF_MOD, 1, "s%=", srem>;
+ }
}
class NEG_RR<BPFOpClass Class, BPFArithOp Opc,
@@ -338,26 +347,49 @@
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def LD_imm64 : LD_IMM64<0, "=">;
-def MOV_rr : ALU_RR<BPF_ALU64, BPF_MOV,
+def MOV_rr : ALU_RR<BPF_ALU64, BPF_MOV, 0,
(outs GPR:$dst),
(ins GPR:$src),
"$dst = $src",
[]>;
-def MOV_ri : ALU_RI<BPF_ALU64, BPF_MOV,
+def MOV_ri : ALU_RI<BPF_ALU64, BPF_MOV, 0,
(outs GPR:$dst),
(ins i64imm:$imm),
"$dst = $imm",
[(set GPR:$dst, (i64 i64immSExt32:$imm))]>;
-def MOV_rr_32 : ALU_RR<BPF_ALU, BPF_MOV,
+def MOV_rr_32 : ALU_RR<BPF_ALU, BPF_MOV, 0,
(outs GPR32:$dst),
(ins GPR32:$src),
"$dst = $src",
[]>;
-def MOV_ri_32 : ALU_RI<BPF_ALU, BPF_MOV,
+def MOV_ri_32 : ALU_RI<BPF_ALU, BPF_MOV, 0,
(outs GPR32:$dst),
(ins i32imm:$imm),
"$dst = $imm",
[(set GPR32:$dst, (i32 i32immSExt32:$imm))]>;
+
+let Predicates = [BPFHasCPUv4] in {
+ def MOVS_rr_8 : ALU_RR<BPF_ALU64, BPF_MOV, 8,
+ (outs GPR:$dst), (ins GPR:$src),
+ "$dst = (s8)$src",
+ [(set GPR:$dst, (sra (shl GPR:$src, (i64 56)), (i64 56)))]>;
+ def MOVS_rr_16 : ALU_RR<BPF_ALU64, BPF_MOV, 16,
+ (outs GPR:$dst), (ins GPR:$src),
+ "$dst = (s16)$src",
+ [(set GPR:$dst, (sra (shl GPR:$src, (i64 48)), (i64 48)))]>;
+ def MOVS_rr_32 : ALU_RR<BPF_ALU64, BPF_MOV, 32,
+ (outs GPR:$dst), (ins GPR:$src),
+ "$dst = (s32)$src",
+ [(set GPR:$dst, (sra (shl GPR:$src, (i64 32)), (i64 32)))]>;
+ def MOVS_rr_32_8 : ALU_RR<BPF_ALU, BPF_MOV, 8,
+ (outs GPR32:$dst), (ins GPR32:$src),
+ "$dst = (s8)$src",
+ [(set GPR32:$dst, (sra (shl GPR32:$src, (i32 24)), (i32 24)))]>;
+ def MOVS_rr_32_16 : ALU_RR<BPF_ALU, BPF_MOV, 16,
+ (outs GPR32:$dst), (ins GPR32:$src),
+ "$dst = (s16)$src",
+ [(set GPR32:$dst, (sra (shl GPR32:$src, (i32 16)), (i32 16)))]>;
+}
}
def FI_ri
@@ -421,8 +453,8 @@
def STD : STOREi64<BPF_DW, "u64", store>;
// LOAD instructions
-class LOAD<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
- : TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value,
+class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
+ : TYPE_LD_ST<ModOp.Value, SizeOp.Value,
(outs GPR:$dst),
(ins MEMri:$addr),
"$dst = *("#OpcodeStr#" *)($addr)",
@@ -436,8 +468,8 @@
let BPFClass = BPF_LDX;
}
-class LOADi64<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
- : LOAD<SizeOp, OpcodeStr, [(set i64:$dst, (OpNode ADDRri:$addr))]>;
+class LOADi64<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode>
+ : LOAD<SizeOp, ModOp, OpcodeStr, [(set i64:$dst, (OpNode ADDRri:$addr))]>;
let isCodeGenOnly = 1 in {
def CORE_MEM : TYPE_LD_ST<BPF_MEM.Value, BPF_W.Value,
@@ -451,7 +483,7 @@
"$dst = core_alu32_mem($opcode, $src, $offset)",
[]>;
let Constraints = "$dst = $src" in {
- def CORE_SHIFT : ALU_RR<BPF_ALU64, BPF_LSH,
+ def CORE_SHIFT : ALU_RR<BPF_ALU64, BPF_LSH, 0,
(outs GPR:$dst),
(ins u64imm:$opcode, GPR:$src, u64imm:$offset),
"$dst = core_shift($opcode, $src, $offset)",
@@ -460,12 +492,18 @@
}
let Predicates = [BPFNoALU32] in {
- def LDW : LOADi64<BPF_W, "u32", zextloadi32>;
- def LDH : LOADi64<BPF_H, "u16", zextloadi16>;
- def LDB : LOADi64<BPF_B, "u8", zextloadi8>;
+ def LDW : LOADi64<BPF_W, BPF_MEM, "u32", zextloadi32>;
+ def LDH : LOADi64<BPF_H, BPF_MEM, "u16", zextloadi16>;
+ def LDB : LOADi64<BPF_B, BPF_MEM, "u8", zextloadi8>;
+}
+
+let Predicates = [BPFHasCPUv4] in {
+ def LDWS : LOADi64<BPF_W, BPF_SMEM, "s32", sextloadi32>;
+ def LDHS : LOADi64<BPF_H, BPF_SMEM, "s16", sextloadi16>;
+ def LDBS : LOADi64<BPF_B, BPF_SMEM, "s8", sextloadi8>;
}
-def LDD : LOADi64<BPF_DW, "u64", load>;
+def LDD : LOADi64<BPF_DW, BPF_MEM, "u64", load>;
class BRANCH<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
: TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
@@ -479,6 +517,18 @@
let BPFClass = BPF_JMP;
}
+class BRANCH_LONG<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
+ : TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
+ (outs),
+ (ins brtarget:$BrDst),
+ !strconcat(OpcodeStr, " $BrDst"),
+ Pattern> {
+ bits<32> BrDst;
+
+ let Inst{31-0} = BrDst;
+ let BPFClass = BPF_JMP32;
+}
+
class CALL<string OpcodeStr>
: TYPE_ALU_JMP<BPF_CALL.Value, BPF_K.Value,
(outs),
@@ -506,6 +556,7 @@
// Jump always
let isBranch = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1 in {
def JMP : BRANCH<BPF_JA, "goto", [(br bb:$BrDst)]>;
+ def JMPL : BRANCH_LONG<BPF_JA, "gotol", []>;
}
// Jump and link
@@ -835,7 +886,7 @@
}
// bswap16, bswap32, bswap64
-class BSWAP<bits<32> SizeOp, string OpcodeStr, BPFSrcType SrcType, list<dag> Pattern>
+class BSWAP<BPFOpClass Class, bits<32> SizeOp, string OpcodeStr, BPFSrcType SrcType, list<dag> Pattern>
: TYPE_ALU_JMP<BPF_END.Value, SrcType.Value,
(outs GPR:$dst),
(ins GPR:$src),
@@ -845,21 +896,29 @@
let Inst{51-48} = dst;
let Inst{31-0} = SizeOp;
- let BPFClass = BPF_ALU;
+ let BPFClass = Class;
}
let Constraints = "$dst = $src" in {
+ let Predicates = [BPFHasCPUv4] in {
+ def BSWAP16 : BSWAP<BPF_ALU64, 16, "bswap16", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+ def BSWAP32 : BSWAP<BPF_ALU64, 32, "bswap32", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+ def BSWAP64 : BSWAP<BPF_ALU64, 64, "bswap64", BPF_TO_LE, [(set GPR:$dst, (bswap GPR:$src))]>;
+ }
+
+ let Predicates = [BPFNoCPUv4] in {
let Predicates = [BPFIsLittleEndian] in {
- def BE16 : BSWAP<16, "be16", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
- def BE32 : BSWAP<32, "be32", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
- def BE64 : BSWAP<64, "be64", BPF_TO_BE, [(set GPR:$dst, (bswap GPR:$src))]>;
+ def BE16 : BSWAP<BPF_ALU, 16, "be16", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+ def BE32 : BSWAP<BPF_ALU, 32, "be32", BPF_TO_BE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+ def BE64 : BSWAP<BPF_ALU, 64, "be64", BPF_TO_BE, [(set GPR:$dst, (bswap GPR:$src))]>;
}
let Predicates = [BPFIsBigEndian] in {
- def LE16 : BSWAP<16, "le16", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
- def LE32 : BSWAP<32, "le32", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
- def LE64 : BSWAP<64, "le64", BPF_TO_LE, [(set GPR:$dst, (bswap GPR:$src))]>;
+ def LE16 : BSWAP<BPF_ALU, 16, "le16", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+ def LE32 : BSWAP<BPF_ALU, 32, "le32", BPF_TO_LE, [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+ def LE64 : BSWAP<BPF_ALU, 64, "le64", BPF_TO_LE, [(set GPR:$dst, (bswap GPR:$src))]>;
}
+ }
}
let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1,
@@ -898,7 +957,7 @@
def LD_IND_W : LOAD_IND<BPF_W, "u32", int_bpf_load_word>;
let isCodeGenOnly = 1 in {
- def MOV_32_64 : ALU_RR<BPF_ALU, BPF_MOV,
+ def MOV_32_64 : ALU_RR<BPF_ALU, BPF_MOV, 0,
(outs GPR:$dst), (ins GPR32:$src),
"$dst = $src", []>;
}
@@ -940,8 +999,8 @@
def STB32 : STOREi32<BPF_B, "u8", truncstorei8>;
}
-class LOAD32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
- : TYPE_LD_ST<BPF_MEM.Value, SizeOp.Value,
+class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
+ : TYPE_LD_ST<ModOp.Value, SizeOp.Value,
(outs GPR32:$dst),
(ins MEMri:$addr),
"$dst = *("#OpcodeStr#" *)($addr)",
@@ -955,13 +1014,18 @@
let BPFClass = BPF_LDX;
}
-class LOADi32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
- : LOAD32<SizeOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>;
+class LOADi32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode>
+ : LOAD32<SizeOp, ModOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>;
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
- def LDW32 : LOADi32<BPF_W, "u32", load>;
- def LDH32 : LOADi32<BPF_H, "u16", zextloadi16>;
- def LDB32 : LOADi32<BPF_B, "u8", zextloadi8>;
+ def LDW32 : LOADi32<BPF_W, BPF_MEM, "u32", load>;
+ def LDH32 : LOADi32<BPF_H, BPF_MEM, "u16", zextloadi16>;
+ def LDB32 : LOADi32<BPF_B, BPF_MEM, "u8", zextloadi8>;
+}
+
+let Predicates = [BPFHasCPUv4], DecoderNamespace = "BPFALU32" in {
+ def LDH32S : LOADi32<BPF_H, BPF_SMEM, "s16", sextloadi16>;
+ def LDB32S : LOADi32<BPF_B, BPF_SMEM, "s8", sextloadi8>;
}
let Predicates = [BPFHasALU32] in {
@@ -973,6 +1037,7 @@
(STW32 (EXTRACT_SUBREG GPR:$src, sub_32), ADDRri:$dst)>;
def : Pat<(i32 (extloadi8 ADDRri:$src)), (i32 (LDB32 ADDRri:$src))>;
def : Pat<(i32 (extloadi16 ADDRri:$src)), (i32 (LDH32 ADDRri:$src))>;
+
def : Pat<(i64 (zextloadi8 ADDRri:$src)),
(SUBREG_TO_REG (i64 0), (LDB32 ADDRri:$src), sub_32)>;
def : Pat<(i64 (zextloadi16 ADDRri:$src)),
Index: llvm/lib/Target/BPF/BPFInstrFormats.td
===================================================================
--- llvm/lib/Target/BPF/BPFInstrFormats.td
+++ llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -90,6 +90,7 @@
def BPF_ABS : BPFModeModifer<0x1>;
def BPF_IND : BPFModeModifer<0x2>;
def BPF_MEM : BPFModeModifer<0x3>;
+def BPF_SMEM : BPFModeModifer<0x4>;
def BPF_ATOMIC : BPFModeModifer<0x6>;
class BPFAtomicFlag<bits<4> val> {
Index: llvm/lib/Target/BPF/BPFISelLowering.cpp
===================================================================
--- llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -102,7 +102,8 @@
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
- setOperationAction(ISD::SREM, VT, Expand);
+ if (!STI.getCPUv4())
+ setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
@@ -141,9 +142,11 @@
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+ if (!STI.getCPUv4()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+ }
}
setBooleanContents(ZeroOrOneBooleanContent);
Index: llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -193,15 +193,17 @@
default:
break;
case ISD::SDIV: {
- DebugLoc Empty;
- const DebugLoc &DL = Node->getDebugLoc();
- if (DL != Empty)
- errs() << "Error at line " << DL.getLine() << ": ";
- else
- errs() << "Error: ";
- errs() << "Unsupport signed division for DAG: ";
- Node->print(errs(), CurDAG);
- errs() << "Please convert to unsigned div/mod.\n";
+ if (!Subtarget->getCPUv4()) {
+ DebugLoc Empty;
+ const DebugLoc &DL = Node->getDebugLoc();
+ if (DL != Empty)
+ errs() << "Error at line " << DL.getLine() << ": ";
+ else
+ errs() << "Error: ";
+ errs() << "Unsupport signed division for DAG: ";
+ Node->print(errs(), CurDAG);
+ errs() << "Please convert to unsigned div/mod.\n";
+ }
break;
}
case ISD::INTRINSIC_W_CHAIN: {
Index: llvm/lib/Target/BPF/BPF.td
===================================================================
--- llvm/lib/Target/BPF/BPF.td
+++ llvm/lib/Target/BPF/BPF.td
@@ -30,6 +30,7 @@
def : Proc<"v1", []>;
def : Proc<"v2", []>;
def : Proc<"v3", [ALU32]>;
+def : Proc<"v4", [ALU32]>;
def : Proc<"probe", []>;
def BPFInstPrinter : AsmWriter {
Index: clang/test/Misc/target-invalid-cpu-note.c
===================================================================
--- clang/test/Misc/target-invalid-cpu-note.c
+++ clang/test/Misc/target-invalid-cpu-note.c
@@ -73,7 +73,7 @@
// RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF
// BPF: error: unknown target CPU 'not-a-cpu'
-// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, probe{{$}}
+// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, v4, probe{{$}}
// RUN: not %clang_cc1 -triple avr--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AVR
// AVR: error: unknown target CPU 'not-a-cpu'
Index: clang/lib/Basic/Targets/BPF.h
===================================================================
--- clang/lib/Basic/Targets/BPF.h
+++ clang/lib/Basic/Targets/BPF.h
@@ -106,7 +106,7 @@
void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
bool setCPU(const std::string &Name) override {
- if (Name == "v3") {
+ if (Name == "v3" || Name == "v4") {
HasAlu32 = true;
}
Index: clang/lib/Basic/Targets/BPF.cpp
===================================================================
--- clang/lib/Basic/Targets/BPF.cpp
+++ clang/lib/Basic/Targets/BPF.cpp
@@ -32,7 +32,7 @@
}
static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2",
- "v3", "probe"};
+ "v3", "v4", "probe"};
bool BPFTargetInfo::isValidCPUName(StringRef Name) const {
return llvm::is_contained(ValidCPUNames, Name);
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits