https://github.com/mmha updated https://github.com/llvm/llvm-project/pull/166191
>From 30c9a5cf5f2ca7192f16eaae0eb37e13425b39b4 Mon Sep 17 00:00:00 2001 From: Morris Hafner <[email protected]> Date: Tue, 4 Nov 2025 01:00:07 +0800 Subject: [PATCH 1/3] [CIR] Implement __builtin_object_size and __builtin_dynamic_object_size * Add cir.objsize operation to CIR dialect * Add lowering for cir.objsize operation to LLVM dialect * Add codegen for __builtin_object_size and __builtin_dynamic_object_size Note that this does not support the pass_object_size attribute yet. --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 42 +++++++++++++ clang/include/clang/CIR/MissingFeatures.h | 1 + clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 60 +++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenFunction.h | 8 +++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 23 +++++++ .../test/CIR/CodeGen/builtin-object-size.cpp | 38 ++++++++++++ 6 files changed, 172 insertions(+) create mode 100644 clang/test/CIR/CodeGen/builtin-object-size.cpp diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 2b361ed0982c6..4cf0b817e8056 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4089,6 +4089,48 @@ def CIR_PrefetchOp : CIR_Op<"prefetch"> { }]; } +//===----------------------------------------------------------------------===// +// ObjSizeOp +//===----------------------------------------------------------------------===// + +def CIR_ObjSizeOp : CIR_Op<"objsize", [Pure]> { + let summary = "Implements the llvm.objsize builtin"; + let description = [{ + The `cir.objsize` operation models the behavior of the `llvm.objectsize` + intrinsic in Clang. It returns the number of accessible bytes past ptr. + + When the `min` attribute is present, the operation returns the minimum + guaranteed accessible size. When absent (max mode), it returns the maximum + possible object size. Additionally, when the object size is unknown, min + mode returns 0 while max mode returns -1. Corresponds to `llvm.objectsize`'s + `min` argument. + + The `dynamic` attribute determines if the value should be evaluated at + runtime. Corresponds to `llvm.objectsize`'s `dynamic` argument. + + Example: + + ```mlir + %size = cir.objsize min %ptr : !cir.ptr<i32> -> i64 + %dsize = cir.objsize max dynamic %ptr : !cir.ptr<i32> -> i64 + ``` + }]; + + let arguments = (ins + CIR_PointerType:$ptr, + UnitAttr:$min, + UnitAttr:$dynamic + ); + + let results = (outs CIR_AnyFundamentalIntType:$result); + + let assemblyFormat = [{ + (`min` $min^) : (`max`)? + (`dynamic` $dynamic^)? + $ptr `:` qualified(type($ptr)) `->` qualified(type($result)) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // PtrDiffOp //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 48ef8be9fb782..ae96f6b932571 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -213,6 +213,7 @@ struct MissingFeatures { static bool builtinCallMathErrno() { return false; } static bool builtinCheckKind() { return false; } static bool cgCapturedStmtInfo() { return false; } + static bool countedBySize() { return false; } static bool cgFPOptionsRAII() { return false; } static bool checkBitfieldClipping() { return false; } static bool cirgenABIInfo() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index e35100ffe4b6b..6617895fa8832 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -459,6 +459,19 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return emitCall(e->getCallee()->getType(), CIRGenCallee::forDirect(fnOp), e, returnValue); } + case Builtin::BI__builtin_dynamic_object_size: + case Builtin::BI__builtin_object_size: { + unsigned type = + e->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); + auto resType = mlir::cast<cir::IntType>(convertType(e->getType())); + + // We pass this builtin onto the optimizer so that it can figure out the + // object size in more complex cases. + bool isDynamic = builtinID == Builtin::BI__builtin_dynamic_object_size; + return RValue::get(emitBuiltinObjectSize(e->getArg(0), type, resType, + /*EmittedE=*/nullptr, isDynamic)); + } + case Builtin::BI__builtin_prefetch: { auto evaluateOperandAsInt = [&](const Expr *arg) { Expr::EvalResult res; @@ -641,3 +654,50 @@ mlir::Value CIRGenFunction::emitVAArg(VAArgExpr *ve) { mlir::Value vaList = emitVAListRef(ve->getSubExpr()).getPointer(); return cir::VAArgOp::create(builder, loc, type, vaList); } + +/// Returns a Value corresponding to the size of the given expression. +/// This Value may be either of the following: +/// +/// - Reference an argument if `pass_object_size` is used. +/// - A call to a `cir.objsize`. +/// +/// emittedE is the result of emitting `e` as a scalar expr. If it's non-null +/// and we wouldn't otherwise try to reference a pass_object_size parameter, +/// we'll call `cir.objsize` on emittedE, rather than emitting e. +mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *e, unsigned type, + cir::IntType resType, + mlir::Value emittedE, + bool isDynamic) { + assert(!cir::MissingFeatures::opCallImplicitObjectSizeArgs()); + + // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't + // evaluate e for side-effects. In either case, just like original LLVM + // lowering, we shouldn't lower to `cir.objsize`. + if (type == 3 || (!emittedE && e->HasSideEffects(getContext()))) + return builder.getConstInt(getLoc(e->getSourceRange()), resType, + (type & 2) ? 0 : -1); + + mlir::Value ptr = emittedE ? emittedE : emitScalarExpr(e); + assert(mlir::isa<cir::PointerType>(ptr.getType()) && + "Non-pointer passed to __builtin_object_size?"); + + assert(!cir::MissingFeatures::countedBySize()); + + // LLVM intrinsics (which CIR lowers to at some point, only supports 0 + // and 2, account for that right now. + const bool min = ((type & 2) != 0); + // TODO(cir): Heads up for LLVM lowering, For GCC compatibility, + // __builtin_object_size treat NULL as unknown size. + auto op = cir::ObjSizeOp::create(builder, getLoc(e->getSourceRange()), + resType, ptr, min, isDynamic); + return op.getResult(); +} + +mlir::Value CIRGenFunction::evaluateOrEmitBuiltinObjectSize( + const Expr *e, unsigned type, cir::IntType resType, mlir::Value emittedE, + bool isDynamic) { + uint64_t objectSize; + if (!e->tryEvaluateObjectSize(objectSize, getContext(), type)) + return emitBuiltinObjectSize(e, type, resType, emittedE, isDynamic); + return builder.getConstInt(getLoc(e->getSourceRange()), resType, objectSize); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index e5cecaa573a6e..3d3d4fa410d1a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1304,6 +1304,14 @@ class CIRGenFunction : public CIRGenTypeCache { RValue emitBuiltinExpr(const clang::GlobalDecl &gd, unsigned builtinID, const clang::CallExpr *e, ReturnValueSlot returnValue); + mlir::Value emitBuiltinObjectSize(const clang::Expr *e, unsigned type, + cir::IntType resType, mlir::Value emittedE, + bool isDynamic); + + mlir::Value evaluateOrEmitBuiltinObjectSize(const clang::Expr *e, + unsigned type, cir::IntType resType, + mlir::Value emittedE, bool isDynamic); + RValue emitCall(const CIRGenFunctionInfo &funcInfo, const CIRGenCallee &callee, ReturnValueSlot returnValue, const CallArgList &args, cir::CIRCallOpInterface *callOp, diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 5a6193fa8d840..6322b2979fa31 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -2816,6 +2816,29 @@ static void collectUnreachable(mlir::Operation *parent, } } +mlir::LogicalResult CIRToLLVMObjSizeOpLowering::matchAndRewrite( + cir::ObjSizeOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::Type llvmResTy = getTypeConverter()->convertType(op.getType()); + mlir::Location loc = op->getLoc(); + + mlir::IntegerType i1Ty = rewriter.getI1Type(); + + auto i1Val = [&rewriter, &loc, &i1Ty](bool val) { + return mlir::LLVM::ConstantOp::create(rewriter, loc, i1Ty, val); + }; + + replaceOpWithCallLLVMIntrinsicOp(rewriter, op, "llvm.objectsize", llvmResTy, + { + adaptor.getPtr(), + i1Val(op.getMin()), + i1Val(true), + i1Val(op.getDynamic()), + }); + + return mlir::LogicalResult::success(); +} + void ConvertCIRToLLVMPass::processCIRAttrs(mlir::ModuleOp module) { // Lower the module attributes to LLVM equivalents. if (mlir::Attribute tripleAttr = diff --git a/clang/test/CIR/CodeGen/builtin-object-size.cpp b/clang/test/CIR/CodeGen/builtin-object-size.cpp new file mode 100644 index 0000000000000..e077a2b57bf1d --- /dev/null +++ b/clang/test/CIR/CodeGen/builtin-object-size.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + +typedef unsigned long size_t; + +// CIR-LABEL: @_Z4testPc +// LLVM-LABEL: define {{.*}} i64 @_Z4testPc +// OGCG-LABEL: define {{.*}} i64 @_Z4testPc +size_t test(char *ptr) { + // CIR: cir.objsize max {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false) + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 false) + return __builtin_object_size(ptr, 0); +} + +// CIR-LABEL: @_Z8test_minPc +// LLVM-LABEL: define {{.*}} i64 @_Z8test_minPc +// OGCG-LABEL: define {{.*}} i64 @_Z8test_minPc +size_t test_min(char *ptr) { + // CIR: cir.objsize min {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false) + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 true, i1 true, i1 false) + return __builtin_object_size(ptr, 2); +} + +// CIR-LABEL: @_Z17test_dynamic_sizePc +// LLVM-LABEL: define {{.*}} i64 @_Z17test_dynamic_sizePc +// OGCG-LABEL: define {{.*}} i64 @_Z17test_dynamic_sizePc +size_t test_dynamic_size(char *ptr) { + // CIR: cir.objsize max dynamic {{.*}} : !cir.ptr<!void> -> !u64i + // LLVM: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 true) + // OGCG: call i64 @llvm.objectsize.i64.p0(ptr %{{.*}}, i1 false, i1 true, i1 true) + return __builtin_dynamic_object_size(ptr, 0); +} >From 4d4fe2ae34d7b8516e71a203fa56bb5ff466ef97 Mon Sep 17 00:00:00 2001 From: Morris Hafner <[email protected]> Date: Tue, 4 Nov 2025 01:09:13 +0800 Subject: [PATCH 2/3] clang-format, comment updates --- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 14 ++++---------- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 17 ++++++++++------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 6617895fa8832..ef58b8c592c63 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -655,14 +655,10 @@ mlir::Value CIRGenFunction::emitVAArg(VAArgExpr *ve) { return cir::VAArgOp::create(builder, loc, type, vaList); } -/// Returns a Value corresponding to the size of the given expression. -/// This Value may be either of the following: +/// Returns a Value corresponding to the size of the given expression by +/// emitting a `cir.objsize` operation. /// -/// - Reference an argument if `pass_object_size` is used. -/// - A call to a `cir.objsize`. -/// -/// emittedE is the result of emitting `e` as a scalar expr. If it's non-null -/// and we wouldn't otherwise try to reference a pass_object_size parameter, +/// emittedE is the result of emitting `e` as a scalar expr. If it's non-null, /// we'll call `cir.objsize` on emittedE, rather than emitting e. mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *e, unsigned type, cir::IntType resType, @@ -675,7 +671,7 @@ mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *e, unsigned type, // lowering, we shouldn't lower to `cir.objsize`. if (type == 3 || (!emittedE && e->HasSideEffects(getContext()))) return builder.getConstInt(getLoc(e->getSourceRange()), resType, - (type & 2) ? 0 : -1); + (type & 2) ? 0 : -1); mlir::Value ptr = emittedE ? emittedE : emitScalarExpr(e); assert(mlir::isa<cir::PointerType>(ptr.getType()) && @@ -686,8 +682,6 @@ mlir::Value CIRGenFunction::emitBuiltinObjectSize(const Expr *e, unsigned type, // LLVM intrinsics (which CIR lowers to at some point, only supports 0 // and 2, account for that right now. const bool min = ((type & 2) != 0); - // TODO(cir): Heads up for LLVM lowering, For GCC compatibility, - // __builtin_object_size treat NULL as unknown size. auto op = cir::ObjSizeOp::create(builder, getLoc(e->getSourceRange()), resType, ptr, min, isDynamic); return op.getResult(); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 6322b2979fa31..470c22631d8e5 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -2828,13 +2828,16 @@ mlir::LogicalResult CIRToLLVMObjSizeOpLowering::matchAndRewrite( return mlir::LLVM::ConstantOp::create(rewriter, loc, i1Ty, val); }; - replaceOpWithCallLLVMIntrinsicOp(rewriter, op, "llvm.objectsize", llvmResTy, - { - adaptor.getPtr(), - i1Val(op.getMin()), - i1Val(true), - i1Val(op.getDynamic()), - }); + replaceOpWithCallLLVMIntrinsicOp( + rewriter, op, "llvm.objectsize", llvmResTy, + { + adaptor.getPtr(), + i1Val(op.getMin()), + // For GCC compatibility, __builtin_object_size treat NULL as unknown + // size. + i1Val(true), + i1Val(op.getDynamic()), + }); return mlir::LogicalResult::success(); } >From 2dc7585dd6a1b80d4cef03c0144b8fa6127c50c2 Mon Sep 17 00:00:00 2001 From: Morris Hafner <[email protected]> Date: Tue, 4 Nov 2025 01:10:19 +0800 Subject: [PATCH 3/3] more clang-format --- clang/lib/CIR/CodeGen/CIRGenFunction.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 3d3d4fa410d1a..115fa6dba2aeb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1305,12 +1305,14 @@ class CIRGenFunction : public CIRGenTypeCache { const clang::CallExpr *e, ReturnValueSlot returnValue); mlir::Value emitBuiltinObjectSize(const clang::Expr *e, unsigned type, - cir::IntType resType, mlir::Value emittedE, - bool isDynamic); + cir::IntType resType, mlir::Value emittedE, + bool isDynamic); mlir::Value evaluateOrEmitBuiltinObjectSize(const clang::Expr *e, - unsigned type, cir::IntType resType, - mlir::Value emittedE, bool isDynamic); + unsigned type, + cir::IntType resType, + mlir::Value emittedE, + bool isDynamic); RValue emitCall(const CIRGenFunctionInfo &funcInfo, const CIRGenCallee &callee, ReturnValueSlot returnValue, _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
