================
@@ -962,9 +964,38 @@ CIRGenModule::getConstantArrayFromStringLiteral(const
StringLiteral *e) {
return builder.getString(str, eltTy, finalSize);
}
- errorNYI(e->getSourceRange(),
- "getConstantArrayFromStringLiteral: wide characters");
- return mlir::Attribute();
+ auto arrayTy = mlir::cast<cir::ArrayType>(convertType(e->getType()));
+
+ auto arrayEltTy = mlir::cast<cir::IntType>(arrayTy.getElementType());
+
+ uint64_t arraySize = arrayTy.getSize();
+ unsigned literalSize = e->getLength();
+
+ // Check if the string is all null bytes before building the vector.
+ // In most non-zero cases, this will break out on the first element.
+ // Padding bytes (if literalSize < arraySize) are implicitly zero.
+ bool isAllZero = true;
+ for (unsigned i = 0; i < literalSize; ++i) {
+ if (e->getCodeUnit(i) != 0) {
+ isAllZero = false;
+ break;
+ }
+ }
+
+ if (isAllZero)
+ return cir::ZeroAttr::get(arrayTy);
+
+ // Otherwise emit a constant array holding the characters.
+ SmallVector<mlir::Attribute> elements;
+ elements.reserve(arraySize);
+ for (unsigned i = 0; i < literalSize; ++i)
+ elements.push_back(cir::IntAttr::get(arrayEltTy, e->getCodeUnit(i)));
+ // Pad with zeros if needed.
+ for (uint64_t i = literalSize; i < arraySize; ++i)
----------------
andykaylor wrote:
I can see why you'd be inclined to do this, but classic codegen doesn't. I
wonder if it's necessary. Have you seen a case where `arraySize` is greater
than `literalSize`? Maybe you could just assert their equivalence.
https://github.com/llvm/llvm-project/pull/171541
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits