MarcusJohnson91 created this revision. MarcusJohnson91 added reviewers: aaron.ballman, efriedma. MarcusJohnson91 added a project: clang. MarcusJohnson91 requested review of this revision.
Split from D103426 <https://reviews.llvm.org/D103426> Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D106756 Files: clang/include/clang/AST/FormatString.h clang/lib/AST/FormatString.cpp clang/lib/AST/PrintfFormatString.cpp clang/lib/AST/ScanfFormatString.cpp clang/test/Sema/format-strings-int-typedefs.c
Index: clang/test/Sema/format-strings-int-typedefs.c =================================================================== --- clang/test/Sema/format-strings-int-typedefs.c +++ clang/test/Sema/format-strings-int-typedefs.c @@ -12,6 +12,10 @@ printf("%td", 42.0); // expected-warning {{format specifies type 'ptrdiff_t' (aka 'int')}} printf("%lc", 42.0); // expected-warning {{format specifies type 'wint_t' (aka 'int')}} printf("%ls", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + printf("%l16c", 42.0); // expected-warning {{format specifies type 'char16_t' (aka 'int')}} + printf("%l16s", 42.0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}} + printf("%l32c", 42.0); // expected-warning {{format specifies type 'char32_t' (aka 'int')}} + printf("%l32s", 42.0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} printf("%S", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} printf("%C", 42.0); // expected-warning {{format specifies type 'wchar_t' (aka 'int')}} @@ -21,6 +25,10 @@ wprintf(L"%td", 42.0); // expected-warning {{format specifies type 'ptrdiff_t' (aka 'int')}} wprintf(L"%lc", 42.0); // expected-warning {{format specifies type 'wint_t' (aka 'int')}} wprintf(L"%ls", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + wprintf(L"%l16c", 42.0); // expected-warning {{format specifies type 'char16_t' (aka 'int')}} + wprintf(L"%l16s", 42.0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}} + wprintf(L"%l32c", 42.0); // expected-warning {{format specifies type 'char32_t' (aka 'int')}} + wprintf(L"%l32s", 42.0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} wprintf(L"%S", 42.0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} wprintf(L"%C", 42.0); // expected-warning {{format specifies type 'wchar_t' (aka 'int')}} @@ -30,6 +38,10 @@ scanf("%td", 0); // expected-warning {{format specifies type 'ptrdiff_t *' (aka 'int *')}} scanf("%lc", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} scanf("%ls", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + scanf("%l16c", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}} + scanf("%l16s", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}} + scanf("%l32c", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} + scanf("%l32s", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} scanf("%S", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} scanf("%C", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} @@ -39,6 +51,10 @@ wscanf("%td", 0); // expected-warning {{format specifies type 'ptrdiff_t *' (aka 'int *')}} wscanf("%lc", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} wscanf("%ls", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} + wscanf("%l16c", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}} + wscanf("%l16s", 0); // expected-warning {{format specifies type 'char16_t *' (aka 'int *')}} + wscanf("%l32c", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} + wscanf("%l32s", 0); // expected-warning {{format specifies type 'char32_t *' (aka 'int *')}} wscanf("%S", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} wscanf("%C", 0); // expected-warning {{format specifies type 'wchar_t *' (aka 'int *')}} Index: clang/lib/AST/ScanfFormatString.cpp =================================================================== --- clang/lib/AST/ScanfFormatString.cpp +++ clang/lib/AST/ScanfFormatString.cpp @@ -261,6 +261,8 @@ case LengthModifier::AsInt32: case LengthModifier::AsInt3264: case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: case LengthModifier::AsShortLong: return ArgType::Invalid(); } @@ -302,6 +304,8 @@ case LengthModifier::AsInt32: case LengthModifier::AsInt3264: case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: case LengthModifier::AsShortLong: return ArgType::Invalid(); } @@ -329,14 +333,18 @@ // Char, string and scanlist. case ConversionSpecifier::cArg: - case ConversionSpecifier::sArg: + case ConversionSpecifier::CArg: case ConversionSpecifier::ScanListArg: switch (LM.getKind()) { case LengthModifier::None: return ArgType::PtrTo(ArgType::AnyCharTy); case LengthModifier::AsLong: case LengthModifier::AsWide: - return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); + return ArgType::PtrTo(ArgType::WCStrTy); + case LengthModifier::AsUTF16: + return ArgType(ArgType::Char16Ty); + case LengthModifier::AsUTF32: + return ArgType(ArgType::Char32Ty); case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: return ArgType::PtrTo(ArgType::CStrTy); @@ -347,13 +355,17 @@ default: return ArgType::Invalid(); } - case ConversionSpecifier::CArg: + case ConversionSpecifier::sArg: case ConversionSpecifier::SArg: // FIXME: Mac OS X specific? switch (LM.getKind()) { case LengthModifier::None: case LengthModifier::AsWide: - return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); + return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t *")); + case LengthModifier::AsUTF16: + return ArgType::PtrTo(ArgType(Ctx.getChar16Type(), "char16_t *")); + case LengthModifier::AsUTF32: + return ArgType::PtrTo(ArgType(Ctx.getChar32Type(), "char32_t *")); case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); @@ -398,6 +410,8 @@ case LengthModifier::AsInt32: case LengthModifier::AsInt3264: case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: case LengthModifier::AsShortLong: return ArgType::Invalid(); } @@ -435,10 +449,14 @@ return false; // Pointer to a character. - if (PT->isAnyCharacterType()) { + if (PT->isAnyCharacterType(LangOpt)) { CS.setKind(ConversionSpecifier::sArg); if (PT->isWideCharType()) LM.setKind(LengthModifier::AsWideChar); + else if (PT->isChar16Type(LangOpt)) + LM.setKind(LengthModifier::AsUTF16); + else if (PT->isChar32Type(LangOpt)) + LM.setKind(LengthModifier::AsUTF32); else LM.setKind(LengthModifier::None); Index: clang/lib/AST/PrintfFormatString.cpp =================================================================== --- clang/lib/AST/PrintfFormatString.cpp +++ clang/lib/AST/PrintfFormatString.cpp @@ -494,6 +494,10 @@ case LengthModifier::AsLong: case LengthModifier::AsWide: return ArgType(ArgType::WIntTy, "wint_t"); + case LengthModifier::AsUTF16: + return ArgType(ArgType::Char16Ty, "char16_t"); + case LengthModifier::AsUTF32: + return ArgType(ArgType::Char32Ty, "char32_t"); case LengthModifier::AsShort: if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) return Ctx.IntTy; @@ -535,6 +539,8 @@ case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: return ArgType::Invalid(); } @@ -567,6 +573,8 @@ case LengthModifier::AsPtrDiff: return ArgType::makePtrdiffT( ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")); + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: case LengthModifier::AsWide: @@ -618,6 +626,8 @@ case LengthModifier::AsInt3264: case LengthModifier::AsInt64: case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: return ArgType::Invalid(); case LengthModifier::AsShortLong: llvm_unreachable("only used for OpenCL which doesn not handle nArg"); @@ -632,9 +642,15 @@ "const unichar *"); return ArgType(ArgType::WCStrTy, "wchar_t *"); } - if (LM.getKind() == LengthModifier::AsWide) + if (LM.getKind() == LengthModifier::AsWide) { return ArgType(ArgType::WCStrTy, "wchar_t *"); - return ArgType::CStrTy; + } + if (LM.getKind() == LengthModifier::AsUTF16) + return ArgType(ArgType::Char16Ty, "char16_t *"); + if (LM.getKind() == LengthModifier::AsUTF32) + return ArgType(ArgType::Char32Ty, "char32_t *"); + else + return ArgType::CStrTy; case ConversionSpecifier::SArg: if (IsObjCLiteral) return ArgType(Ctx.getPointerType(Ctx.UnsignedShortTy.withConst()), @@ -642,13 +658,22 @@ if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() && LM.getKind() == LengthModifier::AsShort) return ArgType::CStrTy; + if (LM.getKind() == LengthModifier::AsUTF16) + return ArgType(ArgType::Char16Ty, "char16_t *"); + if (LM.getKind() == LengthModifier::AsUTF32) + return ArgType(ArgType::Char32Ty, "char32_t *"); return ArgType(ArgType::WCStrTy, "wchar_t *"); + case ConversionSpecifier::cArg: case ConversionSpecifier::CArg: if (IsObjCLiteral) return ArgType(Ctx.UnsignedShortTy, "unichar"); if (Ctx.getTargetInfo().getTriple().isOSMSVCRT() && LM.getKind() == LengthModifier::AsShort) return Ctx.IntTy; + if (LM.getKind() == LengthModifier::AsUTF16) + return ArgType(ArgType::Char16Ty, "char16_t"); + if (LM.getKind() == LengthModifier::AsUTF32) + return ArgType(ArgType::Char32Ty, "char32_t"); return ArgType(Ctx.WideCharTy, "wchar_t"); case ConversionSpecifier::pArg: case ConversionSpecifier::PArg: @@ -706,17 +731,21 @@ return true; } - // Handle strings next (char *, wchar_t *) - if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) { + // Handle strings next (char *, wchar_t *, char16_t *, char32_t *) + if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType(LangOpt))) { CS.setKind(ConversionSpecifier::sArg); // Disable irrelevant flags HasAlternativeForm = 0; HasLeadingZeroes = 0; - // Set the long length modifier for wide characters + // Set the length modifier for characters if (QT->getPointeeType()->isWideCharType()) LM.setKind(LengthModifier::AsWideChar); + else if (QT->getPointeeType()->isChar16Type(LangOpt)) + LM.setKind(LengthModifier::AsUTF16); + else if (QT->getPointeeType()->isChar32Type(LangOpt)) + LM.setKind(LengthModifier::AsUTF32); else LM.setKind(LengthModifier::None); @@ -736,19 +765,10 @@ VectorNumElts = OptionalAmount(VT->getNumElements()); } } - - // We can only work with builtin types. - if (!BT) - return false; - + // Set length modifier switch (BT->getKind()) { case BuiltinType::Bool: - case BuiltinType::WChar_U: - case BuiltinType::WChar_S: - case BuiltinType::Char8: // FIXME: Treat like 'char'? - case BuiltinType::Char16: - case BuiltinType::Char32: case BuiltinType::UInt128: case BuiltinType::Int128: case BuiltinType::Half: @@ -819,6 +839,7 @@ case BuiltinType::UChar: case BuiltinType::Char_S: case BuiltinType::SChar: + case BuiltinType::Char8: LM.setKind(LengthModifier::AsChar); break; @@ -840,6 +861,19 @@ case BuiltinType::LongDouble: LM.setKind(LengthModifier::AsLongDouble); break; + + case BuiltinType::Char16: + LM.setKind(LengthModifier::AsUTF16); + break; + + case BuiltinType::Char32: + LM.setKind(LengthModifier::AsUTF32); + break; + + case BuiltinType::WChar_S: + case BuiltinType::WChar_U: + LM.setKind(LengthModifier::AsWide); + break; } // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. @@ -873,7 +907,7 @@ // Set conversion specifier and disable any flags which do not apply to it. // Let typedefs to char fall through to int, as %c is silly for uint8_t. - if (!isa<TypedefType>(QT) && QT->isCharType()) { + if (!isa<TypedefType>(QT) && QT->isAnyCharacterType(LangOpt)) { CS.setKind(ConversionSpecifier::cArg); LM.setKind(LengthModifier::None); Precision.setHowSpecified(OptionalAmount::NotSpecified); Index: clang/lib/AST/FormatString.cpp =================================================================== --- clang/lib/AST/FormatString.cpp +++ clang/lib/AST/FormatString.cpp @@ -232,7 +232,17 @@ break; case 'l': ++I; - if (I != E && *I == 'l') { + if (I + 1 != E && I[0] == '1' && I[1] == '6') { + ++I; + ++I; + lmKind = LengthModifier::AsUTF16; + break; + } else if (I + 1 != E && I[0] == '3' && I[1] == '2') { + ++I; + ++I; + lmKind = LengthModifier::AsUTF32; + break; + } else if (I + 1 != E && I[0] == 'l') { ++I; lmKind = LengthModifier::AsLongLong; } else { @@ -459,6 +469,24 @@ return WInt == PromoArg ? Match : NoMatch; } + + case Char16Ty: { + const PointerType *PT = argTy->getAs<PointerType>(); + if (!PT) + return NoMatch; + QualType pointeeTy = + C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); + return pointeeTy == C.getChar16Type() ? Match : NoMatch; + } + + case Char32Ty: { + const PointerType *PT = argTy->getAs<PointerType>(); + if (!PT) + return NoMatch; + QualType pointeeTy = + C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType(); + return pointeeTy == C.getChar32Type() ? Match : NoMatch; + } case CPointerTy: if (argTy->isVoidPointerType()) { @@ -520,6 +548,12 @@ case WCStrTy: Res = C.getPointerType(C.getWideCharType()); break; + case Char16Ty: + Res = C.getPointerType(C.getChar16Type()); + break; + case Char32Ty: + Res = C.getPointerType(C.getChar32Type()); + break; case ObjCPointerTy: Res = C.ObjCBuiltinIdTy; break; @@ -607,6 +641,10 @@ return "m"; case AsWide: return "w"; + case AsUTF16: + return "l16"; + case AsUTF32: + return "l32"; case None: return ""; } @@ -860,6 +898,17 @@ default: return false; } + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: + switch (CS.getKind()) { + case ConversionSpecifier::cArg: + case ConversionSpecifier::CArg: + case ConversionSpecifier::sArg: + case ConversionSpecifier::SArg: + return true; + default: + return false; + } case LengthModifier::AsWide: switch (CS.getKind()) { case ConversionSpecifier::cArg: @@ -886,6 +935,9 @@ case LengthModifier::AsSizeT: case LengthModifier::AsPtrDiff: case LengthModifier::AsLongDouble: + case LengthModifier::AsWide: + case LengthModifier::AsUTF16: + case LengthModifier::AsUTF32: return true; case LengthModifier::AsAllocate: case LengthModifier::AsMAllocate: @@ -893,7 +945,6 @@ case LengthModifier::AsInt32: case LengthModifier::AsInt3264: case LengthModifier::AsInt64: - case LengthModifier::AsWide: case LengthModifier::AsShortLong: // ??? return false; } @@ -997,6 +1048,12 @@ } else if (Identifier->getName() == "ptrdiff_t") { LM.setKind(LengthModifier::AsPtrDiff); return true; + } else if (Identifier->getName() == "char16_t") { + LM.setKind(LengthModifier::AsUTF16); + return true; + } else if (Identifier->getName() == "char32_t") { + LM.setKind(LengthModifier::AsUTF32); + return true; } QualType T = Typedef->getUnderlyingType(); Index: clang/include/clang/AST/FormatString.h =================================================================== --- clang/include/clang/AST/FormatString.h +++ clang/include/clang/AST/FormatString.h @@ -80,6 +80,8 @@ AsLongDouble, // 'L' AsAllocate, // for '%as', GNU extension to C90 scanf AsMAllocate, // for '%ms', GNU extension to scanf + AsUTF16, // for '%l16(c|s)', Clang extension + AsUTF32, // for '%l32(c|s)', Clang extension AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z AsWideChar = AsLong // for '%ls', only makes sense for printf };
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits