hans created this revision. hans added reviewers: thakis, majnemer. A Chromium developer reported a bug which turned out to be a mangling collision between these two literals:
char s[] = "foo"; char t[32] = "foo"; They may look the same, but for the initialization of t we will (under some circumstances) use a literal that's extended with zeros, and both the length and those zeros should be accounted for by the mangling. This actually makes the mangling code simpler: where it previously had special logic for null terminators, which are not part of the StringLiteral, that is now covered by the general algorithm. https://reviews.llvm.org/D48928 Files: lib/AST/MicrosoftMangle.cpp test/CodeGen/mangle-ms-string-literals.c
Index: test/CodeGen/mangle-ms-string-literals.c =================================================================== --- /dev/null +++ test/CodeGen/mangle-ms-string-literals.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -x c -emit-llvm %s -o - -triple=i386-pc-win32 | FileCheck %s +// RUN: %clang_cc1 -x c -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s + +void crbug857442(int x) { + // Make sure to handle truncated or padded literals. The truncation is only valid in C. + struct {int x; char s[2]; } truncatedAscii = {x, "hello"}; + // CHECK: "??_C@_01CONKJJHI@he@" + struct {int x; char s[16]; } paddedAscii = {x, "hello"}; + // CHECK: "??_C@_0BA@EAAINDNC@hello?$AA?$AA?$AA?$AA?$AA?$AA?$AA?$AA?$AA?$AA?$AA@" +} Index: lib/AST/MicrosoftMangle.cpp =================================================================== --- lib/AST/MicrosoftMangle.cpp +++ lib/AST/MicrosoftMangle.cpp @@ -3171,7 +3171,7 @@ // <literal-length> ::= <non-negative integer> # the length of the literal // // <encoded-crc> ::= <hex digit>+ @ # crc of the literal including - // # null-terminator + // # trailing null bytes // // <encoded-string> ::= <simple character> # uninteresting character // ::= '?$' <hex digit> <hex digit> # these two nibbles @@ -3186,44 +3186,50 @@ MicrosoftCXXNameMangler Mangler(*this, Out); Mangler.getStream() << "??_C@_"; + // The actual string length might be different from that of the string literal + // in cases like: + // char foo[3] = "foobar"; + // char bar[42] = "foobar"; + // Where it is truncated or zero-padded to fit the array. This is the length + // used for mangling, and any trailing null-bytes also need to be mangled. + unsigned StringLength = getASTContext() + .getAsConstantArrayType(SL->getType()) + ->getSize() + .getZExtValue(); + // <char-type>: The "kind" of string literal is encoded into the mangled name. if (SL->isWide()) Mangler.getStream() << '1'; else Mangler.getStream() << '0'; // <literal-length>: The next part of the mangled name consists of the length - // of the string. - // The StringLiteral does not consider the NUL terminator byte(s) but the - // mangling does. - // N.B. The length is in terms of bytes, not characters. - Mangler.mangleNumber(SL->getByteLength() + SL->getCharByteWidth()); + // of the string in bytes. + Mangler.mangleNumber(StringLength * SL->getCharByteWidth()); - auto GetLittleEndianByte = [&SL](unsigned Index) { + auto GetLittleEndianByte = [&SL, StringLength](unsigned Index) { unsigned CharByteWidth = SL->getCharByteWidth(); + if (Index / CharByteWidth >= SL->getLength()) + return static_cast<char>(0); uint32_t CodeUnit = SL->getCodeUnit(Index / CharByteWidth); unsigned OffsetInCodeUnit = Index % CharByteWidth; return static_cast<char>((CodeUnit >> (8 * OffsetInCodeUnit)) & 0xff); }; - auto GetBigEndianByte = [&SL](unsigned Index) { + auto GetBigEndianByte = [&SL, StringLength](unsigned Index) { unsigned CharByteWidth = SL->getCharByteWidth(); + if (Index / CharByteWidth >= SL->getLength()) + return static_cast<char>(0); uint32_t CodeUnit = SL->getCodeUnit(Index / CharByteWidth); unsigned OffsetInCodeUnit = (CharByteWidth - 1) - (Index % CharByteWidth); return static_cast<char>((CodeUnit >> (8 * OffsetInCodeUnit)) & 0xff); }; // CRC all the bytes of the StringLiteral. llvm::JamCRC JC; - for (unsigned I = 0, E = SL->getByteLength(); I != E; ++I) + for (unsigned I = 0, E = StringLength * SL->getCharByteWidth(); I != E; ++I) JC.update(GetLittleEndianByte(I)); - // The NUL terminator byte(s) were not present earlier, - // we need to manually process those bytes into the CRC. - for (unsigned NullTerminator = 0; NullTerminator < SL->getCharByteWidth(); - ++NullTerminator) - JC.update('\x00'); - // <encoded-crc>: The CRC is encoded utilizing the standard number mangling // scheme. Mangler.mangleNumber(JC.getCRC()); @@ -3260,18 +3266,14 @@ // Enforce our 32 bytes max, except wchar_t which gets 32 chars instead. unsigned MaxBytesToMangle = SL->isWide() ? 64U : 32U; - unsigned NumBytesToMangle = std::min(MaxBytesToMangle, SL->getByteLength()); - for (unsigned I = 0; I != NumBytesToMangle; ++I) + unsigned NumBytesToMangle = + std::min(MaxBytesToMangle, StringLength * SL->getCharByteWidth()); + for (unsigned I = 0; I != NumBytesToMangle; ++I) { if (SL->isWide()) MangleByte(GetBigEndianByte(I)); else MangleByte(GetLittleEndianByte(I)); - - // Encode the NUL terminator if there is room. - if (NumBytesToMangle < MaxBytesToMangle) - for (unsigned NullTerminator = 0; NullTerminator < SL->getCharByteWidth(); - ++NullTerminator) - MangleByte(0); + } Mangler.getStream() << '@'; }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits