Issue |
160435
|
Summary |
Unexpected result for FormattedStream
|
Labels |
new issue
|
Assignees |
|
Reporter |
yuyuyu-atri
|
```
//llvm/lib/Support/Unicode.cpp:479
static bool isprintableascii(char c) { return c > 31 && c < 127; }
int columnWidthUTF8(StringRef Text) {
unsigned ColumnWidth = 0;
unsigned Length;
for (size_t i = 0, e = Text.size(); i < e; i += Length) {
Length = getNumBytesForUTF8(Text[i]);
// fast path for ASCII characters
if (Length == 1) {
if (!isprintableascii(Text[i]))
return ErrorNonPrintableCharacter;
ColumnWidth += 1;
continue;
}
if (Length <= 0 || i + Length > Text.size())
return ErrorInvalidUTF8;
UTF32 buf[1];
const UTF8 *Start = reinterpret_cast<const UTF8 *>(Text.data() + i);
UTF32 *Target = &buf[0];
if (conversionOK != ConvertUTF8toUTF32(&Start, Start + Length, &Target,
Target + 1, strictConversion))
return ErrorInvalidUTF8;
int Width = charWidth(buf[0]);
if (Width < 0)
return ErrorNonPrintableCharacter;
ColumnWidth += Width;
}
return ColumnWidth;
}
```
```
//llvm/lib/Support/FormattedStream.cpp:30
auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) {
int Width = sys::unicode::columnWidthUTF8(CP);
if (Width != sys::unicode::ErrorNonPrintableCharacter)
Column += Width;
// The only special whitespace characters we care about are single-byte.
if (CP.size() > 1)
return;
switch (CP[0]) {
case '\n':
Line += 1;
[[fallthrough]];
case '\r':
Column = 0;
break;
case '\t':
// Assumes tab stop = 8 characters.
Column += (8 - (Column & 0x7)) & 0x7;
break;
}
};
```
If we have a '\t' in column 0(or other 8*n), the Column won't increase according to the code above.
Is this the original intent of this code?
I found this code was modified by this commit.
https://reviews.llvm.org/D76291
Versions prior to this commit would not have this issue.
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs