compilerplugins/clang/narrow.cxx              |  108 ++++++++++++++++++++++++++
 compilerplugins/clang/test/narrow.cxx         |   30 +++++++
 cppcanvas/source/mtfrenderer/implrenderer.cxx |    3 
 editeng/source/editeng/impedit3.cxx           |    3 
 solenv/CompilerTest_compilerplugins_clang.mk  |    1 
 svtools/source/misc/sampletext.cxx            |    4 
 svx/source/dialog/charmap.cxx                 |    6 -
 sw/source/core/unocore/unosett.cxx            |    3 
 vcl/qa/cppunit/fontcharmap.cxx                |   12 +-
 vcl/source/outdev/text.cxx                    |    2 
 10 files changed, 157 insertions(+), 15 deletions(-)

New commits:
commit 82f055e5cb35501db06b4996909759f686de9631
Author:     Noel Grandin <[email protected]>
AuthorDate: Tue Nov 4 10:12:18 2025 +0200
Commit:     Noel Grandin <[email protected]>
CommitDate: Wed Nov 5 15:02:41 2025 +0100

    new loplugin:narrow
    
    inspired by
        commit 4888f6f3f59785d0e661b58060bd8458f866bf10
        Author: Neil Roberts <[email protected]>
        Date:   Sun Oct 26 16:26:47 2025 +0100
        tdf#166488 Fix local variable used to store char from SvxCharacterMap
    
    Look for places narrowing a sal_UCS4 value and thus losing information.
    
    I dont actually fix anything here, just change some types to use the 
sal_UCS4 typedef,
    and add some TODO comments.
    
    Change-Id: Iaea7ba647889efcdaafe5138d4e4806849514f02
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/193384
    Tested-by: Jenkins
    Reviewed-by: Neil Roberts <[email protected]>
    Reviewed-by: Noel Grandin <[email protected]>

diff --git a/compilerplugins/clang/narrow.cxx b/compilerplugins/clang/narrow.cxx
new file mode 100644
index 000000000000..b08fb2979c3b
--- /dev/null
+++ b/compilerplugins/clang/narrow.cxx
@@ -0,0 +1,108 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * Based on LLVM/Clang.
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ */
+
+#include <cassert>
+#include <string>
+#include <iostream>
+#include <fstream>
+#include "config_clang.h"
+#include "plugin.hxx"
+#include "check.hxx"
+#include <unordered_set>
+#include <unordered_map>
+
+/*
+Look for places where we are throwing away useful information by converting 
sal_UCS4 to sal_Unicode.
+*/
+
+namespace
+{
+class Narrow : public loplugin::FilteringPlugin<Narrow>
+{
+public:
+    explicit Narrow(loplugin::InstantiationData const& data)
+        : FilteringPlugin(data)
+    {
+    }
+
+    virtual bool preRun() override { return true; }
+
+    virtual void run() override
+    {
+        if (preRun())
+            TraverseDecl(compiler.getASTContext().getTranslationUnitDecl());
+    }
+
+    bool VisitBinaryOperator(const BinaryOperator*);
+    bool VisitVarDecl(const VarDecl*);
+};
+
+static const Expr* IgnoreImplicitAndConversionOperator(const Expr* expr)
+{
+    expr = expr->IgnoreImplicit();
+    if (auto memberCall = dyn_cast<CXXMemberCallExpr>(expr))
+    {
+        if (auto conversionDecl = 
dyn_cast_or_null<CXXConversionDecl>(memberCall->getMethodDecl()))
+        {
+            if (!conversionDecl->isExplicit())
+                expr = 
memberCall->getImplicitObjectArgument()->IgnoreImplicit();
+        }
+    }
+    return expr;
+}
+
+bool Narrow::VisitBinaryOperator(const BinaryOperator* binaryOp)
+{
+    if (ignoreLocation(binaryOp))
+        return true;
+    if (binaryOp->getBeginLoc().isMacroID())
+        return true;
+    if (binaryOp->getOpcode() != BO_Assign)
+        return true;
+    auto rhsExpr = IgnoreImplicitAndConversionOperator(binaryOp->getRHS());
+    if 
(!loplugin::TypeCheck(rhsExpr->getType()).Typedef("sal_UCS4").GlobalNamespace())
+        return true;
+    auto lhsType = binaryOp->getLHS()->getType();
+    if (auto lhsTemplateType = 
dyn_cast<clang::SubstTemplateTypeParmType>(lhsType))
+        lhsType = lhsTemplateType->getReplacementType();
+    auto tc = loplugin::TypeCheck(lhsType);
+    // std::unique_ptr<sal_UCS4[]> will decompose to UInt.
+    if (tc.Typedef("sal_UCS4").GlobalNamespace()
+        || lhsType->isSpecificBuiltinType(BuiltinType::UInt))
+        return true;
+    report(DiagnosticsEngine::Warning,
+           "loosing information assigning a sal_UCS4 value to a %0 type", 
binaryOp->getBeginLoc())
+        << lhsType << binaryOp->getSourceRange();
+    return true;
+}
+
+bool Narrow::VisitVarDecl(const VarDecl* decl)
+{
+    if (ignoreLocation(decl))
+        return true;
+    if (!decl->getInit())
+        return true;
+    auto expr = IgnoreImplicitAndConversionOperator(decl->getInit());
+    if 
(!loplugin::TypeCheck(expr->getType()).Typedef("sal_UCS4").GlobalNamespace())
+        return true;
+    if 
(loplugin::TypeCheck(decl->getType()).Typedef("sal_UCS4").GlobalNamespace())
+        return true;
+    report(DiagnosticsEngine::Warning,
+           "loosing information assigning a sal_UCS4 value to a %0 type", 
decl->getBeginLoc())
+        << decl->getType() << decl->getSourceRange();
+    return true;
+}
+
+loplugin::Plugin::Registration<Narrow> narrow("narrow");
+
+} // namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/compilerplugins/clang/test/narrow.cxx 
b/compilerplugins/clang/test/narrow.cxx
new file mode 100644
index 000000000000..9c3127fab127
--- /dev/null
+++ b/compilerplugins/clang/test/narrow.cxx
@@ -0,0 +1,30 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; 
fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <sal/config.h>
+#include <sal/types.h>
+#include <vcl/vclenum.hxx>
+
+namespace test1
+{
+sal_UCS4 GetChar();
+void f()
+{
+    // expected-error@+1 {{loosing information assigning a sal_UCS4 value to a 
'sal_Unicode' (aka 'char16_t') type [loplugin:narrow]}}
+    sal_Unicode cChar = GetChar();
+    (void)cChar;
+    // no error expected
+    sal_UCS4 cChar2 = GetChar();
+    (void)cChar2;
+    // expected-error@+1 {{loosing information assigning a sal_UCS4 value to a 
'sal_Unicode' (aka 'char16_t') type [loplugin:narrow]}}
+    cChar = GetChar();
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s 
cinkeys+=0=break: */
diff --git a/cppcanvas/source/mtfrenderer/implrenderer.cxx 
b/cppcanvas/source/mtfrenderer/implrenderer.cxx
index 448d30204b24..d11f9cdada40 100644
--- a/cppcanvas/source/mtfrenderer/implrenderer.cxx
+++ b/cppcanvas/source/mtfrenderer/implrenderer.cxx
@@ -224,7 +224,8 @@ namespace
         {
             sal_Unicode nChar = aBuf[i];
             if (nChar >= '0' && nChar <= '9')
-                aBuf[i] = GetLocalizedChar(nChar, eTextLanguage);
+                // TODO: are the localized digit surrogates?
+                aBuf[i] = static_cast<sal_Unicode>(GetLocalizedChar(nChar, 
eTextLanguage));
         }
         return aBuf.makeStringAndClear();
     }
diff --git a/editeng/source/editeng/impedit3.cxx 
b/editeng/source/editeng/impedit3.cxx
index 0b3ebae8def7..1298eb314b91 100644
--- a/editeng/source/editeng/impedit3.cxx
+++ b/editeng/source/editeng/impedit3.cxx
@@ -4675,7 +4675,8 @@ OUString ImpEditEngine::convertDigits(std::u16string_view 
rString, sal_Int32 nSt
     {
         sal_Unicode cChar = aBuf[nIdx];
         if (cChar >= '0' && cChar <= '9')
-            aBuf[nIdx] = GetLocalizedChar(cChar, eDigitLang);
+            // TODO: are the localized digit surrogates?
+            aBuf[nIdx] = static_cast<sal_Unicode>(GetLocalizedChar(cChar, 
eDigitLang));
     }
     return aBuf.makeStringAndClear();
 }
diff --git a/solenv/CompilerTest_compilerplugins_clang.mk 
b/solenv/CompilerTest_compilerplugins_clang.mk
index d4eb9db80e3c..75433f14bdef 100644
--- a/solenv/CompilerTest_compilerplugins_clang.mk
+++ b/solenv/CompilerTest_compilerplugins_clang.mk
@@ -52,6 +52,7 @@ $(eval $(call 
gb_CompilerTest_add_exception_objects,compilerplugins_clang, \
     compilerplugins/clang/test/moveit \
     compilerplugins/clang/test/moveparam \
     compilerplugins/clang/test/mustoverride \
+    compilerplugins/clang/test/narrow \
     compilerplugins/clang/test/noexcept \
     compilerplugins/clang/test/noexceptmove \
     compilerplugins/clang/test/nullptr \
diff --git a/svtools/source/misc/sampletext.cxx 
b/svtools/source/misc/sampletext.cxx
index 12e98cd769d7..9fabb15e5baf 100644
--- a/svtools/source/misc/sampletext.cxx
+++ b/svtools/source/misc/sampletext.cxx
@@ -174,7 +174,7 @@ OUString 
makeShortRepresentativeSymbolTextForSelectedFont(OutputDevice const &rD
             sal_Unicode aText[8];
 
             // start just above the PUA used by most symbol fonts
-            sal_uInt32 cNewChar = 0xFF00;
+            sal_UCS4 cNewChar = 0xFF00;
 
             const int nMaxCount = SAL_N_ELEMENTS(aText) - 1;
             int nSkip = xFontCharMap->GetCharCount() / nMaxCount;
@@ -184,7 +184,7 @@ OUString 
makeShortRepresentativeSymbolTextForSelectedFont(OutputDevice const &rD
                 nSkip = 1;
             for( int i = 0; i < nMaxCount; ++i )
             {
-                sal_uInt32 cOldChar = cNewChar;
+                sal_UCS4 cOldChar = cNewChar;
                 for( int j = nSkip; --j >= 0; )
                     cNewChar = xFontCharMap->GetPrevChar( cNewChar );
                 if( cOldChar == cNewChar )
diff --git a/svx/source/dialog/charmap.cxx b/svx/source/dialog/charmap.cxx
index 628c7750f342..d82f465de835 100644
--- a/svx/source/dialog/charmap.cxx
+++ b/svx/source/dialog/charmap.cxx
@@ -731,7 +731,7 @@ void SvxShowCharSet::SelectIndex(int nNewIndex, bool bFocus)
     if( nNewIndex < 0 )
     {
         // need to scroll see closest unicode
-        sal_uInt32 cPrev = mxFontCharMap->GetPrevChar( getSelectedChar() );
+        sal_UCS4 cPrev = mxFontCharMap->GetPrevChar( getSelectedChar() );
         int nMapIndex = mxFontCharMap->GetIndexFromChar( cPrev );
         int nNewPos = nMapIndex / COLUMN_COUNT;
         mxScrollArea->vadjustment_set_value(nNewPos);
@@ -2019,8 +2019,8 @@ void SubsetMap::ApplyCharMap( const FontCharMapRef& 
rxFontCharMap )
     // remove subsets that are not matched in any range
     std::erase_if(maSubsets,
         [&rxFontCharMap](const Subset& rSubset) {
-            sal_uInt32 cMin = rSubset.GetRangeMin();
-            sal_uInt32 cMax = rSubset.GetRangeMax();
+            sal_UCS4 cMin = rSubset.GetRangeMin();
+            sal_UCS4 cMax = rSubset.GetRangeMax();
             int nCount = rxFontCharMap->CountCharsInRange( cMin, cMax );
             return nCount <= 0;
         });
diff --git a/sw/source/core/unocore/unosett.cxx 
b/sw/source/core/unocore/unosett.cxx
index cd4d5020f13f..78e0bfce281d 100644
--- a/sw/source/core/unocore/unosett.cxx
+++ b/sw/source/core/unocore/unosett.cxx
@@ -1460,7 +1460,8 @@ uno::Sequence<beans::PropertyValue> 
SwXNumberingRules::GetPropertiesForNumFormat
             sal_UCS4 cBullet = rFormat.GetBulletChar();
 
             //BulletId
-            nINT16 = cBullet;
+            // TODO what happens if this is a surrogate pair?
+            nINT16 = static_cast<sal_uInt16>(cBullet);
             
aPropertyValues.push_back(comphelper::makePropertyValue(u"BulletId"_ustr, 
nINT16));
 
             const std::optional<vcl::Font>& pFont = rFormat.GetBulletFont();
diff --git a/vcl/qa/cppunit/fontcharmap.cxx b/vcl/qa/cppunit/fontcharmap.cxx
index 26a2fe9a0a0d..9d23cdc3c17a 100644
--- a/vcl/qa/cppunit/fontcharmap.cxx
+++ b/vcl/qa/cppunit/fontcharmap.cxx
@@ -32,13 +32,13 @@ void VclFontCharMapTest::testDefaultFontCharMap()
 
     CPPUNIT_ASSERT(xfcmap->IsDefaultMap());
 
-    sal_uInt32 nStartBMPPlane = xfcmap->GetFirstChar();
-    sal_uInt32 nStartSupBMPPlane = xfcmap->GetNextChar(0xD800);
-    sal_uInt32 nEndBMPPlane = xfcmap->GetLastChar();
+    sal_UCS4 nStartBMPPlane = xfcmap->GetFirstChar();
+    sal_UCS4 nStartSupBMPPlane = xfcmap->GetNextChar(0xD800);
+    sal_UCS4 nEndBMPPlane = xfcmap->GetLastChar();
 
-    CPPUNIT_ASSERT_EQUAL(static_cast<sal_uInt32>(0x0020), nStartBMPPlane);
-    CPPUNIT_ASSERT_EQUAL(static_cast<sal_uInt32>(0xE000), nStartSupBMPPlane);
-    CPPUNIT_ASSERT_EQUAL(static_cast<sal_uInt32>(0xFFF0 - 1), nEndBMPPlane);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_UCS4>(0x0020), nStartBMPPlane);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_UCS4>(0xE000), nStartSupBMPPlane);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_UCS4>(0xFFF0 - 1), nEndBMPPlane);
 }
 
 CPPUNIT_TEST_SUITE_REGISTRATION(VclFontCharMapTest);
diff --git a/vcl/source/outdev/text.cxx b/vcl/source/outdev/text.cxx
index 4cd9cdbe5b8c..ba190b047cfe 100644
--- a/vcl/source/outdev/text.cxx
+++ b/vcl/source/outdev/text.cxx
@@ -1011,7 +1011,7 @@ vcl::text::ImplLayoutArgs 
OutputDevice::ImplPrepareLayoutArgs( OUString& rStr,
                     if (!xTmpStr)
                         xTmpStr = OUStringBuffer(rStr);
                     // TODO: are the localized digit surrogates?
-                    (*xTmpStr)[pStr - pBase] = cChar;
+                    (*xTmpStr)[pStr - pBase] = static_cast<sal_Unicode>(cChar);
                 }
             }
         }

Reply via email to