external/clucene/UnpackedTarball_clucene.mk | 1 + external/clucene/patches/wcsncpy.patch | 11 +++++++++++ 2 files changed, 12 insertions(+)
New commits: commit 87974ca9847994d6fa290d411974eb92b36b1d31 Author: Stephan Bergmann <[email protected]> AuthorDate: Wed Oct 8 14:05:52 2025 +0200 Commit: Stephan Bergmann <[email protected]> CommitDate: Wed Oct 8 16:57:48 2025 +0200 external/clucene: Avoid ASan wcsncpy-param-overlap ...as seen after <https://github.com/llvm/llvm-project/commit/6ca835b7f4349ad55c8e8afdf0669927b6b284b4> "[compiler-rt][asan] Add wcscpy/wcsncpy; enable wcscat/wcsncat on Windows (#160493)" when building Extension_nlpsolver, > ==1084736==ERROR: AddressSanitizer: wcsncpy-param-overlap: memory ranges [0x7cacc15e1480,0x7cacc15e1498) and [0x7cacc15e1480, 0x7cacc15e1498) overlap > #0 in wcsncpy at ~/github.com/llvm/llvm-project/compiler-rt/lib/asan/asan_interceptors.cpp:664:5 > #1 in lucene::util::StringBuffer::prepend(wchar_t const*, unsigned long) at workdir/UnpackedTarball/clucene/src/shared/CLucene/util/StringBuffer.cpp:233:7 > #2 in lucene::util::StringBuffer::prepend(wchar_t const*) at workdir/UnpackedTarball/clucene/src/shared/CLucene/util/StringBuffer.cpp:214:7 > #3 in lucene::analysis::standard::StandardTokenizer::ReadNumber(wchar_t const*, wchar_t, lucene::analysis::Token*) at workdir/UnpackedTarball/clucene/src/core/CLucene/analysis/standard/StandardTokenizer.cpp:181:11 > #4 in lucene::analysis::standard::StandardTokenizer::ReadNumber(wchar_t const*, wchar_t, lucene::analysis::Token*) at workdir/UnpackedTarball/clucene/src/core/CLucene/analysis/standard/StandardTokenizer.cpp:238:16 > #5 in lucene::analysis::standard::StandardTokenizer::next(lucene::analysis::Token*) at workdir/UnpackedTarball/clucene/src/core/CLucene/analysis/standard/StandardTokenizer.cpp:160:13 > #6 in lucene::analysis::standard::StandardFilter::next(lucene::analysis::Token*) at workdir/UnpackedTarball/clucene/src/core/CLucene/analysis/standard/StandardFilter.cpp:27:16 > #7 in lucene::analysis::LowerCaseFilter::next(lucene::analysis::Token*) at workdir/UnpackedTarball/clucene/src/core/CLucene/analysis/Analyzers.cpp:151:13 > #8 in lucene::analysis::StopFilter::next(lucene::analysis::Token*) at workdir/UnpackedTarball/clucene/src/core/CLucene/analysis/Analyzers.cpp:218:16 > #9 in lucene::index::DocumentsWriter::ThreadState::FieldData::invertField(lucene::document::Field*, lucene::analysis::Analyzer*, int) at workdir/UnpackedTarball/clucene/src/core/CLucene/index/DocumentsWriterThreadState.cpp:901:32 > #10 in lucene::index::DocumentsWriter::ThreadState::FieldData::processField(lucene::analysis::Analyzer*) at workdir/UnpackedTarball/clucene/src/core/CLucene/index/DocumentsWriterThreadState.cpp:798:9 > #11 in lucene::index::DocumentsWriter::ThreadState::processDocument(lucene::analysis::Analyzer*) at workdir/UnpackedTarball/clucene/src/core/CLucene/index/DocumentsWriterThreadState.cpp:557:24 > #12 in lucene::index::DocumentsWriter::updateDocument(lucene::document::Document*, lucene::analysis::Analyzer*, lucene::index::Term*) at workdir/UnpackedTarball/clucene/src/core/CLucene/index/DocumentsWriter.cpp:946:16 > #13 in lucene::index::DocumentsWriter::addDocument(lucene::document::Document*, lucene::analysis::Analyzer*) at workdir/UnpackedTarball/clucene/src/core/CLucene/index/DocumentsWriter.cpp:930:10 > #14 in lucene::index::IndexWriter::addDocument(lucene::document::Document*, lucene::analysis::Analyzer*) at workdir/UnpackedTarball/clucene/src/core/CLucene/index/IndexWriter.cpp:685:28 > #15 in HelpIndexer::indexDocuments() at helpcompiler/source/HelpIndexer.cxx:125:21 > #16 in main at helpcompiler/source/HelpIndexer_main.cxx:79:22 > > 0x7cacc15e1480 is located 0 bytes inside of 1024-byte region [0x7cacc15e1480,0x7cacc15e1880) > allocated by thread T0 here: > #0 in malloc at ~/github.com/llvm/llvm-project/compiler-rt/lib/asan/asan_malloc_linux.cpp:67:3 > #1 in lucene::analysis::Token::growBuffer(unsigned long) at workdir/UnpackedTarball/clucene/src/core/CLucene/analysis/AnalysisHeader.cpp:162:21 > #2 in lucene::analysis::standard::StandardTokenizer::ReadAlphaNum(wchar_t, lucene::analysis::Token*) at workdir/UnpackedTarball/clucene/src/core/CLucene/analysis/standard/StandardTokenizer.cpp:260:8 > #3 in lucene::analysis::standard::StandardTokenizer::next(lucene::analysis::Token*) at workdir/UnpackedTarball/clucene/src/core/CLucene/analysis/standard/StandardTokenizer.cpp:154:13 > #4 in lucene::analysis::standard::StandardFilter::next(lucene::analysis::Token*) at workdir/UnpackedTarball/clucene/src/core/CLucene/analysis/standard/StandardFilter.cpp:27:16 > > SUMMARY: AddressSanitizer: wcsncpy-param-overlap workdir/UnpackedTarball/clucene/src/shared/CLucene/util/StringBuffer.cpp:233:7 in lucene::util::StringBuffer::prepend(wchar_t const*, unsigned long) > ==1084736==ABORTING > make[1]: *** [nlpsolver/Extension_nlpsolver.mk:14: workdir/Extension/nlpsolver/root/help/de.done] Error 1 That wcsncpy move of a memory range onto itself looks a bit scary, but appears to be by design of how recursive invocations of StandardTokenizer::ReadNumber work. Change-Id: I3ce9cc8343140ee287fac7db46b28e5adb7b67f9 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/192069 Reviewed-by: Stephan Bergmann <[email protected]> Tested-by: Jenkins diff --git a/external/clucene/UnpackedTarball_clucene.mk b/external/clucene/UnpackedTarball_clucene.mk index 8f0a2ea07f52..a2d4b1c69d0a 100644 --- a/external/clucene/UnpackedTarball_clucene.mk +++ b/external/clucene/UnpackedTarball_clucene.mk @@ -57,6 +57,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,clucene,\ external/clucene/patches/enumarith.patch \ external/clucene/patches/clucene-reprobuild.patch.1 \ external/clucene/patches/clucene-const.patch.1 \ + external/clucene/patches/wcsncpy.patch \ )) ifneq ($(OS),WNT) diff --git a/external/clucene/patches/wcsncpy.patch b/external/clucene/patches/wcsncpy.patch new file mode 100644 index 000000000000..e3594805044c --- /dev/null +++ b/external/clucene/patches/wcsncpy.patch @@ -0,0 +1,11 @@ +--- src/shared/CLucene/util/StringBuffer.cpp ++++ src/shared/CLucene/util/StringBuffer.cpp +@@ -230,7 +230,7 @@ + } + + //Copy the string value into the buffer at postion 0 +- _tcsncpy(buffer, value, prependedLength); ++ if (value != buffer) _tcsncpy(buffer, value, prependedLength); + //Add the length of the copied string to len to reflect the new length of the string in + //the buffer (Note: len is not the bufferlength!) + len += prependedLength;
