This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch revert-203-clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git

commit b8c1ef4de76b3fc3c5c0dbcdd3eaba43a226bb7f
Author: qiye <jianliang5...@gmail.com>
AuthorDate: Tue Mar 19 12:05:51 2024 +0800

    Revert "[opt](chinese) chinese tokenizer lowercase interface (#203)"
    
    This reverts commit cf210eaaadc3ad5d7b27ff2e7b9635ad45cf227b.
---
 src/core/CLucene/index/IndexWriter.cpp | 18 +++---------------
 src/core/CLucene/index/IndexWriter.h   |  4 ++--
 2 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/src/core/CLucene/index/IndexWriter.cpp 
b/src/core/CLucene/index/IndexWriter.cpp
index 6b52e047f5..0d770182ba 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -1255,7 +1255,7 @@ void IndexWriter::resetMergeExceptions() {
 void IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> 
&src_dirs,
                                   std::vector<lucene::store::Directory *> 
dest_dirs,
                                   std::vector<std::vector<std::pair<uint32_t, 
uint32_t>>> trans_vec,
-                                  std::vector<uint32_t> dest_index_docs, bool 
maybe_skip) {
+                                  std::vector<uint32_t> dest_index_docs) {
     CND_CONDITION(src_dirs.size() > 0, "Source directory not found.");
     CND_CONDITION(dest_dirs.size() > 0, "Destination directory not found.");
     this->_trans_vec = std::move(trans_vec);
@@ -1387,7 +1387,7 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
         }
 
         /// merge terms
-        mergeTerms(hasProx, maybe_skip);
+        mergeTerms(hasProx);
 
         /// merge null_bitmap
         mergeNullBitmap(srcNullBitmapValues, nullBitmapIndexOutputList);
@@ -1613,7 +1613,7 @@ protected:
 
 };
 
-void IndexWriter::mergeTerms(bool hasProx, bool maybe_skip) {
+void IndexWriter::mergeTerms(bool hasProx) {
     auto queue = _CLNEW SegmentMergeQueue(readers.size());
     auto numSrcIndexes = readers.size();
     //std::vector<TermPositions *> postingsList(numSrcIndexes);
@@ -1664,18 +1664,6 @@ void IndexWriter::mergeTerms(bool hasProx, bool 
maybe_skip) {
             top = queue->top();
         }
 
-        if (maybe_skip && smallestTerm) {
-            auto containsUpperCase = [](const std::wstring_view& ws_term) {
-                return std::any_of(ws_term.begin(), ws_term.end(),
-                                   [](wchar_t ch) { return std::iswupper(ch) 
!= 0; });
-            };
-
-            std::wstring_view ws_term(smallestTerm->text(), 
smallestTerm->textLength());
-            if (containsUpperCase(ws_term)) {
-                _CLTHROWA(CL_ERR_InvalidState, "need rewrite, skip index 
compaction");
-            }
-        }
-
         std::vector<std::vector<uint32_t>> docDeltaBuffers(numDestIndexes);
         std::vector<std::vector<uint32_t>> freqBuffers(numDestIndexes);
         auto destPostingQueues = _CLNEW postingQueue(matchSize);
diff --git a/src/core/CLucene/index/IndexWriter.h 
b/src/core/CLucene/index/IndexWriter.h
index 0e8d40d8cc..7cfb67d2ca 100644
--- a/src/core/CLucene/index/IndexWriter.h
+++ b/src/core/CLucene/index/IndexWriter.h
@@ -317,14 +317,14 @@ public:
     void indexCompaction(std::vector<lucene::store::Directory*>& src_dirs,
                             std::vector<lucene::store::Directory*> dest_dirs,
                             std::vector<std::vector<std::pair<uint32_t, 
uint32_t>>> trans_vec,
-                            std::vector<uint32_t> dest_index_docs, bool 
maybe_skip = false);
+                            std::vector<uint32_t> dest_index_docs);
 
     // create new fields info
     void mergeFields(bool hasProx);
     // write fields info file
     void writeFields(lucene::store::Directory* d, std::string segment);
     // merge terms and write files
-    void mergeTerms(bool hasProx, bool maybe_skip = false);
+    void mergeTerms(bool hasProx);
     // merge null_bitmap
     void mergeNullBitmap(std::vector<std::vector<uint32_t>> srcBitmapValues, 
std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to