source

László Németh (via logerrit) Sun, 28 Dec 2025 05:02:13 -0800

 i18npool/qa/cppunit/test_breakiterator.cxx          |   34 ++++++++++++++++++++
 i18npool/source/breakiterator/data/dict_word_hu.txt |   21 +++++++++++-
 2 files changed, 53 insertions(+), 2 deletions(-)


New commits:
commit be49bc3f64eb8bfcf54628e907c8bd6a5c50dae3
Author:     László Németh <[email protected]>
AuthorDate: Fri Dec 26 01:07:56 2025 +0100
Commit:     Xisco Fauli <[email protected]>
CommitDate: Sun Dec 28 14:01:44 2025 +0100

    tdf#162514 i18npool: apply fix for Hungarian abbreviations, too
    
    Restores Hungarian abbreviation handling to spell checking by
    applying the fix for dict_word_hu.txt.
    
    Regression from commit 44699b3de37f07090ac6fee1cd97aa76036e9700
    "tdf#49885 BreakIterator rule upgrades".
    
    Follow-up to commit f4fe6df6aa92573368c3fa0edb9fd03e64d9d059
    "tdf#162514 i18npool: Handle abbreviations in dictionary breakiterator".
    
    Change-Id: I83e30c831759ae896f1db2da697287b8c4dcd26b
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/196224
    Tested-by: Jenkins
    Reviewed-by: László Németh <[email protected]>
    (cherry picked from commit 167bbe31c0620d6ca1c4640a81f2e759f8f65e6a)
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/196231
    Reviewed-by: Xisco Fauli <[email protected]>

diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx 
b/i18npool/qa/cppunit/test_breakiterator.cxx
index 658481bd4381..5e443bec8fdc 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -46,6 +46,7 @@ public:
     void testKorean();
 
     void testDictWordAbbreviation();
+    void testDictWordAbbreviationHU();
     void testDictWordPrepostDash();
     void testHebrewGereshGershaim();
     void testLegacySurrogatePairs();
@@ -69,6 +70,7 @@ public:
     CPPUNIT_TEST(testChinese);
     CPPUNIT_TEST(testKorean);
     CPPUNIT_TEST(testDictWordAbbreviation);
+    CPPUNIT_TEST(testDictWordAbbreviationHU);
     CPPUNIT_TEST(testDictWordPrepostDash);
     CPPUNIT_TEST(testHebrewGereshGershaim);
     CPPUNIT_TEST(testLegacySurrogatePairs);
@@ -1834,6 +1836,38 @@ void TestBreakIterator::testDictWordAbbreviation()
     }
 }
 
+void TestBreakIterator::testDictWordAbbreviationHU()
+{
+    std::vector<lang::Locale> aLocale{
+        { "hu", "HU", "" } // dict_word_hu locale
+    };
+
+    for (const auto& rLocale : aLocale)
+    {
+        auto aTest = u"Pl. stb. dr.-ral Mo.-gal"_ustr;
+
+        i18n::Boundary aBounds
+            = m_xBreak->getWordBoundary(aTest, 1, rLocale, 
i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aBounds.endPos);
+
+        aBounds
+            = m_xBreak->getWordBoundary(aTest, 4, rLocale, 
i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(8), aBounds.endPos);
+
+        aBounds
+            = m_xBreak->getWordBoundary(aTest, 9, rLocale, 
i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.endPos);
+
+        aBounds
+            = m_xBreak->getWordBoundary(aTest, 17, rLocale, 
i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(17), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(24), aBounds.endPos);
+    }
+}
+
 void TestBreakIterator::testHebrewGereshGershaim()
 {
     // In Hebrew documents, there are multiple valid ways to represent the 
geresh and gershaim
diff --git a/i18npool/source/breakiterator/data/dict_word_hu.txt 
b/i18npool/source/breakiterator/data/dict_word_hu.txt
index 4ba426c8c7db..df28b2fbd679 100644
--- a/i18npool/source/breakiterator/data/dict_word_hu.txt
+++ b/i18npool/source/breakiterator/data/dict_word_hu.txt
@@ -94,6 +94,9 @@ $IncludedML_hu      = [[:name = RIGHT DOUBLE QUOTATION MARK:]
                        [:name = QUESTION MARK:]
                        $Symbols_hu];
 
+### tdf#162514: For spell checking, abbreviations may end with a period.
+$PostPeriod         = [:name = FULL STOP:];
+
 # $MidLetter          = [\p{Word_Break = MidLetter}];
 $MidLetter          = [[\p{Word_Break = MidLetter}]-$ExcludedML $IncludedML 
$IncludedML_hu];
 
@@ -160,10 +163,24 @@ $Ideographic $ExFm* {400};          #
 # rule 5
 #    Do not break between most letters.
 #
-($ALetterPlus | $Hebrew_Letter) $ExFm* ($ALetterPlus | $Hebrew_Letter);
+
+### BEGIN CUSTOMIZATION
+### tdf#162514: For spell checking, abbreviations may end with a period.
+
+# ($ALetterPlus | $Hebrew_Letter) $ExFm* ($ALetterPlus | $Hebrew_Letter);
+($ALetterPlus | $Hebrew_Letter) $ExFm* ($ALetterPlus | $Hebrew_Letter) 
($PostPeriod)?;
+
+### END CUSTOMIZATION
 
 # rule 6 and 7
-($ALetterPlus | $Hebrew_Letter)  $ExFm* ($MidLetter | $MidNumLet | 
$Single_Quote) $ExFm* ($ALetterPlus | $Hebrew_Letter) {200};
+
+### BEGIN CUSTOMIZATION
+### tdf#162514: For spell checking, abbreviations may end with a period.
+
+# ($ALetterPlus | $Hebrew_Letter)  $ExFm* ($MidLetter | $MidNumLet | 
$Single_Quote) $ExFm* ($ALetterPlus | $Hebrew_Letter) {200};
+($ALetterPlus | $Hebrew_Letter)  $ExFm* ($MidLetter | $MidNumLet | 
$Single_Quote) $ExFm* ($ALetterPlus | $Hebrew_Letter) ($PostPeriod)? {200};
+
+### END CUSTOMIZATION
 
 # rule 7a
 $Hebrew_Letter $ExFm* $Single_Quote {200};

core.git: Branch 'libreoffice-25-8' - i18npool/qa i18npool/source

Reply via email to