i18npool/qa/cppunit/test_breakiterator.cxx          |   30 ++++++++++++++++++++
 i18npool/source/breakiterator/data/dict_word_hu.txt |    2 -
 i18npool/source/breakiterator/data/edit_word.txt    |    8 +++--
 i18npool/source/breakiterator/data/edit_word_hu.txt |    8 +++--
 4 files changed, 41 insertions(+), 7 deletions(-)

New commits:
commit 2b9fee5a3e9d1eae65932fb0f08f0216f8a30cf7
Author:     László Németh <nem...@numbertext.org>
AuthorDate: Thu Jun 27 11:06:35 2024 +0200
Commit:     László Németh <nem...@numbertext.org>
CommitDate: Thu Jun 27 16:49:51 2024 +0200

    tdf#161737 i18npool: fix bad word selection with NNBSP
    
    Fix word breaking rules also for editing. Previously
    the word was selected with the following narrow no-break
    space, e.g. at French words before exclamation and question
    marks (where narrow no-break space allows to get correct
    typography, if the OpenType/Graphite font doesn't have
    this feature).
    
    Add this and the previous fixes for Hungarian, which
    handled by extra word-breaking rule files.
    
    Follow-up to commit 6e002da1615b52cda4e9331e87878458b1fe9677
    "tdf#161737 i18npool: fix fake spelling alarms with NNBSP".
    
    Change-Id: I7230bd356e5f0360172b652e615a61d96131d336
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/169624
    Tested-by: Jenkins
    Reviewed-by: László Németh <nem...@numbertext.org>

diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx 
b/i18npool/qa/cppunit/test_breakiterator.cxx
index 6fbde026f565..7e9f47ad22f1 100644
--- a/i18npool/qa/cppunit/test_breakiterator.cxx
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -1022,6 +1022,36 @@ void TestBreakIterator::testWordBoundaries()
         // This was 8 (word + NNBSP)
         CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos);
     }
+
+    //  tdf#161737: narrow no-break space at the end of words resulted 
spelling mistakes
+    {
+        aLocale.Language = "hu";
+        aLocale.Country = "HU";
+
+        OUString aTest(u"L’espace fine insécable\u202F!"_ustr);
+        aBounds
+            = m_xBreak->getWordBoundary(aTest, 14, aLocale, 
i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(14), aBounds.startPos);
+        // This was 24 (word + NNBSP)
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(23), aBounds.endPos);
+    }
+
+    //  tdf#161737: narrow no-break space between digits resulted spelling 
mistakes
+    //  as a quick fix, limit NBSP as word-part character only for editing, 
and not for spell checking
+    //  TODO: remove NBSP by the linguistic module or by the spell checking 
dictionaries to allow
+    //  to check numbers with thousand separators and with correct suffix
+    {
+        aLocale.Language = "hu";
+        aLocale.Country = "HU";
+
+        OUString aTest(u"1\u202F000\u202F000"_ustr);
+        aBounds
+            = m_xBreak->getWordBoundary(aTest, 2, aLocale, 
i18n::WordType::DICTIONARY_WORD, false);
+        // This was 0 (word + NNBSP)
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aBounds.startPos);
+        // This was 8 (word + NNBSP)
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos);
+    }
 }
 
 void TestBreakIterator::testSentenceBoundaries()
diff --git a/i18npool/source/breakiterator/data/dict_word_hu.txt 
b/i18npool/source/breakiterator/data/dict_word_hu.txt
index 88648e6e5716..4ba426c8c7db 100644
--- a/i18npool/source/breakiterator/data/dict_word_hu.txt
+++ b/i18npool/source/breakiterator/data/dict_word_hu.txt
@@ -53,7 +53,7 @@ $Double_Quote       = [\p{Word_Break = Double_Quote}];
 $MidNumLet          = [\p{Word_Break = MidNumLet}];
 $MidNum             = [\p{Word_Break = MidNum}];
 $Numeric            = [\p{Word_Break = Numeric}];
-$ExtendNumLet       = [\p{Word_Break = ExtendNumLet}];
+$ExtendNumLet       = [\p{Word_Break = ExtendNumLet}-[:name = NARROW NO-BREAK 
SPACE:]];
 $WSegSpace          = [\p{Word_Break = WSegSpace}];
 $Extended_Pict      = [\p{Extended_Pictographic}];
 
diff --git a/i18npool/source/breakiterator/data/edit_word.txt 
b/i18npool/source/breakiterator/data/edit_word.txt
index 14fc221aa96e..1e3bcd15b20d 100644
--- a/i18npool/source/breakiterator/data/edit_word.txt
+++ b/i18npool/source/breakiterator/data/edit_word.txt
@@ -65,7 +65,7 @@ $Extended_Pict      = [\p{Extended_Pictographic}];
 $MidNumLet          = [\p{Word_Break = MidNumLet}-[:name= FULL STOP:]];
 
 # $ExtendNumLet       = [\p{Word_Break = ExtendNumLet}];
-$ExtendNumLet       = [\p{Word_Break = ExtendNumLet}-[:name= LOW LINE:]];
+$ExtendNumLet       = [\p{Word_Break = ExtendNumLet}-[:name= LOW LINE:]-[:name 
= NARROW NO-BREAK SPACE:]];
 
 ### END CUSTOMIZATION
 
@@ -164,16 +164,18 @@ $Numeric $ExFm* ($MidNum | $MidNumLet | $Single_Quote) 
$ExFm* $Numeric;
 $Katakana $ExFm*  $Katakana {400};
 
 # rule 13a/b
+# allow to select numbers with narrow no-break spaces as thousand separators
+$ExtendNumLetNNBSP = [\p{Word_Break = ExtendNumLet}];
 
 $ALetterPlus   $ExFm* $ExtendNumLet {200};    #  (13a)
 $Hebrew_Letter $ExFm* $ExtendNumLet {200};    #  (13a)
-$Numeric       $ExFm* $ExtendNumLet {100};    #  (13a)
+$Numeric       $ExFm* $ExtendNumLetNNBSP {100};    #  (13a)
 $Katakana      $ExFm* $ExtendNumLet {400};    #  (13a)
 $ExtendNumLet  $ExFm* $ExtendNumLet {200};    #  (13a)
 
 $ExtendNumLet  $ExFm* $ALetterPlus  {200};    #  (13b)
 $ExtendNumLet  $ExFm* $Hebrew_Letter {200};    #  (13b)
-$ExtendNumLet  $ExFm* $Numeric      {100};    #  (13b)
+$ExtendNumLetNNBSP  $ExFm* $Numeric      {100};    #  (13b)
 $ExtendNumLet  $ExFm* $Katakana     {400};    #  (13b)
 
 # rules 15 - 17
diff --git a/i18npool/source/breakiterator/data/edit_word_hu.txt 
b/i18npool/source/breakiterator/data/edit_word_hu.txt
index 389ad2bacc13..a5e44d2732d9 100644
--- a/i18npool/source/breakiterator/data/edit_word_hu.txt
+++ b/i18npool/source/breakiterator/data/edit_word_hu.txt
@@ -81,7 +81,7 @@ $MidLetter          = [\p{Word_Break = MidLetter} 
$Symbols_hu];
 $MidNumLet          = [\p{Word_Break = MidNumLet}-[:name= FULL STOP:]];
 
 # $ExtendNumLet       = [\p{Word_Break = ExtendNumLet}];
-$ExtendNumLet       = [\p{Word_Break = ExtendNumLet}-[:name= LOW LINE:]];
+$ExtendNumLet       = [\p{Word_Break = ExtendNumLet}-[:name= LOW LINE:]-[:name 
= NARROW NO-BREAK SPACE:]];
 
 ### END CUSTOMIZATION
 
@@ -180,16 +180,18 @@ $Numeric $ExFm* ($MidNum | $MidNumLet | $Single_Quote) 
$ExFm* $Numeric;
 $Katakana $ExFm*  $Katakana {400};
 
 # rule 13a/b
+# allow to select numbers with narrow no-break spaces as thousand separators
+$ExtendNumLetNNBSP = [\p{Word_Break = ExtendNumLet}];
 
 $ALetterPlus   $ExFm* $ExtendNumLet {200};    #  (13a)
 $Hebrew_Letter $ExFm* $ExtendNumLet {200};    #  (13a)
-$Numeric       $ExFm* $ExtendNumLet {100};    #  (13a)
+$Numeric       $ExFm* $ExtendNumLetNNBSP {100};    #  (13a)
 $Katakana      $ExFm* $ExtendNumLet {400};    #  (13a)
 $ExtendNumLet  $ExFm* $ExtendNumLet {200};    #  (13a)
 
 $ExtendNumLet  $ExFm* $ALetterPlus  {200};    #  (13b)
 $ExtendNumLet  $ExFm* $Hebrew_Letter {200};    #  (13b)
-$ExtendNumLet  $ExFm* $Numeric      {100};    #  (13b)
+$ExtendNumLetNNBSP  $ExFm* $Numeric      {100};    #  (13b)
 $ExtendNumLet  $ExFm* $Katakana     {400};    #  (13b)
 
 # rules 15 - 17

Reply via email to