Hello all,

While trying to fix the eternal brokenness of Kashida justification code,
I found some low hanging cleanups. See attached patches.

Regards,
 Khaled

-- 
 Khaled Hosny
 Egyptian
 Arab
>From 7c237af83055a9b892e7349ee08af92b7202b09c Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaledho...@eglug.org>
Date: Wed, 31 Aug 2011 23:58:51 +0200
Subject: [PATCH 1/3] Don't hard code joining type of Arabic characters

The joining type is defined in the Unicode character database, so we
should query that property instead of hard coding some code points.
---
 sw/source/core/text/porlay.cxx |   30 +++++-------------------------
 1 files changed, 5 insertions(+), 25 deletions(-)

diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index 1ce9da3..95e638a 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -147,15 +147,10 @@ sal_Bool isFeChar ( xub_Unicode cCh )
 {
    return ( cCh == 0x641 || ( cCh >= 0x6A1 && cCh <= 0x6A6 ) );
 }
+
 sal_Bool isTransparentChar ( xub_Unicode cCh )
 {
-    return ( ( cCh >= 0x610 && cCh <= 0x61A ) ||
-            ( cCh >= 0x64B && cCh <= 0x65E ) ||
-            ( cCh == 0x670 ) ||
-            ( cCh >= 0x6D6 && cCh <= 0x6DC ) ||
-            ( cCh >= 0x6DF && cCh <= 0x6E4 ) ||
-            ( cCh >= 0x6E7 && cCh <= 0x6E8 ) ||
-            ( cCh >= 0x6EA && cCh <= 0x6ED ));
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_TYPE ) == U_JT_TRANSPARENT;
 }
 
 /*************************************************************************
@@ -178,28 +173,13 @@ sal_Bool lcl_IsLigature( xub_Unicode cCh, xub_Unicode cNextCh )
 
 sal_Bool lcl_ConnectToPrev( xub_Unicode cCh, xub_Unicode cPrevCh )
 {
-    // Alef, Dal, Thal, Reh, Zain, and Waw do not connect to the left
-    // Uh, there seem to be some more characters that are not connectable
-    // to the left. So we look for the characters that are actually connectable
-    // to the left. Here is the complete list of WH:
-
-    // (hennerdrewes):
-    // added lam forms 0x06B5..0x06B8
-    // added 0x6FA..0x6FC, according to unicode documentation, although not present in my fonts
-    // added heh goal 0x6C1
-    sal_Bool bRet = 0x628 == cPrevCh ||
-                    ( 0x62A <= cPrevCh && cPrevCh <= 0x62E ) ||
-                  ( 0x633 <= cPrevCh && cPrevCh <= 0x647 ) ||
-                      0x649 == cPrevCh || // Alef Maksura does connect !!!
-                      0x64A == cPrevCh ||
-                    ( 0x678 <= cPrevCh && cPrevCh <= 0x687 ) ||
-                  ( 0x69A <= cPrevCh && cPrevCh <= 0x6C1 ) ||
-                  ( 0x6C3 <= cPrevCh && cPrevCh <= 0x6D3 ) ||
-                  ( 0x6FA <= cPrevCh && cPrevCh <= 0x6FC )  ;
+    const int32_t nJoiningType = u_getIntPropertyValue( cPrevCh, UCHAR_JOINING_TYPE );
+    sal_Bool bRet = nJoiningType != U_JT_RIGHT_JOINING && nJoiningType != U_JT_NON_JOINING;
 
     // check for ligatures cPrevChar + cChar
     if( bRet )
         bRet = !lcl_IsLigature( cPrevCh, cCh );
+
     return bRet;
 }
 
-- 
1.7.0.4

>From 1bbbe902cfddd0a71683534a8bfec9947e419d37 Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaledho...@eglug.org>
Date: Thu, 1 Sep 2011 00:25:51 +0200
Subject: [PATCH 2/3] Use Unicode Joining_Group

Instead of hard coding code points for character groups, we can use
Unicode Joining_Group that provide the same categorization.
---
 sw/source/core/text/porlay.cxx |   37 +++++++++++++++++--------------------
 1 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index 95e638a..4b8e2ad 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -70,82 +70,79 @@ using namespace i18n::ScriptType;
 
 sal_Bool isAlefChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x622 || cCh == 0x623 || cCh == 0x625 || cCh == 0x627 ||
-           cCh == 0x622 || cCh == 0x671 || cCh == 0x672 || cCh == 0x673 || cCh == 0x675 );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_ALEF;
 }
 
 sal_Bool isWawChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x624 || cCh == 0x648 || cCh == 0x676 || cCh == 0x677 ||
-           ( cCh >= 0x6C4 &&  cCh <= 0x6CB ) || cCh == 0x6CF );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_WAW;
 }
 
 sal_Bool isDalChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x62F || cCh == 0x630 || cCh == 0x688 || cCh == 0x689 || cCh == 0x690 );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_DAL;
 }
 
 sal_Bool isRehChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x631 || cCh == 0x632 || ( cCh >= 0x691 && cCh <= 0x699 ));
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_REH;
 }
 
 sal_Bool isTehMarbutaChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x629 || cCh == 0x6C0 );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_TEH_MARBUTA;
 }
 
 sal_Bool isBaaChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x628 || cCh == 0x62A || cCh == 0x62B || cCh == 0x679 || cCh == 0x680 );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_BEH;
 }
 
 sal_Bool isYehChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x626 || cCh == 0x649 || cCh == 0x64A || cCh == 0x678 || cCh == 0x6CC ||
-       cCh == 0x6CE || cCh == 0x6D0 || cCh == 0x6D1 );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_YEH ||
+           u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FARSI_YEH;
 }
 
 sal_Bool isSeenOrSadChar ( xub_Unicode cCh )
 {
-   return ( ( cCh >= 0x633 && cCh <= 0x636 ) || ( cCh >= 0x69A && cCh <= 0x69E )
-           || cCh == 0x6FA || cCh == 0x6FB );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SEEN ||
+           u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SAD;
 }
 
 sal_Bool isHahChar ( xub_Unicode cCh )
 {
-   return ( ( cCh >= 0x62C && cCh <= 0x62E ) || ( cCh >= 0x681 && cCh <= 0x687 )
-           || cCh == 0x6BF );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_HAH;
 }
 
 sal_Bool isAinChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x639 || cCh == 0x63A || cCh == 0x6A0 || cCh == 0x6FC );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_AIN;
 }
 
 sal_Bool isKafChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x643 || ( cCh >= 0x6AC && cCh <= 0x6AE ) );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_KAF;
 }
 
 sal_Bool isLamChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x644 || ( cCh >= 0x6B5 && cCh <= 0x6B8 ) );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_LAM;
 }
 
 sal_Bool isGafChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x6A9 || cCh == 0x6AB ||( cCh >= 0x6AF && cCh <= 0x6B4 ) );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_GAF;
 }
 
 sal_Bool isQafChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x642 || cCh == 0x6A7 || cCh == 0x6A8  );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_QAF;
 }
 
 sal_Bool isFeChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x641 || ( cCh >= 0x6A1 && cCh <= 0x6A6 ) );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FEH;
 }
 
 sal_Bool isTransparentChar ( xub_Unicode cCh )
-- 
1.7.0.4

>From b5620697ae271154edb7216f1c619c7006930e50 Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaledho...@eglug.org>
Date: Thu, 1 Sep 2011 01:21:12 +0200
Subject: [PATCH 3/3] Replace simple one line functions with macros

---
 sw/source/core/text/porlay.cxx |   94 +++++++--------------------------------
 1 files changed, 17 insertions(+), 77 deletions(-)

diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index 4b8e2ad..05e48f5 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -68,82 +68,22 @@ using namespace i18n::ScriptType;
 #include <unicode/ubidi.h>
 #include <i18nutil/unicode.hxx>  //unicode::getUnicodeScriptType
 
-sal_Bool isAlefChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_ALEF;
-}
-
-sal_Bool isWawChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_WAW;
-}
-
-sal_Bool isDalChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_DAL;
-}
-
-sal_Bool isRehChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_REH;
-}
-
-sal_Bool isTehMarbutaChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_TEH_MARBUTA;
-}
-
-sal_Bool isBaaChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_BEH;
-}
-
-sal_Bool isYehChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_YEH ||
-           u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FARSI_YEH;
-}
-
-sal_Bool isSeenOrSadChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SEEN ||
-           u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SAD;
-}
-
-sal_Bool isHahChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_HAH;
-}
-
-sal_Bool isAinChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_AIN;
-}
-
-sal_Bool isKafChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_KAF;
-}
-
-sal_Bool isLamChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_LAM;
-}
-
-sal_Bool isGafChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_GAF;
-}
-
-sal_Bool isQafChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_QAF;
-}
-
-sal_Bool isFeChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FEH;
-}
+#define IS_JOINING_GROUP(c, g) ( u_getIntPropertyValue( c, UCHAR_JOINING_GROUP ) == U_JG_##g )
+#define isAinChar(c)        IS_JOINING_GROUP(c, AIN)
+#define isAlefChar(c)       IS_JOINING_GROUP(c, ALEF)
+#define isBaaChar(c)        IS_JOINING_GROUP(c, BEH)
+#define isDalChar(c)        IS_JOINING_GROUP(c, DAL)
+#define isFehChar(c)        IS_JOINING_GROUP(c, FEH)
+#define isGafChar(c)        IS_JOINING_GROUP(c, GAF)
+#define isHahChar(c)        IS_JOINING_GROUP(c, HAH)
+#define isKafChar(c)        IS_JOINING_GROUP(c, KAF)
+#define isLamChar(c)        IS_JOINING_GROUP(c, LAM)
+#define isQafChar(c)        IS_JOINING_GROUP(c, QAF)
+#define isRehChar(c)        IS_JOINING_GROUP(c, REH)
+#define isTehMarbutaChar(c) IS_JOINING_GROUP(c, TEH_MARBUTA)
+#define isWawChar(c)        IS_JOINING_GROUP(c, WAW)
+#define isYehChar(c)        (IS_JOINING_GROUP(c, YEH) || IS_JOINING_GROUP(c, FARSI_YEH))
+#define isSeenOrSadChar(c)  (IS_JOINING_GROUP(c, SAD) || IS_JOINING_GROUP(c, SEEN))
 
 sal_Bool isTransparentChar ( xub_Unicode cCh )
 {
@@ -1185,7 +1125,7 @@ void SwScriptInfo::InitScriptInfo( const SwTxtNode& rNode, sal_Bool bRTL )
                                                     // final form may appear in the middle of word
                              (( isAinChar ( cCh ) ||  // Ain (dual joining)
                                 isQafChar ( cCh ) ||  // Qaf (dual joining)
-                                isFeChar  ( cCh ) )   // Feh (dual joining)
+                                isFehChar ( cCh ) )   // Feh (dual joining)
                                 && nIdx == nWordLen - 1))  // only at end of word
                         {
                             OSL_ENSURE( 0 != cPrevCh, "No previous character" );
-- 
1.7.0.4

_______________________________________________
LibreOffice mailing list
LibreOffice@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libreoffice

Reply via email to