Hello all, While trying to fix the eternal brokenness of Kashida justification code, I found some low hanging cleanups. See attached patches.
Regards, Khaled -- Khaled Hosny Egyptian Arab
>From 7c237af83055a9b892e7349ee08af92b7202b09c Mon Sep 17 00:00:00 2001 From: Khaled Hosny <khaledho...@eglug.org> Date: Wed, 31 Aug 2011 23:58:51 +0200 Subject: [PATCH 1/3] Don't hard code joining type of Arabic characters The joining type is defined in the Unicode character database, so we should query that property instead of hard coding some code points. --- sw/source/core/text/porlay.cxx | 30 +++++------------------------- 1 files changed, 5 insertions(+), 25 deletions(-) diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx index 1ce9da3..95e638a 100644 --- a/sw/source/core/text/porlay.cxx +++ b/sw/source/core/text/porlay.cxx @@ -147,15 +147,10 @@ sal_Bool isFeChar ( xub_Unicode cCh ) { return ( cCh == 0x641 || ( cCh >= 0x6A1 && cCh <= 0x6A6 ) ); } + sal_Bool isTransparentChar ( xub_Unicode cCh ) { - return ( ( cCh >= 0x610 && cCh <= 0x61A ) || - ( cCh >= 0x64B && cCh <= 0x65E ) || - ( cCh == 0x670 ) || - ( cCh >= 0x6D6 && cCh <= 0x6DC ) || - ( cCh >= 0x6DF && cCh <= 0x6E4 ) || - ( cCh >= 0x6E7 && cCh <= 0x6E8 ) || - ( cCh >= 0x6EA && cCh <= 0x6ED )); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_TYPE ) == U_JT_TRANSPARENT; } /************************************************************************* @@ -178,28 +173,13 @@ sal_Bool lcl_IsLigature( xub_Unicode cCh, xub_Unicode cNextCh ) sal_Bool lcl_ConnectToPrev( xub_Unicode cCh, xub_Unicode cPrevCh ) { - // Alef, Dal, Thal, Reh, Zain, and Waw do not connect to the left - // Uh, there seem to be some more characters that are not connectable - // to the left. So we look for the characters that are actually connectable - // to the left. Here is the complete list of WH: - - // (hennerdrewes): - // added lam forms 0x06B5..0x06B8 - // added 0x6FA..0x6FC, according to unicode documentation, although not present in my fonts - // added heh goal 0x6C1 - sal_Bool bRet = 0x628 == cPrevCh || - ( 0x62A <= cPrevCh && cPrevCh <= 0x62E ) || - ( 0x633 <= cPrevCh && cPrevCh <= 0x647 ) || - 0x649 == cPrevCh || // Alef Maksura does connect !!! - 0x64A == cPrevCh || - ( 0x678 <= cPrevCh && cPrevCh <= 0x687 ) || - ( 0x69A <= cPrevCh && cPrevCh <= 0x6C1 ) || - ( 0x6C3 <= cPrevCh && cPrevCh <= 0x6D3 ) || - ( 0x6FA <= cPrevCh && cPrevCh <= 0x6FC ) ; + const int32_t nJoiningType = u_getIntPropertyValue( cPrevCh, UCHAR_JOINING_TYPE ); + sal_Bool bRet = nJoiningType != U_JT_RIGHT_JOINING && nJoiningType != U_JT_NON_JOINING; // check for ligatures cPrevChar + cChar if( bRet ) bRet = !lcl_IsLigature( cPrevCh, cCh ); + return bRet; } -- 1.7.0.4
>From 1bbbe902cfddd0a71683534a8bfec9947e419d37 Mon Sep 17 00:00:00 2001 From: Khaled Hosny <khaledho...@eglug.org> Date: Thu, 1 Sep 2011 00:25:51 +0200 Subject: [PATCH 2/3] Use Unicode Joining_Group Instead of hard coding code points for character groups, we can use Unicode Joining_Group that provide the same categorization. --- sw/source/core/text/porlay.cxx | 37 +++++++++++++++++-------------------- 1 files changed, 17 insertions(+), 20 deletions(-) diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx index 95e638a..4b8e2ad 100644 --- a/sw/source/core/text/porlay.cxx +++ b/sw/source/core/text/porlay.cxx @@ -70,82 +70,79 @@ using namespace i18n::ScriptType; sal_Bool isAlefChar ( xub_Unicode cCh ) { - return ( cCh == 0x622 || cCh == 0x623 || cCh == 0x625 || cCh == 0x627 || - cCh == 0x622 || cCh == 0x671 || cCh == 0x672 || cCh == 0x673 || cCh == 0x675 ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_ALEF; } sal_Bool isWawChar ( xub_Unicode cCh ) { - return ( cCh == 0x624 || cCh == 0x648 || cCh == 0x676 || cCh == 0x677 || - ( cCh >= 0x6C4 && cCh <= 0x6CB ) || cCh == 0x6CF ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_WAW; } sal_Bool isDalChar ( xub_Unicode cCh ) { - return ( cCh == 0x62F || cCh == 0x630 || cCh == 0x688 || cCh == 0x689 || cCh == 0x690 ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_DAL; } sal_Bool isRehChar ( xub_Unicode cCh ) { - return ( cCh == 0x631 || cCh == 0x632 || ( cCh >= 0x691 && cCh <= 0x699 )); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_REH; } sal_Bool isTehMarbutaChar ( xub_Unicode cCh ) { - return ( cCh == 0x629 || cCh == 0x6C0 ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_TEH_MARBUTA; } sal_Bool isBaaChar ( xub_Unicode cCh ) { - return ( cCh == 0x628 || cCh == 0x62A || cCh == 0x62B || cCh == 0x679 || cCh == 0x680 ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_BEH; } sal_Bool isYehChar ( xub_Unicode cCh ) { - return ( cCh == 0x626 || cCh == 0x649 || cCh == 0x64A || cCh == 0x678 || cCh == 0x6CC || - cCh == 0x6CE || cCh == 0x6D0 || cCh == 0x6D1 ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_YEH || + u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FARSI_YEH; } sal_Bool isSeenOrSadChar ( xub_Unicode cCh ) { - return ( ( cCh >= 0x633 && cCh <= 0x636 ) || ( cCh >= 0x69A && cCh <= 0x69E ) - || cCh == 0x6FA || cCh == 0x6FB ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SEEN || + u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SAD; } sal_Bool isHahChar ( xub_Unicode cCh ) { - return ( ( cCh >= 0x62C && cCh <= 0x62E ) || ( cCh >= 0x681 && cCh <= 0x687 ) - || cCh == 0x6BF ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_HAH; } sal_Bool isAinChar ( xub_Unicode cCh ) { - return ( cCh == 0x639 || cCh == 0x63A || cCh == 0x6A0 || cCh == 0x6FC ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_AIN; } sal_Bool isKafChar ( xub_Unicode cCh ) { - return ( cCh == 0x643 || ( cCh >= 0x6AC && cCh <= 0x6AE ) ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_KAF; } sal_Bool isLamChar ( xub_Unicode cCh ) { - return ( cCh == 0x644 || ( cCh >= 0x6B5 && cCh <= 0x6B8 ) ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_LAM; } sal_Bool isGafChar ( xub_Unicode cCh ) { - return ( cCh == 0x6A9 || cCh == 0x6AB ||( cCh >= 0x6AF && cCh <= 0x6B4 ) ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_GAF; } sal_Bool isQafChar ( xub_Unicode cCh ) { - return ( cCh == 0x642 || cCh == 0x6A7 || cCh == 0x6A8 ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_QAF; } sal_Bool isFeChar ( xub_Unicode cCh ) { - return ( cCh == 0x641 || ( cCh >= 0x6A1 && cCh <= 0x6A6 ) ); + return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FEH; } sal_Bool isTransparentChar ( xub_Unicode cCh ) -- 1.7.0.4
>From b5620697ae271154edb7216f1c619c7006930e50 Mon Sep 17 00:00:00 2001 From: Khaled Hosny <khaledho...@eglug.org> Date: Thu, 1 Sep 2011 01:21:12 +0200 Subject: [PATCH 3/3] Replace simple one line functions with macros --- sw/source/core/text/porlay.cxx | 94 +++++++-------------------------------- 1 files changed, 17 insertions(+), 77 deletions(-) diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx index 4b8e2ad..05e48f5 100644 --- a/sw/source/core/text/porlay.cxx +++ b/sw/source/core/text/porlay.cxx @@ -68,82 +68,22 @@ using namespace i18n::ScriptType; #include <unicode/ubidi.h> #include <i18nutil/unicode.hxx> //unicode::getUnicodeScriptType -sal_Bool isAlefChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_ALEF; -} - -sal_Bool isWawChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_WAW; -} - -sal_Bool isDalChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_DAL; -} - -sal_Bool isRehChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_REH; -} - -sal_Bool isTehMarbutaChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_TEH_MARBUTA; -} - -sal_Bool isBaaChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_BEH; -} - -sal_Bool isYehChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_YEH || - u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FARSI_YEH; -} - -sal_Bool isSeenOrSadChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SEEN || - u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SAD; -} - -sal_Bool isHahChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_HAH; -} - -sal_Bool isAinChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_AIN; -} - -sal_Bool isKafChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_KAF; -} - -sal_Bool isLamChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_LAM; -} - -sal_Bool isGafChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_GAF; -} - -sal_Bool isQafChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_QAF; -} - -sal_Bool isFeChar ( xub_Unicode cCh ) -{ - return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FEH; -} +#define IS_JOINING_GROUP(c, g) ( u_getIntPropertyValue( c, UCHAR_JOINING_GROUP ) == U_JG_##g ) +#define isAinChar(c) IS_JOINING_GROUP(c, AIN) +#define isAlefChar(c) IS_JOINING_GROUP(c, ALEF) +#define isBaaChar(c) IS_JOINING_GROUP(c, BEH) +#define isDalChar(c) IS_JOINING_GROUP(c, DAL) +#define isFehChar(c) IS_JOINING_GROUP(c, FEH) +#define isGafChar(c) IS_JOINING_GROUP(c, GAF) +#define isHahChar(c) IS_JOINING_GROUP(c, HAH) +#define isKafChar(c) IS_JOINING_GROUP(c, KAF) +#define isLamChar(c) IS_JOINING_GROUP(c, LAM) +#define isQafChar(c) IS_JOINING_GROUP(c, QAF) +#define isRehChar(c) IS_JOINING_GROUP(c, REH) +#define isTehMarbutaChar(c) IS_JOINING_GROUP(c, TEH_MARBUTA) +#define isWawChar(c) IS_JOINING_GROUP(c, WAW) +#define isYehChar(c) (IS_JOINING_GROUP(c, YEH) || IS_JOINING_GROUP(c, FARSI_YEH)) +#define isSeenOrSadChar(c) (IS_JOINING_GROUP(c, SAD) || IS_JOINING_GROUP(c, SEEN)) sal_Bool isTransparentChar ( xub_Unicode cCh ) { @@ -1185,7 +1125,7 @@ void SwScriptInfo::InitScriptInfo( const SwTxtNode& rNode, sal_Bool bRTL ) // final form may appear in the middle of word (( isAinChar ( cCh ) || // Ain (dual joining) isQafChar ( cCh ) || // Qaf (dual joining) - isFeChar ( cCh ) ) // Feh (dual joining) + isFehChar ( cCh ) ) // Feh (dual joining) && nIdx == nWordLen - 1)) // only at end of word { OSL_ENSURE( 0 != cPrevCh, "No previous character" ); -- 1.7.0.4
_______________________________________________ LibreOffice mailing list LibreOffice@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice