Hello all,
While trying to fix the eternal brokenness of Kashida justification code,
I found some low hanging cleanups. See attached patches.
Regards,
 Khaled
-- 
 Khaled Hosny
 Egyptian
 Arab
From 7c237af83055a9b892e7349ee08af92b7202b09c Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaledhosny@eglug.org>
Date: Wed, 31 Aug 2011 23:58:51 +0200
Subject: [PATCH 1/3] Don't hard code joining type of Arabic characters
The joining type is defined in the Unicode character database, so we
should query that property instead of hard coding some code points.
---
 sw/source/core/text/porlay.cxx |   30 +++++-------------------------
 1 files changed, 5 insertions(+), 25 deletions(-)
diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index 1ce9da3..95e638a 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -147,15 +147,10 @@ sal_Bool isFeChar ( xub_Unicode cCh )
 {
    return ( cCh == 0x641 || ( cCh >= 0x6A1 && cCh <= 0x6A6 ) );
 }
+
 sal_Bool isTransparentChar ( xub_Unicode cCh )
 {
-    return ( ( cCh >= 0x610 && cCh <= 0x61A ) ||
-            ( cCh >= 0x64B && cCh <= 0x65E ) ||
-            ( cCh == 0x670 ) ||
-            ( cCh >= 0x6D6 && cCh <= 0x6DC ) ||
-            ( cCh >= 0x6DF && cCh <= 0x6E4 ) ||
-            ( cCh >= 0x6E7 && cCh <= 0x6E8 ) ||
-            ( cCh >= 0x6EA && cCh <= 0x6ED ));
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_TYPE ) == U_JT_TRANSPARENT;
 }
 
 /*************************************************************************
@@ -178,28 +173,13 @@ sal_Bool lcl_IsLigature( xub_Unicode cCh, xub_Unicode cNextCh )
 
 sal_Bool lcl_ConnectToPrev( xub_Unicode cCh, xub_Unicode cPrevCh )
 {
-    // Alef, Dal, Thal, Reh, Zain, and Waw do not connect to the left
-    // Uh, there seem to be some more characters that are not connectable
-    // to the left. So we look for the characters that are actually connectable
-    // to the left. Here is the complete list of WH:
-
-    // (hennerdrewes):
-    // added lam forms 0x06B5..0x06B8
-    // added 0x6FA..0x6FC, according to unicode documentation, although not present in my fonts
-    // added heh goal 0x6C1
-    sal_Bool bRet = 0x628 == cPrevCh ||
-                    ( 0x62A <= cPrevCh && cPrevCh <= 0x62E ) ||
-                  ( 0x633 <= cPrevCh && cPrevCh <= 0x647 ) ||
-                      0x649 == cPrevCh || // Alef Maksura does connect !!!
-                      0x64A == cPrevCh ||
-                    ( 0x678 <= cPrevCh && cPrevCh <= 0x687 ) ||
-                  ( 0x69A <= cPrevCh && cPrevCh <= 0x6C1 ) ||
-                  ( 0x6C3 <= cPrevCh && cPrevCh <= 0x6D3 ) ||
-                  ( 0x6FA <= cPrevCh && cPrevCh <= 0x6FC )  ;
+    const int32_t nJoiningType = u_getIntPropertyValue( cPrevCh, UCHAR_JOINING_TYPE );
+    sal_Bool bRet = nJoiningType != U_JT_RIGHT_JOINING && nJoiningType != U_JT_NON_JOINING;
 
     // check for ligatures cPrevChar + cChar
     if( bRet )
         bRet = !lcl_IsLigature( cPrevCh, cCh );
+
     return bRet;
 }
 
-- 
1.7.0.4
From 1bbbe902cfddd0a71683534a8bfec9947e419d37 Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaledhosny@eglug.org>
Date: Thu, 1 Sep 2011 00:25:51 +0200
Subject: [PATCH 2/3] Use Unicode Joining_Group
Instead of hard coding code points for character groups, we can use
Unicode Joining_Group that provide the same categorization.
---
 sw/source/core/text/porlay.cxx |   37 +++++++++++++++++--------------------
 1 files changed, 17 insertions(+), 20 deletions(-)
diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index 95e638a..4b8e2ad 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -70,82 +70,79 @@ using namespace i18n::ScriptType;
 
 sal_Bool isAlefChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x622 || cCh == 0x623 || cCh == 0x625 || cCh == 0x627 ||
-           cCh == 0x622 || cCh == 0x671 || cCh == 0x672 || cCh == 0x673 || cCh == 0x675 );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_ALEF;
 }
 
 sal_Bool isWawChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x624 || cCh == 0x648 || cCh == 0x676 || cCh == 0x677 ||
-           ( cCh >= 0x6C4 &&  cCh <= 0x6CB ) || cCh == 0x6CF );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_WAW;
 }
 
 sal_Bool isDalChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x62F || cCh == 0x630 || cCh == 0x688 || cCh == 0x689 || cCh == 0x690 );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_DAL;
 }
 
 sal_Bool isRehChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x631 || cCh == 0x632 || ( cCh >= 0x691 && cCh <= 0x699 ));
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_REH;
 }
 
 sal_Bool isTehMarbutaChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x629 || cCh == 0x6C0 );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_TEH_MARBUTA;
 }
 
 sal_Bool isBaaChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x628 || cCh == 0x62A || cCh == 0x62B || cCh == 0x679 || cCh == 0x680 );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_BEH;
 }
 
 sal_Bool isYehChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x626 || cCh == 0x649 || cCh == 0x64A || cCh == 0x678 || cCh == 0x6CC ||
-       cCh == 0x6CE || cCh == 0x6D0 || cCh == 0x6D1 );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_YEH ||
+           u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FARSI_YEH;
 }
 
 sal_Bool isSeenOrSadChar ( xub_Unicode cCh )
 {
-   return ( ( cCh >= 0x633 && cCh <= 0x636 ) || ( cCh >= 0x69A && cCh <= 0x69E )
-           || cCh == 0x6FA || cCh == 0x6FB );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SEEN ||
+           u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SAD;
 }
 
 sal_Bool isHahChar ( xub_Unicode cCh )
 {
-   return ( ( cCh >= 0x62C && cCh <= 0x62E ) || ( cCh >= 0x681 && cCh <= 0x687 )
-           || cCh == 0x6BF );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_HAH;
 }
 
 sal_Bool isAinChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x639 || cCh == 0x63A || cCh == 0x6A0 || cCh == 0x6FC );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_AIN;
 }
 
 sal_Bool isKafChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x643 || ( cCh >= 0x6AC && cCh <= 0x6AE ) );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_KAF;
 }
 
 sal_Bool isLamChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x644 || ( cCh >= 0x6B5 && cCh <= 0x6B8 ) );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_LAM;
 }
 
 sal_Bool isGafChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x6A9 || cCh == 0x6AB ||( cCh >= 0x6AF && cCh <= 0x6B4 ) );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_GAF;
 }
 
 sal_Bool isQafChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x642 || cCh == 0x6A7 || cCh == 0x6A8  );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_QAF;
 }
 
 sal_Bool isFeChar ( xub_Unicode cCh )
 {
-   return ( cCh == 0x641 || ( cCh >= 0x6A1 && cCh <= 0x6A6 ) );
+    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FEH;
 }
 
 sal_Bool isTransparentChar ( xub_Unicode cCh )
-- 
1.7.0.4
From b5620697ae271154edb7216f1c619c7006930e50 Mon Sep 17 00:00:00 2001
From: Khaled Hosny <khaledhosny@eglug.org>
Date: Thu, 1 Sep 2011 01:21:12 +0200
Subject: [PATCH 3/3] Replace simple one line functions with macros
---
 sw/source/core/text/porlay.cxx |   94 +++++++--------------------------------
 1 files changed, 17 insertions(+), 77 deletions(-)
diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index 4b8e2ad..05e48f5 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -68,82 +68,22 @@ using namespace i18n::ScriptType;
 #include <unicode/ubidi.h>
 #include <i18nutil/unicode.hxx>  //unicode::getUnicodeScriptType
 
-sal_Bool isAlefChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_ALEF;
-}
-
-sal_Bool isWawChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_WAW;
-}
-
-sal_Bool isDalChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_DAL;
-}
-
-sal_Bool isRehChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_REH;
-}
-
-sal_Bool isTehMarbutaChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_TEH_MARBUTA;
-}
-
-sal_Bool isBaaChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_BEH;
-}
-
-sal_Bool isYehChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_YEH ||
-           u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FARSI_YEH;
-}
-
-sal_Bool isSeenOrSadChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SEEN ||
-           u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_SAD;
-}
-
-sal_Bool isHahChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_HAH;
-}
-
-sal_Bool isAinChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_AIN;
-}
-
-sal_Bool isKafChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_KAF;
-}
-
-sal_Bool isLamChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_LAM;
-}
-
-sal_Bool isGafChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_GAF;
-}
-
-sal_Bool isQafChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_QAF;
-}
-
-sal_Bool isFeChar ( xub_Unicode cCh )
-{
-    return u_getIntPropertyValue( cCh, UCHAR_JOINING_GROUP ) == U_JG_FEH;
-}
+#define IS_JOINING_GROUP(c, g) ( u_getIntPropertyValue( c, UCHAR_JOINING_GROUP ) == U_JG_##g )
+#define isAinChar(c)        IS_JOINING_GROUP(c, AIN)
+#define isAlefChar(c)       IS_JOINING_GROUP(c, ALEF)
+#define isBaaChar(c)        IS_JOINING_GROUP(c, BEH)
+#define isDalChar(c)        IS_JOINING_GROUP(c, DAL)
+#define isFehChar(c)        IS_JOINING_GROUP(c, FEH)
+#define isGafChar(c)        IS_JOINING_GROUP(c, GAF)
+#define isHahChar(c)        IS_JOINING_GROUP(c, HAH)
+#define isKafChar(c)        IS_JOINING_GROUP(c, KAF)
+#define isLamChar(c)        IS_JOINING_GROUP(c, LAM)
+#define isQafChar(c)        IS_JOINING_GROUP(c, QAF)
+#define isRehChar(c)        IS_JOINING_GROUP(c, REH)
+#define isTehMarbutaChar(c) IS_JOINING_GROUP(c, TEH_MARBUTA)
+#define isWawChar(c)        IS_JOINING_GROUP(c, WAW)
+#define isYehChar(c)        (IS_JOINING_GROUP(c, YEH) || IS_JOINING_GROUP(c, FARSI_YEH))
+#define isSeenOrSadChar(c)  (IS_JOINING_GROUP(c, SAD) || IS_JOINING_GROUP(c, SEEN))
 
 sal_Bool isTransparentChar ( xub_Unicode cCh )
 {
@@ -1185,7 +1125,7 @@ void SwScriptInfo::InitScriptInfo( const SwTxtNode& rNode, sal_Bool bRTL )
                                                     // final form may appear in the middle of word
                              (( isAinChar ( cCh ) ||  // Ain (dual joining)
                                 isQafChar ( cCh ) ||  // Qaf (dual joining)
-                                isFeChar  ( cCh ) )   // Feh (dual joining)
+                                isFehChar ( cCh ) )   // Feh (dual joining)
                                 && nIdx == nWordLen - 1))  // only at end of word
                         {
                             OSL_ENSURE( 0 != cPrevCh, "No previous character" );
-- 
1.7.0.4
Context
- [Libreoffice] [PATCH] some cleanup of Kashida justification code · Khaled Hosny
 
  Privacy Policy |
  
Impressum (Legal Info) |
  
Copyright information: Unless otherwise specified, all text and images
  on this website are licensed under the
  
Creative Commons Attribution-Share Alike 3.0 License.
  This does not include the source code of LibreOffice, which is
  licensed under the Mozilla Public License (
MPLv2).
  "LibreOffice" and "The Document Foundation" are
  registered trademarks of their corresponding registered owners or are
  in actual use as trademarks in one or more countries. Their respective
  logos and icons are also subject to international copyright laws. Use
  thereof is explained in our 
trademark policy.