Hi all I am Isam Bayazidi, from Arabeyes team ( www.arabeyes.org ). We work on Arabic language support issues in Open Source Programs and Systems. We had our eye on LyX Arabic support for some time, we are glad that LyX supports Arabic, nevertheless, the faulty Arabic shaping had been a displeasure.
There is some faulty shaping, and there is some special cases for letters that should be considered when doing the shaping for Arabic.. I had been working in a small patch to fix this issue.. the patch does the following: - fix shapes values in src/encoding.C - add a new function to consider special characters (those that can not be connected from left) (function in src/encoding.C, prototype in encoding.h) - use is_arabic_special in src/text.C in to fix shaping faults - fix lib/kbd/arabic.kmap to make it compatable with the most used KeyBoard in Arab world - fix Arabic Lang Symbol in /lib/languages I really hope that the developers check this patch, no other code that is not Arabic related had been touched. We would be glad to have this patched enrolled to the main LyX tree It was made across 1.2.1 .. I can make it across the CVS if it is needed.. Please CC " developer at arabeyes dot com ", it it the mailing list of developers in Arabeyes project. Yours Isam Bayazidi
--- lyx-1.2.1/src/encoding.C Mon Jun 17 13:35:12 2002 +++ lyx-1.2.1-Arabic/src/encoding.C Sat Nov 16 11:55:11 2002 @@ -102,24 +102,24 @@ unsigned char arabic_table2[63][4] = { {0x41, 0x41, 0x41, 0x41}, // 0xc1 = hamza - {0x42, 0xa1, 0x42, 0x42}, // 0xc2 = ligature madda on alef - {0x43, 0xa2, 0x43, 0x43}, // 0xc3 = ligature hamza on alef - {0x44, 0xa3, 0x44, 0x44}, // 0xc4 = ligature hamza on waw - {0x45, 0xa4, 0x45, 0x45}, // 0xc5 = ligature hamza under alef - {0xf9, 0xf9, 0xf8, 0xa0}, // 0xc6 = ligature hamza on ya - {0x47, 0xa5, 0xa5, 0xa5}, // 0xc7 = alef + {0x42, 0xa1, 0x42, 0xa1}, // 0xc2 = ligature madda on alef + {0x43, 0xa2, 0x43, 0xa2}, // 0xc3 = ligature hamza on alef + {0x44, 0xa3, 0x44, 0xa3}, // 0xc4 = ligature hamza on waw + {0x45, 0xa4, 0x45, 0xa4}, // 0xc5 = ligature hamza under alef + {0x46, 0xf9, 0xf8, 0xa0}, // 0xc6 = ligature hamza on ya + {0x47, 0xa5, 0x47, 0xa5}, // 0xc7 = alef {0x48, 0xae, 0xac, 0xad}, // 0xc8 = baa - {0x49, 0xb1, 0xaf, 0xb0}, // 0xc9 = taa marbuta + {0x49, 0xb1, 0x49, 0xb1}, // 0xc9 = taa marbuta {0x4a, 0xb4, 0xb2, 0xb3}, // 0xca = taa {0x4b, 0xb7, 0xb5, 0xb6}, // 0xcb = thaa {0x4c, 0xba, 0xb8, 0xb9}, // 0xcc = jeem {0x4d, 0xbd, 0xbb, 0xbc}, // 0xcd = haa {0x4e, 0xc0, 0xbe, 0xbf}, // 0xce = khaa - {0x4f, 0xa6, 0xa6, 0xa6}, // 0xcf = dal + {0x4f, 0xa6, 0x4f, 0xa6}, // 0xcf = dal - {0x50, 0xa7, 0xa7, 0xa7}, // 0xd0 = thal - {0x51, 0xa8, 0xa8, 0xa8}, // 0xd1 = ra - {0x52, 0xa9, 0xa9, 0xa9}, // 0xd2 = zain + {0x50, 0xa7, 0x50, 0xa7}, // 0xd0 = thal + {0x51, 0xa8, 0x51, 0xa8}, // 0xd1 = ra + {0x52, 0xa9, 0x52, 0xa9}, // 0xd2 = zain {0x53, 0xc3, 0xc1, 0xc2}, // 0xd3 = seen {0x54, 0xc6, 0xc4, 0xc5}, // 0xd4 = sheen {0x55, 0xc9, 0xc7, 0xc8}, // 0xd5 = sad @@ -142,8 +142,8 @@ {0x65, 0xe7, 0xe5, 0xe6}, // 0xe5 = meem {0x66, 0xea, 0xe8, 0xe9}, // 0xe6 = noon {0x67, 0xed, 0xeb, 0xec}, // 0xe7 = ha - {0x68, 0xaa, 0xaa, 0xaa}, // 0xe8 = waw - {0x69, 0xab, 0xab, 0xab}, // 0xe9 = alef maksura + {0x68, 0xaa, 0x68, 0xaa}, // 0xe8 = waw + {0x69, 0xab, 0x69, 0xab}, // 0xe9 = alef maksura {0x6a, 0xf0, 0xee, 0xef}, // 0xea = ya {0x6b, 0x6b, 0x6b, 0x6b}, // 0xeb = fathatan {0x6c, 0x6c, 0x6c, 0x6c}, // 0xec = dammatan @@ -252,6 +252,19 @@ } +// Special Arabic letters are ones that do not get connected from left +// they are hamza, alef_madda, alef_hamza, waw_hamza, alef_hamza_under, +// alef, tah_marbota, dal, thal, rah, zai, wow, alef_maksoura + +bool Encodings::is_arabic_special(unsigned char c) +{ + return (c >= 0xc1 && c <= 0xc5) || + c == 0xc7 || c == 0xc9 || + c == 0xcf || c == 0xe8 || + (c >= 0xd0 && c <= 0xd2) || + c == 0xe9; +} + bool Encodings::IsComposeChar_arabic(unsigned char c) { return c >= 0xeb && c <= 0xf2; --- lyx-1.2.1/src/encoding.h Thu Mar 21 19:25:09 2002 +++ lyx-1.2.1-Arabic/src/encoding.h Sat Nov 16 11:28:21 2002 @@ -90,6 +90,9 @@ bool IsComposeChar_arabic(unsigned char c); /// static + bool is_arabic_special(unsigned char c); + /// + static bool is_arabic(unsigned char c); /// static --- lyx-1.2.1/src/text.C Fri Jul 19 19:46:47 2002 +++ lyx-1.2.1-Arabic/src/text.C Sat Nov 16 11:51:08 2002 @@ -150,12 +150,14 @@ } if (Encodings::is_arabic(next_char)) { - if (Encodings::is_arabic(prev_char)) + if (Encodings::is_arabic(prev_char) && + !Encodings::is_arabic_special(prev_char)) return Encodings::TransformChar(c, Encodings::FORM_MEDIAL); else return Encodings::TransformChar(c, Encodings::FORM_INITIAL); } else { - if (Encodings::is_arabic(prev_char)) + if (Encodings::is_arabic(prev_char) && + !Encodings::is_arabic_special(prev_char)) return Encodings::TransformChar(c, Encodings::FORM_FINAL); else return Encodings::TransformChar(c, Encodings::FORM_ISOLATED); --- lyx-1.2.1/lib/languages Wed Jul 3 15:52:51 2002 +++ lyx-1.2.1-Arabic/lib/languages Sat Nov 16 11:36:41 2002 @@ -1,7 +1,7 @@ # name babel name GUI name RTL? encoding code latex options afrikaans afrikaans "Afrikaans" false iso8859-1 af_ZA "" american american "American" false iso8859-1 en_US "" -arabic arabic "Arabic" true iso8859-6 ar_SA "" +arabic arabic "Arabic" true iso8859-6 ar "" austrian austrian "Austrian" false iso8859-1 de_AU "" bahasa bahasa "Bahasa" false iso8859-1 in_ID "" belarusian belarusian "Belarusian" false cp1251 be "" --- lyx-1.2.1/lib/kbd/arabic.kmap Mon Jul 17 16:41:20 2000 +++ lyx-1.2.1-Arabic/lib/kbd/arabic.kmap Mon Apr 1 14:24:35 2002 @@ -3,6 +3,7 @@ # # Generated automatically from kikbd map by Adil Alsaid <[EMAIL PROTECTED]> # +# reviewed and fixed by Isam Bayazidi <[EMAIL PROTECTED]>, Mohamed Kebdani <[EMAIL PROTECTED]> \kmap q Ö \kmap w Õ @@ -27,7 +28,7 @@ \kmap x Á \kmap c Ä \kmap v Ñ -\kmap b Ð +\kmap b äÇ \kmap n é \kmap m É \kmap ; ã @@ -35,44 +36,41 @@ \kmap "," è \kmap . Ò \kmap / Ø -\kmap ` ; +\kmap ` Ð \kmap [ Ì \kmap ] Ï -\kmap Q î -\kmap W ë -\kmap E ï -\kmap R ì -#\kmap T ¤ -\kmap T ~ +# shifted keyboard + +\kmap Q ? +\kmap W ? +\kmap E ? +\kmap R ? +\kmap T äÅ \kmap Y Å -#\kmap U ~ -\kmap U Ù +\kmap U ` \kmap I ç -\kmap O Î +\kmap O ? \kmap P » -\kmap A ð -\kmap S í +\kmap A ? +\kmap S ? \kmap D [ \kmap F ] -#\kmap G £ -\kmap G ~ +\kmap G äà \kmap H à \kmap J à -#\kmap K º -\kmap K ~ +\kmap K ¬ \kmap L / -\kmap Z ñ -\kmap X ò +\kmap Z ~ +\kmap X ? \kmap C { \kmap V } -#\kmap B ¢ -\kmap B ~ +\kmap B ä \kmap N  -#\kmap M º -\kmap M ~ +\kmap M ' \kmap < "," \kmap > . \kmap ? ¿ \kmap { < \kmap } > +\kmap ~ ?