Dear all, Attached is a patch to libsword to include an arabic vowel filter.
Essentially this is a monkey patch - I aped everything relating to the hebrewpoints filter without by necessity understanding it all. After all I have little or no clue of programming. The filter itself - yes I think I knew what I was doing there: This filter right now is only geared up to catch two ranges which apply to Farsi and Arabic. I find the wider Arabic script scene quite infuriatingly complicated and struggle also to get full coverage in my fonts, despite installing everything I can get. So, Central Asian ranges, Urdu etc are left out. I guess it is a matter of slowly adding what becomes relevant. But then I thought the filter alone is not good enough, it needs to get integrated into the general source and into build environments etc. So I grepped the source for all mentions of hebrewpoints (to catch the files) and HebrewPoints (to catch the classes etc) and inserted the same for arabicpoints there, where it seemed to apply I was not sure how much of all this is automatically created. Patch attached Peter
Index: src/mgr/swmgr.cpp =================================================================== --- src/mgr/swmgr.cpp (revision 2211) +++ src/mgr/swmgr.cpp (working copy) @@ -76,6 +76,7 @@ #include <utf8greekaccents.h> #include <utf8cantillation.h> #include <utf8hebrewpoints.h> +#include <utf8arabicpoints.h> #include <greeklexattribs.h> #include <swfiltermgr.h> #include <swcipher.h> @@ -214,6 +215,10 @@ optionFilters.insert(OptionFilterMap::value_type("UTF8HebrewPoints", tmpFilter)); cleanupFilters.push_back(tmpFilter); + tmpFilter = new UTF8ArabicPoints(); + optionFilters.insert(OptionFilterMap::value_type("UTF8ArabicPoints", tmpFilter)); + cleanupFilters.push_back(tmpFilter); + tmpFilter = new UTF8Cantillation(); optionFilters.insert(OptionFilterMap::value_type("UTF8Cantillation", tmpFilter)); cleanupFilters.push_back(tmpFilter); Index: src/modules/filters/utf8arabicpoints.cpp =================================================================== --- src/modules/filters/utf8arabicpoints.cpp (revision 0) +++ src/modules/filters/utf8arabicpoints.cpp (revision 0) @@ -0,0 +1,44 @@ +/****************************************************************************** + * + * UTF8ArabicPoints - SWFilter descendant to remove UTF-8 Arabic vowel points + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8arabicpoints.h> + +SWORD_NAMESPACE_START + +const char oName[] = "Arabic Vowel Points"; +const char oTip[] = "Toggles Arabic Vowel Points"; + +const SWBuf choices[3] = {"On", "Off", ""}; +const StringList oValues(&choices[0], &choices[2]); + +UTF8ArabicPoints::UTF8ArabicPoints() : SWOptionFilter(oName, oTip, &oValues) { + setOptionValue("On"); +} + +UTF8ArabicPoints::~UTF8ArabicPoints(){}; + + +char UTF8ArabicPoints::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if (!option) { + //The UTF-8 range 0xFC 0xE5 to 0xFC 0x63 consist of Arabic vowel marks so block those out. + SWBuf orig = text; + const unsigned char* from = (unsigned char*)orig.c_str(); + for (text = ""; *from; from++) { + if (((*from == 0xFC) && (*(from + 1) >= 0xE5 && *(from + 1) <= 0x63)) || ((*from == 0xFE) && (*(from + 1) >= 0x70 && *(from + 1) <= 0x7F))){ + from++; + } + else { + text += *from; + } + } + } + return 0; +} + +SWORD_NAMESPACE_END Index: bindings/swig/vstudio/libsword_csharpsword.vcproj =================================================================== --- bindings/swig/vstudio/libsword_csharpsword.vcproj (revision 2211) +++ bindings/swig/vstudio/libsword_csharpsword.vcproj (working copy) @@ -892,6 +892,10 @@ > </File> <File + RelativePath="..\..\..\include\utf8arabicpoints.h" + > + </File> + <File RelativePath="..\..\..\include\utf8html.h" > </File> Index: lib/vcppmake/vc8/libsword.vcproj =================================================================== --- lib/vcppmake/vc8/libsword.vcproj (revision 2211) +++ lib/vcppmake/vc8/libsword.vcproj (working copy) @@ -5513,6 +5513,53 @@ </FileConfiguration> </File> <File + RelativePath="..\..\..\src\modules\filters\utf8arabicpoints.cpp" + > + <FileConfiguration + Name="Release|Win32" + > + <Tool + Name="VCCLCompilerTool" + Optimization="2" + AdditionalIncludeDirectories="" + PreprocessorDefinitions="" + /> + </FileConfiguration> + <FileConfiguration + Name="Debug with ICU|Win32" + > + <Tool + Name="VCCLCompilerTool" + Optimization="0" + AdditionalIncludeDirectories="" + PreprocessorDefinitions="" + BasicRuntimeChecks="3" + /> + </FileConfiguration> + <FileConfiguration + Name="Debug|Win32" + > + <Tool + Name="VCCLCompilerTool" + Optimization="0" + AdditionalIncludeDirectories="" + PreprocessorDefinitions="" + BasicRuntimeChecks="3" + BrowseInformation="1" + /> + </FileConfiguration> + <FileConfiguration + Name="Release with ICU|Win32" + > + <Tool + Name="VCCLCompilerTool" + Optimization="2" + AdditionalIncludeDirectories="" + PreprocessorDefinitions="" + /> + </FileConfiguration> + </File> + <File RelativePath="..\..\..\src\modules\filters\utf8html.cpp" > <FileConfiguration Index: lib/vcppmake/vc7.1/libsword.vcproj =================================================================== --- lib/vcppmake/vc7.1/libsword.vcproj (revision 2211) +++ lib/vcppmake/vc7.1/libsword.vcproj (working copy) @@ -4537,6 +4537,44 @@ </FileConfiguration> </File> <File + RelativePath="..\..\..\src\modules\filters\utf8arabicpoints.cpp"> + <FileConfiguration + Name="Release|Win32"> + <Tool + Name="VCCLCompilerTool" + Optimization="2" + AdditionalIncludeDirectories="" + PreprocessorDefinitions=""/> + </FileConfiguration> + <FileConfiguration + Name="Debug with ICU|Win32"> + <Tool + Name="VCCLCompilerTool" + Optimization="0" + AdditionalIncludeDirectories="" + PreprocessorDefinitions="" + BasicRuntimeChecks="3"/> + </FileConfiguration> + <FileConfiguration + Name="Debug|Win32"> + <Tool + Name="VCCLCompilerTool" + Optimization="0" + AdditionalIncludeDirectories="" + PreprocessorDefinitions="" + BasicRuntimeChecks="3" + BrowseInformation="1"/> + </FileConfiguration> + <FileConfiguration + Name="Release with ICU|Win32"> + <Tool + Name="VCCLCompilerTool" + Optimization="2" + AdditionalIncludeDirectories="" + PreprocessorDefinitions=""/> + </FileConfiguration> + </File> + <File RelativePath="..\..\..\src\modules\filters\utf8html.cpp"> <FileConfiguration Name="Release|Win32"> Index: include/Makefile.am =================================================================== --- include/Makefile.am (revision 2211) +++ include/Makefile.am (working copy) @@ -139,6 +139,7 @@ pkginclude_HEADERS += $(swincludedir)/utf8cantillation.h pkginclude_HEADERS += $(swincludedir)/utf8greekaccents.h pkginclude_HEADERS += $(swincludedir)/utf8hebrewpoints.h +pkginclude_HEADERS += $(swincludedir)/utf8arabicpoints.h pkginclude_HEADERS += $(swincludedir)/utf8html.h pkginclude_HEADERS += $(swincludedir)/utf8latin1.h pkginclude_HEADERS += $(swincludedir)/utf8nfc.h Index: include/utf8arabicpoints.h =================================================================== --- include/utf8arabicpoints.h (revision 0) +++ include/utf8arabicpoints.h (revision 0) @@ -0,0 +1,38 @@ +/****************************************************************************** + * + * $Id: utf8arabicpoints.h 1688 2008-11-30 04:42:26Z refdoc $ + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#ifndef UTF8ARABICPOINTS_H +#define UTF8ARABICPOINTS_H + +#include <swoptfilter.h> + +SWORD_NAMESPACE_START + +/** This Filter shows/hides Arabic vowel points in UTF8 text + */ +class SWDLLEXPORT UTF8HebrewPoints : public SWOptionFilter { +public: + UTF8ArabicPoints(); + virtual ~UTF8ArabicPoints(); + virtual char processText(SWBuf &text, const SWKey *key = 0, const SWModule *module = 0); +}; + +SWORD_NAMESPACE_END +#endif Index: sword.kdevprj =================================================================== --- sword.kdevprj (revision 2211) +++ sword.kdevprj (working copy) @@ -50,7 +50,7 @@ type=HEADER [include/Makefile.am] -files=include/canon.h,include/cipherfil.h,include/defs.h,include/echomod.h,include/entriesblk.h,include/femain.h,include/filemgr.h,include/gbffootnotes.h,include/gbfheadings.h,include/gbfhtml.h,include/swfilter.h,include/gbfhtmlhref.h,include/gbfmorph.h,include/gbfplain.h,include/gbfrtf.h,include/gbfstrongs.h,include/gbfthml.h,include/Greek2Greek.h,include/GreekChars.h,include/hrefcom.h,include/latin1utf16.h,include/latin1utf8.h,include/listkey.h,include/localemgr.h,include/lzsscomprs.h,include/plainfootnotes.h,include/plainhtml.h,include/rawcom.h,include/rawfiles.h,include/rawld.h,include/rawld4.h,include/rawstr.h,include/rawstr4.h,include/rawtext.h,include/rawverse.h,include/regex.h,include/roman.h,include/rtfhtml.h,include/rwphtml.h,include/rwprtf.h,include/sapphire.h,include/scsuutf8.h,include/strkey.h,include/swbasicfilter.h,include/swbyteswap.h,include/swcipher.h,include/swcom.h,include/swcomprs.h,include/swconfig.h,include/swdisp.h,include/swdisprtf.h,include/swdisprtfchap.h,include/swencodingmgr.h,include/swkey.h,include/swld.h,include/swlocale.h,include/swlog.h,include/swmacs.h,include/swmarkupmgr.h,include/swmgr.h,include/swmodule.h,include/swobject.h,include/swtext.h,include/swunicod.h,include/swwinlog.h,include/tbdisp.h,include/thmlgbf.h,include/thmlhtml.h,include/thmlhtmlhref.h,include/thmlplain.h,include/thmlrtf.h,include/unicodertf.h,include/unixstr.h,include/untgz.h,include/utf16utf8.h,include/utf8cantillation.h,include/utf8greekaccents.h,include/utf8hebrewpoints.h,include/utf8html.h,include/utf8latin1.h,include/utf8transliterator.h,include/utf8utf16.h,include/utilconf.h,include/utilfuns.h,include/utilstr.h,include/versekey.h,include/zcom.h,include/zconf.h,include/zipcomprs.h,include/zlib.h,include/ztext.h,include/zverse.h,include/thmlfootnotes.h,include/thmlheadings.h,include/thmllemma.h,include/thmlmorph.h,include/thmlscripref.h,include/thmlstrongs.h,include/utf8cnormalizer.h,include/utf8arshaping.h,include/utf8bidireorder.h,include/utf8nfc.h,include/utf8nfkd.h,include/encfiltmgr.h,include/swversion.h,include/swfiltermgr.h,include/markupfiltmgr.h,include/rawgenbook.h,include/swgenbook.h,include/treekey.h,include/thmlvariants.h,include/treekeyidx.h,include/zld.h,include/zstr.h,include/swinputmeth.h,include/nullim.h,include/hebrewmcim.h,include/flatapi.h,include/cdsourcemgr.h,include/ftpparse.h,include/gbfosis.h,include/gbfredletterwords.h,include/greeklexattribs.h,include/installmgr.h,include/osismorph.h,include/osisstrongs.h,include/remotesourcemgr.h,include/multimapwdef.h,include/swcacher.h,include/swinstallmgr.h,include/swremotemgr.h,include/swsourcemgr.h,include/sysdata.h,include/thmlosis.h,include/swbuf.h,include/osisplain.h,include/osisrtf.h +files=include/canon.h,include/cipherfil.h,include/defs.h,include/echomod.h,include/entriesblk.h,include/femain.h,include/filemgr.h,include/gbffootnotes.h,include/gbfheadings.h,include/gbfhtml.h,include/swfilter.h,include/gbfhtmlhref.h,include/gbfmorph.h,include/gbfplain.h,include/gbfrtf.h,include/gbfstrongs.h,include/gbfthml.h,include/Greek2Greek.h,include/GreekChars.h,include/hrefcom.h,include/latin1utf16.h,include/latin1utf8.h,include/listkey.h,include/localemgr.h,include/lzsscomprs.h,include/plainfootnotes.h,include/plainhtml.h,include/rawcom.h,include/rawfiles.h,include/rawld.h,include/rawld4.h,include/rawstr.h,include/rawstr4.h,include/rawtext.h,include/rawverse.h,include/regex.h,include/roman.h,include/rtfhtml.h,include/rwphtml.h,include/rwprtf.h,include/sapphire.h,include/scsuutf8.h,include/strkey.h,include/swbasicfilter.h,include/swbyteswap.h,include/swcipher.h,include/swcom.h,include/swcomprs.h,include/swconfig.h,include/swdisp.h,include/swdisprtf.h,include/swdisprtfchap.h,include/swencodingmgr.h,include/swkey.h,include/swld.h,include/swlocale.h,include/swlog.h,include/swmacs.h,include/swmarkupmgr.h,include/swmgr.h,include/swmodule.h,include/swobject.h,include/swtext.h,include/swunicod.h,include/swwinlog.h,include/tbdisp.h,include/thmlgbf.h,include/thmlhtml.h,include/thmlhtmlhref.h,include/thmlplain.h,include/thmlrtf.h,include/unicodertf.h,include/unixstr.h,include/untgz.h,include/utf16utf8.h,include/utf8cantillation.h,include/utf8greekaccents.h,include/utf8arabicpoints.h,include/utf8hebrewpoints.h,include/utf8html.h,include/utf8latin1.h,include/utf8transliterator.h,include/utf8utf16.h,include/utilconf.h,include/utilfuns.h,include/utilstr.h,include/versekey.h,include/zcom.h,include/zconf.h,include/zipcomprs.h,include/zlib.h,include/ztext.h,include/zverse.h,include/thmlfootnotes.h,include/thmlheadings.h,include/thmllemma.h,include/thmlmorph.h,include/thmlscripref.h,include/thmlstrongs.h,include/utf8cnormalizer.h,include/utf8arshaping.h,include/utf8bidireorder.h,include/utf8nfc.h,include/utf8nfkd.h,include/encfiltmgr.h,include/swversion.h,include/swfiltermgr.h,include/markupfiltmgr.h,include/rawgenbook.h,include/swgenbook.h,include/treekey.h,include/thmlvariants.h,include/treekeyidx.h,include/zld.h,include/zstr.h,include/swinputmeth.h,include/nullim.h,include/hebrewmcim.h,include/flatapi.h,include/cdsourcemgr.h,include/ftpparse.h,include/gbfosis.h,include/gbfredletterwords.h,include/greeklexattribs.h,include/installmgr.h,include/osismorph.h,include/osisstrongs.h,include/remotesourcemgr.h,include/multimapwdef.h,include/swcacher.h,include/swinstallmgr.h,include/swremotemgr.h,include/swsourcemgr.h,include/sysdata.h,include/thmlosis.h,include/swbuf.h,include/osisplain.h,include/osisrtf.h sub_dirs= type=normal @@ -756,6 +756,12 @@ install_location= type=HEADER +[include/utf8arabicpoints.h] +dist=true +install=false +install_location= +type=HEADER + [include/utf8html.h] dist=true install=false @@ -1208,7 +1214,7 @@ type=SOURCE [src/modules/filters/Makefile.am] -files=src/modules/filters/cipherfil.cpp,src/modules/filters/gbffootnotes.cpp,src/modules/filters/gbfheadings.cpp,src/modules/filters/gbfhtml.cpp,src/modules/filters/gbfhtmlhref.cpp,src/modules/filters/gbfmorph.cpp,src/modules/filters/gbfplain.cpp,src/modules/filters/gbfrtf.cpp,src/modules/filters/gbfstrongs.cpp,src/modules/filters/gbfthml.cpp,src/modules/filters/latin1utf16.cpp,src/modules/filters/latin1utf8.cpp,src/modules/filters/plainfootnotes.cpp,src/modules/filters/plainhtml.cpp,src/modules/filters/rtfhtml.cpp,src/modules/filters/rwphtml.cpp,src/modules/filters/rwprtf.cpp,src/modules/filters/scsuutf8.cpp,src/modules/filters/swbasicfilter.cpp,src/modules/filters/thmlgbf.cpp,src/modules/filters/thmlhtml.cpp,src/modules/filters/thmlhtmlhref.cpp,src/modules/filters/thmlplain.cpp,src/modules/filters/thmlrtf.cpp,src/modules/filters/unicodertf.cpp,src/modules/filters/utf16utf8.cpp,src/modules/filters/utf8cantillation.cpp,src/modules/filters/utf8greekaccents.cpp,src/modules/filters/utf8hebrewpoints.cpp,src/modules/filters/utf8html.cpp,src/modules/filters/utf8latin1.cpp,src/modules/filters/utf8transliterator.cpp,src/modules/filters/utf8utf16.cpp,src/modules/filters/thmlfootnotes.cpp,src/modules/filters/thmlheadings.cpp,src/modules/filters/thmllemma.cpp,src/modules/filters/thmlmorph.cpp,src/modules/filters/thmlscripref.cpp,src/modules/filters/thmlstrongs.cpp,src/modules/filters/utf8nfc.cpp,src/modules/filters/utf8nfkd.cpp,src/modules/filters/utf8arshaping.cpp,src/modules/filters/utf8bidireorder.cpp,src/modules/filters/thmlvariants.cpp,src/modules/filters/gbfosis.cpp,src/modules/filters/gbfredletterwords.cpp,src/modules/filters/greeklexattribs.cpp,src/modules/filters/osismorph.cpp,src/modules/filters/osisstrongs.cpp,src/modules/filters/thmlosis.cpp,src/modules/filters/osisplain.cpp,src/modules/filters/osisrtf.cpp,src/modules/filters/gbfwebif.cpp,src/modules/filters/osisfootnotes.cpp,src/modules/filters/osisheadings.cpp,src/modules/filters/osishtmlhref.cpp,src/modules/filters/osislemma.cpp,src/modules/filters/osisredletterwords.cpp,src/modules/filters/osisscripref.cpp,src/modules/filters/osiswebif.cpp,src/modules/filters/swoptfilter.cpp,src/modules/filters/thmlwebif.cpp +files=src/modules/filters/cipherfil.cpp,src/modules/filters/gbffootnotes.cpp,src/modules/filters/gbfheadings.cpp,src/modules/filters/gbfhtml.cpp,src/modules/filters/gbfhtmlhref.cpp,src/modules/filters/gbfmorph.cpp,src/modules/filters/gbfplain.cpp,src/modules/filters/gbfrtf.cpp,src/modules/filters/gbfstrongs.cpp,src/modules/filters/gbfthml.cpp,src/modules/filters/latin1utf16.cpp,src/modules/filters/latin1utf8.cpp,src/modules/filters/plainfootnotes.cpp,src/modules/filters/plainhtml.cpp,src/modules/filters/rtfhtml.cpp,src/modules/filters/rwphtml.cpp,src/modules/filters/rwprtf.cpp,src/modules/filters/scsuutf8.cpp,src/modules/filters/swbasicfilter.cpp,src/modules/filters/thmlgbf.cpp,src/modules/filters/thmlhtml.cpp,src/modules/filters/thmlhtmlhref.cpp,src/modules/filters/thmlplain.cpp,src/modules/filters/thmlrtf.cpp,src/modules/filters/unicodertf.cpp,src/modules/filters/utf16utf8.cpp,src/modules/filters/utf8cantillation.cpp,src/modules/filters/utf8greekaccents.cpp,src/modules/filters/utf8hebrewpoints.cpp,src/modules/filters/utf8arabicpoints.cpp,src/modules/filters/utf8html.cpp,src/modules/filters/utf8latin1.cpp,src/modules/filters/utf8transliterator.cpp,src/modules/filters/utf8utf16.cpp,src/modules/filters/thmlfootnotes.cpp,src/modules/filters/thmlheadings.cpp,src/modules/filters/thmllemma.cpp,src/modules/filters/thmlmorph.cpp,src/modules/filters/thmlscripref.cpp,src/modules/filters/thmlstrongs.cpp,src/modules/filters/utf8nfc.cpp,src/modules/filters/utf8nfkd.cpp,src/modules/filters/utf8arshaping.cpp,src/modules/filters/utf8bidireorder.cpp,src/modules/filters/thmlvariants.cpp,src/modules/filters/gbfosis.cpp,src/modules/filters/gbfredletterwords.cpp,src/modules/filters/greeklexattribs.cpp,src/modules/filters/osismorph.cpp,src/modules/filters/osisstrongs.cpp,src/modules/filters/thmlosis.cpp,src/modules/filters/osisplain.cpp,src/modules/filters/osisrtf.cpp,src/modules/filters/gbfwebif.cpp,src/modules/filters/osisfootnotes.cpp,src/modules/filters/osisheadings.cpp,src/modules/filters/osishtmlhref.cpp,src/modules/filters/osislemma.cpp,src/modules/filters/osisredletterwords.cpp,src/modules/filters/osisscripref.cpp,src/modules/filters/osiswebif.cpp,src/modules/filters/swoptfilter.cpp,src/modules/filters/thmlwebif.cpp sharedlib_LDFLAGS=-version-info 0:0:1 sharedlib_rootname=filters sub_dirs= @@ -1550,6 +1556,12 @@ install_location= type=SOURCE +[src/modules/filters/utf8arabicpoints.cpp] +dist=true +install=false +install_location= +type=SOURCE + [src/modules/filters/utf8html.cpp] dist=true install=false
_______________________________________________ sword-devel mailing list: sword-devel@crosswire.org http://www.crosswire.org/mailman/listinfo/sword-devel Instructions to unsubscribe/change your settings at above page