ICU’s

u_charType

and after that you can use

// http://www.fileformat.info/info/unicode/category/index.htm
std::tuple<std::string, std::string> u_charTypeName(UCharCategory c) {
    switch (c) {
        /*case U_UNASSIGNED:*/
        case U_GENERAL_OTHER_TYPES:
            return std::make_tuple("Cn","Other, Not Assigned (no characters in 
the file have this property) ");
        case U_UPPERCASE_LETTER:
            return std::make_tuple("Lu","Letter, Uppercase");
        case U_LOWERCASE_LETTER:
            return std::make_tuple("Ll","Letter, Lowercase");
        case U_TITLECASE_LETTER:
            return std::make_tuple("Lt","Letter, Titlecase");
        case U_MODIFIER_LETTER:
            return std::make_tuple("Lm","Letter, Modifier");
        case U_OTHER_LETTER:
            return std::make_tuple("Lo","Letter, Other");
        case U_NON_SPACING_MARK:
            return std::make_tuple("Mn","Mark, Nonspacing");
        case U_ENCLOSING_MARK:
            return std::make_tuple("Me","Mark, Enclosing");
        case U_COMBINING_SPACING_MARK:
            return std::make_tuple("Mc","Mark, Spacing Combining");
        case U_DECIMAL_DIGIT_NUMBER:
            return std::make_tuple("Nd","Number, Decimal Digit");
        case U_LETTER_NUMBER:
            return std::make_tuple("Nl","Number, Letter");
        case U_OTHER_NUMBER:
            return std::make_tuple("No","Number, Other");
        case U_SPACE_SEPARATOR:
            return std::make_tuple("Zs","Separator, Space");
        case U_LINE_SEPARATOR:
            return std::make_tuple("Zl","Separator, Line");
        case U_PARAGRAPH_SEPARATOR:
            return std::make_tuple("Zp","Separator, Paragraph");
        case U_CONTROL_CHAR:
            return std::make_tuple("Cc","Other, Control");
        case U_FORMAT_CHAR:
            return std::make_tuple("Cf","Other, Format");
        case U_PRIVATE_USE_CHAR:
            return std::make_tuple("Co","Other, Private Use");
        case U_SURROGATE:
            return std::make_tuple("Cs","Other, Surrogate");
        case U_DASH_PUNCTUATION:
            return std::make_tuple("Pd","Punctuation, Dash");
        case U_START_PUNCTUATION:
            return std::make_tuple("Ps","Punctuation, Open");
        case U_END_PUNCTUATION:
            return std::make_tuple("Pe","Punctuation, Close");
        case U_CONNECTOR_PUNCTUATION:
            return std::make_tuple("Pc","Punctuation, Connector");
        case U_OTHER_PUNCTUATION:
            return std::make_tuple("Po","Punctuation, Other");
        case U_MATH_SYMBOL:
            return std::make_tuple("Sm","Symbol, Math");
        case U_CURRENCY_SYMBOL:
            return std::make_tuple("Sc","Symbol, Currency");
        case U_MODIFIER_SYMBOL:
            return std::make_tuple("Sk","Symbol, Modifier");
        case U_OTHER_SYMBOL:
            return std::make_tuple("So","Symbol, Other");
        case U_INITIAL_PUNCTUATION:
            return std::make_tuple("Pi","Punctuation, Initial quote (may behave 
like Ps or Pe depending on usage)");
        case U_FINAL_PUNCTUATION:
            return std::make_tuple("Pf","Punctuation, Final quote (may behave 
like Ps or Pe depending on usage)");
        default:
            return std::make_tuple("","");
    }
}




> On Jul 7, 2015, at 8:03 AM, Gerriet M. Denkmann <gerr...@mdenkmann.de> wrote:
> 
> Given a character (a Unicode code point, to be exact) like U+FF0B (FULLWIDTH 
> PLUS SIGN), I want to know the General Category of this.
> For this example it would be “Sm" (aka. Math_Symbol or Symbol, Math).
> 
> I could download the current version of UnicodeData.txt and parse it.
> But this looks not very efficient.
> 
> For punctuation one could use NSCharacterSet punctuationCharacterSet.
> 
> But for Math Symbols?
> 
> I did look at CFStringTransform, which can give the Character name via 
> kCFStringTransformToUnicodeName.
> 
> But I cannot find anything for “General Category"
> 
> NSRegularExpression can match for [\p{General_Category = Math_Symbol}]; not 
> quite what I want, but better than nothing.
> 
> 
> Any ideas?
> 
> Gerriet.
> 
> 
> _______________________________________________
> 
> Cocoa-dev mailing list (Cocoa-dev@lists.apple.com)
> 
> Please do not post admin requests or moderator comments to the list.
> Contact the moderators at cocoa-dev-admins(at)lists.apple.com
> 
> Help/Unsubscribe/Update your Subscription:
> https://lists.apple.com/mailman/options/cocoa-dev/dmarkman%40mac.com
> 
> This email sent to dmark...@mac.com

Dmitry Markman


_______________________________________________

Cocoa-dev mailing list (Cocoa-dev@lists.apple.com)

Please do not post admin requests or moderator comments to the list.
Contact the moderators at cocoa-dev-admins(at)lists.apple.com

Help/Unsubscribe/Update your Subscription:
https://lists.apple.com/mailman/options/cocoa-dev/archive%40mail-archive.com

This email sent to arch...@mail-archive.com

Reply via email to