ICU’s u_charType
and after that you can use // http://www.fileformat.info/info/unicode/category/index.htm std::tuple<std::string, std::string> u_charTypeName(UCharCategory c) { switch (c) { /*case U_UNASSIGNED:*/ case U_GENERAL_OTHER_TYPES: return std::make_tuple("Cn","Other, Not Assigned (no characters in the file have this property) "); case U_UPPERCASE_LETTER: return std::make_tuple("Lu","Letter, Uppercase"); case U_LOWERCASE_LETTER: return std::make_tuple("Ll","Letter, Lowercase"); case U_TITLECASE_LETTER: return std::make_tuple("Lt","Letter, Titlecase"); case U_MODIFIER_LETTER: return std::make_tuple("Lm","Letter, Modifier"); case U_OTHER_LETTER: return std::make_tuple("Lo","Letter, Other"); case U_NON_SPACING_MARK: return std::make_tuple("Mn","Mark, Nonspacing"); case U_ENCLOSING_MARK: return std::make_tuple("Me","Mark, Enclosing"); case U_COMBINING_SPACING_MARK: return std::make_tuple("Mc","Mark, Spacing Combining"); case U_DECIMAL_DIGIT_NUMBER: return std::make_tuple("Nd","Number, Decimal Digit"); case U_LETTER_NUMBER: return std::make_tuple("Nl","Number, Letter"); case U_OTHER_NUMBER: return std::make_tuple("No","Number, Other"); case U_SPACE_SEPARATOR: return std::make_tuple("Zs","Separator, Space"); case U_LINE_SEPARATOR: return std::make_tuple("Zl","Separator, Line"); case U_PARAGRAPH_SEPARATOR: return std::make_tuple("Zp","Separator, Paragraph"); case U_CONTROL_CHAR: return std::make_tuple("Cc","Other, Control"); case U_FORMAT_CHAR: return std::make_tuple("Cf","Other, Format"); case U_PRIVATE_USE_CHAR: return std::make_tuple("Co","Other, Private Use"); case U_SURROGATE: return std::make_tuple("Cs","Other, Surrogate"); case U_DASH_PUNCTUATION: return std::make_tuple("Pd","Punctuation, Dash"); case U_START_PUNCTUATION: return std::make_tuple("Ps","Punctuation, Open"); case U_END_PUNCTUATION: return std::make_tuple("Pe","Punctuation, Close"); case U_CONNECTOR_PUNCTUATION: return std::make_tuple("Pc","Punctuation, Connector"); case U_OTHER_PUNCTUATION: return std::make_tuple("Po","Punctuation, Other"); case U_MATH_SYMBOL: return std::make_tuple("Sm","Symbol, Math"); case U_CURRENCY_SYMBOL: return std::make_tuple("Sc","Symbol, Currency"); case U_MODIFIER_SYMBOL: return std::make_tuple("Sk","Symbol, Modifier"); case U_OTHER_SYMBOL: return std::make_tuple("So","Symbol, Other"); case U_INITIAL_PUNCTUATION: return std::make_tuple("Pi","Punctuation, Initial quote (may behave like Ps or Pe depending on usage)"); case U_FINAL_PUNCTUATION: return std::make_tuple("Pf","Punctuation, Final quote (may behave like Ps or Pe depending on usage)"); default: return std::make_tuple("",""); } } > On Jul 7, 2015, at 8:03 AM, Gerriet M. Denkmann <gerr...@mdenkmann.de> wrote: > > Given a character (a Unicode code point, to be exact) like U+FF0B (FULLWIDTH > PLUS SIGN), I want to know the General Category of this. > For this example it would be “Sm" (aka. Math_Symbol or Symbol, Math). > > I could download the current version of UnicodeData.txt and parse it. > But this looks not very efficient. > > For punctuation one could use NSCharacterSet punctuationCharacterSet. > > But for Math Symbols? > > I did look at CFStringTransform, which can give the Character name via > kCFStringTransformToUnicodeName. > > But I cannot find anything for “General Category" > > NSRegularExpression can match for [\p{General_Category = Math_Symbol}]; not > quite what I want, but better than nothing. > > > Any ideas? > > Gerriet. > > > _______________________________________________ > > Cocoa-dev mailing list (Cocoa-dev@lists.apple.com) > > Please do not post admin requests or moderator comments to the list. > Contact the moderators at cocoa-dev-admins(at)lists.apple.com > > Help/Unsubscribe/Update your Subscription: > https://lists.apple.com/mailman/options/cocoa-dev/dmarkman%40mac.com > > This email sent to dmark...@mac.com Dmitry Markman _______________________________________________ Cocoa-dev mailing list (Cocoa-dev@lists.apple.com) Please do not post admin requests or moderator comments to the list. Contact the moderators at cocoa-dev-admins(at)lists.apple.com Help/Unsubscribe/Update your Subscription: https://lists.apple.com/mailman/options/cocoa-dev/archive%40mail-archive.com This email sent to arch...@mail-archive.com