Tal Einat added the comment: Alright, so I'm going to use the equivalent of the following code, unless someone can tell me that something is wrong:
from keyword import iskeyword from unicodedata import category, normalize _ID_FIRST_CATEGORIES = {"Lu", "Ll", "Lt", "Lm", "Lo", "Nl", "Other_ID_Start"} _ID_CATEGORIES = _ID_FIRST_CATEGORIES | {"Mn", "Mc", "Nd", "Pc", "Other_ID_Continue"} _ASCII_ID_CHARS = set(string.ascii_letters + string.digits + "_") _ID_KEYWORDS = {"True", "False", "None"} def is_id_char(char): return char in _ASCII_ID_CHARS or ( ord(char) >= 128 and category(normalize(char)[0]) in _ID_CATEGORIES ) def is_identifier(id_candidate): return id_candidate.isidentifier() and ( (not iskeyword(id_candidate)) or id_candidate in _ID_KEYWORDS ) def _eat_identifier(str, limit, pos): i = pos while i > limit and is_id_char(str[pos - i]): i -= 1 if i < pos and not is_identifier(str[i:pos]): return 0 return pos - i ---------- _______________________________________ Python tracker <rep...@bugs.python.org> <http://bugs.python.org/issue21765> _______________________________________ _______________________________________________ Python-bugs-list mailing list Unsubscribe: https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com