Alexander Belopolsky added the comment: I did a little more investigation and it looks like information separators have been included in whitespace since unicode type was first implemented in Python:
guido 11967 Fri Mar 10 22:52:46 2000 +0000: /* Returns 1 for Unicode characters having the type 'WS', 'B' or 'S', guido 11967 Fri Mar 10 22:52:46 2000 +0000: 0 otherwise. */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: guido 11967 Fri Mar 10 22:52:46 2000 +0000: int _PyUnicode_IsWhitespace(register const Py_UNICODE ch) guido 11967 Fri Mar 10 22:52:46 2000 +0000: { guido 11967 Fri Mar 10 22:52:46 2000 +0000: switch (ch) { guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x0009: /* HORIZONTAL TABULATION */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x000A: /* LINE FEED */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x000B: /* VERTICAL TABULATION */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x000C: /* FORM FEED */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x000D: /* CARRIAGE RETURN */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x001C: /* FILE SEPARATOR */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x001D: /* GROUP SEPARATOR */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x001E: /* RECORD SEPARATOR */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x001F: /* UNIT SEPARATOR */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x0020: /* SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x1680: /* OGHAM SPACE MARK */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x2000: /* EN QUAD */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x2001: /* EM QUAD */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x2002: /* EN SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x2003: /* EM SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x2004: /* THREE-PER-EM SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x2005: /* FOUR-PER-EM SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x2006: /* SIX-PER-EM SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x2007: /* FIGURE SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x2008: /* PUNCTUATION SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x2009: /* THIN SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x200A: /* HAIR SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x2028: /* LINE SEPARATOR */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x202F: /* NARROW NO-BREAK SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: case 0x3000: /* IDEOGRAPHIC SPACE */ guido 11967 Fri Mar 10 22:52:46 2000 +0000: return 1; guido 11967 Fri Mar 10 22:52:46 2000 +0000: default: guido 11967 Fri Mar 10 22:52:46 2000 +0000: return 0; guido 11967 Fri Mar 10 22:52:46 2000 +0000: } guido 11967 Fri Mar 10 22:52:46 2000 +0000: } guido 11967 Fri Mar 10 22:52:46 2000 +0000: (hg blame -u -d -n -r 11967 Objects/unicodectype.c) ---------- _______________________________________ Python tracker <rep...@bugs.python.org> <http://bugs.python.org/issue18236> _______________________________________ _______________________________________________ Python-bugs-list mailing list Unsubscribe: http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com