New submission from Christian Heimes:
I'm sending the patch in for review.
----------
components: Interpreter Core
files: py3k_file_fsenc2.patch
messages: 56374
nosy: tiran
severity: normal
status: open
title: Decode __file__ and co_filename to unicode using fs default
versions: Python 3.0
__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1272>
__________________________________
Index: Python/pythonrun.c
===================================================================
--- Python/pythonrun.c (revision 58412)
+++ Python/pythonrun.c (working copy)
@@ -867,7 +867,8 @@
return -1;
d = PyModule_GetDict(m);
if (PyDict_GetItemString(d, "__file__") == NULL) {
- PyObject *f = PyString_FromString(filename);
+ PyObject *f;
+ f = PyUnicode_DecodeFSDefault(filename, 0, NULL);
if (f == NULL)
return -1;
if (PyDict_SetItemString(d, "__file__", f) < 0) {
Index: Python/import.c
===================================================================
--- Python/import.c (revision 58412)
+++ Python/import.c (working copy)
@@ -652,7 +652,7 @@
/* Remember the filename as the __file__ attribute */
v = NULL;
if (pathname != NULL) {
- v = PyString_FromString(pathname);
+ v = PyUnicode_DecodeFSDefault(pathname, 0, NULL);
if (v == NULL)
PyErr_Clear();
}
@@ -983,7 +983,7 @@
PySys_WriteStderr("import %s # directory %s\n",
name, pathname);
d = PyModule_GetDict(m);
- file = PyString_FromString(pathname);
+ file = PyUnicode_DecodeFSDefault(pathname, 0, NULL);
if (file == NULL)
goto error;
path = Py_BuildValue("[O]", file);
Index: Python/compile.c
===================================================================
--- Python/compile.c (revision 58412)
+++ Python/compile.c (working copy)
@@ -4001,7 +4001,7 @@
freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars));
if (!freevars)
goto error;
- filename = PyString_FromString(c->c_filename);
+ filename = PyUnicode_DecodeFSDefault(c->c_filename, 0, NULL);
if (!filename)
goto error;
Index: Python/importdl.c
===================================================================
--- Python/importdl.c (revision 58412)
+++ Python/importdl.c (working copy)
@@ -62,7 +62,9 @@
return NULL;
}
/* Remember the filename as the __file__ attribute */
- if (PyModule_AddStringConstant(m, "__file__", pathname) < 0)
+ PyObject *path;
+ path = PyUnicode_DecodeFSDefault(pathname, 0, NULL);
+ if (PyModule_AddObject(m, "__file__", path) < 0)
PyErr_Clear(); /* Not important enough to report */
if (_PyImport_FixupExtension(name, pathname) == NULL)
Index: Include/unicodeobject.h
===================================================================
--- Include/unicodeobject.h (revision 58412)
+++ Include/unicodeobject.h (working copy)
@@ -154,6 +154,7 @@
# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
+# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
@@ -245,6 +246,7 @@
# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
+# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
@@ -641,6 +643,25 @@
PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
PyObject *, const char *);
+/* Decode a string to a Python unicode object using either
+ Py_FileSystemDefaultEncoding or UTF-8 if the default encoding isn't given.
+
+ The function is intended to be used for paths and file names only. It
+ doesn't use the codecs module and PyUnicode_Decode() since it is required
+ during boot strapping and before the codecs are set up. For that reason
+ the default fs encoding should be UTF-8, UTF-16, UTF-32, Latin-1 or MBCS.
+
+ In case the length paramenter is 0 the length of string is autodetected
+ with strlen(string). errors must be set to NULL.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
+ const char *string, /* encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors /* error handling */
+ );
+
+
/* Return a char* holding the UTF-8 encoded value of the
Unicode object.
Index: setup.py
===================================================================
--- setup.py (revision 58412)
+++ setup.py (working copy)
@@ -414,7 +414,6 @@
# Python C API test module
exts.append( Extension('_testcapi', ['_testcapimodule.c']) )
# profilers (_lsprof is for cProfile.py)
- exts.append( Extension('_hotshot', ['_hotshot.c']) )
exts.append( Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c']) )
# static Unicode character database
exts.append( Extension('unicodedata', ['unicodedata.c']) )
Index: Objects/codeobject.c
===================================================================
--- Objects/codeobject.c (revision 58412)
+++ Objects/codeobject.c (working copy)
@@ -59,7 +59,7 @@
freevars == NULL || !PyTuple_Check(freevars) ||
cellvars == NULL || !PyTuple_Check(cellvars) ||
name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
- filename == NULL || !PyString_Check(filename) ||
+ filename == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
lnotab == NULL || !PyString_Check(lnotab) ||
!PyObject_CheckReadBuffer(code)) {
PyErr_BadInternalCall();
@@ -72,6 +72,13 @@
} else {
Py_INCREF(name);
}
+ if (PyString_Check(filename)) {
+ filename = PyUnicode_DecodeFSDefault(PyString_AS_STRING(filename), 0, NULL);
+ if (filename == NULL)
+ return NULL;
+ } else {
+ Py_INCREF(filename);
+ }
intern_strings(names);
intern_strings(varnames);
intern_strings(freevars);
@@ -260,6 +267,8 @@
ourcellvars = PyTuple_New(0);
if (ourcellvars == NULL)
goto cleanup;
+ filename = PyUnicode_DecodeFSDefault(PyString_AS_STRING(filename),
+ 0, NULL);
co = (PyObject *)PyCode_New(argcount, kwonlyargcount,
nlocals, stacksize, flags,
Index: Objects/unicodeobject.c
===================================================================
--- Objects/unicodeobject.c (revision 58412)
+++ Objects/unicodeobject.c (working copy)
@@ -1231,6 +1231,61 @@
return v;
}
+PyObject*
+PyUnicode_DecodeFSDefault(const char *string, Py_ssize_t length,
+ const char *errors)
+{
+ PyObject *v = NULL;
+ char encoding[32], mangled[32], *encptr, *manptr;
+ char tmp;
+
+ if (errors != NULL)
+ Py_FatalError("non-NULL encoding in PyUnicode_DecodeFSDefault");
+ if ((length == 0) && *string)
+ length = (Py_ssize_t)strlen(string);
+
+ strncpy(encoding,
+ Py_FileSystemDefaultEncoding ?
+ Py_FileSystemDefaultEncoding : "UTF-8",
+ 31);
+ encoding[31] = '\0';
+
+ encptr = encoding;
+ manptr = mangled;
+ /* lower the string and remove non alpha numeric chars like '-' */
+ while(*encptr) {
+ tmp = *encptr++;
+ if (isupper(tmp))
+ tmp = tolower(tmp);
+ if (!isalnum(tmp))
+ continue;
+ *manptr++ = tmp;
+ }
+ *manptr++ = '\0';
+
+ if (strcmp(mangled, "utf8") == 0)
+ v = PyUnicode_DecodeUTF8(string, length, NULL);
+ else if (strcmp(mangled, "utf16") == 0)
+ v = PyUnicode_DecodeUTF16(string, length, NULL, 0);
+ else if (strcmp(mangled, "utf32") == 0)
+ v = PyUnicode_DecodeUTF32(string, length, NULL, 0);
+ else if ((strcmp(mangled, "latin1") == 0) ||
+ (strcmp(mangled, "iso88591") == 0) ||
+ (strcmp(mangled, "iso885915") == 0))
+ v = PyUnicode_DecodeLatin1(string, length, NULL);
+ else if (strcmp(mangled, "ascii") == 0)
+ v = PyUnicode_DecodeASCII(string, length, NULL);
+#ifdef MS_WIN32
+ else if (strcmp(mangled, "mbcs") == 0)
+ v = PyUnicode_DecodeMBCS(string, length, NULL);
+#endif
+
+ if (v == NULL)
+ v = PyUnicode_DecodeUTF8(string, length, "replace");
+
+ return (PyObject*)v;
+}
+
char*
PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
{
Index: Objects/moduleobject.c
===================================================================
--- Objects/moduleobject.c (revision 58412)
+++ Objects/moduleobject.c (working copy)
@@ -86,12 +86,12 @@
d = ((PyModuleObject *)m)->md_dict;
if (d == NULL ||
(fileobj = PyDict_GetItemString(d, "__file__")) == NULL ||
- !PyString_Check(fileobj))
+ !PyUnicode_Check(fileobj))
{
PyErr_SetString(PyExc_SystemError, "module filename missing");
return NULL;
}
- return PyString_AsString(fileobj);
+ return PyUnicode_AsString(fileobj);
}
void
Index: Modules/pyexpat.c
===================================================================
--- Modules/pyexpat.c (revision 58412)
+++ Modules/pyexpat.c (working copy)
@@ -238,7 +238,7 @@
nulltuple = PyTuple_New(0);
if (nulltuple == NULL)
goto failed;
- filename = PyString_FromString(__FILE__);
+ filename = PyUnicode_DecodeFSDefault(__FILE__, 0, NULL);
handler_info[slot].tb_code =
PyCode_New(0, /* argcount */
0, /* kwonlyargcount */
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com