abael added the comment:
added my implement( with some enhancement, got better performance, at less
for my apps. ).
test result:
with small chunk of str, got double performanc,
and 111% for big chunks; it
## Util funcion for text definition:
def pf(f,n):
a=time()
for i in xrange(n):
f()
b=time()
print n/(b-a)
#############################################################################
>>> s=['abcd',u'\u548c\u6613\u541b'] * 1000
>>> pf(lambda:''.join(s), 10000)
2289.28293164
>>> pf(lambda:text.join('',s), 10000)
4457.27947763
>>> s=['abcd'*1000,u'\u548c\u6613\u541b'*1000] * 1000
>>> pf(lambda:''.join(s), 100)
15.2374868484
>>> pf(lambda:text.join('',s), 100)
16.939913023
##############################################################################
2012/8/1 Antoine Pitrou <rep...@bugs.python.org>
>
> Antoine Pitrou added the comment:
>
> Hi, several points:
>
> - Python 2.7 is in bugfix mode; you need to work from the default
> Mercurial branch as explained in http://docs.python.org/devguide/ . In
> practice, this means your patch will target the future Python 3.4, and
> therefore either PyUnicode_Join or _PyBytes_Join.
>
> - please provide an actual patch (a Mercurial diff, probably)
>
> - please provide benchmarks (using e.g. timeit) which demonstrate the
> performance improvement you are proposing
>
> ----------
> nosy: +pitrou
> versions: +Python 3.4 -Python 2.7
>
> _______________________________________
> Python tracker <rep...@bugs.python.org>
> <http://bugs.python.org/issue15522>
> _______________________________________
>
----------
Added file: http://bugs.python.org/file26681/stringjoin.c
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue15522>
_______________________________________
# Author: Abael Heyijun<hyj...@gmail.com>
# Date: 2012-08-02
static PyObject *py_join(PyObject *self, PyObject *args){
PyObject *seprator=NULL; PyObject *sequence=NULL;
PyArg_ParseTuple(args, "OO:join", &seprator, &sequence);
register PyObject *seq =PySequence_Fast(sequence, "join() only accept list/tuple arguments ");
const Py_ssize_t seqlen = PySequence_Fast_GET_SIZE(sequence);
if (seq == NULL || !PyString_Check(seprator) || seqlen < 0)return NULL;
if (seqlen == 0){
Py_DECREF(seq);
return PyString_FromString("");
}
register PyObject * item=NULL;
item = PySequence_Fast_GET_ITEM(seq, 0);
if (!PyString_Check(item)){
if (PyUnicode_Check(item)){
item=pyEncodeUTF8(self, item);
if (!PyString_Check(item))return PyErr_Format(PyExc_TypeError,"sequence item %zd: expected string, %.80s found",0, Py_TYPE(item)->tp_name);
}else{
PyErr_Format(PyExc_TypeError,"sequence item %zd: expected string, %.80s found",0, Py_TYPE(item)->tp_name);
Py_DECREF(seq);
return NULL;
}
}
if(seqlen == 1){
Py_INCREF(item);
return item;
}
const Py_ssize_t seplen = PyString_GET_SIZE(seprator);
const char *sep = PyString_AS_STRING(seprator);
register size_t isz=PyString_GET_SIZE(item);
size_t usz=isz, alloc=isz>8192?isz:8192;
PyObject *res=PyString_FromStringAndSize(NULL, alloc);
register char *p =(char *)PyString_AS_STRING(res);
if(p==NULL)return NULL;
if (isz){
Py_MEMCPY(p, (char *)PyString_AS_STRING(item),isz);
p+=isz;
}
register Py_ssize_t i;
for (i=1; i<seqlen; i++){
item = PySequence_Fast_GET_ITEM(seq,i);
if (!PyString_Check(item)){
if (PyUnicode_Check(item)){
item=pyEncodeUTF8(self, item);
if (!PyString_Check(item)){
PyErr_Format(PyExc_TypeError,"sequence item %zd: transfer unicode to string, %.80s found",i, Py_TYPE(item)->tp_name);
return NULL;
}
}else{
PyErr_Format(PyExc_TypeError,"sequence item %zd: expected string, %.80s found",i, Py_TYPE(item)->tp_name);
return NULL;
}
}
isz=PyString_GET_SIZE(item);
if(isz){
if (usz + seplen +isz>alloc){
do {
alloc+=8192;
if (alloc<1){
PyErr_SetString(PyExc_OverflowError,"join() result is too long for a Python string");
return NULL;
}
}while (usz + seplen +isz>alloc);
if (_PyString_Resize(&res, alloc)<0){
PyErr_SetString(PyExc_OverflowError,"join() result is too long for a Python string");
return NULL;
}
p=(char *)PyString_AS_STRING(res)+usz;
}
if(seplen){
Py_MEMCPY(p, sep, seplen);
p+=seplen;
usz+=seplen;
}
Py_MEMCPY(p, (char *)PyString_AS_STRING(item),isz);
p +=isz;
usz +=isz;
}
}
Py_DECREF(seq);
if (_PyString_Resize(&res, usz)< 0){
PyErr_SetString(PyExc_OverflowError,"join() result is too long for a Python string");
return NULL;
}
return res;
}
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com