abael added the comment:

added my implement( with some enhancement, got better performance, at less
for my apps. ).

test result:
with small chunk of str, got double performanc,
and 111% for big chunks;  it

## Util funcion for text definition:
def pf(f,n):
    a=time()
    for i in xrange(n):
        f()
    b=time()
    print  n/(b-a)

#############################################################################
>>> s=['abcd',u'\u548c\u6613\u541b'] * 1000
>>> pf(lambda:''.join(s), 10000)
2289.28293164
>>> pf(lambda:text.join('',s), 10000)
4457.27947763
>>> s=['abcd'*1000,u'\u548c\u6613\u541b'*1000] * 1000
>>> pf(lambda:''.join(s), 100)
15.2374868484
>>> pf(lambda:text.join('',s), 100)
16.939913023

##############################################################################

2012/8/1 Antoine Pitrou <rep...@bugs.python.org>

>
> Antoine Pitrou added the comment:
>
> Hi, several points:
>
> - Python 2.7 is in bugfix mode; you need to work from the default
> Mercurial branch as explained in http://docs.python.org/devguide/ . In
> practice, this means your patch will target the future Python 3.4, and
> therefore either PyUnicode_Join or _PyBytes_Join.
>
> - please provide an actual patch (a Mercurial diff, probably)
>
> - please provide benchmarks (using e.g. timeit) which demonstrate the
> performance improvement you are proposing
>
> ----------
> nosy: +pitrou
> versions: +Python 3.4 -Python 2.7
>
> _______________________________________
> Python tracker <rep...@bugs.python.org>
> <http://bugs.python.org/issue15522>
> _______________________________________
>

----------
Added file: http://bugs.python.org/file26681/stringjoin.c

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue15522>
_______________________________________
# Author: Abael Heyijun<hyj...@gmail.com>
# Date: 2012-08-02

static PyObject *py_join(PyObject *self, PyObject *args){
  PyObject *seprator=NULL; PyObject *sequence=NULL;
  PyArg_ParseTuple(args, "OO:join", &seprator, &sequence);
  register PyObject *seq =PySequence_Fast(sequence, "join() only accept list/tuple arguments ");
  const Py_ssize_t seqlen = PySequence_Fast_GET_SIZE(sequence);
  if (seq == NULL || !PyString_Check(seprator) || seqlen < 0)return NULL;

  if (seqlen == 0){
	  Py_DECREF(seq);
	  return PyString_FromString("");
  }

  register PyObject * item=NULL;
  item = PySequence_Fast_GET_ITEM(seq, 0);

	if (!PyString_Check(item)){
	  if (PyUnicode_Check(item)){
		  item=pyEncodeUTF8(self, item);
		  if (!PyString_Check(item))return PyErr_Format(PyExc_TypeError,"sequence item %zd: expected string, %.80s found",0, Py_TYPE(item)->tp_name);
	  }else{
		  PyErr_Format(PyExc_TypeError,"sequence item %zd: expected string, %.80s found",0, Py_TYPE(item)->tp_name);
		  Py_DECREF(seq);
		  return NULL;
	  }
	}

  if(seqlen == 1){
	  Py_INCREF(item);
	  return item;
  }

  const Py_ssize_t seplen = PyString_GET_SIZE(seprator);
  const char *sep = PyString_AS_STRING(seprator);
  register size_t isz=PyString_GET_SIZE(item);
  size_t usz=isz, alloc=isz>8192?isz:8192;
  PyObject *res=PyString_FromStringAndSize(NULL, alloc);
  register char *p =(char *)PyString_AS_STRING(res);
  if(p==NULL)return NULL;
  if (isz){
	  Py_MEMCPY(p, (char *)PyString_AS_STRING(item),isz);
	  p+=isz;
  }
  register Py_ssize_t i;
  for (i=1; i<seqlen; i++){
	item = PySequence_Fast_GET_ITEM(seq,i);
	if (!PyString_Check(item)){
	  if (PyUnicode_Check(item)){
		  item=pyEncodeUTF8(self, item);
		  if (!PyString_Check(item)){
			  PyErr_Format(PyExc_TypeError,"sequence item %zd: transfer unicode to string, %.80s found",i, Py_TYPE(item)->tp_name);
			  return NULL;
		  }
	  }else{
		  PyErr_Format(PyExc_TypeError,"sequence item %zd: expected string, %.80s found",i, Py_TYPE(item)->tp_name);
		  return NULL;
	  }
	}
	isz=PyString_GET_SIZE(item);
	if(isz){
		if (usz + seplen +isz>alloc){
			do {
				alloc+=8192;
				if (alloc<1){
					PyErr_SetString(PyExc_OverflowError,"join() result is too long for a Python string");
					return NULL;
				}
			}while (usz + seplen +isz>alloc);
			if (_PyString_Resize(&res, alloc)<0){
				PyErr_SetString(PyExc_OverflowError,"join() result is too long for a Python string");
				return NULL;
			}
			p=(char *)PyString_AS_STRING(res)+usz;
		}
		if(seplen){
			Py_MEMCPY(p, sep, seplen);
			p+=seplen;
	        usz+=seplen;
		}
		Py_MEMCPY(p, (char *)PyString_AS_STRING(item),isz);
		p +=isz;
		usz +=isz;
	}
  }
  Py_DECREF(seq);
  if (_PyString_Resize(&res, usz)< 0){
	PyErr_SetString(PyExc_OverflowError,"join() result is too long for a Python string");
	return NULL;
  }
  return res;
}
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to