The implode() function uses very bad algorithm, which uses memory reallocation very often. The speed of functions falls quickly with increase of length of imploded array & data in it.
My version of implode() uses much better algorithm: 1) counts the amount of memory needed to result string 2) allocates memory 3) implodes array to the allocated memory
Below is standalone test application and unified diff:
the test: =========cut======== #include <stdio.h> #include <stdlib.h> #include <string.h>
/* faked PHP API, which has been used in php_implode() */ #define PHPAPI #define HashPosition unsigned int #define Z_ARRVAL_P(a) (a) #define Z_STRLEN_P(a) ((a)->len) #define Z_STRVAL_P(a) ((a)->p) #define Z_STRLEN_PP(pp) Z_STRLEN_P(*(pp)) #define Z_STRVAL_PP(pp) Z_STRVAL_P(*(pp)) #define RETURN_STRINGL(str, str_len, foo) \ { \ zval *q = return_value; \ q->p = (str); \ q->len = (str_len); \ return; \ } #define RETURN_EMPTY_STRING() \ { \ zval *q = return_value; \ q->p = emalloc(1); \ *(q->p) = '\0'; \ q->len = 0; \ return; \ } #define RETURN_FALSE RETURN_EMPTY_STRING() #define zend_hash_internal_pointer_reset_ex(foo, pos) *(pos) = 0 #define SUCCESS 1 #define FAILED 0 #define SEPARATE_ZVAL(foo) #define convert_to_string(foo) #define zend_hash_move_forward_ex(foo, pos) (*pos)++ #define emalloc(len) malloc(len) #define efree(p) free(p) #define TSRMLS_FETCH() #define TSRMLS_CC #define php_error_docref(foo, bar, s) printf("Error: [%s]\n", s)
typedef struct { char *p; /* pointer to a data */ size_t len; /* length of a data */ } zval;
int zend_hash_num_elements(zval *a) { int i = 0; while (a[i].p != NULL) i++; return i; }
int zend_hash_get_current_data_ex(zval *arr, void **z_ptr, HashPosition *pos) {
static zval *z, **zp;
zp = &z;
z = arr + *pos;
*z_ptr = zp;
return z->p == NULL ? FAILED : SUCCESS;
}
/******************************************/ /* test array */ zval arr[] = { {"foo", 3}, {"bar", 3}, {"baz", 3}, {NULL, 0} };
/* delimiter */ zval delim = {":", 1}; /******************************************/
PHPAPI void php_implode(zval *delim, zval *arr, zval *return_value) { zval **tmp; HashPosition pos; int numelems, i; size_t result_len, delim_len, tmp_len; char *result_str, *delim_str, *ptr; char **str_arr; size_t *len_arr;
numelems = zend_hash_num_elements(Z_ARRVAL_P(arr)); if (numelems < 1) RETURN_EMPTY_STRING(); str_arr = (char **) emalloc(numelems * sizeof(char *)); len_arr = (size_t *) emalloc(numelems * sizeof(size_t)); if (str_arr == NULL || len_arr == NULL) { TSRMLS_FETCH(); php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory"); RETURN_FALSE; } delim_str = Z_STRVAL_P(delim); delim_len = Z_STRLEN_P(delim);
/* the first pass: calculate size of memory to allocate */ i = 0; result_len = 0; zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), &pos); while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr), (void **) &tmp, &pos) == SUCCESS) { SEPARATE_ZVAL(tmp); convert_to_string(*tmp); str_arr[i] = Z_STRVAL_PP(tmp); /* pointer to string */ len_arr[i] = Z_STRLEN_PP(tmp); /* length of the string */ result_len += len_arr[i]; i++; if (delim_len && i < numelems) result_len += delim_len; zend_hash_move_forward_ex(Z_ARRVAL_P(arr), &pos); } if (result_len == 0) { efree(len_arr); efree(str_arr); RETURN_EMPTY_STRING(); } /* allocate memory */ ptr = result_str = emalloc(result_len + 1); if (ptr == NULL) { efree(len_arr); efree(str_arr); TSRMLS_FETCH(); php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory"); RETURN_FALSE; } /* the second pass: implode array into allocated memory */ for (i = 0; i < numelems - 1; i++) { if (tmp_len = len_arr[i]) { memcpy(ptr, str_arr[i], tmp_len); ptr += tmp_len; } if (delim_len) { memcpy(ptr, delim_str, delim_len); ptr += delim_len; } } /* copy the last element of the array */ if (tmp_len = len_arr[i]) { memcpy(ptr, str_arr[i], tmp_len); ptr += tmp_len; } *ptr = '\0';
efree(len_arr); efree(str_arr); RETURN_STRINGL(result_str, result_len, 0); }
/***********************************************/ int main(int argc,char *argv[]) { zval result; php_implode(&delim, arr, &result); printf("str=[%s], len=%lu", Z_STRVAL_P(&result), Z_STRLEN_P(&result)); efree(result.p); return 0; } =========cut========
unified diff: =========cut========= --- string.c Thu May 13 20:44:32 2004 +++ string_implode.c Mon Jun 14 13:28:19 2004 @@ -827,31 +827,75 @@ { zval **tmp; HashPosition pos; - smart_str implstr = {0}; - int numelems, i = 0; + int numelems, i; + size_t result_len, delim_len, tmp_len; + char *result_str, *delim_str, *ptr; + char **str_arr; + size_t *len_arr;
numelems = zend_hash_num_elements(Z_ARRVAL_P(arr)); - - if(numelems == 0) { - RETURN_EMPTY_STRING(); + if (numelems < 1) RETURN_EMPTY_STRING(); + str_arr = (char **) emalloc(numelems * sizeof(char *)); + len_arr = (size_t *) emalloc(numelems * sizeof(size_t)); + if (str_arr == NULL || len_arr == NULL) { + TSRMLS_FETCH(); + php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory"); + RETURN_FALSE; } + delim_str = Z_STRVAL_P(delim); + delim_len = Z_STRLEN_P(delim);
+ /* the first pass: calculate size of memory to allocate */
+ i = 0;
+ result_len = 0;
zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(arr), &pos);
while (zend_hash_get_current_data_ex(Z_ARRVAL_P(arr),
(void **) &tmp,
&pos) == SUCCESS) {
SEPARATE_ZVAL(tmp);
convert_to_string(*tmp);
-
- smart_str_appendl(&implstr, Z_STRVAL_PP(tmp), Z_STRLEN_PP(tmp));
- if (++i != numelems) {
- smart_str_appendl(&implstr, Z_STRVAL_P(delim), Z_STRLEN_P(delim));
- }
+ str_arr[i] = Z_STRVAL_PP(tmp); /* pointer to string */
+ len_arr[i] = Z_STRLEN_PP(tmp); /* length of the string */
+ result_len += len_arr[i];
+ i++;
+ if (delim_len && i < numelems) result_len += delim_len;
zend_hash_move_forward_ex(Z_ARRVAL_P(arr), &pos);
}
- smart_str_0(&implstr);
-
- RETURN_STRINGL(implstr.c, implstr.len, 0);
+ if (result_len == 0) {
+ efree(len_arr);
+ efree(str_arr);
+ RETURN_EMPTY_STRING();
+ }
+ /* allocate memory */
+ ptr = result_str = emalloc(result_len + 1);
+ if (ptr == NULL) {
+ efree(len_arr);
+ efree(str_arr);
+ TSRMLS_FETCH();
+ php_error_docref(NULL TSRMLS_CC, E_ERROR, "Out of memory");
+ RETURN_FALSE;
+ }
+ /* the second pass: implode array into allocated memory */
+ for (i = 0; i < numelems - 1; i++) {
+ if (tmp_len = len_arr[i]) {
+ memcpy(ptr, str_arr[i], tmp_len);
+ ptr += tmp_len;
+ }
+ if (delim_len) {
+ memcpy(ptr, delim_str, delim_len);
+ ptr += delim_len;
+ }
+ }
+ /* copy the last element of the array */
+ if (tmp_len = len_arr[i]) {
+ memcpy(ptr, str_arr[i], tmp_len);
+ ptr += tmp_len;
+ }
+ *ptr = '\0';
+
+ efree(len_arr);
+ efree(str_arr);
+ RETURN_STRINGL(result_str, result_len, 0);
}
/* }}} */
=========cut=========
-- Using Opera's revolutionary e-mail client: http://www.opera.com/m2/
-- PHP Internals - PHP Runtime Development Mailing List To unsubscribe, visit: http://www.php.net/unsub.php