Hello curl team! I've found that curl_multi slows down quadratically with the number of running requests.
For example, with 10,000 concurrent requests, each request taking 10 seconds for the server to respond, curl should be able to complete 1000 requests per second. Instead, curl_multi_perform() spins at 100% cpu for several seconds at a time, making almost no forward progress. Profiling shows that curl_multi_perform() is spending all its time in Curl_multi_process_pending_handles(). This function is called every time a request completes, and it iterates over every running request. I am able to completely eliminate the performance problem by commenting out the body of Curl_multi_process_pending_handles(). It appears this code is only needed when CURLMOPT_MAX_TOTAL_CONNECTIONS is set. I've attached a minimal demonstration of the problem (two source files). mock_http_server.c: (60 lines) Creates a mock http server (on port 8080) with an average 10 second request delay (uses libevent) test_curl_throughput.c: (99 lines) Performs requests using curl_multi (with 10,000 handles running concurrently) To run the demonstration: gcc mock_http_server.c -o mock_http_server -levent gcc test_curl_throughput.c -o test_curl_throughput -lcurl ulimit -n 100000 # requires root ./mock_http_server | ./test_curl_throughput # the pipe is to run them concurrently Would it make sense to store the list of pending handles as a separate linked list, to avoid iterating through every easy_handle? Thanks! David
/* mock_http_server * * Creates a mock high-concurrency webserver, which * simulates time-consuming requests. * * Every request pauses for between 0 and 20 seconds (average 10 seconds), * before returning 200 OK. * */ #include <assert.h> #include <stdlib.h> #include <time.h> #include <evhttp.h> #include <event.h> #define HTTP_SERVER_ADDR "127.0.0.1" #define HTTP_SERVER_PORT 8080 #define HTTP_DELAY_MS 10000 struct event_base *evb; struct evhttp *evh; struct request_info { struct evhttp_request *request; struct event *timer; }; void request_finish(int fd, short which, void *arg) { struct request_info *ri = (struct request_info*)arg; evtimer_del(ri->timer); evhttp_send_reply(ri->request, 200, "OK", NULL); free(ri); } void request_callback(struct evhttp_request *request, void *arg) { double jitter; long delay_ms; struct timeval tv; struct request_info *ri; ri = (struct request_info*)malloc(sizeof(struct request_info)); ri->request = request; ri->timer = evtimer_new(evb, request_finish, (void*)ri); /* jitter is between -1.0 and 1.0 */ jitter = 2.0*((double)rand())/(1.0 + RAND_MAX) - 1.0; delay_ms = 1 + (long)((1.0 + jitter)*HTTP_DELAY_MS); tv.tv_sec = delay_ms/1000; tv.tv_usec = (delay_ms % 1000)*1000; evtimer_add(ri->timer, &tv); } int main() { int rc; srand(time(NULL)); evb = event_base_new(); evh = evhttp_new(evb); rc = evhttp_bind_socket(evh, HTTP_SERVER_ADDR, HTTP_SERVER_PORT); assert(rc == 0); evhttp_set_gencb(evh, request_callback, NULL); event_base_dispatch(evb); return 0; }
/* * Perform HTTP requests as fast as we can, using 10000 handles * concurrently in a single curl_multi. */ #include <sys/time.h> #include <assert.h> #include <unistd.h> #include <pthread.h> #include <curl/curl.h> #define HTTP_HOST "127.0.0.1" #define HTTP_PORT 8080 #define CONCURRENCY 10000 CURLM *multi_handle = NULL; volatile int completed = 0; struct curl_slist *curl_headers = NULL; long get_time_ms() { struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec*1000 + tv.tv_usec/1000; } void launch_request() { char url[256]; CURL *handle; sprintf(url, "http://%s:%d/", HTTP_HOST, HTTP_PORT); handle = curl_easy_init(); curl_easy_setopt(handle, CURLOPT_NOSIGNAL, 1); curl_easy_setopt(handle, CURLOPT_URL, url); curl_easy_setopt(handle, CURLOPT_NOBODY, 1); curl_easy_setopt(handle, CURLOPT_HTTPHEADER, curl_headers); curl_multi_add_handle(multi_handle, handle); } void finish_request(CURL *handle, CURLcode result) { long http_code; assert(result == CURLE_OK); curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &http_code); assert(http_code == 200); curl_multi_remove_handle(multi_handle, handle); curl_easy_cleanup(handle); ++completed; } void *status_thread(void *arg) { /* Print out a status report every 2 seconds. */ int last_completed = 0; long last_time = 0; for (;;) { int completed_now = completed; long time_now = get_time_ms(); long requests_per_sec = (1000*(completed_now - last_completed))/(time_now - last_time); printf("Completed requests: %d \t Requests/sec: %ld\n", completed_now, requests_per_sec); last_completed = completed_now; last_time = time_now; sleep(2); } } int main() { int i; pthread_t t; /* Sleep to let the mock_http_server startup. */ sleep(1); pthread_create(&t, NULL, status_thread, NULL); multi_handle = curl_multi_init(); curl_headers = curl_slist_append(curl_headers, "Expect:"); for (i = 0; i < CONCURRENCY; ++i) { launch_request(); } for (;;) { CURLMcode cc; CURLMsg *msg; int msgs_left; int rc; do { int unused; long s, e; s = get_time_ms(); rc = curl_multi_perform(multi_handle, &unused); e = get_time_ms(); printf("curl_multi_perform took %ld ms\n", e - s); } while (rc == CURLM_CALL_MULTI_PERFORM); while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) { assert(msg->msg == CURLMSG_DONE); /* Launch a new request every time we finish a request, * to keep the number of running requests equal to CONCURRENCY. */ launch_request(); finish_request(msg->easy_handle, msg->data.result); } cc = curl_multi_wait(multi_handle, NULL, 0, 1000000, NULL); assert(cc == CURLM_OK); } /* Unreachable */ }
------------------------------------------------------------------- List admin: http://cool.haxx.se/list/listinfo/curl-library Etiquette: http://curl.haxx.se/mail/etiquette.html