Hello curl team!

I've found that curl_multi slows down quadratically with the number of
running requests.

For example, with 10,000 concurrent requests, each request taking 10
seconds for the server to respond, curl should be able to complete 1000
requests per second. Instead, curl_multi_perform() spins at 100% cpu for
several seconds at a time, making almost no forward progress.

Profiling shows that curl_multi_perform() is spending all its time in
Curl_multi_process_pending_handles(). This function is called every time a
request completes, and it iterates over every running request.

I am able to completely eliminate the performance problem by commenting out
the body of Curl_multi_process_pending_handles(). It appears this code is
only needed when CURLMOPT_MAX_TOTAL_CONNECTIONS is set.

I've attached a minimal demonstration of the problem (two source files).

mock_http_server.c: (60 lines)
  Creates a mock http server (on port 8080) with an average 10 second
request delay (uses libevent)

test_curl_throughput.c: (99 lines)
  Performs requests using curl_multi (with 10,000 handles running
concurrently)

To run the demonstration:

gcc mock_http_server.c -o mock_http_server -levent
gcc test_curl_throughput.c  -o test_curl_throughput -lcurl
ulimit -n 100000   # requires root
./mock_http_server | ./test_curl_throughput   # the pipe is to run them
concurrently

Would it make sense to store the list of pending handles as a separate
linked list, to avoid iterating through every easy_handle?

Thanks!
  David
/* mock_http_server
 *
 * Creates a mock high-concurrency webserver, which
 * simulates time-consuming requests.
 *
 * Every request pauses for between 0 and 20 seconds (average 10 seconds),
 * before returning 200 OK.
 *
 */
#include <assert.h>
#include <stdlib.h>
#include <time.h>
#include <evhttp.h>
#include <event.h>

#define HTTP_SERVER_ADDR   "127.0.0.1"
#define HTTP_SERVER_PORT   8080
#define HTTP_DELAY_MS      10000

struct event_base *evb;
struct evhttp *evh;

struct request_info {
    struct evhttp_request *request;
    struct event *timer;
};

void request_finish(int fd, short which, void *arg) {
    struct request_info *ri = (struct request_info*)arg;
    evtimer_del(ri->timer);
    evhttp_send_reply(ri->request, 200, "OK", NULL);
    free(ri);
}

void request_callback(struct evhttp_request *request, void *arg) {
    double jitter;
    long delay_ms;
    struct timeval tv;
    struct request_info *ri;
    ri = (struct request_info*)malloc(sizeof(struct request_info));
    ri->request = request;
    ri->timer = evtimer_new(evb, request_finish, (void*)ri);
    /* jitter is between -1.0 and 1.0 */
    jitter = 2.0*((double)rand())/(1.0 + RAND_MAX) - 1.0;
    delay_ms = 1 + (long)((1.0 + jitter)*HTTP_DELAY_MS);
    tv.tv_sec = delay_ms/1000;
    tv.tv_usec = (delay_ms % 1000)*1000;
    evtimer_add(ri->timer, &tv);
}
int main() {
    int rc;
    srand(time(NULL));
    evb = event_base_new();
    evh = evhttp_new(evb);
    rc = evhttp_bind_socket(evh, HTTP_SERVER_ADDR, HTTP_SERVER_PORT);
    assert(rc == 0);
    evhttp_set_gencb(evh, request_callback, NULL);
    event_base_dispatch(evb);
    return 0;
}
/*
 * Perform HTTP requests as fast as we can, using 10000 handles
 * concurrently in a single curl_multi.
 */
#include <sys/time.h>
#include <assert.h>
#include <unistd.h>
#include <pthread.h>
#include <curl/curl.h>


#define HTTP_HOST       "127.0.0.1"
#define HTTP_PORT       8080
#define CONCURRENCY     10000

CURLM *multi_handle = NULL;
volatile int completed = 0;
struct curl_slist *curl_headers = NULL;

long get_time_ms() {
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_sec*1000 + tv.tv_usec/1000;
}

void launch_request() {
    char url[256];
    CURL *handle;
    sprintf(url, "http://%s:%d/";, HTTP_HOST, HTTP_PORT);
    handle = curl_easy_init();
    curl_easy_setopt(handle, CURLOPT_NOSIGNAL, 1);
    curl_easy_setopt(handle, CURLOPT_URL, url);
    curl_easy_setopt(handle, CURLOPT_NOBODY, 1);
    curl_easy_setopt(handle, CURLOPT_HTTPHEADER, curl_headers);
    curl_multi_add_handle(multi_handle, handle);
}

void finish_request(CURL *handle, CURLcode result) {
    long http_code;
    assert(result == CURLE_OK);
    curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &http_code);
    assert(http_code == 200);
    curl_multi_remove_handle(multi_handle, handle);
    curl_easy_cleanup(handle);
    ++completed;
}

void *status_thread(void *arg) {
    /* Print out a status report every 2 seconds. */
    int last_completed = 0;
    long last_time = 0;
    for (;;) {
        int completed_now = completed;
        long time_now = get_time_ms();
        long requests_per_sec = (1000*(completed_now - last_completed))/(time_now - last_time);
        printf("Completed requests: %d \t Requests/sec: %ld\n", completed_now, requests_per_sec);
        last_completed = completed_now;
        last_time = time_now;
        sleep(2);
    }
}

int main() {
    int i;
    pthread_t t;
    /* Sleep to let the mock_http_server startup. */
    sleep(1);
    pthread_create(&t, NULL, status_thread, NULL);
    multi_handle = curl_multi_init();
    curl_headers = curl_slist_append(curl_headers, "Expect:");
    for (i = 0; i < CONCURRENCY; ++i) {
        launch_request();
    }
    for (;;) {
        CURLMcode cc;
        CURLMsg *msg;
        int msgs_left;
        int rc;
        do {
            int unused;
            long s, e;
            s = get_time_ms();
            rc = curl_multi_perform(multi_handle, &unused);
            e = get_time_ms();
            printf("curl_multi_perform took %ld ms\n", e - s);
        } while (rc == CURLM_CALL_MULTI_PERFORM);

        while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) {
            assert(msg->msg == CURLMSG_DONE);
            /* Launch a new request every time we finish a request,
             * to keep the number of running requests equal to CONCURRENCY. */
            launch_request();
            finish_request(msg->easy_handle, msg->data.result);
        }
        cc = curl_multi_wait(multi_handle, NULL, 0, 1000000, NULL);
        assert(cc == CURLM_OK);
    }
    /* Unreachable */
}
-------------------------------------------------------------------
List admin: http://cool.haxx.se/list/listinfo/curl-library
Etiquette:  http://curl.haxx.se/mail/etiquette.html

Reply via email to