It seems that a lot of CPU resources are spend when trying to get exclusive 
lock on file from multiple processes concurrently. By multiple i mean hundreds.
It seems that there's an initial cost of fcntl() call. Each process that tries 
to lock the file consumes some amount of CPU and cools down.
However, each repeated fcntl() call will consume same amount of resources 
again. It seems as if entering the "wait queue" is expensive.

Environment:
#uname -a                        
FreeBSD test.com 11.1-STABLE FreeBSD 11.1-STABLE #0 r322650: Thu Aug 31 
19:49:49 EEST 2017     r...@test.com:/usr/obj/usr/src/sys/SERVER  amd64

Test case:

test.c:

#include <unistd.h>
#include <stdio.h>
#include <sys/fcntl.h>
#include <sys/time.h>
#include <signal.h>
#include <string.h>

static int child_count = 0;

static void schild_handler(int sig)
{
    --child_count;
}

static void alarm_handler(int sig)
{
}

void lock_write(int fd)
{
    struct flock fl;

    fl.l_type   = F_WRLCK;
    fl.l_whence = SEEK_SET;
    fl.l_start  = 0;
    fl.l_len    = 1;

    do {
        // Simulate interruption with alarm to re-enter the wait queue.
        alarm(1);
    }  while (fcntl(fd, F_SETLKW, &fl) < 0);
}

int main(int argc, char ** argv)
{
    if (argc < 2) {
        return 1;
    }

    signal(SIGCHLD, schild_handler);

    struct sigaction sig_action;

    memset(&sig_action, 0, sizeof sig_action);
    sig_action.sa_handler = alarm_handler;
    sigemptyset(&sig_action.sa_mask);
    sigaction(SIGALRM, &sig_action, NULL);

    int fd = open(argv[1], O_CREAT|O_RDWR, 0777);

    for (int i = 0; i < 300; ++i) {
        pid_t child_pid = fork();

        if (!child_pid) {
            // Lock the descriptor.
            lock_write(fd);

            // Simulate some work.
            sleep(1);
            return 0;
        }

        ++child_count;
    }

    do {
        printf("\rchild count: %5u\n", child_count);
        sleep(1);
    } while(child_count);

    return 0;
}

Commands:

# cd /tmp
# ~~~~~ Create test.c
# clang -o test test.c
# ./test 11111


Note that on linux, even if 1000 children are spawned instead of 300, none of 
them ever appear in the top.

This is a huge problem, because our current software uses lock files for sync 
purposes. And at times, when a lot of processes of said software are spawned 
(prime time), system becomes totally unresponsive with over 1000 LA.
_______________________________________________
freebsd-stable@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-stable
To unsubscribe, send any mail to "freebsd-stable-unsubscr...@freebsd.org"

Reply via email to