I have detected an issue with pthread_join when the thread is executing "accept" TCP function.
If a thread is blocked in an accept TCP call, and a "pthread_cancel" and "pthread_join" are called from parent, in cygwin, the "pthread_join" is blocked forever until "accept" ends (that maybe never end) so the program gets blocked forever. In Linux and Solaris, it works fine (see below) The problem is happening in one of the tools of varnish cache program. I have extracted part of the code and created program test. The program does the following: 1. calls socket(AF_INET, SOCK_STREAM, 0); , bind(sd, (struct sockaddr *) &saddr, sizeof(saddr) and listen(sock, 1) 2. execute rc = pthread_create(&thr, NULL, server_thread, (void *)t); 3. server_thread => executes "accept" and gets blocked until somebody connects to port. 4. server_thread => if receives an incomming conection, closes the connection and exits. 5. main thread => execute sleep x seconds (program argument) 6. main thread => execute pthread_cancel and pthread_join 7. main thread => closes socket I have made some tests in Linux 2.6.16.60 , Solaris 10, Cygwin 1.7.7 and Cygwin 1.7.8s(0.235/5/3) 20110114 * In Linux, when "pthread_cancel"+"pthread_join" are executed, if thread is blocked in accept call, it is destroyed immediately, "pthread_join" returns 0. and tread returns "0xffffffffffffffff" * In Solaris, when "pthread_cancel"+"pthread_join" are executed, if thread is blocked in accept call, accept is aborted and returns error, but rest of thread is executed ok, and pthread_join ends when thread returns. * In Cygwin (both versions), when "pthread_cancel"+"pthread_join" are executed, if thread is blocked in accept call, pthread_join is also blocked, forever (or until accept is unblocked) ==== SOLARIS ==== $ /tmp/thread_accept_test 10 Start main Opening socket on 0.0.0.0 61002 Main: Creating thread Start sleep Starting server thread, executing "accept" End sleep pthread_cancel Waiting for server Accepted failed: Error 0 Ending thread Server returned "NULL" End main $ ==== LINUX ==== $ /tmp/thread_accept_test 10 Start main Opening socket on 0.0.0.0 50636 Main: Creating thread Start sleep Starting server thread, executing "accept" End sleep pthread_cancel Waiting for server Server returned "0xffffffffffffffff" End main $ ==== CYGWIN ==== $ /tmp/thread_accept_test.exe 10 Start main Opening socket on 0.0.0.0 3940 Main: Creating thread Starting server thread, executing "accept" Start sleep End sleep pthread_cancel Waiting for server <it never ends> After searching solutions in google, I founded a workaround for cygwin, if I send a signal to thread, it unblocks "accept" call: (void)pthread_kill(thr,SIGUSR1);
#include <errno.h> #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <assert.h> #include <string.h> #include <netdb.h> #include <poll.h> int seconds; int sock; /********************************************************************** * Listen to port */ int listen_port() { struct addrinfo hints, *res0, *res; int i, ret; int sd, val; sd = socket(AF_INET, SOCK_STREAM, 0); if (sd < 0) { perror("socket()"); return (-1); } val = 1; if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val) != 0) { perror("setsockopt(SO_REUSEADDR, 1)"); (void)close(sd); return (-1); } struct sockaddr_in saddr; bzero((char *) &saddr, sizeof(saddr)); saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = htonl(INADDR_ANY); saddr.sin_port = htons(0); if (bind(sd, (struct sockaddr *) &saddr, sizeof(saddr)) < 0) { perror("bind()"); (void)close(sd); return (-1); } return (sd); } /********************************************************************** * Server Thread */ static void * server_thread(void *priv) { struct vtclog *vl; int i, j, fd; struct sockaddr_storage addr_s; struct sockaddr *addr; socklen_t l; assert(sock >= 0); printf("Starting server thread, executing \"accept\"\n"); addr = (void*)&addr_s; l = sizeof addr_s; fd = accept(sock, addr, &l); if (fd < 0) { printf( "Accepted failed: %s\n", strerror(errno)); } else { printf( "accepted fd %d\n", fd); //http_process (removed) printf( "shutting fd %d\n", fd); j = shutdown(fd, SHUT_WR); if (!((j) == 0 || errno == ECONNRESET || errno == ENOTCONN)) printf( "Shutdown failed: %s\n", strerror(errno)); i = close(fd); printf( "closed fd %d\n", fd); assert (((i) == 0 || errno == ECONNRESET || errno == ENOTCONN)); } printf("Ending thread\n"); return (NULL); } /********************************************************************** * Main */ int main(int argc, char *argv[]) { if(argc!=2) { printf("%s <seconds>\n",argv[0]); return; } else { seconds=atoi(argv[1]); } printf( "Start main\n"); sock = listen_port(); if (sock >= 0) { if (listen(sock, 1) != 0) { perror("listen()"); (void)close(sock); return (-1); } } struct sockaddr_storage addr_s; socklen_t l; l = sizeof addr_s; getsockname(sock, (void *)&addr_s, &l); int i; char aaddr[32]; char aport[32]; i = getnameinfo((const void *)&addr_s, l, aaddr, sizeof aaddr, aport, sizeof aport,NI_NUMERICHOST | NI_NUMERICSERV); if (i) { printf("getnameinfo = %d %s\n", i, gai_strerror(i)); return; } printf( "Opening socket on %s %s\n", aaddr, aport); pthread_t thr; int rc; long t; printf("Main: Creating thread\n", t); rc = pthread_create(&thr, NULL, server_thread, (void *)t); if (rc) { printf("ERROR; return code from pthread_create() is %d\n", rc); exit(-1); } printf("Start sleep\n"); sleep(seconds); printf("End sleep\n"); printf("pthread_cancel\n"); (void)pthread_cancel(thr); //(void)pthread_kill(thr,SIGUSR1); void *res; printf("Waiting for server\n"); rc=pthread_join(thr, &res); if (rc) { printf("ERROR; return code from pthread_join() is %d\n", rc); } if (res != NULL) printf( "Server returned \"%p\"\n",(char *)res); int j; j = close(sock); assert (((j) == 0 || errno == ECONNRESET || errno == ENOTCONN)); printf("End main\n"); return; }
-- Problem reports: http://cygwin.com/problems.html FAQ: http://cygwin.com/faq/ Documentation: http://cygwin.com/docs.html Unsubscribe info: http://cygwin.com/ml/#unsubscribe-simple