Charles-François Natali added the comment: wget trace:
Here a 404 is treated correctly: """ socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5 connect(5, {sa_family=AF_INET, sin_port=htons(4242), sin_addr=inet_addr("127.0.0.1")}, 16) = 0 select(6, NULL, [5], NULL, {900, 0}) = 1 (out [5], left {900, 0}) write(5, "GET /MISSING-8276 HTTP/1.1\r\nUser"..., 126) = 126 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {899, 999000}) recvfrom(5, "HTTP/1.0 404 Not Found\r\n", 511, MSG_PEEK, NULL, NULL) = 24 read(5, "HTTP/1.0 404 Not Found\r\n", 24) = 24 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) recvfrom(5, "Server: BaseHTTP/0.3 Python/2.4."..., 487, MSG_PEEK, NULL, NULL) = 35 read(5, "Server: BaseHTTP/0.3 Python/2.4."..., 35) = 35 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) recvfrom(5, "Date: Tue, 09 Oct 2012 22:09:59 "..., 452, MSG_PEEK, NULL, NULL) = 37 read(5, "Date: Tue, 09 Oct 2012 22:09:59 "..., 37) = 37 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) recvfrom(5, "Content-Type: text/html\r\n", 415, MSG_PEEK, NULL, NULL) = 25 read(5, "Content-Type: text/html\r\n", 25) = 25 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) recvfrom(5, "Connection: close\r\n", 390, MSG_PEEK, NULL, NULL) = 19 read(5, "Connection: close\r\n", 19) = 19 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) recvfrom(5, "\r\n", 371, MSG_PEEK, NULL, NULL) = 2 read(5, "\r\n", 2) = 2 select(6, [5], NULL, NULL, {0, 1}) = 1 (in [5], left {0, 1}) close(5) = 0 """ And here a 404 is treated incorrectly: """ socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5 connect(5, {sa_family=AF_INET, sin_port=htons(4242), sin_addr=inet_addr("127.0.0.1")}, 16) = 0 select(6, NULL, [5], NULL, {900, 0}) = 1 (out [5], left {900, 0}) write(5, "GET /MISSING-8153 HTTP/1.1\r\nUser"..., 126) = 126 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {899, 998000}) recvfrom(5, "HTTP/1.0 404 Not Found\r\n", 511, MSG_PEEK, NULL, NULL) = 24 read(5, "HTTP/1.0 404 Not Found\r\n", 24) = 24 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) recvfrom(5, "Server: BaseHTTP/0.3 Python/2.4."..., 487, MSG_PEEK, NULL, NULL) = 35 read(5, "Server: BaseHTTP/0.3 Python/2.4."..., 35) = 35 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) recvfrom(5, "Date: Tue, 09 Oct 2012 22:10:01 "..., 452, MSG_PEEK, NULL, NULL) = 37 read(5, "Date: Tue, 09 Oct 2012 22:10:01 "..., 37) = 37 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) recvfrom(5, "Content-Type: text/html\r\n", 415, MSG_PEEK, NULL, NULL) = 25 read(5, "Content-Type: text/html\r\n", 25) = 25 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) recvfrom(5, "Connection: close\r\n", 390, MSG_PEEK, NULL, NULL) = 19 read(5, "Connection: close\r\n", 19) = 19 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) recvfrom(5, "\r\n", 371, MSG_PEEK, NULL, NULL) = 2 read(5, "\r\n", 2) = 2 HERE=> select(6, [5], NULL, NULL, {0, 1}) = 0 (Timeout) select(6, NULL, [5], NULL, {900, 0}) = 1 (out [5], left {900, 0}) write(5, "GET /FOUND-8154 HTTP/1.1\r\nUser-A"..., 124) = 124 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) recvfrom(5, "<head>\n<title>Error response</ti"..., 511, MSG_PEEK, NULL, NULL) = 190 stat("FOUND-8154", 0x7fffcc530d60) = -1 ENOENT (No such file or directory) open("FOUND-8154", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 7 clock_gettime(CLOCK_MONOTONIC, {130079, 24215187}) = 0 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) read(5, "<head>\n<title>Error response</ti"..., 8192) = 190 clock_gettime(CLOCK_MONOTONIC, {130079, 24258187}) = 0 fstat(7, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b0004bbc000 write(7, "<head>\n<title>Error response</ti"..., 190) = 190 write(4, "\n 0K "..., 777) = 777 select(6, [5], NULL, NULL, {900, 0}) = 1 (in [5], left {900, 0}) read(5, 0x1becc8f0, 8192) = -1 ECONNRESET (Connection reset by peer) close(5) = 0 """ You can se that after 404 and "Connection: close" is returned, wget calls select() with a 1us timeout, to check if the socket has been closed by the other end. That's /src/connect.c:test_socket_open() in wget source code. This is used by persistent_available_p() in src/http.c, with this comment: """ Finally, check whether the connection is still open. This is important because most servers implement liberal (short) timeout on persistent connections. Wget can of course always reconnect if the connection doesn't work out, but it's nicer to know in advance. """ So it deliberately reuses the same connection (so-called persistent connection) if the http server doesn't close it right away. This explains probably why you see this mainly when many threads are in use. I'm not an http expert, so I'm not sure whether it's legal... ---------- _______________________________________ Python tracker <rep...@bugs.python.org> <http://bugs.python.org/issue15991> _______________________________________ _______________________________________________ Python-bugs-list mailing list Unsubscribe: http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com