Fujii Masao wrote: > Hi, > > When the replication connection is closed unexpectedly, walsender might emit > the following unfit messages. IOW, the loss of the connection might be wrongly > regarded as an arrival of invalid message by the walsender. This looks messy. > We should get rid of that unfit FATAL message, emit a COMMERROR message and > just call proc_exit() when the loss of the connection is found? > >> [2460]: LOG: could not receive data from client: No connection could be >> made because the target machine actively refused it. >> [2460]: FATAL: invalid standby closing message type 4 >> [2460]: LOG: could not send data to client: No connection could be made >> because the target machine actively refused it. > > Also the walsender wrongly tries to send the FATAL message to the standby even > though the connection has already been closed, and then gets the following LOG > message after the FATAL one. This FATAL message is suitable, but output of the > LOG message looks messy, too. We should use COMMERROR instead of FATAL and > then > just call proc_exit() in order to prevent a message from being sent? > >> [12586] FATAL: unexpected EOF on standby connection >> [12586] LOG: could not send data to client: Broken pipe > > The attached patch fixes those unfit messages.
Actually the pg_getbyte_if_available() function is a bit confused. The comment above it claims that it returns 0 if no data is available without blocking, but it actually returns -1 with errno==EWOULDBLOCK. That stems from confusion in secure_read(); with SSL it returns 0 if it would block, but without SSL it returns -1 and EWOULDBLOCK. We should fix that so that secure_read() behaves consistently and so that pq_getbyte_if_available() behaves like e.g pq_getbytes(). Patch attached for that. pq_getbyte_if_available() now reports any errors with COMMERROR level, and returns EOF if the connection is closed cleanly. If no data is available without blocking it now really returns 0 as the comment said. Walsender reports any unexpected EOF to the log at COMMERROR level, similar to what normal backends do. -- Heikki Linnakangas EnterpriseDB http://www.enterprisedb.com
? aaa.patch ? postgres ? access/common/.deps ? access/gin/.deps ? access/gist/.deps ? access/hash/.deps ? access/heap/.deps ? access/index/.deps ? access/nbtree/.deps ? access/transam/.deps ? bootstrap/.deps ? catalog/.deps ? commands/.deps ? executor/.deps ? foreign/.deps ? foreign/dummy/.deps ? foreign/postgresql/.deps ? lib/.deps ? libpq/.deps ? main/.deps ? nodes/.deps ? optimizer/geqo/.deps ? optimizer/path/.deps ? optimizer/plan/.deps ? optimizer/prep/.deps ? optimizer/util/.deps ? parser/.deps ? po/af.mo ? po/cs.mo ? po/hr.mo ? po/hu.mo ? po/it.mo ? po/ko.mo ? po/nb.mo ? po/nl.mo ? po/pl.mo ? po/ro.mo ? po/ru.mo ? po/sk.mo ? po/sl.mo ? po/sv.mo ? po/zh_CN.mo ? po/zh_TW.mo ? port/.deps ? postmaster/.deps ? regex/.deps ? replication/.deps ? replication/libpqwalreceiver/.deps ? rewrite/.deps ? snowball/.deps ? snowball/snowball_create.sql ? storage/buffer/.deps ? storage/file/.deps ? storage/freespace/.deps ? storage/ipc/.deps ? storage/large_object/.deps ? storage/lmgr/.deps ? storage/page/.deps ? storage/smgr/.deps ? tcop/.deps ? tsearch/.deps ? utils/.deps ? utils/probes.h ? utils/adt/.deps ? utils/cache/.deps ? utils/error/.deps ? utils/fmgr/.deps ? utils/hash/.deps ? utils/init/.deps ? utils/mb/.deps ? utils/mb/Unicode/BIG5.TXT ? utils/mb/Unicode/CP950.TXT ? utils/mb/conversion_procs/conversion_create.sql ? utils/mb/conversion_procs/ascii_and_mic/.deps ? utils/mb/conversion_procs/cyrillic_and_mic/.deps ? utils/mb/conversion_procs/euc2004_sjis2004/.deps ? utils/mb/conversion_procs/euc_cn_and_mic/.deps ? utils/mb/conversion_procs/euc_jis_2004_and_shift_jis_2004/.deps ? utils/mb/conversion_procs/euc_jp_and_sjis/.deps ? utils/mb/conversion_procs/euc_kr_and_mic/.deps ? utils/mb/conversion_procs/euc_tw_and_big5/.deps ? utils/mb/conversion_procs/latin2_and_win1250/.deps ? utils/mb/conversion_procs/latin_and_mic/.deps ? utils/mb/conversion_procs/utf8_and_ascii/.deps ? utils/mb/conversion_procs/utf8_and_big5/.deps ? utils/mb/conversion_procs/utf8_and_cyrillic/.deps ? utils/mb/conversion_procs/utf8_and_euc2004/.deps ? utils/mb/conversion_procs/utf8_and_euc_cn/.deps ? utils/mb/conversion_procs/utf8_and_euc_jis_2004/.deps ? utils/mb/conversion_procs/utf8_and_euc_jp/.deps ? utils/mb/conversion_procs/utf8_and_euc_kr/.deps ? utils/mb/conversion_procs/utf8_and_euc_tw/.deps ? utils/mb/conversion_procs/utf8_and_gb18030/.deps ? utils/mb/conversion_procs/utf8_and_gbk/.deps ? utils/mb/conversion_procs/utf8_and_iso8859/.deps ? utils/mb/conversion_procs/utf8_and_iso8859_1/.deps ? utils/mb/conversion_procs/utf8_and_johab/.deps ? utils/mb/conversion_procs/utf8_and_shift_jis_2004/.deps ? utils/mb/conversion_procs/utf8_and_sjis/.deps ? utils/mb/conversion_procs/utf8_and_sjis2004/.deps ? utils/mb/conversion_procs/utf8_and_uhc/.deps ? utils/mb/conversion_procs/utf8_and_win/.deps ? utils/misc/.deps ? utils/mmgr/.deps ? utils/resowner/.deps ? utils/sort/.deps ? utils/time/.deps Index: libpq/be-secure.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/libpq/be-secure.c,v retrieving revision 1.96 diff -c -r1.96 be-secure.c *** libpq/be-secure.c 15 Jan 2010 09:19:02 -0000 1.96 --- libpq/be-secure.c 18 Feb 2010 09:42:30 -0000 *************** *** 256,262 **** case SSL_ERROR_WANT_READ: case SSL_ERROR_WANT_WRITE: if (port->noblock) ! return 0; #ifdef WIN32 pgwin32_waitforsinglesocket(SSL_get_fd(port->ssl), (err == SSL_ERROR_WANT_READ) ? --- 256,266 ---- case SSL_ERROR_WANT_READ: case SSL_ERROR_WANT_WRITE: if (port->noblock) ! { ! errno = EWOULDBLOCK; ! n = -1; ! break; ! } #ifdef WIN32 pgwin32_waitforsinglesocket(SSL_get_fd(port->ssl), (err == SSL_ERROR_WANT_READ) ? Index: libpq/pqcomm.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/libpq/pqcomm.c,v retrieving revision 1.203 diff -c -r1.203 pqcomm.c *** libpq/pqcomm.c 16 Feb 2010 19:26:02 -0000 1.203 --- libpq/pqcomm.c 18 Feb 2010 09:42:30 -0000 *************** *** 822,828 **** * if available * * The received byte is stored in *c. Returns 1 if a byte was read, 0 if ! * if no data was available, or EOF. * -------------------------------- */ int --- 822,828 ---- * if available * * The received byte is stored in *c. Returns 1 if a byte was read, 0 if ! * if no data was available, or EOF if trouble. * -------------------------------- */ int *************** *** 848,853 **** --- 848,879 ---- PG_TRY(); { r = secure_read(MyProcPort, c, 1); + if (r < 0) + { + /* + * Ok if would block or interrupted (though it really shouldn't + * happen with a non-blocking operation). + */ + if (errno == EAGAIN || EWOULDBLOCK || errno == EINTR) + r = 0; + else + { + /* + * Careful: an ereport() that tries to write to the client would + * cause recursion to here, leading to stack overflow and core + * dump! This message must go *only* to the postmaster log. + */ + ereport(COMMERROR, + (errcode_for_socket_access(), + errmsg("could not receive data from client: %m"))); + return EOF; + } + } + else if (r == 0) + { + /* EOF detected */ + r = EOF; + } } PG_CATCH(); { Index: replication/walsender.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/replication/walsender.c,v retrieving revision 1.6 diff -c -r1.6 walsender.c *** replication/walsender.c 17 Feb 2010 04:19:39 -0000 1.6 --- replication/walsender.c 18 Feb 2010 09:42:30 -0000 *************** *** 286,300 **** break; } ! /* 'X' means that the standby is closing the connection */ case 'X': proc_exit(0); - case EOF: - ereport(ERROR, - (errcode(ERRCODE_PROTOCOL_VIOLATION), - errmsg("unexpected EOF on standby connection"))); - default: ereport(FATAL, (errcode(ERRCODE_PROTOCOL_VIOLATION), --- 286,304 ---- break; } ! /* ! * 'X' means that the standby is closing the connection. EOF ! * means unexpected loss of standby connection. Either way, ! * perform normal shutdown. ! */ ! case EOF: ! ereport(COMMERROR, ! (errcode(ERRCODE_PROTOCOL_VIOLATION), ! errmsg("unexpected EOF on standby connection"))); ! /* fall through */ case 'X': proc_exit(0); default: ereport(FATAL, (errcode(ERRCODE_PROTOCOL_VIOLATION), *************** *** 315,350 **** r = pq_getbyte_if_available(&firstchar); if (r < 0) { ! /* no data available */ ! if (errno == EAGAIN || errno == EWOULDBLOCK) ! return; ! ! /* ! * Ok if interrupted, though it shouldn't really happen with ! * a non-blocking operation. ! */ ! if (errno == EINTR) ! return; ! ereport(COMMERROR, ! (errcode_for_socket_access(), ! errmsg("could not receive data from client: %m"))); } if (r == 0) { ! /* standby disconnected unexpectedly */ ! ereport(ERROR, ! (errcode(ERRCODE_PROTOCOL_VIOLATION), ! errmsg("unexpected EOF on standby connection"))); } /* Handle the very limited subset of commands expected in this phase */ switch (firstchar) { /* ! * 'X' means that the standby is closing down the socket. EOF means ! * unexpected loss of standby connection. Either way, perform normal ! * shutdown. */ case 'X': proc_exit(0); --- 319,341 ---- r = pq_getbyte_if_available(&firstchar); if (r < 0) { ! /* unexpected error */ ereport(COMMERROR, ! (errcode(ERRCODE_PROTOCOL_VIOLATION), ! errmsg("unexpected EOF on standby connection"))); ! proc_exit(0); } if (r == 0) { ! /* no data available without blocking */ ! return; } /* Handle the very limited subset of commands expected in this phase */ switch (firstchar) { /* ! * 'X' means that the standby is closing down the socket. */ case 'X': proc_exit(0);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers