Hi. My company have servers that run postgres for storing some logs and
serving django web interfaces for management of the server itself. In the
last days some servers stopped serving the web interface, and syslog show
this error:

Jun 23 04:40:19 django-1 postgres[8790]: [3-1] FATAL:  remaining connection
slots are reserved for non-replication superuser connections



So I started looking for the cause:


$ ps auxf
...
postgres  4580  0.7  0.1 305156 19184 ?        S    Jun01 251:46
/usr/local/pgsql/bin/postmaster -D /usr/local/pgsql/data
postgres  4746  0.0  1.6 305608 274656 ?       Ss   Jun01   4:09  \_
postgres: checkpointer process
postgres  4747  0.0  1.2 305288 208708 ?       Ss   Jun01   0:33  \_
postgres: writer process
postgres  4748  0.0  0.0 305288  9300 ?        Ss   Jun01   0:04  \_
postgres: wal writer process
postgres  4750  0.0  0.0 305972  2376 ?        Ss   Jun01   0:00  \_
postgres: autovacuum launcher process
postgres  4752  0.1  0.0  23852  1396 ?        Ss   Jun01  50:54  \_
postgres: stats collector process
postgres 63615  0.0  0.1 307036 22208 ?        Ss   Jun02   0:00  \_
postgres: db db [local] idle
postgres 22476  0.0  0.0 306368  2576 ?        Ss   Jun22   0:00  \_
postgres: db db [local] idle
postgres  4521  0.0  0.0 306512  6408 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4522  0.0  0.0 306512  6412 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4523  0.0  0.0 306512  6416 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4524  0.0  0.0 306512  6412 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4534  0.0  0.0 306512  6152 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4544  0.0  0.0 306512  6420 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4552  0.0  0.0 306512  6408 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4742  0.0  0.0 306512  6400 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4743  0.0  0.0 306512  6400 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4766  0.0  0.0 306512  6408 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4770  0.0  0.0 306512  6144 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4774  0.0  0.0 306512  6396 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4783  0.0  0.0 306512  6400 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4786  0.0  0.0 306512  6376 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4804  0.0  0.0 306512  6376 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4812  0.0  0.0 306512  6376 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4860  0.0  0.0 306512  6356 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4862  0.0  0.0 306516  6672 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4868  0.0  0.0 306516  6408 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4878  0.0  0.0 306516  6684 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4881  0.0  0.0 306516  6164 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4882  0.0  0.0 306516  6168 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4886  0.0  0.0 306500  6524 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4887  0.0  0.0 306500  6272 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4889  0.0  0.0 306500  6272 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4890  0.0  0.0 306500  6276 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  4907  0.0  0.0 306500  6796 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5131  0.0  0.0 306500  6268 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5138  0.0  0.0 306512  6116 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5142  0.0  0.0 306512  6116 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5143  0.0  0.0 306512  6644 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5151  0.0  0.0 306512  6120 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5154  0.0  0.0 306512  6904 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5155  0.0  0.0 306512  6128 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5156  0.0  0.0 306512  6120 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5157  0.0  0.0 306512  6380 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5162  0.0  0.0 306512  6120 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5165  0.0  0.0 306512  6384 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5172  0.0  0.0 306512  6128 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5174  0.0  0.0 306512  6120 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5188  0.0  0.0 306512  6124 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5189  0.0  0.0 306512  6636 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres  5190  0.0  0.0 306512  6104 ?        Ss   00:13   0:00  \_
postgres: db db [local] idle
postgres 26621  0.0  0.0 306512  6116 ?        Ss   12:54   0:00  \_
postgres: db db [local] idle
postgres  9600  0.0  0.0 306512  6120 ?        Ss   13:30   0:00  \_
postgres: db db [local] idle



50 connections in idle status, but looking further:

$ cat /proc/4521/stack
[<ffffffff81b59a8a>] unix_stream_recvmsg+0x2b9/0x633
[<ffffffff81acbf27>] __sock_recvmsg_nosec+0x29/0x2b
[<ffffffff81ace30c>] sock_recvmsg+0x65/0x88
[<ffffffff81ace409>] SYSC_recvfrom+0xda/0x134
[<ffffffff81acf4ef>] SyS_recvfrom+0x9/0xb
[<ffffffff81bd4062>] system_call_fastpath+0x16/0x1b
[<ffffffffffffffff>] 0xffffffffffffffff


All connections have stucked in this stack.


The postmaster process:


$ cat /proc/4580/stack
[<ffffffff810ec9fc>] poll_schedule_timeout+0x3e/0x61
[<ffffffff810ed32f>] do_select+0x5ea/0x629
[<ffffffff810ed4f7>] core_sys_select+0x189/0x245
[<ffffffff810ed632>] SyS_select+0x7f/0xb5
[<ffffffff81bd4062>] system_call_fastpath+0x16/0x1b
[<ffffffffffffffff>] 0xffffffffffffffff




Some information about the server:



$ df -lh
Filesystem        Size  Used Avail Use% Mounted on
/dev/mapper/root  437G   13G  403G   3% /
/dev/sda1         2.0G  204M  1.6G  12% /boot
tmpfs             7.9G  224K  7.9G   1% /run
devtmpfs          7.8G     0  7.8G   0% /dev
shm               7.9G     0  7.9G   0% /dev/shm


$ free -m
             total       used       free     shared    buffers     cached
Mem:         16030      13221       2808          0         10       3486
-/+ buffers/cache:       9725       6304
Swap:            0          0          0


$ cat /proc/cpuinfo
...
processor : 3
vendor_id : GenuineIntel
cpu family : 6
model : 58
model name : Intel(R) Xeon(R) CPU E3-1220 V2 @ 3.10GHz
stepping : 9
microcode : 0x15
cpu MHz : 3092.951
cache size : 8192 KB
...


# postgresql.conf #

external_pid_file = '/tmp/postgres.pid'
listen_addresses = ''
port = 5432
max_connections = 50
unix_socket_directories = '/tmp'
unix_socket_permissions = 0777
bonjour = off
authentication_timeout = 10min
ssl = on
ssl_ciphers = 'DEFAULT:!LOW:!EXP:!MD5:@STRENGTH'
ssl_renegotiation_limit = 512MB
ssl_cert_file = 'server.crt'
ssl_key_file = 'server.key'

shared_buffers = 256MB
temp_buffers = 128MB
work_mem = 256MB
maintenance_work_mem = 256MB
max_stack_depth = 1MB
max_files_per_process = 1000
max_locks_per_transaction = 128
effective_cache_size = 512MB

wal_level = minimal
wal_writer_delay = 500ms
checkpoint_segments = 100
enable_indexscan = on
enable_sort = on

log_destination = 'syslog'
syslog_facility = 'LOCAL0'
syslog_ident = 'postgres'
client_min_messages = error
log_min_messages = error
log_min_error_statement = fatal
log_min_duration_statement = -1
log_timezone = 'Brazil/East'
track_activities = on
track_counts = on
track_io_timing = off

autovacuum = on
autovacuum_max_workers = 1
autovacuum_naptime = 300min
autovacuum_vacuum_threshold = 50
autovacuum_analyze_threshold = 50

search_path = 'public'

datestyle = 'iso, mdy'
intervalstyle = 'postgres'
timezone = 'Brazil/East'

lc_messages = 'C'
lc_monetary = 'C'
lc_numeric = 'C'
lc_time = 'C'

default_text_search_config = 'pg_catalog.english'
restart_after_crash = on

# end conf #








*I really don't know what is happening, why postgres hang and didn't close
the connections. This django web interface is used only for management and
viewing logs, most server have two users only.*

*I've sent this email just to know if one of yours have seen this, or
something like this, before.*


*Thank you!*




--
-----

Bruno Hass
(51) 9280-3627

Reply via email to