We are unfortunately still seeing a lot of these errors once the machine reaches a high number of concurrent users/logins (just below 20k simultaneous IMAP connections on a powerful 24 core machine with 128GB RAM):

2020-01-14T09:18:58.661349+01:00 dovecot: imap-login: Warning: net_connect_unix(imap) succeeded only after retrying - took 140330 us 2020-01-14T09:18:58.854692+01:00 dovecot: imap-login: Error: master(imap): net_connect_unix(imap) failed: Resource temporarily unavailable - http://wiki2.dovecot.org/SocketUnavailable (client-pid=107932, client-id=780262, rip=x, created 500 msecs ago, received 0/4 bytes) 2020-01-14T09:18:58.888228+01:00 dovecot: imap-login: Warning: net_connect_unix(imap) succeeded only after retrying - took 440762 us

The machine is at insignificant load numbers, and the dovecot process is somewhere near 25% CPU usage when the problem occurs. It is not close to saturating its core from what I can tell.

To make sure the issues are not task/fd-limit related, I have set this in /etc/systemd/system/dovecot.service.d/service.conf:

[Service]
LimitNOFILE=infinity
TasksMax=infinity

~# egrep "processes|files" /proc/`pidof dovecot`/limits
Max processes             514051               514051
Max open files            1048576              1048576
~# cat /proc/sys/net/core/somaxconn
65536
~# cat /proc/sys/kernel/pid_max
278528

Dovecot is configured with NFS backed storage, and locally stored passwdfile userdb/passdb are used for authentication.

Backends are (now) behind directors. The directors/proxies are having no issues dealing with the traffic whatsoever.

At the time of failure, the process list looks like this:

~# ps -f --ppid `pidof dovecot` | egrep -v "dovecot/(imap|pop3) "
UID         PID   PPID  C STIME TTY          TIME CMD
274264    44753 138506  0 12:43 ?        00:00:00 [imap]
308665   104852 138506  0 13:01 ?        00:00:00 [imap]
308665   104853 138506  0 13:01 ?        00:00:00 [imap]
dovenull 138508 138506 1 10:31 ? 00:03:00 dovecot/pop3-login [6 pre-login + 36 TLS proxies]
dovenull 138509 138506  0 10:31 ?        00:00:07 dovecot/imap-login
dovecot 138510 138506 0 10:31 ? 00:01:10 dovecot/anvil [20 connections]
root     138511 138506  1 10:31 ?        00:02:14 dovecot/log
dovenull 138512 138506 1 10:31 ? 00:01:48 dovecot/pop3-login [1 pre-login + 15 TLS proxies] dovenull 138513 138506 0 10:31 ? 00:00:08 dovecot/imap-login [redacted TLS proxy] dovenull 138514 138506 0 10:31 ? 00:00:07 dovecot/imap-login [0 pre-login + 3 TLS proxies] dovenull 138515 138506 0 10:31 ? 00:00:10 dovecot/imap-login [redacted TLS proxy] dovenull 138516 138506 0 10:31 ? 00:01:14 dovecot/imap-login [27 pre-login + 12 TLS proxies] dovenull 138517 138506 0 10:31 ? 00:00:31 dovecot/imap-login [2 pre-login + 2 TLS proxies] dovenull 138518 138506 0 10:31 ? 00:01:28 dovecot/imap-login [56 pre-login + 20 TLS proxies] dovenull 138519 138506 0 10:31 ? 00:00:09 dovecot/imap-login [0 pre-login + 4 TLS proxies] dovenull 138520 138506 0 10:31 ? 00:00:06 dovecot/imap-login [redacted TLS proxy] dovenull 138521 138506 0 10:31 ? 00:00:11 dovecot/imap-login [0 pre-login + 3 TLS proxies] dovenull 138522 138506 0 10:31 ? 00:00:16 dovecot/imap-login [2 pre-login + 2 TLS proxies] dovenull 138523 138506 0 10:31 ? 00:00:13 dovecot/imap-login [1 pre-login + 2 TLS proxies] dovenull 138524 138506 0 10:31 ? 00:00:24 dovecot/imap-login [1 pre-login + 3 TLS proxies] dovenull 138525 138506 0 10:31 ? 00:01:13 dovecot/imap-login [36 pre-login + 23 TLS proxies] dovenull 138526 138506 0 10:31 ? 00:00:41 dovecot/imap-login [10 pre-login + 12 TLS proxies] dovenull 138527 138506 0 10:31 ? 00:00:20 dovecot/imap-login [1 pre-login + 7 TLS proxies]
root     138528 138506  2 10:31 ?        00:04:45 dovecot/config
dovecot 138529 138506 1 10:31 ? 00:02:22 dovecot/stats [19389 connections] dovecot 138530 138506 3 10:31 ? 00:05:53 dovecot/auth [137 wait, 0 passdb, 0 userdb] root 148675 138506 0 13:13 ? 00:00:00 dovecot/doveadm-server [redacted]

Other stats:

~# ps -f --ppid `pidof dovecot` | grep "dovecot/imap " | wc -l
19328
~# doveadm process status | grep "^imap " | wc -l
19307
~# ss -ntp "( sport = :143 or sport = :993 )" | grep "\"imap\"" | wc -l
19141
~# ss -ntp "( sport = :143 or sport = :993 )" | grep "\"imap-login\"" | wc -l
333
~# ss -ntp "( sport = :143 or sport = :993 )" | wc -l
19530

~# doveconf -n

# 2.3.9.2 (cf2918cac): /etc/dovecot/dovecot.conf
# OS: Linux 4.9.0-9-amd64 x86_64 Debian 9.11
# Hostname: [redacted]
default_vsz_limit = 768 M
disable_plaintext_auth = no
imap_id_log = *
log_timestamp = "%F %T %z "
login_trusted_networks = [redacted]
mail_fsync = always
mail_location = maildir:~/Maildir
mail_nfs_index = yes
mail_nfs_storage = yes
mmap_disable = yes
namespace inbox {
  inbox = yes
  location =
  mailbox Drafts {
    auto = subscribe
    special_use = \Drafts
  }
  mailbox Sent {
    auto = subscribe
    special_use = \Sent
  }
  mailbox "Sent Messages" {
    auto = no
    special_use = \Sent
  }
  mailbox Spam {
    auto = create
    special_use = \Junk
  }
  mailbox Trash {
    auto = subscribe
    special_use = \Trash
  }
  prefix =
  separator = /
}
passdb {
  args = username_format=%Lu /etc/dovecot/aliases
  default_fields = noauthenticate
  driver = passwd-file
}
passdb {
  args = /etc/dovecot/passwd
  driver = passwd-file
}
protocols = imap pop3
service doveadm {
  inet_listener {
    port = 24245
  }
  inet_listener http {
    port = 8080
  }
}
service imap-login {
  client_limit = 4096
  inet_listener imap {
    port = 143
  }
  inet_listener imaps {
    port = 993
    ssl = yes
  }
  process_limit = 16
  process_min_avail = 16
  service_count = 0
  vsz_limit = 768 M
}
service imap {
  client_limit = 1
  process_limit = 65536
}
service pop3-login {
  client_limit = 4096
  inet_listener pop3 {
    port = 110
  }
  inet_listener pop3s {
    port = 995
    ssl = yes
  }
  process_limit = 8
  process_min_avail = 2
  service_count = 0
}
service pop3 {
  process_limit = 16384
}
service stats {
  client_limit = 65536
}
ssl_cert = <[redacted]
ssl_cipher_list = ALL:!LOW:!SSLv2:!EXP:!aNULL
ssl_dh = # hidden, use -P to show it
ssl_key = # hidden, use -P to show it
userdb {
  args = /etc/dovecot/passwd
  driver = passwd-file
}
verbose_proctitle = yes
protocol imap {
  mail_max_userip_connections = 20
  rawlog_dir = [redacted]
}

Are there any other tunables either in Dovecot or in the kernel that may relate to this issue that we may have missed?

--
Eirik

On 06/01/2020 11:28, Eirik Rye wrote:
Hi,

After upgrading Dovecot from version 2.2.27 to 2.3.9.2, we are sporadically seeing lots of these errors in the error log on many of our servers:

  imap-login: Error: master(imap): net_connect_unix(imap) failed: Resource temporarily unavailable - http://wiki2.dovecot.org/SocketUnavailable

The issue is causing significant delays and/or timeouts on login.

From what I can tell, this happens because the imap-login service is unable to connect to the imap service socket, however I am unable to identify the root cause of the issue.

We have increased the client_limit for the stats service, as described in the upgrade document at (https://wiki2.dovecot.org/Upgrading/2.3). It is set to the same value as the process_limit for imap service.

System CPU usage does not appear to be saturated. The CPU usage sits at around 50% when these errors appear.

We have doubled the number of file descriptors after upgrading from 2.2.27. It was previously set to 16392 (which worked fine):

~# cat /proc/`pidof dovecot`/limits | grep "open files"
Max open files            30000                30000                files

We have tried increasing default_process_limit and default_client_limit from the default 1000 to 3000, but this has no effect.

Current configuration (with irrelevant parts removed):

~# doveconf -n

default_client_limit = 3000 # these were raised after upgrading in attempt to remedy
default_process_limit = 3000
default_vsz_limit = 512 M
[...]
service imap-login {
   client_limit = 1250
   inet_listener imap {
     port = 143
   }
   inet_listener imaps {
     port = 993
     ssl = yes
   }
   process_limit = 16
   process_min_avail = 16
   service_count = 0
   vsz_limit = 512 M
}
service imap {
   client_limit = 1
   process_limit = 20000
}
service stats {
   client_limit = 20000
}
[...]

We appear to be nowhere near the 20000 process_limit set for the imap service. We are also not seeing any warnings being logged with regards to the process_limit.

~# doveadm who -1 | wc -l
8765

~# doveadm process status | grep "^imap " | wc -l
7583

~# ps aux | grep "dovecot/imap" | wc -l
7449

~# doveadm process status | grep -v "^imap "
name       pid   available_count total_count idle_start last_status_update last_kill_sent
stats      19875 12384           582387      0          1578305489   0
pop3-login 19868 1000            0           0          1578061504   0
pop3-login 19867 1000            0           0          1578061503   0
pop3-login 19866 1000            0           0          1578061502   0
pop3-login 19865 1000            0           0          1578061503   0
pop3-login 19864 1000            0           0          1578061503   0
pop3-login 19853 1000            0           0          1578061504   0
pop3-login 19852 1000            0           0          1578061504   0
pop3-login 19851 1000            0           0          1578061504   0
pop3-login 19849 1000            0           0          1578061503   0
pop3-login 19847 1000            0           0          1578061503   0
pop3-login 19846 1000            0           0          1578061503   0
pop3-login 19845 1000            0           0          1578061503   0
pop3-login 19844 1000            0           0          1578061503   0
pop3-login 19843 1000            0           0          1578061503   0
pop3-login 19842 1000            0           0          1578061503   0
pop3-login 19839 1000            0           0          1578061502   0
log        19841 2973            27          0          1578061502   0
imap-login 19873 666             14942       0          1578305488   0
imap-login 19872 523             32792       0          1578305489   0
imap-login 19871 316             62876       0          1578305489   0
imap-login 19870 167             78332       0          1578305489   0
imap-login 19869 118             97989       0          1578305489   0
imap-login 19863 0               184726      0          1578305489   0
imap-login 19862 0               193094      0          1578305489   0
imap-login 19861 0               186800      0          1578305487   0
imap-login 19860 675             17806       0          1578305487   0
imap-login 19859 0               169446      0          1578305489   0
imap-login 19858 0               143517      0          1578305489   0
imap-login 19857 0               119215      0          1578305488   0
imap-login 19856 0               151958      0          1578305489   0
imap-login 19855 483             47036       0          1578305488   0
imap-login 19854 1               185240      0          1578305489   0
imap-login 19840 567             23859       0          1578305485   0
config     19874 2967            124482      0          1578305489   0
auth       19885 2967            124185      0          1578305489   0
anvil      10017 967             7567        0          1578305463   0


Reply via email to