Hi,

some of our customers have discovered a replication issue after
upgraded from 2.3.7.2 to 2.3.8.

Running 2.3.8 several replication connections are hanging until defined
timeout. So after some seconds there are $replication_max_conns hanging
connections.
Other replications are running fast and successful.

Also running a doveadm sync tcp:... is working fine for all users.

I can't see exactly, but I haven't seen mailboxes timeouting again and
again. So I would assume it's not related to the mailbox.

>From the logs:

server1:
Oct 16 08:29:25 server1 dovecot[5715]:
dsync-local(userna...@domain.com)<FXnVDW22pl0tGAAA1cwDxA>: Error:
dsync(172.16.0.1): I/O has stalled, no activity for 600 seconds (version
not received)
Oct 16 08:29:25 server1 dovecot[5715]:
dsync-local(userna...@domain.com)<FXnVDW22pl0tGAAA1cwDxA>: Error:
Timeout during state=master_recv_handshake

server2:

Oct 16 08:29:25 server2 dovecot[8113]: doveadm: Error: read(server1)
failed: EOF (last sent=handshake, last recv=handshake)

There aren't any additional logs regarding the replication.

I have tried increasing vsz_limit or reducing replication_max_conns.
Nothing changed.

--

Both customers have 10k+ users. Currently I couldn't reproduce this on
smaller test systems.

Both installation were downgraded to 2.3.7.2 to fix the issue for now

--

I've attached a tcpdump showing the client showing the client stops
sending any data after the mailbox_guid table headers.



Any idea what could be wrong here or the debug this issue?

Thanks.

Carsten Rosenberg
root@server1:~# doveconf -n
# 2.3.7.2 (3c910f64b): /etc/dovecot/dovecot.conf
# Pigeonhole version 0.5.7.2 (7372921a)
# OS: Linux 4.15.0-65-generic x86_64 Ubuntu 18.04.3 LTS
# Hostname: server1
auth_cache_negative_ttl = 0
auth_cache_size = 10 M
auth_master_user_separator = *
auth_worker_max_count = 1024
base_dir = /var/run/dovecot/
default_client_limit = 10000
default_vsz_limit = 1 G
doveadm_password = # hidden, use -P to show it
doveadm_port = 12345
first_valid_gid = 10000
first_valid_uid = 10000
imap_max_line_length = 640 k
last_valid_gid = 10000
last_valid_uid = 10000
mail_gid = 10000
mail_location = mdbox:%h/mdbox
mail_plugins = " mail_log notify zlib notify replication"
mail_privileged_group = mail
mail_uid = 10000
managesieve_notify_capability = mailto
managesieve_sieve_capability = fileinto reject envelope encoded-character 
vacation subaddress comparator-i;ascii-numeric relational regex imap4flags copy 
include variables body enotify environment mailbox date index ihave duplicate 
mime foreverypart extracttext
namespace inbox {
  hidden = no
  inbox = yes
  list = yes
  location =
  prefix =
  separator = /
  subscriptions = yes
  type = private
}
passdb {
  args = /etc/dovecot.deny
  deny = yes
  driver = passwd-file
}
passdb {
  args = /etc/dovecot/private/passwd.masterusers
  driver = passwd-file
  master = yes
}
passdb {
  args = /etc/dovecot/dovecot-ldap-passdb.conf.ext
  driver = ldap
}
plugin {
  mail_replica = tcp:server2
  sieve = file:~/sieve;active=~/.dovecot.sieve
  sieve_default = /var/lib/dovecot/default.sieve
  sieve_max_actions = 55
  sieve_max_redirects = 50
}
pop3_uidl_format = %08Xv%08Xu
protocols = imap pop3 lmtp sieve
replication_dsync_parameters = -d -n INBOX -l 30 -U
replication_max_conns = 20
service aggregator {
  fifo_listener replication-notify-fifo {
    user = vmail
  }
  unix_listener replication-notify {
    user = vmail
  }
}
service auth-worker {
  user = $default_internal_user
}
service auth {
  client_limit = 10000
}
service config {
  process_min_avail = 8
}
service doveadm {
  inet_listener {
    port = 12345
  }
  vsz_limit = 1 G
}
service imap-login {
  process_min_avail = 64
  service_count = 0
}
service imap {
  process_limit = 8192
}
service lmtp {
  inet_listener lmtp {
    port = 24
  }
}
service managesieve-login {
  inet_listener sieve {
    port = 4190
  }
  process_min_avail = 8
  service_count = 0
}
service pop3-login {
  process_min_avail = 8
  service_count = 0
}
service replicator {
  process_min_avail = 1
  unix_listener replicator-doveadm {
    mode = 0600
    user = vmail
  }
}
service submission-login {
  service_count = 0
}
ssl = required
ssl_ca = </etc/ssl/certs/chain.pem
ssl_cert = </etc/ssl/certs/cert.pem
ssl_client_ca_dir = /etc/ssl/certs
ssl_dh = # hidden, use -P to show it
ssl_key = # hidden, use -P to show it
ssl_require_crl = no
userdb {
  args = /etc/dovecot/dovecot-ldap-userdb.conf.ext
  driver = ldap
  name = userdb_ldap
}
protocol imap {
  mail_max_userip_connections = 25
  mail_plugins = " mail_log notify zlib notify replication imap_zlib"
}
protocol lmtp {
  mail_plugins = " mail_log notify zlib notify replication sieve"
}
VERSION doveadm-server  1       1
VERSION doveadm-client  1       1
-
PLAIN   xxxx...
+
        username1       dsync-server    -uusername1     -U
.....
+
VERSION dsync   3       5
Hhostname       sync_ns_prefix  sync_box        sync_box_guid   sync_type       
debug
sync_visible_namespaces exclude_mailboxes       send_mail_requests
backup_send     backup_recv     lock_timeout    no_mail_sync    
no_mailbox_renames
no_backup_overwrite     purge_remote    no_notify       sync_since_timestamp
sync_max_size   sync_flags      sync_until_timestamp    virtual_all_box
empty_hdr_workaround    import_commit_msgs_interval     hashed_headers
Smailbox_guid   last_uidvalidity        last_common_uid last_common_modseq
last_common_pvt_modseq  last_messages_count     changes_during_sync
Nname   existence       mailbox_guid    uid_validity    uid_next
last_renamed_or_created subscribed      last_subscription_change
Dhierarchy_sep  mailboxes       dirs    unsubscribes
Bmailbox_guid   uid_validity    uid_next        messages_count  first_recent_uid
highest_modseq  highest_pvt_modseq      mailbox_lost    mailbox_ignore
cache_fields    have_guids      have_save_guids have_only_guid128
Atype   key     value   stream  deleted last_change     modseq
Ctype   uid     guid    hdr_hash        modseq  pvt_modseq      add_flags       
remove_flags
final_flags     keywords_reset  keyword_changes received_timestamp      
virtual_size
Rguid   uid
Mguid   uid     pop3_uidl       pop3_order      received_date   saved_date      
stream
Ferror  mail_error      require_full_resync
cname   decision        last_used
.
....JHserver2   .       .       .       .       .       .       .       .       
.       .       .       .       .       .       .       .       .       .       
.       .       .       .
.
VERSION dsync   3       5
Hhostname       sync_ns_prefix  sync_box        sync_box_guid   sync_type       
debug
sync_visible_namespaces exclude_mailboxes       send_mail_requests
backup_send     backup_recv     lock_timeout    no_mail_sync    
no_mailbox_renames
no_backup_overwrite     purge_remote    no_notify       sync_since_timestamp
sync_max_size   sync_flags      sync_until_timestamp    virtual_all_box
empty_hdr_workaround    import_commit_msgs_interval     hashed_headers
Smailbox_guid   last_uidvalidity        last_common_uid last_common_modseq
last_common_pvt_modseq  last_messages_count     changes_during_sync
Nname   existence       mailbox_guid    uid_validity    uid_next
last_renamed_or_created subscribed      last_subscription_change
Dhierarchy_sep  mailboxes       dirs    unsubscribes
Bmailbox_guid   uid_validity    uid_next        messages_count  first_recent_uid
highest_modseq  highest_pvt_modseq      mailbox_lost    mailbox_ignore
cache_fields    have_guids      have_save_guids have_only_guid128
Atype   key     value   stream  deleted last_change     modseq
Ctype   uid     guid    hdr_hash        modseq  pvt_modseq      add_flags       
remove_flags
final_flags     keywords_reset  keyword_changes received_timestamp      
virtual_size
Rguid   uid
Mguid   uid     pop3_uidl       pop3_order      received_date   saved_date      
stream
Ferror  mail_error      require_full_resync
cname   decision        last_used
.
Hserver1                .       .       s       .       .       .               
.       .       20      .       .       .       .       .       .       .       
.       .       .       .       100
Date.tMessage-ID.t
L...Z.read(server1) failed: EOF (last
sent=handshake, last recv=handshake)

Reply via email to