On Tue, May 21, 2019 at 05:59:18PM +0200, Claudio Jeker wrote: > Hi, > > The last refactor of the pfkey handling uncovered an issue deep down in > the pfkey handling. Some long time ago henning@ added a sleep(1) into > pfkey_md5sig_establish(). This is now hunting us back because bgpd ends up > with no tcp md5sig flow for 1 sec while a connection is established. > Because of that writes failed with "Operation not permitted" > semi-frequently. Now a lot of the dance around pfkey is rather annoying, > it is not possible to do atomic updates of SADB_X_SATYPE_TCPSIGNATURE > entries. So instead I decided to insert new flows and then remove the old > ones in a second step. Since this is done now all the time it is no longer > needed to pfkey_remove(p) before calling pfkey_establish(p). > pfkey_establish(p) will do that now itself when needed. > > While checking the code I noticed that we leave keys behind when removing > peers in the config. I added the needed pfkey_remove(p) call in > merge_config() for that. This could be commited independently. > > With this in the regress test works a lot better and my basic testing > seems to be good as well. Please test if you are using tcp md5sum or ipsec > in your bgpd config. >
Guess nobody is using and suffering of tcp md5sum. Should I just commit and wait for bug reports? -- :wq Claudio Index: bgpd.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v retrieving revision 1.217 diff -u -p -r1.217 bgpd.c --- bgpd.c 8 May 2019 18:48:34 -0000 1.217 +++ bgpd.c 21 May 2019 12:30:00 -0000 @@ -799,11 +799,9 @@ dispatch_imsg(struct imsgbuf *ibuf, int log_warnx("pfkey reload: no such peer: id=%u", imsg.hdr.peerid); else { - pfkey_remove(p); - if (pfkey_establish(p) == -1) { + if (pfkey_establish(p) == -1) log_peer_warnx(&p->conf, "pfkey setup failed"); - } } break; case IMSG_CTL_RELOAD: Index: config.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/config.c,v retrieving revision 1.88 diff -u -p -r1.88 config.c --- config.c 8 May 2019 12:41:55 -0000 1.88 +++ config.c 21 May 2019 13:44:50 -0000 @@ -322,6 +322,9 @@ merge_config(struct bgpd_config *xconf, np->reconf_action = RECONF_KEEP; /* copy the auth state since parent uses it */ np->auth = p->auth; + } else { + /* peer no longer exists, clear pfkey state */ + pfkey_remove(p); } TAILQ_REMOVE(&xconf->peers, p, entry); Index: pfkey.c =================================================================== RCS file: /cvs/src/usr.sbin/bgpd/pfkey.c,v retrieving revision 1.55 diff -u -p -r1.55 pfkey.c --- pfkey.c 8 May 2019 12:41:55 -0000 1.55 +++ pfkey.c 21 May 2019 15:27:53 -0000 @@ -50,13 +50,6 @@ int pfkey_send(int, uint8_t, uint8_t, ui struct bgpd_addr *, struct bgpd_addr *, u_int32_t, uint8_t, int, char *, uint8_t, int, char *, uint16_t, uint16_t); -int pfkey_sa_add(struct bgpd_addr *, struct bgpd_addr *, u_int8_t, char *, - u_int32_t *); -int pfkey_sa_remove(struct bgpd_addr *, struct bgpd_addr *, u_int32_t *); -int pfkey_md5sig_establish(struct peer *); -int pfkey_md5sig_remove(struct peer *); -int pfkey_ipsec_establish(struct peer *); -int pfkey_ipsec_remove(struct peer *); #define pfkey_flow(fd, satype, cmd, dir, from, to, sport, dport) \ pfkey_send(fd, satype, cmd, dir, from, to, \ @@ -404,7 +397,7 @@ pfkey_send(int sd, uint8_t satype, uint8 } while (n == -1 && (errno == EAGAIN || errno == EINTR)); if (n == -1) { - log_warn("writev (%d/%d)", iov_cnt, len); + log_warn("%s: writev (%d/%d)", __func__, iov_cnt, len); return (-1); } @@ -502,7 +495,7 @@ pfkey_reply(int sd, u_int32_t *spi) return (0); } -int +static int pfkey_sa_add(struct bgpd_addr *src, struct bgpd_addr *dst, u_int8_t keylen, char *key, u_int32_t *spi) { @@ -519,7 +512,7 @@ pfkey_sa_add(struct bgpd_addr *src, stru return (0); } -int +static int pfkey_sa_remove(struct bgpd_addr *src, struct bgpd_addr *dst, u_int32_t *spi) { if (pfkey_send(pfkey_fd, SADB_X_SATYPE_TCPSIGNATURE, SADB_DELETE, 0, @@ -531,47 +524,71 @@ pfkey_sa_remove(struct bgpd_addr *src, s return (0); } -int +static int pfkey_md5sig_establish(struct peer *p) { - sleep(1); + u_int32_t spi_out = 0; + u_int32_t spi_in = 0; - if (!p->auth.spi_out) - if (pfkey_sa_add(&p->auth.local_addr, &p->conf.remote_addr, - p->conf.auth.md5key_len, p->conf.auth.md5key, - &p->auth.spi_out) == -1) - return (-1); - if (!p->auth.spi_in) - if (pfkey_sa_add(&p->conf.remote_addr, &p->auth.local_addr, - p->conf.auth.md5key_len, p->conf.auth.md5key, - &p->auth.spi_in) == -1) + if (pfkey_sa_add(&p->conf.local_addr, &p->conf.remote_addr, + p->conf.auth.md5key_len, p->conf.auth.md5key, + &spi_out) == -1) + goto fail; + + if (pfkey_sa_add(&p->conf.remote_addr, &p->conf.local_addr, + p->conf.auth.md5key_len, p->conf.auth.md5key, + &spi_in) == -1) + goto fail; + + /* cleanup old flow if one was present */ + if (p->auth.established) { + if (pfkey_remove(p) == -1) return (-1); + } p->auth.established = 1; + p->auth.spi_out = spi_out; + p->auth.spi_in = spi_in; return (0); + +fail: + log_peer_warn(&p->conf, "%s: failed to insert md5sig", __func__); + return (-1); } -int +static int pfkey_md5sig_remove(struct peer *p) { if (p->auth.spi_out) if (pfkey_sa_remove(&p->auth.local_addr, &p->conf.remote_addr, &p->auth.spi_out) == -1) - return (-1); + goto fail; if (p->auth.spi_in) if (pfkey_sa_remove(&p->conf.remote_addr, &p->auth.local_addr, &p->auth.spi_in) == -1) - return (-1); + goto fail; p->auth.established = 0; + p->auth.spi_out = 0; + p->auth.spi_in = 0; return (0); + +fail: + log_peer_warn(&p->conf, "%s: failed to remove md5sig", __func__); + return (-1); } -int +static int pfkey_ipsec_establish(struct peer *p) { uint8_t satype = SADB_SATYPE_ESP; + /* cleanup first, unlike in the TCP MD5 case */ + if (p->auth.established) { + if (pfkey_remove(p) == -1) + return (-1); + } + switch (p->auth.method) { case AUTH_IPSEC_IKE_ESP: satype = SADB_SATYPE_ESP; @@ -584,8 +601,8 @@ pfkey_ipsec_establish(struct peer *p) satype = p->auth.method == AUTH_IPSEC_MANUAL_ESP ? SADB_SATYPE_ESP : SADB_SATYPE_AH; if (pfkey_send(pfkey_fd, satype, SADB_ADD, 0, - &p->auth.local_addr, &p->conf.remote_addr, - p->auth.spi_out, + &p->conf.local_addr, &p->conf.remote_addr, + p->conf.auth.spi_out, p->conf.auth.auth_alg_out, p->conf.auth.auth_keylen_out, p->conf.auth.auth_key_out, @@ -593,12 +610,12 @@ pfkey_ipsec_establish(struct peer *p) p->conf.auth.enc_keylen_out, p->conf.auth.enc_key_out, 0, 0) < 0) - return (-1); + goto fail_key; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_key; if (pfkey_send(pfkey_fd, satype, SADB_ADD, 0, - &p->conf.remote_addr, &p->auth.local_addr, - p->auth.spi_in, + &p->conf.remote_addr, &p->conf.local_addr, + p->conf.auth.spi_in, p->conf.auth.auth_alg_in, p->conf.auth.auth_keylen_in, p->conf.auth.auth_key_in, @@ -606,44 +623,53 @@ pfkey_ipsec_establish(struct peer *p) p->conf.auth.enc_keylen_in, p->conf.auth.enc_key_in, 0, 0) < 0) - return (-1); + goto fail_key; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_key; break; default: return (-1); - break; } if (pfkey_flow(pfkey_fd, satype, SADB_X_ADDFLOW, IPSP_DIRECTION_OUT, - &p->auth.local_addr, &p->conf.remote_addr, 0, BGP_PORT) < 0) - return (-1); + &p->conf.local_addr, &p->conf.remote_addr, 0, BGP_PORT) < 0) + goto fail_flow; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_flow; if (pfkey_flow(pfkey_fd, satype, SADB_X_ADDFLOW, IPSP_DIRECTION_OUT, - &p->auth.local_addr, &p->conf.remote_addr, BGP_PORT, 0) < 0) - return (-1); + &p->conf.local_addr, &p->conf.remote_addr, BGP_PORT, 0) < 0) + goto fail_flow; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_flow; if (pfkey_flow(pfkey_fd, satype, SADB_X_ADDFLOW, IPSP_DIRECTION_IN, - &p->conf.remote_addr, &p->auth.local_addr, 0, BGP_PORT) < 0) - return (-1); + &p->conf.remote_addr, &p->conf.local_addr, 0, BGP_PORT) < 0) + goto fail_flow; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_flow; if (pfkey_flow(pfkey_fd, satype, SADB_X_ADDFLOW, IPSP_DIRECTION_IN, - &p->conf.remote_addr, &p->auth.local_addr, BGP_PORT, 0) < 0) - return (-1); + &p->conf.remote_addr, &p->conf.local_addr, BGP_PORT, 0) < 0) + goto fail_flow; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_flow; + /* save SPI so that they can be removed later on */ + p->auth.spi_in = p->conf.auth.spi_in; + p->auth.spi_out = p->conf.auth.spi_out; p->auth.established = 1; return (0); + +fail_key: + log_peer_warn(&p->conf, "%s: failed to insert ipsec key", __func__); + return (-1); +fail_flow: + log_peer_warn(&p->conf, "%s: failed to insert flow", __func__); + return (-1); } -int +static int pfkey_ipsec_remove(struct peer *p) { uint8_t satype; @@ -663,84 +689,106 @@ pfkey_ipsec_remove(struct peer *p) &p->auth.local_addr, &p->conf.remote_addr, p->auth.spi_out, 0, 0, NULL, 0, 0, NULL, 0, 0) < 0) - return (-1); + goto fail_key; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_key; if (pfkey_send(pfkey_fd, satype, SADB_DELETE, 0, &p->conf.remote_addr, &p->auth.local_addr, p->auth.spi_in, 0, 0, NULL, 0, 0, NULL, 0, 0) < 0) - return (-1); + goto fail_key; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_key; break; default: return (-1); - break; } if (pfkey_flow(pfkey_fd, satype, SADB_X_DELFLOW, IPSP_DIRECTION_OUT, &p->auth.local_addr, &p->conf.remote_addr, 0, BGP_PORT) < 0) - return (-1); + goto fail_flow; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_flow; if (pfkey_flow(pfkey_fd, satype, SADB_X_DELFLOW, IPSP_DIRECTION_OUT, &p->auth.local_addr, &p->conf.remote_addr, BGP_PORT, 0) < 0) - return (-1); + goto fail_flow; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_flow; if (pfkey_flow(pfkey_fd, satype, SADB_X_DELFLOW, IPSP_DIRECTION_IN, &p->conf.remote_addr, &p->auth.local_addr, 0, BGP_PORT) < 0) - return (-1); + goto fail_flow; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_flow; if (pfkey_flow(pfkey_fd, satype, SADB_X_DELFLOW, IPSP_DIRECTION_IN, &p->conf.remote_addr, &p->auth.local_addr, BGP_PORT, 0) < 0) - return (-1); + goto fail_flow; if (pfkey_reply(pfkey_fd, NULL) < 0) - return (-1); + goto fail_flow; p->auth.established = 0; + p->auth.spi_out = 0; + p->auth.spi_in = 0; return (0); + +fail_key: + log_peer_warn(&p->conf, "%s: failed to remove ipsec key", __func__); + return (-1); +fail_flow: + log_peer_warn(&p->conf, "%s: failed to remove flow", __func__); + return (-1); } int pfkey_establish(struct peer *p) { + int rv; + + switch (p->conf.auth.method) { + case AUTH_NONE: + rv = 0; + if (p->auth.established) + rv = pfkey_remove(p); + break; + case AUTH_MD5SIG: + rv = pfkey_md5sig_establish(p); + break; + default: + rv = pfkey_ipsec_establish(p); + break; + } /* * make sure we keep copies of everything we need to * remove SAs and flows later again, even if the * info in p->conf changed due to reload. * We need: SPIs, method, local_addr, remote_addr. - * remote_addr cannot change, so no copy. + * remote_addr cannot change, so no copy, SPI are + * handled by the method specific functions. */ memcpy(&p->auth.local_addr, &p->conf.local_addr, sizeof(p->auth.local_addr)); p->auth.method = p->conf.auth.method; - p->auth.spi_in = p->conf.auth.spi_in; - p->auth.spi_out = p->conf.auth.spi_out; - if (!p->auth.method) - return (0); - else if (p->auth.method == AUTH_MD5SIG) - return (pfkey_md5sig_establish(p)); - else - return (pfkey_ipsec_establish(p)); + return (rv); } int pfkey_remove(struct peer *p) { - if (!p->auth.established) + if (p->auth.established == 0) + return (0); + + switch (p->auth.method) { + case AUTH_NONE: return (0); - else if (p->auth.method == AUTH_MD5SIG) + case AUTH_MD5SIG: return (pfkey_md5sig_remove(p)); - else + default: return (pfkey_ipsec_remove(p)); + } } int