On Tue, Jun 27, 2023 at 12:10:21PM +0200, Claudio Jeker wrote: > Sorry this diff is a monster but it kind of turned into a all or nothing > situation.
Frankly, it's not that bad. > For a long time bgpd used a static 4k buffer plus a len argument to build > updates. The result was ok but was also very fragile. > With the new ibuf API all of this can be rewritten and the result is IMO > already a lot cleaner (mainly because a lot of code duplication could be > removed). > > Some bits I'm not super happy with are community_writebuf() and > pt_writebuf(). Both functions are a bit too complex to my taste but for > different reasons. pt_writebuf() needs to be transactional (either all or > nothing) since we call pt_writebuf() until there is no more space. > community_writebuf() does maybe too much at once. It is a big step in the right direction. It's great. There's always time for more passes and cleanup. I agree that both these functions are a bit on the complex side, but compared to the code they replace... > As mentioned this is a major rewrite, I did run this through regress and > also on a few personal systems but I'm unable to test all possible cases. > Please try this out and report back. I need to make more passes over this, but here's a first round of feedback. Some small suggestions and an (already existing) leak in up_generate_attr() inline. (There's the usual bit of "return (foo)" vs "return foo" being mixed in the same function. I refrained from making comments.) > -- > :wq Claudio > > Index: mrt.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v > retrieving revision 1.114 > diff -u -p -r1.114 mrt.c > --- mrt.c 19 Apr 2023 09:03:00 -0000 1.114 > +++ mrt.c 27 Jun 2023 08:33:54 -0000 > @@ -309,7 +309,9 @@ mrt_attr_dump(struct ibuf *buf, struct r > return (-1); > > /* communities */ > - if (community_writebuf(buf, c) == -1) > + if (community_writebuf(c, ATTR_COMMUNITIES, 0, buf) == -1 || > + community_writebuf(c, ATTR_EXT_COMMUNITIES, 0, buf) == -1 || > + community_writebuf(c, ATTR_LARGE_COMMUNITIES, 0, buf) == -1) > return (-1); > > /* dump all other path attributes without modification */ > @@ -502,7 +504,7 @@ mrt_dump_entry_mp(struct mrt *mrt, struc > goto fail; > } > > - if (pt_writebuf(h2buf, p->pt) == -1) { > + if (pt_writebuf(h2buf, p->pt, 0, 0, 0) == -1) { > log_warnx("%s: pt_writebuf error", __func__); > goto fail; > } > @@ -678,7 +680,7 @@ mrt_dump_entry_v2_rib(struct rib_entry * > } > len = ibuf_size(tbuf); > DUMP_SHORT(buf, (uint16_t)len); I assume the DUMP_* macros will be dealt with in a subsequent step? > - if (ibuf_add(buf, tbuf->buf, len) == -1) { > + if (ibuf_add_buf(buf, tbuf) == -1) { > log_warn("%s: ibuf_add error", __func__); ibuf_add_buf error > ibuf_free(tbuf); > goto fail; > @@ -731,7 +733,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc > break; > } > > - if (pt_writebuf(pbuf, re->prefix) == -1) { > + if (pt_writebuf(pbuf, re->prefix, 0, 0, 0) == -1) { > log_warnx("%s: pt_writebuf error", __func__); > goto fail; > } > @@ -748,7 +750,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc > goto fail; > > DUMP_LONG(hbuf, snum); > - if (ibuf_add(hbuf, pbuf->buf, ibuf_size(pbuf)) == -1) { > + if (ibuf_add_buf(hbuf, pbuf) == -1) { > log_warn("%s: ibuf_add error", __func__); ibuf_add_buf error > goto fail; > } > @@ -767,7 +769,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc > goto fail; > > DUMP_LONG(hbuf, snum); > - if (ibuf_add(hbuf, pbuf->buf, ibuf_size(pbuf)) == -1) { > + if (ibuf_add_buf(hbuf, pbuf) == -1) { > log_warn("%s: ibuf_add error", __func__); ibuf_add_buf error > goto fail; > } > @@ -833,8 +835,8 @@ mrt_dump_v2_hdr(struct mrt *mrt, struct > } > > off = ibuf_size(buf); > - if (ibuf_reserve(buf, sizeof(nump)) == NULL) { > - log_warn("%s: ibuf_reserve error", __func__); > + if (ibuf_add_zero(buf, sizeof(nump)) == -1) { > + log_warn("%s: ibuf_add_zero error", __func__); > goto fail; > } > arg.nump = 0; > @@ -843,8 +845,10 @@ mrt_dump_v2_hdr(struct mrt *mrt, struct > if (arg.nump == -1) > goto fail; > > - nump = htons(arg.nump); > - memcpy(ibuf_seek(buf, off, sizeof(nump)), &nump, sizeof(nump)); > + if (ibuf_set_n16(buf, off, arg.nump) == -1) { > + log_warn("%s: ibuf_set_n16 error", __func__); > + goto fail; > + } > > len = ibuf_size(buf); > if (mrt_dump_hdr_rde(&hbuf, MSG_TABLE_DUMP_V2, > @@ -1099,14 +1103,8 @@ mrt_write(struct mrt *mrt) > void > mrt_clean(struct mrt *mrt) > { > - struct ibuf *b; > - > close(mrt->wbuf.fd); > - while ((b = TAILQ_FIRST(&mrt->wbuf.bufs))) { > - TAILQ_REMOVE(&mrt->wbuf.bufs, b, entry); > - ibuf_free(b); > - } > - mrt->wbuf.queued = 0; > + msgbuf_clear(&mrt->wbuf); > } > > static struct imsgbuf *mrt_imsgbuf[2]; > Index: rde.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v > retrieving revision 1.606 > diff -u -p -r1.606 rde.c > --- rde.c 12 Jun 2023 12:48:07 -0000 1.606 > +++ rde.c 27 Jun 2023 08:33:54 -0000 > @@ -86,8 +86,7 @@ static void rde_rpki_reload(void); > static int rde_roa_reload(void); > static int rde_aspa_reload(void); > int rde_update_queue_pending(void); > -void rde_update_queue_runner(void); > -void rde_update6_queue_runner(uint8_t); > +void rde_update_queue_runner(uint8_t); > struct rde_prefixset *rde_find_prefixset(char *, struct rde_prefixset_head > *); > void rde_mark_prefixsets_dirty(struct rde_prefixset_head *, > struct rde_prefixset_head *); > @@ -310,9 +309,8 @@ rde_main(int debug, int verbose) > rib_dump_runner(); > nexthop_runner(); > if (ibuf_se && ibuf_se->w.queued < SESS_MSG_HIGH_MARK) { > - rde_update_queue_runner(); > - for (aid = AID_INET6; aid < AID_MAX; aid++) > - rde_update6_queue_runner(aid); > + for (aid = AID_MIN; aid < AID_MAX; aid++) > + rde_update_queue_runner(aid); > } > /* commit pftable once per poll loop */ > rde_commit_pftable(); > @@ -3456,13 +3454,13 @@ rde_update_queue_pending(void) > } > > void > -rde_update_queue_runner(void) > +rde_update_queue_runner(uint8_t aid) > { > struct rde_peer *peer; > - int r, sent, max = RDE_RUNNER_ROUNDS, eor; > - uint16_t len, wpos; > + struct ibuf *buf; > + int sent, max = RDE_RUNNER_ROUNDS; > > - len = sizeof(queue_buf) - MSGSIZE_HEADER; > + /* first withdraws ... */ > do { > sent = 0; > RB_FOREACH(peer, peer_tree, &peertable) { > @@ -3472,80 +3470,26 @@ rde_update_queue_runner(void) > continue; > if (peer->throttled) > continue; > - eor = 0; > - wpos = 0; > - /* first withdraws, save 2 bytes for path attributes */ > - if ((r = up_dump_withdraws(queue_buf, len - 2, peer, > - AID_INET)) == -1) > + if (RB_EMPTY(&peer->withdraws[aid])) > continue; > - wpos += r; > > - /* now bgp path attributes unless it is the EoR mark */ > - if (up_is_eor(peer, AID_INET)) { > - eor = 1; > - memset(queue_buf + wpos, 0, 2); > - wpos += 2; > - } else { > - r = up_dump_attrnlri(queue_buf + wpos, > - len - wpos, peer); > - wpos += r; > - } > - > - /* finally send message to SE */ > - if (wpos > 4) { > - if (imsg_compose(ibuf_se, IMSG_UPDATE, > - peer->conf.id, 0, -1, queue_buf, > - wpos) == -1) > - fatal("%s %d imsg_compose error", > - __func__, __LINE__); > - sent++; > - } > - if (eor) { > - int sent_eor = peer->sent_eor & (1 << AID_INET); > - if (peer->capa.grestart.restart && !sent_eor) > - rde_peer_send_eor(peer, AID_INET); > - if (peer->capa.enhanced_rr && sent_eor) > - rde_peer_send_rrefresh(peer, AID_INET, > - ROUTE_REFRESH_END_RR); > - } > - } > - max -= sent; > - } while (sent != 0 && max > 0); > -} > - > -void > -rde_update6_queue_runner(uint8_t aid) > -{ > - struct rde_peer *peer; > - int r, sent, max = RDE_RUNNER_ROUNDS / 2; > - uint16_t len; > - > - /* first withdraws ... */ > - do { > - sent = 0; > - RB_FOREACH(peer, peer_tree, &peertable) { > - if (peer->conf.id == 0) > - continue; > - if (peer->state != PEER_UP) > + if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == > + NULL) > + fatal("%s", __func__); > + if (up_dump_withdraws(buf, peer, aid) == -1) { > + ibuf_free(buf); > continue; > - if (peer->throttled) > - continue; > - len = sizeof(queue_buf) - MSGSIZE_HEADER; > - r = up_dump_mp_unreach(queue_buf, len, peer, aid); > - if (r == -1) > - continue; > - /* finally send message to SE */ > - if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, > - 0, -1, queue_buf, r) == -1) > - fatal("%s %d imsg_compose error", __func__, > - __LINE__); > + } > + if (imsg_compose_ibuf(ibuf_se, IMSG_UPDATE, > + peer->conf.id, 0, buf) == -1) > + fatal("%s: imsg_create error", __func__); > sent++; > } > max -= sent; > } while (sent != 0 && max > 0); > > /* ... then updates */ > - max = RDE_RUNNER_ROUNDS / 2; > + max = RDE_RUNNER_ROUNDS; > do { > sent = 0; > RB_FOREACH(peer, peer_tree, &peertable) { > @@ -3555,7 +3499,9 @@ rde_update6_queue_runner(uint8_t aid) > continue; > if (peer->throttled) > continue; > - len = sizeof(queue_buf) - MSGSIZE_HEADER; > + if (RB_EMPTY(&peer->updates[aid])) > + continue; > + > if (up_is_eor(peer, aid)) { > int sent_eor = peer->sent_eor & (1 << aid); > if (peer->capa.grestart.restart && !sent_eor) > @@ -3565,15 +3511,17 @@ rde_update6_queue_runner(uint8_t aid) > ROUTE_REFRESH_END_RR); > continue; > } > - r = up_dump_mp_reach(queue_buf, len, peer, aid); > - if (r == 0) > - continue; > > - /* finally send message to SE */ > - if (imsg_compose(ibuf_se, IMSG_UPDATE, peer->conf.id, > - 0, -1, queue_buf, r) == -1) > - fatal("%s %d imsg_compose error", __func__, > - __LINE__); > + if ((buf = ibuf_dynamic(4, 4096 - MSGSIZE_HEADER)) == > + NULL) > + fatal("%s", __func__); > + if (up_dump_update(buf, peer, aid) == -1) { > + ibuf_free(buf); > + continue; > + } > + if (imsg_compose_ibuf(ibuf_se, IMSG_UPDATE, > + peer->conf.id, 0, buf) == -1) > + fatal("%s: imsg_compose_ibuf error", __func__); > sent++; > } > max -= sent; > Index: rde.h > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v > retrieving revision 1.294 > diff -u -p -r1.294 rde.h > --- rde.h 12 Jun 2023 12:48:07 -0000 1.294 > +++ rde.h 27 Jun 2023 08:33:54 -0000 > @@ -447,10 +447,7 @@ int community_add(struct rde_community * > int community_large_add(struct rde_community *, int, void *, size_t); > int community_ext_add(struct rde_community *, int, int, void *, size_t); > > -int community_write(struct rde_community *, void *, uint16_t); > -int community_large_write(struct rde_community *, void *, uint16_t); > -int community_ext_write(struct rde_community *, int, void *, uint16_t); > -int community_writebuf(struct ibuf *, struct rde_community *); > +int community_writebuf(struct rde_community *, uint8_t, int, struct ibuf *); > > void communities_shutdown(void); > struct rde_community *communities_lookup(struct rde_community *); > @@ -519,8 +516,7 @@ struct pt_entry *pt_add_flow(struct flow > void pt_remove(struct pt_entry *); > struct pt_entry *pt_lookup(struct bgpd_addr *); > int pt_prefix_cmp(const struct pt_entry *, const struct pt_entry *); > -int pt_write(u_char *, int, struct pt_entry *, int); > -int pt_writebuf(struct ibuf *, struct pt_entry *); > +int pt_writebuf(struct ibuf *, struct pt_entry *, int, int, uint32_t); > > static inline struct pt_entry * > pt_ref(struct pt_entry *pt) > @@ -710,10 +706,8 @@ void up_generate_addpath_all(struct rd > struct prefix *, struct prefix *); > void up_generate_default(struct rde_peer *, uint8_t); > int up_is_eor(struct rde_peer *, uint8_t); > -int up_dump_withdraws(u_char *, int, struct rde_peer *, uint8_t); > -int up_dump_mp_unreach(u_char *, int, struct rde_peer *, uint8_t); > -int up_dump_attrnlri(u_char *, int, struct rde_peer *); > -int up_dump_mp_reach(u_char *, int, struct rde_peer *, uint8_t); > +int up_dump_withdraws(struct ibuf *, struct rde_peer *, uint8_t); > +int up_dump_update(struct ibuf *, struct rde_peer *, uint8_t); > > /* rde_aspa.c */ > void aspa_validation(struct rde_aspa *, struct aspath *, > Index: rde_attr.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_attr.c,v > retrieving revision 1.133 > diff -u -p -r1.133 rde_attr.c > --- rde_attr.c 12 Jun 2023 12:10:17 -0000 1.133 > +++ rde_attr.c 27 Jun 2023 08:33:54 -0000 > @@ -30,42 +30,6 @@ > #include "log.h" > > int > -attr_write(void *p, uint16_t p_len, uint8_t flags, uint8_t type, > - void *data, uint16_t data_len) > -{ > - u_char *b = p; > - uint16_t tmp, tot_len = 2; /* attribute header (without len) */ > - > - flags &= ~ATTR_DEFMASK; > - if (data_len > 255) { > - tot_len += 2 + data_len; > - flags |= ATTR_EXTLEN; > - } else { > - tot_len += 1 + data_len; > - } > - > - if (tot_len > p_len) > - return (-1); > - > - *b++ = flags; > - *b++ = type; > - if (data_len > 255) { > - tmp = htons(data_len); > - memcpy(b, &tmp, sizeof(tmp)); > - b += 2; > - } else > - *b++ = (u_char)data_len; > - > - if (data == NULL) > - return (tot_len - data_len); > - > - if (data_len != 0) > - memcpy(b, data, data_len); > - > - return (tot_len); > -} > - > -int > attr_writebuf(struct ibuf *buf, uint8_t flags, uint8_t type, void *data, > uint16_t data_len) > { > Index: rde_community.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_community.c,v > retrieving revision 1.12 > diff -u -p -r1.12 rde_community.c > --- rde_community.c 17 Jun 2023 08:05:48 -0000 1.12 > +++ rde_community.c 27 Jun 2023 09:37:35 -0000 > @@ -225,10 +225,9 @@ insert_community(struct rde_community *c > struct community *new; > int newsize = comm->size + 8; > > - if ((new = reallocarray(comm->communities, newsize, > - sizeof(struct community))) == NULL) > + if ((new = recallocarray(comm->communities, comm->size, > + newsize, sizeof(struct community))) == NULL) > fatal(__func__); > - memset(new + comm->size, 0, 8 * sizeof(struct community)); Since you fatal on failure, new doesn't really add anything > comm->communities = new; > comm->size = newsize; > } > @@ -256,6 +255,8 @@ insert_community(struct rde_community *c > static int > non_transitive_ext_community(struct community *c) > { > + if ((uint8_t)c->flags != COMMUNITY_TYPE_EXT) > + return 0; > if ((c->data3 >> 8) & EXT_COMMUNITY_NON_TRANSITIVE) > return 1; > return 0; > @@ -514,228 +515,79 @@ community_ext_add(struct rde_community * > > /* > * Convert communities back to the wireformat. > - * This function needs to make sure that the attribute buffer is overflowed > - * while writing out the communities. > - * - community_write for ATTR_COMMUNITUES > - * - community_large_write for ATTR_LARGE_COMMUNITIES > - * - community_ext_write for ATTR_EXT_COMMUNITIES > - * When writing ATTR_EXT_COMMUNITIES non-transitive communities need to > - * be skipped if it is sent to an ebgp peer. > + * When writing ATTR_EXT_COMMUNITIES non-transitive communities need to > + * be skipped if they are sent to an ebgp peer. > */ > int > -community_write(struct rde_community *comm, void *buf, uint16_t len) > +community_writebuf(struct rde_community *comm, uint8_t type, int ebgp, > + struct ibuf *buf) > { > - uint8_t *b = buf; > - uint16_t c; > - size_t n = 0; > - int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; > - > - if (comm->flags & PARTIAL_COMMUNITIES) > - flags |= ATTR_PARTIAL; > - > - /* first count how many communities will be written */ > - for (l = 0; l < comm->nentries; l++) > - if ((uint8_t)comm->communities[l].flags == > - COMMUNITY_TYPE_BASIC) > - n++; > - > - if (n == 0) > - return 0; > + struct community *cp; > + uint64_t ext; > + int l, size, start = -1, end, num = 0; > + int flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; > + uint8_t t; > > - /* write attribute header */ > - r = attr_write(b, len, flags, ATTR_COMMUNITIES, NULL, n * 4); > - if (r == -1) > + switch (type) { > + case ATTR_COMMUNITIES: > + if (comm->flags & PARTIAL_COMMUNITIES) > + flags |= ATTR_PARTIAL; > + size = 4; > + t = COMMUNITY_TYPE_BASIC; > + break; > + case ATTR_EXT_COMMUNITIES: > + if (comm->flags & PARTIAL_EXT_COMMUNITIES) > + flags |= ATTR_PARTIAL; > + size = 8; > + t = COMMUNITY_TYPE_EXT; > + break; > + case ATTR_LARGE_COMMUNITIES: > + if (comm->flags & PARTIAL_LARGE_COMMUNITIES) > + flags |= ATTR_PARTIAL; > + size = 12; > + t = COMMUNITY_TYPE_LARGE; > + break; > + default: > return -1; I wonder if it is worth pulling the flags handling out of the switch by adding int is_partial = PARTIAL_COMMUNITIES | PARTIAL_EXT_COMMUNITIES | PARTIAL_LARGE_COMMUNITIES; and doing flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; if (comm->flags & is_partial) flags |= ATTR_PARTIAL; right before the attr_writebuf() call. > - b += r; > - > - /* write out the communities */ > - for (l = 0; l < comm->nentries; l++) > - if ((uint8_t)comm->communities[l].flags == > - COMMUNITY_TYPE_BASIC) { > - c = htons(comm->communities[l].data1); > - memcpy(b, &c, sizeof(c)); > - b += sizeof(c); > - r += sizeof(c); > - > - c = htons(comm->communities[l].data2); > - memcpy(b, &c, sizeof(c)); > - b += sizeof(c); > - r += sizeof(c); > - } > - > - return r; > -} > - > -int > -community_large_write(struct rde_community *comm, void *buf, uint16_t len) > -{ > - uint8_t *b = buf; > - uint32_t c; > - size_t n = 0; > - int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; > - > - if (comm->flags & PARTIAL_LARGE_COMMUNITIES) > - flags |= ATTR_PARTIAL; > + } > > /* first count how many communities will be written */ > - for (l = 0; l < comm->nentries; l++) > - if ((uint8_t)comm->communities[l].flags == > - COMMUNITY_TYPE_LARGE) > - n++; > - > - if (n == 0) > - return 0; > - > - /* write attribute header */ > - r = attr_write(b, len, flags, ATTR_LARGE_COMMUNITIES, NULL, n * 12); > - if (r == -1) > - return -1; > - b += r; As this is a long function, I would probably initialize these right before the for loop. num = 0; start = -1; > + for (l = 0; l < comm->nentries; l++) { > + cp = comm->communities + l; I tend to prefer cp = &comm->communities[l]; > > - /* write out the communities */ > - for (l = 0; l < comm->nentries; l++) > - if ((uint8_t)comm->communities[l].flags == > - COMMUNITY_TYPE_LARGE) { > - c = htonl(comm->communities[l].data1); > - memcpy(b, &c, sizeof(c)); > - b += sizeof(c); > - r += sizeof(c); > - > - c = htonl(comm->communities[l].data2); > - memcpy(b, &c, sizeof(c)); > - b += sizeof(c); > - r += sizeof(c); > - > - c = htonl(comm->communities[l].data3); > - memcpy(b, &c, sizeof(c)); > - b += sizeof(c); > - r += sizeof(c); > + if (ebgp && non_transitive_ext_community(cp)) > + continue; > + if ((uint8_t)cp->flags == t) { > + num++; > + if (start == -1) > + start = l; > } > + if ((uint8_t)cp->flags > t) > + break; > + } > + end = l; > > - return r; > -} > - > -int > -community_ext_write(struct rde_community *comm, int ebgp, void *buf, > - uint16_t len) > -{ > - struct community *cp; > - uint8_t *b = buf; > - uint64_t ext; > - size_t n = 0; > - int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; > - > - if (comm->flags & PARTIAL_EXT_COMMUNITIES) > - flags |= ATTR_PARTIAL; > - > - /* first count how many communities will be written */ > - for (l = 0; l < comm->nentries; l++) > - if ((uint8_t)comm->communities[l].flags == > - COMMUNITY_TYPE_EXT && !(ebgp && > - non_transitive_ext_community(&comm->communities[l]))) > - n++; > - > - if (n == 0) > + /* no communities for this type present */ > + if (num == 0) > return 0; Should this overflow check before passing num * size to attr_writebuf()? if (num > INT16_MAX / size) return -1; > > /* write attribute header */ > - r = attr_write(b, len, flags, ATTR_EXT_COMMUNITIES, NULL, n * 8); > - if (r == -1) > + if (attr_writebuf(buf, flags, type, NULL, num * size) == -1) > return -1; > - b += r; > > /* write out the communities */ > - for (l = 0; l < comm->nentries; l++) { > + for (l = start; l < end; l++) { > cp = comm->communities + l; again, i'd prefer cp = &comm->communities[l]; > - if ((uint8_t)cp->flags == COMMUNITY_TYPE_EXT && !(ebgp && > - non_transitive_ext_community(cp))) { > - ext = (uint64_t)cp->data3 << 48; > - switch ((cp->data3 >> 8) & EXT_COMMUNITY_VALUE) { > - case EXT_COMMUNITY_TRANS_TWO_AS: > - case EXT_COMMUNITY_TRANS_OPAQUE: > - case EXT_COMMUNITY_TRANS_EVPN: > - ext |= ((uint64_t)cp->data1 & 0xffff) << 32; > - ext |= (uint64_t)cp->data2; > - break; > - case EXT_COMMUNITY_TRANS_FOUR_AS: > - case EXT_COMMUNITY_TRANS_IPV4: > - ext |= (uint64_t)cp->data1 << 16; > - ext |= (uint64_t)cp->data2 & 0xffff; > - break; > - } > - ext = htobe64(ext); > - memcpy(b, &ext, sizeof(ext)); > - b += sizeof(ext); > - r += sizeof(ext); > - } > - } > - > - return r; > -} > - > -/* > - * Convert communities back to the wireformat and dump them into the ibuf > buf. > - * This function is used by the mrt dump code. > - */ > -int > -community_writebuf(struct ibuf *buf, struct rde_community *comm) > -{ > - size_t basic_n = 0, large_n = 0, ext_n = 0; > - int l, flags; > - > - /* first count how many communities will be written */ > - for (l = 0; l < comm->nentries; l++) > - if ((uint8_t)comm->communities[l].flags == > - COMMUNITY_TYPE_BASIC) > - basic_n++; > - else if ((uint8_t)comm->communities[l].flags == > - COMMUNITY_TYPE_EXT) > - ext_n++; > - else if ((uint8_t)comm->communities[l].flags == > - COMMUNITY_TYPE_LARGE) > - large_n++; > > - > - if (basic_n != 0) { > - /* write attribute header */ > - flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; > - if (comm->flags & PARTIAL_COMMUNITIES) > - flags |= ATTR_PARTIAL; > - > - if (attr_writebuf(buf, flags, ATTR_COMMUNITIES, NULL, > - basic_n * 4) == -1) > - return -1; > - > - /* write out the communities */ > - for (l = 0; l < comm->nentries; l++) > - if ((uint8_t)comm->communities[l].flags == > - COMMUNITY_TYPE_BASIC) { > - uint16_t c; > - c = htons(comm->communities[l].data1); > - if (ibuf_add(buf, &c, sizeof(c)) == -1) > - return (-1); > - c = htons(comm->communities[l].data2); > - if (ibuf_add(buf, &c, sizeof(c)) == -1) > - return (-1); > - } > - } > - if (ext_n != 0) { > - /* write attribute header */ > - flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; > - if (comm->flags & PARTIAL_COMMUNITIES) > - flags |= ATTR_PARTIAL; > - > - if (attr_writebuf(buf, flags, ATTR_EXT_COMMUNITIES, NULL, > - ext_n * 8) == -1) > - return -1; > - > - /* write out the communities */ > - for (l = 0; l < comm->nentries; l++) { > - struct community *cp; > - uint64_t ext; > - > - cp = comm->communities + l; > - if ((uint8_t)cp->flags != COMMUNITY_TYPE_EXT) > + switch (type) { > + case ATTR_COMMUNITIES: > + if (ibuf_add_n16(buf, cp->data1) == -1) > + return (-1); > + if (ibuf_add_n16(buf, cp->data2) == -1) > + return (-1); > + break; > + case ATTR_EXT_COMMUNITIES: > + if (ebgp && non_transitive_ext_community(cp)) > continue; > > ext = (uint64_t)cp->data3 << 48; > @@ -752,38 +604,19 @@ community_writebuf(struct ibuf *buf, str > ext |= (uint64_t)cp->data2 & 0xffff; > break; > } > - ext = htobe64(ext); > - if (ibuf_add(buf, &ext, sizeof(ext)) == -1) > + if (ibuf_add_n64(buf, ext) == -1) > + return (-1); > + break; > + case ATTR_LARGE_COMMUNITIES: > + if (ibuf_add_n32(buf, cp->data1) == -1) > return (-1); > + if (ibuf_add_n32(buf, cp->data2) == -1) > + return (-1); > + if (ibuf_add_n32(buf, cp->data3) == -1) > + return (-1); > + break; > } > } > - if (large_n != 0) { > - /* write attribute header */ > - flags = ATTR_OPTIONAL | ATTR_TRANSITIVE; > - if (comm->flags & PARTIAL_COMMUNITIES) > - flags |= ATTR_PARTIAL; > - > - if (attr_writebuf(buf, flags, ATTR_LARGE_COMMUNITIES, NULL, > - large_n * 12) == -1) > - return -1; > - > - /* write out the communities */ > - for (l = 0; l < comm->nentries; l++) > - if ((uint8_t)comm->communities[l].flags == > - COMMUNITY_TYPE_LARGE) { > - uint32_t c; > - c = htonl(comm->communities[l].data1); > - if (ibuf_add(buf, &c, sizeof(c)) == -1) > - return (-1); > - c = htonl(comm->communities[l].data2); > - if (ibuf_add(buf, &c, sizeof(c)) == -1) > - return (-1); > - c = htonl(comm->communities[l].data3); > - if (ibuf_add(buf, &c, sizeof(c)) == -1) > - return (-1); > - } > - } > - > return 0; > } > > Index: rde_prefix.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_prefix.c,v > retrieving revision 1.49 > diff -u -p -r1.49 rde_prefix.c > --- rde_prefix.c 19 Apr 2023 07:09:47 -0000 1.49 > +++ rde_prefix.c 27 Jun 2023 08:33:54 -0000 > @@ -468,143 +468,111 @@ pt_free(struct pt_entry *pte) > > /* dump a prefix into specified buffer */ > int > -pt_write(u_char *buf, int len, struct pt_entry *pte, int withdraw) > +pt_writebuf(struct ibuf *buf, struct pt_entry *pte, int withdraw, > + int add_path, uint32_t pathid) > { > struct pt_entry_vpn4 *pvpn4 = (struct pt_entry_vpn4 *)pte; > struct pt_entry_vpn6 *pvpn6 = (struct pt_entry_vpn6 *)pte; > struct pt_entry_flow *pflow = (struct pt_entry_flow *)pte; > - int totlen, flowlen, psize; > + struct ibuf *tmp; > + int flowlen, psize; > uint8_t plen; > > + if ((tmp = ibuf_dynamic(32, UINT16_MAX)) == NULL) > + goto fail; > + > + if (add_path) { > + if (ibuf_add_n32(tmp, pathid) == -1) > + goto fail; > + } > + > switch (pte->aid) { > case AID_INET: > case AID_INET6: > plen = pte->prefixlen; > - totlen = PREFIX_SIZE(plen); > - > - if (totlen > len) > - return (-1); > - *buf++ = plen; > - memcpy(buf, pte->data, totlen - 1); > - return (totlen); > + if (ibuf_add_n8(tmp, plen) == -1) > + goto fail; > + if (ibuf_add(tmp, pte->data, PREFIX_SIZE(plen) - 1) == -1) > + goto fail; > + break; > case AID_VPN_IPv4: > plen = pvpn4->prefixlen; > - totlen = PREFIX_SIZE(plen) + sizeof(pvpn4->rd); > psize = PREFIX_SIZE(plen) - 1; > plen += sizeof(pvpn4->rd) * 8; > if (withdraw) { > /* withdraw have one compat label as placeholder */ > - totlen += 3; > plen += 3 * 8; > } else { > - totlen += pvpn4->labellen; > plen += pvpn4->labellen * 8; > } > > - if (totlen > len) > - return (-1); > - *buf++ = plen; > + if (ibuf_add_n8(tmp, plen) == -1) > + goto fail; > if (withdraw) { > /* magic compatibility label as per rfc8277 */ > - *buf++ = 0x80; > - *buf++ = 0x0; > - *buf++ = 0x0; > + if (ibuf_add_n8(tmp, 0x80) == -1 || > + ibuf_add_zero(tmp, 2) == -1) > + goto fail; > } else { > - memcpy(buf, &pvpn4->labelstack, pvpn4->labellen); > - buf += pvpn4->labellen; > + if (ibuf_add(tmp, &pvpn4->labelstack, > + pvpn4->labellen) == -1) > + goto fail; > } > - memcpy(buf, &pvpn4->rd, sizeof(pvpn4->rd)); > - buf += sizeof(pvpn4->rd); > - memcpy(buf, &pvpn4->prefix4, psize); > - return (totlen); > + if (ibuf_add(tmp, &pvpn4->rd, sizeof(pvpn4->rd)) == -1 || > + ibuf_add(tmp, &pvpn4->prefix4, psize) == -1) > + goto fail; > + break; > case AID_VPN_IPv6: > plen = pvpn6->prefixlen; > - totlen = PREFIX_SIZE(plen) + sizeof(pvpn6->rd); > psize = PREFIX_SIZE(plen) - 1; > plen += sizeof(pvpn6->rd) * 8; > if (withdraw) { > /* withdraw have one compat label as placeholder */ > - totlen += 3; > plen += 3 * 8; > } else { > - totlen += pvpn6->labellen; > plen += pvpn6->labellen * 8; > } > > - if (totlen > len) > - return (-1); > - *buf++ = plen; > + if (ibuf_add_n8(tmp, plen) == -1) > + goto fail; > if (withdraw) { > /* magic compatibility label as per rfc8277 */ > - *buf++ = 0x80; > - *buf++ = 0x0; > - *buf++ = 0x0; > + if (ibuf_add_n8(tmp, 0x80) == -1 || > + ibuf_add_zero(tmp, 2) == -1) > + goto fail; > } else { > - memcpy(buf, &pvpn6->labelstack, pvpn6->labellen); > - buf += pvpn6->labellen; > + if (ibuf_add(tmp, &pvpn6->labelstack, > + pvpn6->labellen) == -1) > + goto fail; > } > - memcpy(buf, &pvpn6->rd, sizeof(pvpn6->rd)); > - buf += sizeof(pvpn6->rd); > - memcpy(buf, &pvpn6->prefix6, psize); > - return (totlen); > + if (ibuf_add(tmp, &pvpn6->rd, sizeof(pvpn6->rd)) == -1 || > + ibuf_add(tmp, &pvpn6->prefix6, psize) == -1) > + goto fail; > + break; > case AID_FLOWSPECv4: > case AID_FLOWSPECv6: > flowlen = pflow->len - PT_FLOW_SIZE; > - totlen = flowlen < FLOWSPEC_LEN_LIMIT ? 1 : 2; > - totlen += flowlen; > - > - if (totlen > len) > - return (-1); > - > if (flowlen < FLOWSPEC_LEN_LIMIT) { > - *buf++ = flowlen; > + if (ibuf_add_n8(tmp, flowlen) == -1) > + goto fail; > } else { > - *buf++ = 0xf0 | (flowlen >> 8); > - *buf++ = flowlen & 0xff; > + if (ibuf_add_n8(tmp, 0xf0 | (flowlen >> 8)) == -1 || > + ibuf_add_n8(tmp, flowlen) == -1) > + goto fail; > } > - memcpy(buf, &pflow->flow, flowlen); > - return (totlen); > - default: > - return (-1); > - } > -} > - > -/* dump a prefix into specified ibuf, allocating space for it if needed */ > -int > -pt_writebuf(struct ibuf *buf, struct pt_entry *pte) > -{ > - struct pt_entry_vpn4 *pvpn4 = (struct pt_entry_vpn4 *)pte; > - struct pt_entry_vpn6 *pvpn6 = (struct pt_entry_vpn6 *)pte; > - struct pt_entry_flow *pflow = (struct pt_entry_flow *)pte; > - int totlen, flowlen; > - void *bptr; > - > - switch (pte->aid) { > - case AID_INET: > - case AID_INET6: > - totlen = PREFIX_SIZE(pte->prefixlen); > - break; > - case AID_VPN_IPv4: > - totlen = PREFIX_SIZE(pte->prefixlen) + sizeof(pvpn4->rd) + > - pvpn4->labellen; > - break; > - case AID_VPN_IPv6: > - totlen = PREFIX_SIZE(pte->prefixlen) + sizeof(pvpn6->rd) + > - pvpn6->labellen; > - break; > - case AID_FLOWSPECv4: > - case AID_FLOWSPECv6: > - flowlen = pflow->len - PT_FLOW_SIZE; > - totlen = flowlen < FLOWSPEC_LEN_LIMIT ? 1 : 2; > - totlen += flowlen; > + if (ibuf_add(tmp, &pflow->flow, flowlen) == -1) > + goto fail; > break; > default: > - return (-1); > + goto fail; > } > > - if ((bptr = ibuf_reserve(buf, totlen)) == NULL) > - return (-1); > - if (pt_write(bptr, totlen, pte, 0) == -1) > - return (-1); > - return (0); > + if (ibuf_add_buf(buf, tmp) == -1) > + goto fail; > + ibuf_free(tmp); > + return 0; > + > + fail: > + ibuf_free(tmp); > + return -1; > } > Index: rde_update.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v > retrieving revision 1.162 > diff -u -p -r1.162 rde_update.c > --- rde_update.c 19 Apr 2023 08:30:37 -0000 1.162 > +++ rde_update.c 27 Jun 2023 08:33:54 -0000 > @@ -558,17 +558,15 @@ up_prep_adjout(struct rde_peer *peer, st > > > static int > -up_generate_attr(u_char *buf, int len, struct rde_peer *peer, > - struct filterstate *state, uint8_t aid) > +up_generate_attr(struct ibuf *buf, struct rde_peer *peer, > + struct rde_aspath *asp, struct rde_community *comm, struct nexthop *nh, > + uint8_t aid) > { > - struct rde_aspath *asp = &state->aspath; > - struct rde_community *comm = &state->communities; > struct attr *oa = NULL, *newaggr = NULL; > u_char *pdata; > uint32_t tmp32; > - struct bgpd_addr *nexthop; > - int flags, r, neednewpath = 0; > - uint16_t wlen = 0, plen; > + int flags, neednewpath = 0; > + uint16_t plen; > uint8_t oalen = 0, type; > > if (asp->others_len > 0) > @@ -576,8 +574,6 @@ up_generate_attr(u_char *buf, int len, s > > /* dump attributes in ascending order */ > for (type = ATTR_ORIGIN; type < 255; type++) { > - r = 0; > - > while (oa && oa->type < type) { > if (oalen < asp->others_len) > oa = asp->others[oalen++]; > @@ -590,9 +586,9 @@ up_generate_attr(u_char *buf, int len, s > * Attributes stored in rde_aspath > */ > case ATTR_ORIGIN: > - if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN, > - ATTR_ORIGIN, &asp->origin, 1)) == -1) > - return (-1); > + if (attr_writebuf(buf, ATTR_WELL_KNOWN, > + ATTR_ORIGIN, &asp->origin, 1) == -1) > + return -1; > break; > case ATTR_ASPATH: > plen = aspath_length(asp->aspath); > @@ -602,20 +598,19 @@ up_generate_attr(u_char *buf, int len, s > pdata = aspath_deflate(pdata, &plen, > &neednewpath); > > - if ((r = attr_write(buf + wlen, len, ATTR_WELL_KNOWN, > - ATTR_ASPATH, pdata, plen)) == -1) > - return (-1); > + if (attr_writebuf(buf, ATTR_WELL_KNOWN, > + ATTR_ASPATH, pdata, plen) == -1) Not new, but this return leaks pdata in the !peer_has_as4byte() case. > + return -1; > if (!peer_has_as4byte(peer)) > free(pdata); > break; > case ATTR_NEXTHOP: > switch (aid) { > case AID_INET: > - nexthop = &state->nexthop->exit_nexthop; > - if ((r = attr_write(buf + wlen, len, > - ATTR_WELL_KNOWN, ATTR_NEXTHOP, > - &nexthop->v4.s_addr, 4)) == -1) > - return (-1); > + if (attr_writebuf(buf, ATTR_WELL_KNOWN, > + ATTR_NEXTHOP, &nh->exit_nexthop.v4, > + sizeof(nh->exit_nexthop.v4)) == -1) > + return -1; > break; > default: > break; > @@ -632,37 +627,29 @@ up_generate_attr(u_char *buf, int len, s > asp->flags & F_ATTR_MED_ANNOUNCE || > peer->flags & PEERFLAG_TRANS_AS)) { > tmp32 = htonl(asp->med); > - if ((r = attr_write(buf + wlen, len, > - ATTR_OPTIONAL, ATTR_MED, &tmp32, 4)) == -1) > - return (-1); > + if (attr_writebuf(buf, ATTR_OPTIONAL, > + ATTR_MED, &tmp32, 4) == -1) > + return -1; > } > break; > case ATTR_LOCALPREF: > if (!peer->conf.ebgp) { > /* local preference, only valid for ibgp */ > tmp32 = htonl(asp->lpref); > - if ((r = attr_write(buf + wlen, len, > - ATTR_WELL_KNOWN, ATTR_LOCALPREF, &tmp32, > - 4)) == -1) > - return (-1); > + if (attr_writebuf(buf, ATTR_WELL_KNOWN, > + ATTR_LOCALPREF, &tmp32, 4) == -1) > + return -1; > } > break; > /* > * Communities are stored in struct rde_community > */ > case ATTR_COMMUNITIES: > - if ((r = community_write(comm, buf + wlen, len)) == -1) > - return (-1); > - break; > case ATTR_EXT_COMMUNITIES: > - if ((r = community_ext_write(comm, peer->conf.ebgp, > - buf + wlen, len)) == -1) > - return (-1); > - break; > case ATTR_LARGE_COMMUNITIES: > - if ((r = community_large_write(comm, buf + wlen, > - len)) == -1) > - return (-1); > + if (community_writebuf(comm, type, peer->conf.ebgp, > + buf) == -1) > + return -1; > break; > /* > * NEW to OLD conversion when sending stuff to a 2byte AS peer > @@ -675,11 +662,10 @@ up_generate_attr(u_char *buf, int len, s > flags = ATTR_OPTIONAL|ATTR_TRANSITIVE; > if (!(asp->flags & F_PREFIX_ANNOUNCED)) > flags |= ATTR_PARTIAL; > - if (plen == 0) > - r = 0; > - else if ((r = attr_write(buf + wlen, len, flags, > - ATTR_AS4_PATH, pdata, plen)) == -1) > - return (-1); > + if (plen != 0) > + if (attr_writebuf(buf, flags, > + ATTR_AS4_PATH, pdata, plen) == -1) > + return -1; > } > break; > case ATTR_AS4_AGGREGATOR: > @@ -687,10 +673,10 @@ up_generate_attr(u_char *buf, int len, s > flags = ATTR_OPTIONAL|ATTR_TRANSITIVE; > if (!(asp->flags & F_PREFIX_ANNOUNCED)) > flags |= ATTR_PARTIAL; > - if ((r = attr_write(buf + wlen, len, flags, > + if (attr_writebuf(buf, flags, > ATTR_AS4_AGGREGATOR, newaggr->data, > - newaggr->len)) == -1) > - return (-1); > + newaggr->len) == -1) > + return -1; > } > break; > /* > @@ -712,24 +698,24 @@ up_generate_attr(u_char *buf, int len, s > case ATTR_ATOMIC_AGGREGATE: > if (oa == NULL || oa->type != type) > break; > - if ((r = attr_write(buf + wlen, len, > - ATTR_WELL_KNOWN, ATTR_ATOMIC_AGGREGATE, > - NULL, 0)) == -1) > - return (-1); > + if (attr_writebuf(buf, ATTR_WELL_KNOWN, > + ATTR_ATOMIC_AGGREGATE, NULL, 0) == -1) > + return -1; > break; > case ATTR_AGGREGATOR: > if (oa == NULL || oa->type != type) > break; > + if ((!(oa->flags & ATTR_TRANSITIVE)) && > + peer->conf.ebgp) > + break; > if (!peer_has_as4byte(peer)) { > /* need to deflate the aggregator */ > - uint8_t t[6]; > + uint8_t t[6]; > uint16_t tas; > > if ((!(oa->flags & ATTR_TRANSITIVE)) && > - peer->conf.ebgp) { > - r = 0; > + peer->conf.ebgp) > break; > - } > > memcpy(&tmp32, oa->data, sizeof(tmp32)); > if (ntohl(tmp32) > USHRT_MAX) { > @@ -742,30 +728,31 @@ up_generate_attr(u_char *buf, int len, s > memcpy(t + sizeof(tas), > oa->data + sizeof(tmp32), > oa->len - sizeof(tmp32)); > - if ((r = attr_write(buf + wlen, len, > - oa->flags, oa->type, &t, sizeof(t))) == -1) > - return (-1); > - break; > + if (attr_writebuf(buf, oa->flags, > + oa->type, &t, sizeof(t)) == -1) > + return -1; > + } else { > + if (attr_writebuf(buf, oa->flags, oa->type, > + oa->data, oa->len) == -1) > + return -1; > } > - /* FALLTHROUGH */ > + break; > case ATTR_ORIGINATOR_ID: > case ATTR_CLUSTER_LIST: > case ATTR_OTC: > if (oa == NULL || oa->type != type) > break; > if ((!(oa->flags & ATTR_TRANSITIVE)) && > - peer->conf.ebgp) { > - r = 0; > + peer->conf.ebgp) > break; > - } > - if ((r = attr_write(buf + wlen, len, > - oa->flags, oa->type, oa->data, oa->len)) == -1) > - return (-1); > + if (attr_writebuf(buf, oa->flags, oa->type, > + oa->data, oa->len) == -1) > + return -1; > break; > default: > if (oa == NULL && type >= ATTR_FIRST_UNKNOWN) > /* there is no attribute left to dump */ > - goto done; > + return (0); > > if (oa == NULL || oa->type != type) > break; > @@ -779,16 +766,12 @@ up_generate_attr(u_char *buf, int len, s > */ > break; > } > - if ((r = attr_write(buf + wlen, len, > - oa->flags | ATTR_PARTIAL, oa->type, > - oa->data, oa->len)) == -1) > - return (-1); > + if (attr_writebuf(buf, oa->flags | ATTR_PARTIAL, > + oa->type, oa->data, oa->len) == -1) > + return -1; > } > - wlen += r; > - len -= r; > } > -done: > - return (wlen); > + return 0; > } > > /* > @@ -817,33 +800,42 @@ up_is_eor(struct rde_peer *peer, uint8_t > /* minimal buffer size > withdraw len + attr len + attr hdr + afi/safi */ > #define MIN_UPDATE_LEN 16 > > +static void > +up_prefix_free(struct prefix_tree *prefix_head, struct prefix *p, > + struct rde_peer *peer, int withdraw) > +{ > + if (withdraw) { > + /* prefix no longer needed, remove it */ > + prefix_adjout_destroy(p); > + peer->stats.prefix_sent_withdraw++; > + } else { > + /* prefix still in Adj-RIB-Out, keep it */ > + RB_REMOVE(prefix_tree, prefix_head, p); > + p->flags &= ~PREFIX_FLAG_UPDATE; > + peer->stats.pending_update--; > + peer->stats.prefix_sent_update++; > + } > +} > + > /* > * Write prefixes to buffer until either there is no more space or > * the next prefix has no longer the same ASPATH attributes. > + * Returns -1 if no prefix was written else 0. > */ > static int > -up_dump_prefix(u_char *buf, int len, struct prefix_tree *prefix_head, > +up_dump_prefix(struct ibuf *buf, struct prefix_tree *prefix_head, > struct rde_peer *peer, int withdraw) > { > struct prefix *p, *np; > - uint32_t pathid; > - int r, wpos = 0, done = 0; > + int done = 0, has_ap = -1, rv = -1; > > RB_FOREACH_SAFE(p, prefix_tree, prefix_head, np) { > - if (peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND)) { > - if (len <= wpos + (int)sizeof(pathid)) > - break; > - pathid = htonl(p->path_id_tx); > - memcpy(buf + wpos, &pathid, sizeof(pathid)); > - wpos += sizeof(pathid); > - } > - if ((r = pt_write(buf + wpos, len - wpos, p->pt, > - withdraw)) == -1) { > - if (peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND)) > - wpos -= sizeof(pathid); > + if (has_ap == -1) > + has_ap = peer_has_add_path(peer, p->pt->aid, > + CAPA_AP_SEND); > + if (pt_writebuf(buf, p->pt, withdraw, has_ap, p->path_id_tx) == > + -1) > break; > - } > - wpos += r; > > /* make sure we only dump prefixes which belong together */ > if (np == NULL || > @@ -854,276 +846,249 @@ up_dump_prefix(u_char *buf, int len, str > (np->flags & PREFIX_FLAG_EOR)) > done = 1; > > - if (withdraw) { > - /* prefix no longer needed, remove it */ > - prefix_adjout_destroy(p); > - peer->stats.prefix_sent_withdraw++; > - } else { > - /* prefix still in Adj-RIB-Out, keep it */ > - RB_REMOVE(prefix_tree, prefix_head, p); > - p->flags &= ~PREFIX_FLAG_UPDATE; > - peer->stats.pending_update--; > - peer->stats.prefix_sent_update++; > - } > - > + rv = 0; > + up_prefix_free(prefix_head, p, peer, withdraw); > if (done) > break; > } > - return (wpos); > -} > - > -int > -up_dump_withdraws(u_char *buf, int len, struct rde_peer *peer, uint8_t aid) > -{ > - uint16_t wpos, wd_len; > - int r; > - > - if (len < MIN_UPDATE_LEN) > - return (-1); > - > - /* reserve space for the length field */ > - wpos = 2; > - r = up_dump_prefix(buf + wpos, len - wpos, &peer->withdraws[aid], > - peer, 1); > - wd_len = htons(r); > - memcpy(buf, &wd_len, 2); > - > - return (wpos + r); > -} > - > -int > -up_dump_mp_unreach(u_char *buf, int len, struct rde_peer *peer, uint8_t aid) > -{ > - u_char *attrbuf; > - int wpos, r; > - uint16_t attr_len, tmp; > - > - if (len < MIN_UPDATE_LEN || RB_EMPTY(&peer->withdraws[aid])) > - return (-1); > - > - /* reserve space for withdraw len, attr len */ > - wpos = 2 + 2; > - attrbuf = buf + wpos; > - > - /* attribute header, defaulting to extended length one */ > - attrbuf[0] = ATTR_OPTIONAL | ATTR_EXTLEN; > - attrbuf[1] = ATTR_MP_UNREACH_NLRI; > - wpos += 4; > - > - /* afi & safi */ > - if (aid2afi(aid, &tmp, buf + wpos + 2)) > - fatalx("up_dump_mp_unreach: bad AID"); > - tmp = htons(tmp); > - memcpy(buf + wpos, &tmp, sizeof(uint16_t)); > - wpos += 3; > - > - r = up_dump_prefix(buf + wpos, len - wpos, &peer->withdraws[aid], > - peer, 1); > - if (r == 0) > - return (-1); > - wpos += r; > - attr_len = r + 3; /* prefixes + afi & safi */ > - > - /* attribute length */ > - attr_len = htons(attr_len); > - memcpy(attrbuf + 2, &attr_len, sizeof(attr_len)); > - > - /* write length fields */ > - memset(buf, 0, sizeof(uint16_t)); /* withdrawn routes len */ > - attr_len = htons(wpos - 4); > - memcpy(buf + 2, &attr_len, sizeof(attr_len)); > - > - return (wpos); > -} > - > -int > -up_dump_attrnlri(u_char *buf, int len, struct rde_peer *peer) > -{ > - struct filterstate state; > - struct prefix *p; > - int r, wpos; > - uint16_t attr_len; > - > - if (len < 2) > - fatalx("up_dump_attrnlri: buffer way too small"); > - if (len < MIN_UPDATE_LEN) > - goto done; > - > - p = RB_MIN(prefix_tree, &peer->updates[AID_INET]); > - if (p == NULL) > - goto done; > - > - rde_filterstate_prep(&state, p); > - r = up_generate_attr(buf + 2, len - 2, peer, &state, AID_INET); > - rde_filterstate_clean(&state); > - if (r == -1) { > - /* > - * either no packet or not enough space. > - * The length field needs to be set to zero else it would be > - * an invalid bgp update. > - */ > -done: > - memset(buf, 0, 2); > - return (2); > - } > - > - /* first dump the 2-byte path attribute length */ > - attr_len = htons(r); > - memcpy(buf, &attr_len, 2); > - wpos = 2; > - /* then skip over the already dumped path attributes themselves */ > - wpos += r; > - > - /* last but not least dump the nlri */ > - r = up_dump_prefix(buf + wpos, len - wpos, &peer->updates[AID_INET], > - peer, 0); > - wpos += r; > - > - return (wpos); > + return rv; > } > > static int > -up_generate_mp_reach(u_char *buf, int len, struct rde_peer *peer, > - struct filterstate *state, uint8_t aid) > +up_generate_mp_reach(struct ibuf *buf, struct rde_peer *peer, > + struct nexthop *nh, uint8_t aid) > { > - struct bgpd_addr *nexthop; > - u_char *attrbuf; > - int r, wpos, attrlen; > - uint16_t tmp; > + struct bgpd_addr *nexthop; > + size_t off; > + uint16_t len, afi; > + uint8_t safi; > > - if (len < 4) > - return (-1); > /* attribute header, defaulting to extended length one */ > - buf[0] = ATTR_OPTIONAL | ATTR_EXTLEN; > - buf[1] = ATTR_MP_REACH_NLRI; > - wpos = 4; > - attrbuf = buf + wpos; > + if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1) > + return -1; > + if (ibuf_add_n8(buf, ATTR_MP_REACH_NLRI) == -1) > + return -1; > + off = ibuf_size(buf); > + if (ibuf_add_zero(buf, sizeof(len)) == -1) > + return -1; > + > + if (aid2afi(aid, &afi, &safi)) > + fatalx("up_generate_mp_reach: bad AID"); > + > + /* AFI + SAFI + NH LEN + NH + Reserved */ > + if (ibuf_add_n16(buf, afi) == -1) > + return -1; > + if (ibuf_add_n8(buf, safi) == -1) > + return -1; > > switch (aid) { > case AID_INET6: > - attrlen = 21; /* AFI + SAFI + NH LEN + NH + Reserved */ > - if (len < wpos + attrlen) > - return (-1); > - wpos += attrlen; > - if (aid2afi(aid, &tmp, &attrbuf[2])) > - fatalx("up_generate_mp_reach: bad AID"); > - tmp = htons(tmp); > - memcpy(attrbuf, &tmp, sizeof(tmp)); > - attrbuf[3] = sizeof(struct in6_addr); > - attrbuf[20] = 0; /* Reserved must be 0 */ > - > + /* NH LEN */ > + if (ibuf_add_n8(buf, sizeof(struct in6_addr)) == -1) > + return -1; > /* write nexthop */ > - attrbuf += 4; > - nexthop = &state->nexthop->exit_nexthop; > - memcpy(attrbuf, &nexthop->v6, sizeof(struct in6_addr)); > + nexthop = &nh->exit_nexthop; > + if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1) > + return -1; > break; > case AID_VPN_IPv4: > - attrlen = 17; /* AFI + SAFI + NH LEN + NH + Reserved */ > - if (len < wpos + attrlen) > - return (-1); > - wpos += attrlen; > - if (aid2afi(aid, &tmp, &attrbuf[2])) > - fatalx("up_generate_mp_reachi: bad AID"); > - tmp = htons(tmp); > - memcpy(attrbuf, &tmp, sizeof(tmp)); > - attrbuf[3] = sizeof(uint64_t) + sizeof(struct in_addr); > - memset(attrbuf + 4, 0, sizeof(uint64_t)); > - attrbuf[16] = 0; /* Reserved must be 0 */ > - > + /* NH LEN */ > + if (ibuf_add_n8(buf, > + sizeof(uint64_t) + sizeof(struct in_addr)) == -1) > + return -1; > + /* write zero rd */ > + if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1) > + return -1; > /* write nexthop */ > - attrbuf += 12; > - nexthop = &state->nexthop->exit_nexthop; > - memcpy(attrbuf, &nexthop->v4, sizeof(struct in_addr)); > + nexthop = &nh->exit_nexthop; > + if (ibuf_add(buf, &nexthop->v4, sizeof(struct in_addr)) == -1) > + return -1; > break; > case AID_VPN_IPv6: > - attrlen = 29; /* AFI + SAFI + NH LEN + NH + Reserved */ > - if (len < wpos + attrlen) > - return (-1); > - wpos += attrlen; > - if (aid2afi(aid, &tmp, &attrbuf[2])) > - fatalx("up_generate_mp_reachi: bad AID"); > - tmp = htons(tmp); > - memcpy(attrbuf, &tmp, sizeof(tmp)); > - attrbuf[3] = sizeof(uint64_t) + sizeof(struct in6_addr); > - memset(attrbuf + 4, 0, sizeof(uint64_t)); > - attrbuf[28] = 0; /* Reserved must be 0 */ > - > + /* NH LEN */ > + if (ibuf_add_n8(buf, > + sizeof(uint64_t) + sizeof(struct in6_addr)) == -1) > + return -1; > + /* write zero rd */ > + if (ibuf_add_zero(buf, sizeof(uint64_t)) == -1) > + return -1; > /* write nexthop */ > - attrbuf += 12; > - nexthop = &state->nexthop->exit_nexthop; > - memcpy(attrbuf, &nexthop->v6, sizeof(struct in6_addr)); > + nexthop = &nh->exit_nexthop; > + if (ibuf_add(buf, &nexthop->v6, sizeof(struct in6_addr)) == -1) > + return -1; > break; > case AID_FLOWSPECv4: > case AID_FLOWSPECv6: > - attrlen = 5; /* AFI + SAFI + NH LEN + NH + Reserved */ > - if (len < wpos + attrlen) > - return (-1); > - wpos += attrlen; > - if (aid2afi(aid, &tmp, &attrbuf[2])) > - fatalx("up_generate_mp_reachi: bad AID"); > - tmp = htons(tmp); > - memcpy(attrbuf, &tmp, sizeof(tmp)); > - attrbuf[3] = 0; /* nh len MUST be 0 */ > - attrbuf[4] = 0; /* Reserved must be 0 */ > + if (ibuf_add_zero(buf, 1) == -1) /* NH LEN MUST be 0 */ > + return -1; > + /* no NH */ > break; > default: > fatalx("up_generate_mp_reach: unknown AID"); > } > > - r = up_dump_prefix(buf + wpos, len - wpos, &peer->updates[aid], > - peer, 0); > - if (r == 0) { > - /* no prefixes written ... */ > + if (ibuf_add_zero(buf, 1) == -1) /* Reserved must be 0 */ > + return -1; > + > + if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1) > + /* no prefixes written, fail update */ > return (-1); > - } > - attrlen += r; > - wpos += r; > - /* update attribute length field */ > - tmp = htons(attrlen); > - memcpy(buf + 2, &tmp, sizeof(tmp)); > > - return (wpos); > + /* update MP_REACH attribute length field */ > + len = ibuf_size(buf) - off - sizeof(len); > + if (ibuf_set_n16(buf, off, len) == -1) > + return -1; > + > + return 0; > } > > +/* > + * Generate UPDATE message containing either just withdraws or updates. > + * UPDATE messages are contructed like this: > + * > + * +-----------------------------------------------------+ > + * | Withdrawn Routes Length (2 octets) | > + * +-----------------------------------------------------+ > + * | Withdrawn Routes (variable) | > + * +-----------------------------------------------------+ > + * | Total Path Attribute Length (2 octets) | > + * +-----------------------------------------------------+ > + * | Path Attributes (variable) | > + * +-----------------------------------------------------+ > + * | Network Layer Reachability Information (variable) | > + * +-----------------------------------------------------+ > + * > + * Multiprotocol messages use MP_REACH_NLRI and MP_UNREACH_NLRI > + * the latter will be the only path attribute in a message. > + */ > + > +/* > + * Write UPDATE message for withdrawn routes. The size of buf limits > + * how may routes can be added. Return 0 on success -1 on error which > + * includes generating an empty withdraw message. > + */ > int > -up_dump_mp_reach(u_char *buf, int len, struct rde_peer *peer, uint8_t aid) > +up_dump_withdraws(struct ibuf *buf, struct rde_peer *peer, uint8_t aid) > { > - struct filterstate state; > - struct prefix *p; > - int r, wpos; > - uint16_t attr_len; > + size_t off; > + uint16_t afi, len; > + uint8_t safi; > + > + /* reserve space for the withdrawn routes length field */ > + off = ibuf_size(buf); > + if (ibuf_add_zero(buf, sizeof(len)) == -1) > + return -1; > + > + if (aid != AID_INET) { > + /* reserve space for 2-byte path attribute length */ > + off = ibuf_size(buf); > + if (ibuf_add_zero(buf, sizeof(len)) == -1) > + return -1; > + > + /* attribute header, defaulting to extended length one */ > + if (ibuf_add_n8(buf, ATTR_OPTIONAL | ATTR_EXTLEN) == -1) > + return -1; > + if (ibuf_add_n8(buf, ATTR_MP_UNREACH_NLRI) == -1) > + return -1; > + if (ibuf_add_zero(buf, sizeof(len)) == -1) > + return -1; > + > + /* afi & safi */ > + if (aid2afi(aid, &afi, &safi)) > + fatalx("up_dump_mp_unreach: bad AID"); > + if (ibuf_add_n16(buf, afi) == -1) > + return -1; > + if (ibuf_add_n8(buf, safi) == -1) > + return -1; > + } > + > + if (up_dump_prefix(buf, &peer->withdraws[aid], peer, 1) == -1) > + return -1; > > - if (len < MIN_UPDATE_LEN) > - return 0; > + /* update length field (either withdrawn routes or attribute length) */ > + len = ibuf_size(buf) - off - sizeof(len); > + if (ibuf_set_n16(buf, off, len) == -1) > + return -1; > + > + if (aid != AID_INET) { > + /* write MP_UNREACH_NLRI attribute length (always extended) */ > + len -= 4; /* skip attribute header */ > + if (ibuf_set_n16(buf, off + sizeof(len) + 2, len) == -1) > + return -1; > + } else { > + /* no extra attributes so set attribute len to 0 */ > + if (ibuf_add_zero(buf, sizeof(len)) == -1) > + return -1; > + } > + > + return 0; > +} > + > +/* > + * Write UPDATE message for changed and added routes. The size of buf limits > + * how may routes can be added. The function first dumps the path attributes > + * and then tries to add as many prefixes using these attributes. > + * Return 0 on success -1 on error which includes producing an empty message. > + */ > +int > +up_dump_update(struct ibuf *buf, struct rde_peer *peer, uint8_t aid) > +{ > + struct bgpd_addr addr; > + struct prefix *p; > + size_t off; > + uint16_t len; > > - /* get starting point */ > p = RB_MIN(prefix_tree, &peer->updates[aid]); > if (p == NULL) > - return 0; > + return -1; > + > + /* withdrawn routes length field is 0 */ > + if (ibuf_add_zero(buf, sizeof(len)) == -1) > + return -1; > + > + /* reserve space for 2-byte path attribute length */ > + off = ibuf_size(buf); > + if (ibuf_add_zero(buf, sizeof(len)) == -1) > + return -1; > + > + if (up_generate_attr(buf, peer, prefix_aspath(p), > + prefix_communities(p), prefix_nexthop(p), aid) == -1) > + goto fail; > > - wpos = 4; /* reserve space for length fields */ > + if (aid != AID_INET) { > + /* write mp attribute including nlri */ > > - rde_filterstate_prep(&state, p); > + /* > + * RFC 7606 wants this to be first but then we need > + * to use multiple buffers with adjusted length to > + * merge the attributes together in reverse order of > + * creation. > + */ > + if (up_generate_mp_reach(buf, peer, prefix_nexthop(p), aid) == > + -1) > + goto fail; > + } > > - /* write regular path attributes */ > - r = up_generate_attr(buf + wpos, len - wpos, peer, &state, aid); > - if (r == -1) { > - rde_filterstate_clean(&state); > - return 0; > + /* update attribute length field */ > + len = ibuf_size(buf) - off - sizeof(len); > + if (ibuf_set_n16(buf, off, len) == -1) > + return -1; > + > + if (aid == AID_INET) { > + /* last but not least dump the IPv4 nlri */ > + if (up_dump_prefix(buf, &peer->updates[aid], peer, 0) == -1) > + goto fail; > } > - wpos += r; > > - /* write mp attribute */ > - r = up_generate_mp_reach(buf + wpos, len - wpos, peer, &state, aid); > - rde_filterstate_clean(&state); > - if (r == -1) > - return 0; > - wpos += r; > - > - /* write length fields */ > - memset(buf, 0, sizeof(uint16_t)); /* withdrawn routes len */ > - attr_len = htons(wpos - 4); > - memcpy(buf + 2, &attr_len, sizeof(attr_len)); > + return 0; > > - return (wpos); > +fail: > + /* Not enough space. Drop prefix, it will never fit. */ > + pt_getaddr(p->pt, &addr); > + log_peer_warnx(&peer->conf, "path attributes to large, " > + "prefix %s/%d dropped", log_addr(&addr), p->pt->prefixlen); > + > + up_prefix_free(&peer->updates[AID_INET], p, peer, 0); > + /* XXX should probably send a withdraw for this prefix */ > + return -1; > } > Index: session.c > =================================================================== > RCS file: /cvs/src/usr.sbin/bgpd/session.c,v > retrieving revision 1.445 > diff -u -p -r1.445 session.c > --- session.c 25 May 2023 14:20:25 -0000 1.445 > +++ session.c 27 Jun 2023 08:33:54 -0000 > @@ -1383,7 +1383,7 @@ session_sendmsg(struct bgp_msg *msg, str > if ((mrt->peer_id == 0 && mrt->group_id == 0) || > mrt->peer_id == p->conf.id || (mrt->group_id != 0 && > mrt->group_id == p->conf.groupid)) > - mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p, > + mrt_dump_bgp_msg(mrt, ibuf_data(msg->buf), msg->len, p, > msg->type); > } > > @@ -1589,7 +1589,7 @@ session_open(struct peer *p) > uint8_t op_len = optparamlen; > errs += ibuf_add(buf->buf, &op_len, 1); > } > - errs += ibuf_add(buf->buf, opb->buf, ibuf_size(opb)); > + errs += ibuf_add_buf(buf->buf, opb); > } > > ibuf_free(opb); >