When there is no incoming data traffic at the interface for a period, BFD decay allows the bfd session to increase the min_rx. This is helpful in that some interfaces usually idle for long time. And cpu consumption can be reduced by processing fewer bfd control packets.
Signed-off-by: Alex Wang <al...@nicira.com> --- lib/bfd.c | 110 +++++++++++++++++++++++++++++++++++++++++++++--- lib/bfd.h | 5 ++- ofproto/ofproto-dpif.c | 7 ++- vswitchd/vswitch.xml | 10 +++++ 4 files changed, 125 insertions(+), 7 deletions(-) diff --git a/lib/bfd.c b/lib/bfd.c index d4ac489..6420bf9 100644 --- a/lib/bfd.c +++ b/lib/bfd.c @@ -26,6 +26,7 @@ #include "hash.h" #include "hmap.h" #include "list.h" +#include "netdev.h" #include "netlink.h" #include "odp-util.h" #include "ofpbuf.h" @@ -149,6 +150,9 @@ struct bfd { bool cpath_down; /* Concatenated Path Down. */ uint8_t mult; /* bfd.DetectMult. */ + struct netdev *netdev; + uint64_t rx_packets; /* Packets received by 'netdev'. */ + enum state state; /* bfd.SessionState. */ enum state rmt_state; /* bfd.RemoteSessionState. */ @@ -184,6 +188,10 @@ struct bfd { atomic_bool check_tnl_key; /* Verify tunnel key of inbound packets? */ atomic_int ref_cnt; + + /* BFD decay related variables. */ + int decay_min_rx; + long long int decay_detect_time; /* Decay detection time. */ }; static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; @@ -206,6 +214,8 @@ static void bfd_set_state(struct bfd *, enum state, enum diag) static uint32_t generate_discriminator(void) OVS_REQ_WRLOCK(&mutex); static void bfd_put_details(struct ds *, const struct bfd *) OVS_REQ_WRLOCK(&mutex); +static uint64_t bfd_rx_packets(const struct bfd *) OVS_REQ_WRLOCK(&mutex); +static void bfd_decay(struct bfd *) OVS_REQ_WRLOCK(&mutex); static void bfd_unixctl_show(struct unixctl_conn *, int argc, const char *argv[], void *aux OVS_UNUSED); static void bfd_unixctl_set_forwarding_override(struct unixctl_conn *, @@ -253,12 +263,13 @@ bfd_get_status(const struct bfd *bfd, struct smap *smap) * handle for the session, or NULL if BFD is not enabled according to 'cfg'. * Also returns NULL if cfg is NULL. */ struct bfd * -bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg) - OVS_EXCLUDED(mutex) +bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg, + struct netdev *netdev) OVS_EXCLUDED(mutex) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static atomic_uint16_t udp_src = ATOMIC_VAR_INIT(0); + int decay_min_rx; long long int min_tx, min_rx; bool cpath_down; const char *hwaddr; @@ -290,6 +301,9 @@ bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg) bfd->min_tx = 1000; bfd->mult = 3; atomic_init(&bfd->ref_cnt, 1); + bfd->netdev = netdev_ref(netdev); + bfd->decay_detect_time = 0; + bfd->rx_packets = bfd_rx_packets(bfd); /* RFC 5881 section 4 * The source port MUST be in the range 49152 through 65535. The same @@ -322,9 +336,26 @@ bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg) if (bfd->cfg_min_rx != min_rx) { bfd->cfg_min_rx = min_rx; if (bfd->state != STATE_UP - || (!bfd_in_poll(bfd) && bfd->cfg_min_rx > bfd->min_rx)) { + || (!bfd_in_poll(bfd) && bfd->cfg_min_rx > bfd->min_rx) + || bfd->min_rx == bfd->decay_min_rx) { bfd->min_rx = bfd->cfg_min_rx; } + bfd->decay_min_rx = 0; + bfd_poll(bfd); + } + + decay_min_rx = smap_get_int(cfg, "decay_min_rx", 0); + if (bfd->decay_min_rx != decay_min_rx ) { + if (decay_min_rx > 0 && decay_min_rx < bfd->cfg_min_rx) { + VLOG_WARN("%s: decay_min_rx cannot be less than %lld ms", + bfd->name, bfd->cfg_min_rx); + bfd->decay_min_rx = 0; + } else { + bfd->decay_min_rx = decay_min_rx; + } + bfd->min_rx = bfd->cfg_min_rx; + bfd->decay_detect_time = (bfd->decay_min_rx < 2000 ? + 2000 : bfd->decay_min_rx) + time_msec(); bfd_poll(bfd); } @@ -373,6 +404,7 @@ bfd_unref(struct bfd *bfd) OVS_EXCLUDED(mutex) if (orig == 1) { ovs_mutex_lock(&mutex); hmap_remove(all_bfds, &bfd->node); + netdev_close(bfd->netdev); free(bfd->name); free(bfd); ovs_mutex_unlock(&mutex); @@ -398,14 +430,29 @@ bfd_wait(const struct bfd *bfd) OVS_EXCLUDED(mutex) void bfd_run(struct bfd *bfd) OVS_EXCLUDED(mutex) { + long long int now; + ovs_mutex_lock(&mutex); - if (bfd->state > STATE_DOWN && time_msec() >= bfd->detect_time) { + now = time_msec(); + + if (bfd->state > STATE_DOWN && now >= bfd->detect_time) { bfd_set_state(bfd, STATE_DOWN, DIAG_EXPIRED); } + if (bfd->state == STATE_UP && bfd->decay_min_rx > 0 + && now >= bfd->decay_detect_time) { + bfd_decay(bfd); + } + if (bfd->min_tx != bfd->cfg_min_tx || bfd->min_rx != bfd->cfg_min_rx) { + /* Do not poll if already decayed to decay_min_rx. */ + if (bfd->state == STATE_UP && bfd->poll_min_rx == bfd->decay_min_rx + && bfd->min_tx == bfd->cfg_min_tx) { + goto exit; + } bfd_poll(bfd); } +exit: ovs_mutex_unlock(&mutex); } @@ -680,6 +727,20 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow, out: ovs_mutex_unlock(&mutex); } + +/* Must be called when the netdev owned by 'bfd' should change. */ +void +bfd_set_netdev(struct bfd *bfd, const struct netdev *netdev) + OVS_EXCLUDED(mutex) +{ + ovs_mutex_lock(&mutex); + if (bfd->netdev != netdev) { + netdev_close(bfd->netdev); + bfd->netdev = netdev_ref(netdev); + } + ovs_mutex_unlock(&mutex); +} + static bool bfd_forwarding__(const struct bfd *bfd) OVS_REQ_WRLOCK(mutex) @@ -707,7 +768,8 @@ bfd_poll(struct bfd *bfd) OVS_REQ_WRLOCK(mutex) if (bfd->state > STATE_DOWN && !bfd_in_poll(bfd) && !(bfd->flags & FLAG_FINAL)) { bfd->poll_min_tx = bfd->cfg_min_tx; - bfd->poll_min_rx = bfd->cfg_min_rx; + bfd->poll_min_rx = bfd->min_rx == bfd->decay_min_rx + ? bfd->decay_min_rx : bfd->cfg_min_rx; bfd->flags |= FLAG_POLL; bfd->next_tx = 0; VLOG_INFO_RL(&rl, "%s: Initiating poll sequence", bfd->name); @@ -882,6 +944,44 @@ bfd_set_state(struct bfd *bfd, enum state state, enum diag diag) } } +static uint64_t +bfd_rx_packets(const struct bfd *bfd) OVS_REQ_WRLOCK(mutex) +{ + struct netdev_stats stats; + + if (!netdev_get_stats(bfd->netdev, &stats)) { + return stats.rx_packets; + } else { + return 0; + } +} + +static void +bfd_decay(struct bfd *bfd) OVS_REQ_WRLOCK(mutex) +{ + uint64_t rx_packets = bfd_rx_packets(bfd); + int64_t diff, measure; + + diff = rx_packets - bfd->rx_packets; + bfd->rx_packets = rx_packets; + bfd->decay_detect_time = (bfd->decay_min_rx < 2000 ? + 2000 : bfd->decay_min_rx) + time_msec(); + measure = (bfd->decay_min_rx < 2000 ? 2000 : bfd->decay_min_rx) + / bfd->min_rx + 5; + + if (diff <= measure) { + /* Decay when there is no obvious data traffic. */ + if (bfd->min_rx != bfd->decay_min_rx) { + bfd->min_rx = bfd->decay_min_rx; + } + } else { + /* Restore the min_rx. */ + if (bfd->min_rx != bfd->cfg_min_rx) { + bfd->min_rx = bfd->cfg_min_rx; + } + } +} + static uint32_t generate_discriminator(void) { diff --git a/lib/bfd.h b/lib/bfd.h index 67d012e..0e1e33d 100644 --- a/lib/bfd.h +++ b/lib/bfd.h @@ -24,6 +24,7 @@ struct bfd; struct flow; struct flow_wildcards; +struct netdev; struct ofpbuf; struct smap; @@ -40,11 +41,13 @@ void bfd_process_packet(struct bfd *, const struct flow *, const struct ofpbuf *); struct bfd *bfd_configure(struct bfd *, const char *name, - const struct smap *smap); + const struct smap *smap, + struct netdev *netdev); struct bfd *bfd_ref(const struct bfd *); void bfd_unref(struct bfd *); bool bfd_forwarding(const struct bfd *); void bfd_get_status(const struct bfd *, struct smap *); +void bfd_set_netdev(struct bfd *, const struct netdev *); #endif /* bfd.h */ diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index a8e5cd5..75fd96c 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1862,6 +1862,10 @@ port_modified(struct ofport *port_) cfm_set_netdev(port->cfm, port->up.netdev); } + if (port->bfd) { + bfd_set_netdev(port->bfd, port->up.netdev); + } + if (port->is_tunnel && tnl_port_reconfigure(port, port->up.netdev, port->odp_port)) { ofproto_dpif_cast(port->up.ofproto)->backer->need_revalidate = @@ -1996,7 +2000,8 @@ set_bfd(struct ofport *ofport_, const struct smap *cfg) struct bfd *old; old = ofport->bfd; - ofport->bfd = bfd_configure(old, netdev_get_name(ofport->up.netdev), cfg); + ofport->bfd = bfd_configure(old, netdev_get_name(ofport->up.netdev), + cfg, ofport->up.netdev); if (ofport->bfd != old) { ofproto->backer->need_revalidate = REV_RECONFIGURE; } diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index b89d58c..b73a612 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -1880,6 +1880,16 @@ specified. Defaults to <code>100</code>. </column> + <column name="bfd" key="decay_min_rx" type='{"type": "integer"}'> + <code>decay_min_rx</code> is used to set the <code>min_rx</code>, + when there is no obvious incoming data traffic at the interface. + It cannot be less than the <code>min_rx</code>. The decay feature + is disable by setting the <code>decay_min_rx</code> to 0. And the + feature is reset everytime itself or <code>min_rx</code> is + reconfigured. + </column> + + <column name="bfd" key="cpath_down" type='{"type": "boolean"}'> Concatenated path down may be used when the local system should not have traffic forwarded to it for some reason other than a connectivty -- 1.7.9.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev