Looks good thanks.

Ethan

On Mon, Aug 6, 2012 at 1:54 PM, Ben Pfaff <b...@nicira.com> wrote:
> Until now, the rconn module has used messages received from the
> controller as the sole means to determine that the connection is up.
> This can interact badly with the OVS connection manager in ofproto,
> which stops reading and processing messages from the receive queue
> when there is a backlog in the send queue for a given connection
> (because reading and processes messages is the main cause of messages
> getting pushed onto the send queue).  So, if a send queue backlog
> lasts more than twice the inactivity probe interval, then the
> connection drops, whether the controller is sending messages or not.
> Dumping a large flow table can trigger this behavior if the controller
> becomes temporarily busy or if the network between OVS and a
> controller is slow.  The problem can easily repeat itself, since upon
> reconnection the controller will generally dump the flow table.
>
> This commit fixes the problem by expanding the definition of
> "activity" to include successfully sending an OpenFlow message that
> was previously queued.
>
> Reported-by: Natasha Gude <nata...@nicira.com>
> Signed-off-by: Ben Pfaff <b...@nicira.com>
> ---
>  lib/rconn.c |   23 ++++++++++++++---------
>  1 files changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/lib/rconn.c b/lib/rconn.c
> index a32f042..eae6a7e 100644
> --- a/lib/rconn.c
> +++ b/lib/rconn.c
> @@ -80,7 +80,6 @@ struct rconn {
>      int backoff;
>      int max_backoff;
>      time_t backoff_deadline;
> -    time_t last_received;
>      time_t last_connected;
>      time_t last_disconnected;
>      unsigned int packets_sent;
> @@ -105,11 +104,15 @@ struct rconn {
>      time_t creation_time;
>      unsigned long int total_time_connected;
>
> -    /* Throughout this file, "probe" is shorthand for "inactivity probe".
> -     * When nothing has been received from the peer for a while, we send out
> -     * an echo request as an inactivity probe packet.  We should receive back
> -     * a response. */
> +    /* Throughout this file, "probe" is shorthand for "inactivity probe".  
> When
> +     * no activity has been observed from the peer for a while, we send out 
> an
> +     * echo request as an inactivity probe packet.  We should receive back a
> +     * response.
> +     *
> +     * "Activity" is defined as either receiving an OpenFlow message from the
> +     * peer or successfully sending a message that had been in 'txq'. */
>      int probe_interval;         /* Secs of inactivity before sending probe. 
> */
> +    time_t last_activity;       /* Last time we saw some activity. */
>
>      /* When we create a vconn we obtain these values, to save them past the 
> end
>       * of the vconn's lifetime.  Otherwise, in-band control will only allow
> @@ -179,7 +182,6 @@ rconn_create(int probe_interval, int max_backoff, uint8_t 
> dscp)
>      rc->backoff = 0;
>      rc->max_backoff = max_backoff ? max_backoff : 8;
>      rc->backoff_deadline = TIME_MIN;
> -    rc->last_received = time_now();
>      rc->last_connected = TIME_MIN;
>      rc->last_disconnected = TIME_MIN;
>      rc->seqno = 0;
> @@ -195,6 +197,8 @@ rconn_create(int probe_interval, int max_backoff, uint8_t 
> dscp)
>      rc->creation_time = time_now();
>      rc->total_time_connected = 0;
>
> +    rc->last_activity = time_now();
> +
>      rconn_set_probe_interval(rc, probe_interval);
>      rconn_set_dscp(rc, dscp);
>
> @@ -419,6 +423,7 @@ do_tx_work(struct rconn *rc)
>          if (error) {
>              break;
>          }
> +        rc->last_activity = time_now();
>      }
>      if (list_is_empty(&rc->txq)) {
>          poll_immediate_wake();
> @@ -429,7 +434,7 @@ static unsigned int
>  timeout_ACTIVE(const struct rconn *rc)
>  {
>      if (rc->probe_interval) {
> -        unsigned int base = MAX(rc->last_received, rc->state_entered);
> +        unsigned int base = MAX(rc->last_activity, rc->state_entered);
>          unsigned int arg = base + rc->probe_interval - rc->state_entered;
>          return arg;
>      }
> @@ -440,7 +445,7 @@ static void
>  run_ACTIVE(struct rconn *rc)
>  {
>      if (timed_out(rc)) {
> -        unsigned int base = MAX(rc->last_received, rc->state_entered);
> +        unsigned int base = MAX(rc->last_activity, rc->state_entered);
>          VLOG_DBG("%s: idle %u seconds, sending inactivity probe",
>                   rc->name, (unsigned int) (time_now() - base));
>
> @@ -543,7 +548,7 @@ rconn_recv(struct rconn *rc)
>                  rc->probably_admitted = true;
>                  rc->last_admitted = time_now();
>              }
> -            rc->last_received = time_now();
> +            rc->last_activity = time_now();
>              rc->packets_received++;
>              if (rc->state == S_IDLE) {
>                  state_transition(rc, S_ACTIVE);
> --
> 1.7.2.5
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to