My kernel knowledge is rusty.  I can research it, but maybe Flavio or
someone else "just knows".

On Wed, Sep 10, 2014 at 08:48:01AM -0700, Justin Pettit wrote:
> Ben, we were assuming this same issue would happen with the
> bridge. Do we know if that's true? It seems like it would be a
> pretty serious problem as the number of containers/VMs grows on a
> host, so I'm wondering what the kernel community may do about it.
> 
> --Justin
> 
> 
> > On Sep 9, 2014, at 3:07 PM, Ben Pfaff <b...@nicira.com> wrote:
> > 
> > Linux has an internal queue that temporarily holds packets transmitted to
> > certain network devices.  If too many packets are transmitted to such
> > network devices within a single list of actions, then packets tend to get
> > dropped.  Broadcast or flooded or multicast packets on bridges with
> > thousands of ports are examples of how this can occur.
> > 
> > This commit avoids the problem by implementing a flow in userspace when it
> > outputs its packet more times than the maximum length of the queue.
> > 
> > CC: Flavio Leitner <f...@redhat.com>
> > Signed-off-by: Ben Pfaff <b...@nicira.com>
> > ---
> > ofproto/ofproto-dpif-xlate.c |   75 
> > ++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 75 insertions(+)
> > 
> > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> > index 425b171..2c8ef49 100644
> > --- a/ofproto/ofproto-dpif-xlate.c
> > +++ b/ofproto/ofproto-dpif-xlate.c
> > @@ -53,6 +53,7 @@
> > 
> > COVERAGE_DEFINE(xlate_actions);
> > COVERAGE_DEFINE(xlate_actions_oversize);
> > +COVERAGE_DEFINE(xlate_actions_too_many_output);
> > COVERAGE_DEFINE(xlate_actions_mpls_overflow);
> > 
> > VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
> > @@ -4031,6 +4032,77 @@ actions_output_to_local_port(const struct xlate_ctx 
> > *ctx)
> >     return false;
> > }
> > 
> > +/* Returns the maximum number of packets that the Linux kernel is willing 
> > to
> > + * queue up internally to certain kinds of software-implemented ports, or 
> > the
> > + * default (and rarely modified) value if it cannot be determined. */
> > +static int
> > +netdev_max_backlog(void)
> > +{
> > +    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
> > +    static int max_backlog = 1000; /* The normal default value. */
> > +
> > +    if (ovsthread_once_start(&once)) {
> > +        static const char filename[] = 
> > "/proc/sys/net/core/netdev_max_backlog";
> > +        FILE *stream;
> > +        int n;
> > +
> > +        stream = fopen(filename, "r");
> > +        if (!stream) {
> > +            VLOG_WARN("%s: open failed (%s)", filename, 
> > ovs_strerror(errno));
> > +        } else {
> > +            if (fscanf(stream, "%d", &n) != 1) {
> > +                VLOG_WARN("%s: read error", filename);
> > +            } else if (n <= 100) {
> > +                VLOG_WARN("%s: unexpectedly small value %d", filename, n);
> > +            } else {
> > +                max_backlog = n;
> > +            }
> > +            fclose(stream);
> > +        }
> > +        ovsthread_once_done(&once);
> > +
> > +        VLOG_DBG("%s: using %d max_backlog", filename, max_backlog);
> > +    }
> > +
> > +    return max_backlog;
> > +}
> > +
> > +/* Counts and returns the number of OVS_ACTION_ATTR_OUTPUT actions in
> > + * 'odp_actions'. */
> > +static int
> > +count_output_actions(const struct ofpbuf *odp_actions)
> > +{
> > +    const struct nlattr *a;
> > +    size_t left;
> > +    int n = 0;
> > +
> > +    NL_ATTR_FOR_EACH_UNSAFE (a, left, ofpbuf_data(odp_actions),
> > +                             ofpbuf_size(odp_actions)) {
> > +        if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) {
> > +            n++;
> > +        }
> > +    }
> > +    return n;
> > +}
> > +
> > +/* Returns true if 'odp_actions' contains more output actions than the 
> > datapath
> > + * can reliably handle in one go.  On Linux, this is the value of the
> > + * net.core.netdev_max_backlog sysctl, which limits the maximum number of
> > + * packets that the kernel is willing to queue up for processing while the
> > + * datapath is processing a set of actions. */
> > +static bool
> > +too_many_output_actions(const struct ofpbuf *odp_actions)
> > +{
> > +#ifdef __linux__
> > +    return (ofpbuf_size(odp_actions) / NL_A_U32_SIZE > netdev_max_backlog()
> > +            && count_output_actions(odp_actions) > netdev_max_backlog());
> > +#else
> > +    /* OSes other than Linux might have similar limits, but we don't know 
> > how
> > +     * to determine them.*/
> > +    return false;
> > +#endif
> > +}
> > +
> > /* Translates the 'ofpacts_len' bytes of "struct ofpacts" starting at 
> > 'ofpacts'
> >  * into datapath actions in 'odp_actions', using 'ctx'.
> >  *
> > @@ -4259,6 +4331,9 @@ xlate_actions(struct xlate_in *xin, struct xlate_out 
> > *xout)
> >          * prevent the flow from being installed. */
> >         COVERAGE_INC(xlate_actions_oversize);
> >         ctx.xout->slow |= SLOW_ACTION;
> > +    } else if (too_many_output_actions(ctx.xout->odp_actions)) {
> > +        COVERAGE_INC(xlate_actions_too_many_output);
> > +        ctx.xout->slow |= SLOW_ACTION;
> >     }
> > 
> >     if (mbridge_has_mirrors(ctx.xbridge->mbridge)) {
> > -- 
> > 1.7.10.4
> > 
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to