Ben, we were assuming this same issue would happen with the bridge. Do we know 
if that's true? It seems like it would be a pretty serious problem as the 
number of containers/VMs grows on a host, so I'm wondering what the kernel 
community may do about it.

--Justin


> On Sep 9, 2014, at 3:07 PM, Ben Pfaff <b...@nicira.com> wrote:
> 
> Linux has an internal queue that temporarily holds packets transmitted to
> certain network devices.  If too many packets are transmitted to such
> network devices within a single list of actions, then packets tend to get
> dropped.  Broadcast or flooded or multicast packets on bridges with
> thousands of ports are examples of how this can occur.
> 
> This commit avoids the problem by implementing a flow in userspace when it
> outputs its packet more times than the maximum length of the queue.
> 
> CC: Flavio Leitner <f...@redhat.com>
> Signed-off-by: Ben Pfaff <b...@nicira.com>
> ---
> ofproto/ofproto-dpif-xlate.c |   75 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 75 insertions(+)
> 
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index 425b171..2c8ef49 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -53,6 +53,7 @@
> 
> COVERAGE_DEFINE(xlate_actions);
> COVERAGE_DEFINE(xlate_actions_oversize);
> +COVERAGE_DEFINE(xlate_actions_too_many_output);
> COVERAGE_DEFINE(xlate_actions_mpls_overflow);
> 
> VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
> @@ -4031,6 +4032,77 @@ actions_output_to_local_port(const struct xlate_ctx 
> *ctx)
>     return false;
> }
> 
> +/* Returns the maximum number of packets that the Linux kernel is willing to
> + * queue up internally to certain kinds of software-implemented ports, or the
> + * default (and rarely modified) value if it cannot be determined. */
> +static int
> +netdev_max_backlog(void)
> +{
> +    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
> +    static int max_backlog = 1000; /* The normal default value. */
> +
> +    if (ovsthread_once_start(&once)) {
> +        static const char filename[] = 
> "/proc/sys/net/core/netdev_max_backlog";
> +        FILE *stream;
> +        int n;
> +
> +        stream = fopen(filename, "r");
> +        if (!stream) {
> +            VLOG_WARN("%s: open failed (%s)", filename, ovs_strerror(errno));
> +        } else {
> +            if (fscanf(stream, "%d", &n) != 1) {
> +                VLOG_WARN("%s: read error", filename);
> +            } else if (n <= 100) {
> +                VLOG_WARN("%s: unexpectedly small value %d", filename, n);
> +            } else {
> +                max_backlog = n;
> +            }
> +            fclose(stream);
> +        }
> +        ovsthread_once_done(&once);
> +
> +        VLOG_DBG("%s: using %d max_backlog", filename, max_backlog);
> +    }
> +
> +    return max_backlog;
> +}
> +
> +/* Counts and returns the number of OVS_ACTION_ATTR_OUTPUT actions in
> + * 'odp_actions'. */
> +static int
> +count_output_actions(const struct ofpbuf *odp_actions)
> +{
> +    const struct nlattr *a;
> +    size_t left;
> +    int n = 0;
> +
> +    NL_ATTR_FOR_EACH_UNSAFE (a, left, ofpbuf_data(odp_actions),
> +                             ofpbuf_size(odp_actions)) {
> +        if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) {
> +            n++;
> +        }
> +    }
> +    return n;
> +}
> +
> +/* Returns true if 'odp_actions' contains more output actions than the 
> datapath
> + * can reliably handle in one go.  On Linux, this is the value of the
> + * net.core.netdev_max_backlog sysctl, which limits the maximum number of
> + * packets that the kernel is willing to queue up for processing while the
> + * datapath is processing a set of actions. */
> +static bool
> +too_many_output_actions(const struct ofpbuf *odp_actions)
> +{
> +#ifdef __linux__
> +    return (ofpbuf_size(odp_actions) / NL_A_U32_SIZE > netdev_max_backlog()
> +            && count_output_actions(odp_actions) > netdev_max_backlog());
> +#else
> +    /* OSes other than Linux might have similar limits, but we don't know how
> +     * to determine them.*/
> +    return false;
> +#endif
> +}
> +
> /* Translates the 'ofpacts_len' bytes of "struct ofpacts" starting at 
> 'ofpacts'
>  * into datapath actions in 'odp_actions', using 'ctx'.
>  *
> @@ -4259,6 +4331,9 @@ xlate_actions(struct xlate_in *xin, struct xlate_out 
> *xout)
>          * prevent the flow from being installed. */
>         COVERAGE_INC(xlate_actions_oversize);
>         ctx.xout->slow |= SLOW_ACTION;
> +    } else if (too_many_output_actions(ctx.xout->odp_actions)) {
> +        COVERAGE_INC(xlate_actions_too_many_output);
> +        ctx.xout->slow |= SLOW_ACTION;
>     }
> 
>     if (mbridge_has_mirrors(ctx.xbridge->mbridge)) {
> -- 
> 1.7.10.4
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to