My kernel knowledge is rusty. I can research it, but maybe Flavio or someone else "just knows".
On Wed, Sep 10, 2014 at 08:48:01AM -0700, Justin Pettit wrote: > Ben, we were assuming this same issue would happen with the > bridge. Do we know if that's true? It seems like it would be a > pretty serious problem as the number of containers/VMs grows on a > host, so I'm wondering what the kernel community may do about it. > > --Justin > > > > On Sep 9, 2014, at 3:07 PM, Ben Pfaff <b...@nicira.com> wrote: > > > > Linux has an internal queue that temporarily holds packets transmitted to > > certain network devices. If too many packets are transmitted to such > > network devices within a single list of actions, then packets tend to get > > dropped. Broadcast or flooded or multicast packets on bridges with > > thousands of ports are examples of how this can occur. > > > > This commit avoids the problem by implementing a flow in userspace when it > > outputs its packet more times than the maximum length of the queue. > > > > CC: Flavio Leitner <f...@redhat.com> > > Signed-off-by: Ben Pfaff <b...@nicira.com> > > --- > > ofproto/ofproto-dpif-xlate.c | 75 > > ++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 75 insertions(+) > > > > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c > > index 425b171..2c8ef49 100644 > > --- a/ofproto/ofproto-dpif-xlate.c > > +++ b/ofproto/ofproto-dpif-xlate.c > > @@ -53,6 +53,7 @@ > > > > COVERAGE_DEFINE(xlate_actions); > > COVERAGE_DEFINE(xlate_actions_oversize); > > +COVERAGE_DEFINE(xlate_actions_too_many_output); > > COVERAGE_DEFINE(xlate_actions_mpls_overflow); > > > > VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate); > > @@ -4031,6 +4032,77 @@ actions_output_to_local_port(const struct xlate_ctx > > *ctx) > > return false; > > } > > > > +/* Returns the maximum number of packets that the Linux kernel is willing > > to > > + * queue up internally to certain kinds of software-implemented ports, or > > the > > + * default (and rarely modified) value if it cannot be determined. */ > > +static int > > +netdev_max_backlog(void) > > +{ > > + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; > > + static int max_backlog = 1000; /* The normal default value. */ > > + > > + if (ovsthread_once_start(&once)) { > > + static const char filename[] = > > "/proc/sys/net/core/netdev_max_backlog"; > > + FILE *stream; > > + int n; > > + > > + stream = fopen(filename, "r"); > > + if (!stream) { > > + VLOG_WARN("%s: open failed (%s)", filename, > > ovs_strerror(errno)); > > + } else { > > + if (fscanf(stream, "%d", &n) != 1) { > > + VLOG_WARN("%s: read error", filename); > > + } else if (n <= 100) { > > + VLOG_WARN("%s: unexpectedly small value %d", filename, n); > > + } else { > > + max_backlog = n; > > + } > > + fclose(stream); > > + } > > + ovsthread_once_done(&once); > > + > > + VLOG_DBG("%s: using %d max_backlog", filename, max_backlog); > > + } > > + > > + return max_backlog; > > +} > > + > > +/* Counts and returns the number of OVS_ACTION_ATTR_OUTPUT actions in > > + * 'odp_actions'. */ > > +static int > > +count_output_actions(const struct ofpbuf *odp_actions) > > +{ > > + const struct nlattr *a; > > + size_t left; > > + int n = 0; > > + > > + NL_ATTR_FOR_EACH_UNSAFE (a, left, ofpbuf_data(odp_actions), > > + ofpbuf_size(odp_actions)) { > > + if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) { > > + n++; > > + } > > + } > > + return n; > > +} > > + > > +/* Returns true if 'odp_actions' contains more output actions than the > > datapath > > + * can reliably handle in one go. On Linux, this is the value of the > > + * net.core.netdev_max_backlog sysctl, which limits the maximum number of > > + * packets that the kernel is willing to queue up for processing while the > > + * datapath is processing a set of actions. */ > > +static bool > > +too_many_output_actions(const struct ofpbuf *odp_actions) > > +{ > > +#ifdef __linux__ > > + return (ofpbuf_size(odp_actions) / NL_A_U32_SIZE > netdev_max_backlog() > > + && count_output_actions(odp_actions) > netdev_max_backlog()); > > +#else > > + /* OSes other than Linux might have similar limits, but we don't know > > how > > + * to determine them.*/ > > + return false; > > +#endif > > +} > > + > > /* Translates the 'ofpacts_len' bytes of "struct ofpacts" starting at > > 'ofpacts' > > * into datapath actions in 'odp_actions', using 'ctx'. > > * > > @@ -4259,6 +4331,9 @@ xlate_actions(struct xlate_in *xin, struct xlate_out > > *xout) > > * prevent the flow from being installed. */ > > COVERAGE_INC(xlate_actions_oversize); > > ctx.xout->slow |= SLOW_ACTION; > > + } else if (too_many_output_actions(ctx.xout->odp_actions)) { > > + COVERAGE_INC(xlate_actions_too_many_output); > > + ctx.xout->slow |= SLOW_ACTION; > > } > > > > if (mbridge_has_mirrors(ctx.xbridge->mbridge)) { > > -- > > 1.7.10.4 > > > > _______________________________________________ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev