On Wed, Aug 27, 2014 at 4:13 AM, Andy Zhou <az...@nicira.com> wrote: > Since kernel stack is limited in size, it is not wise to using > recursive function with large stack frames. > > This patch provides an alternative implementation of recirc action > without using recursion. > > A per CPU fixed sized, 'deferred action stack' is used to store both > recirc and sample actions when executing actions. These actions becomes > 'deferred actions". > > Deferred actions are only executed after all other actions has been > executed, including the ones triggered by loopback from the kernel > network stack. > > The depth of the private stack, currently set to 10, limits the number > of 'deferred actions' can be accumulated for each packet. > > Signed-off-by: Andy Zhou <az...@nicira.com>
Patch looks good, I have few comments. > --- > datapath/actions.c | 150 > +++++++++++++++++++++++++++++++++++++--------------- > datapath/actions.h | 4 +- > datapath/datapath.c | 4 +- > 3 files changed, 112 insertions(+), 46 deletions(-) > > diff --git a/datapath/actions.c b/datapath/actions.c > index 31fb57d..662de9c 100644 > --- a/datapath/actions.c > +++ b/datapath/actions.c > @@ -42,6 +42,7 @@ > #include "actions.h" > > static DEFINE_PER_CPU(struct ovs_action_stack, action_stacks); > +DEFINE_PER_CPU(int, ovs_exec_actions_count); > .... @@ -741,33 +758,39 @@ static int sample(struct datapath *dp, struct sk_buff *skb, > rem = nla_len(acts_list); > a = nla_data(acts_list); > > + if (unlikely(rem == 0)) > + return 0; > + > /* Actions list is either empty or only contains a single user-space > * action, the latter being a special case as it is the only known > * usage of the sample action. > * In these special cases don't clone the skb as there are no > * side-effects in the nested actions. > * Otherwise, clone in case the nested actions have side effects. */ > - if (likely(rem == 0 || > - (nla_type(a) == OVS_ACTION_ATTR_USERSPACE && > - last_action(a, rem)))) { > - sample_skb = skb; > - skb_get(skb); > + if (nla_type(a) == OVS_ACTION_ATTR_USERSPACE && > + last_action(a, rem)) { > + return output_userspace(dp, skb, a); This is optimization, should be kept in separate patch. > } else { > - sample_skb = skb_clone(skb, GFP_ATOMIC); > - if (!sample_skb) > - /* Skip the sample action when out of memory. */ > + struct ovs_deferred_action *da; > + > + skb = skb_clone(skb, GFP_ATOMIC); > + if (!skb) > + /* Out of memory, skip the sample action. */ > + return 0; > + > + da = push_actions(skb, a, rem); > + if (!da) { > + if (net_ratelimit()) > + pr_warn("%s: stack limit reached, drop sample > action\n", > + ovs_dp_name(dp)); > + kfree_skb(skb); > return 0; > + } > > - flow_key_clone(skb, &sample_key); > + flow_key_clone(skb, &da->pkt_key); > } > > - /* Note that do_execute_actions() never consumes skb. > - * In the case where skb has been cloned above it is the clone that > - * is consumed. Otherwise the skb_get(skb) call prevents > - * consumption by do_execute_actions(). Thus, it is safe to simply > - * return the error code and let the caller (also > - * do_execute_actions()) free skb on error. */ > - return do_execute_actions(dp, sample_skb, a, rem); > + return 0; > } > > static void execute_hash(struct sk_buff *skb, const struct nlattr *attr) > @@ -837,11 +860,10 @@ static int execute_set_action(struct sk_buff *skb, > return err; > } > > - > static int execute_recirc(struct datapath *dp, struct sk_buff *skb, > const struct nlattr *a, int rem) > { > - struct sw_flow_key recirc_key; > + struct ovs_deferred_action *da; > > if (!is_skb_flow_key_valid(skb)) { > int err; > @@ -854,26 +876,40 @@ static int execute_recirc(struct datapath *dp, struct > sk_buff *skb, > BUG_ON(!is_skb_flow_key_valid(skb)); > > if (!last_action(a, rem)) { > - /* Recirc action is the not the last action > - * of the action list. */ > - skb = skb_clone(skb, GFP_ATOMIC); > + /* Recirc action is the not the last action, > + * We will need to push skb clone instead of skb. */ > > - /* Skip the recirc action when out of memory, but > - * continue on with the rest of the action list. */ > + skb = skb_clone(skb, GFP_ATOMIC); > if (!skb) > + /* Out of memory, skip recirc. */ > return 0; > > - flow_key_clone(skb, &recirc_key); > + /* Push the skb clone for recirc. */ > + da = push_actions(skb, NULL, 0); > + if (!da) > + goto stack_full; > + > + flow_key_clone(skb, &da->pkt_key); > + } else { > + skb_get(skb); why do we need skb_get()? > + if (!push_actions(skb, NULL, 0)) > + goto stack_full; > } > > flow_key_set_recirc_id(skb, nla_get_u32(a)); > - ovs_dp_process_packet(skb); > + return 0; > + > +stack_full: > + if (net_ratelimit()) > + pr_warn("%s: stack limit reached, drop recirc action\n", > + ovs_dp_name(dp)); > + > return 0; > } > > /* Execute a list of actions against 'skb'. */ > -static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, > - const struct nlattr *attr, int len) > +static void do_execute_actions(struct datapath *dp, struct sk_buff *skb, > + const struct nlattr *attr, int len) > { what is reason for changing function prototype? > /* Every output action needs a separate clone of 'skb', but the common > * case is just a single output action, so that doing a clone and > @@ -920,7 +956,7 @@ static int do_execute_actions(struct datapath *dp, struct > sk_buff *skb, > case OVS_ACTION_ATTR_PUSH_VLAN: > err = push_vlan(skb, nla_data(a)); > if (unlikely(err)) /* skb already freed. */ > - return err; > + return; > break; > > case OVS_ACTION_ATTR_POP_VLAN: > @@ -929,12 +965,6 @@ static int do_execute_actions(struct datapath *dp, > struct sk_buff *skb, > > case OVS_ACTION_ATTR_RECIRC: > err = execute_recirc(dp, skb, a, rem); > - if (last_action(a, rem)) { > - /* If this is the last action, the skb has > - * been consumed or freed. > - * Return immediately. */ > - return err; > - } > break; > > case OVS_ACTION_ATTR_SET: > @@ -948,7 +978,7 @@ static int do_execute_actions(struct datapath *dp, struct > sk_buff *skb, > > if (unlikely(err)) { > kfree_skb(skb); > - return err; > + return; > } > } > > @@ -956,14 +986,50 @@ static int do_execute_actions(struct datapath *dp, > struct sk_buff *skb, > do_output(dp, skb, prev_port); > else > consume_skb(skb); > +} > > - return 0; > +static void ovs_process_deferred_packets(struct datapath *dp) > +{ > + struct ovs_action_stack *stack; > + > + stack = this_cpu_ptr(&(action_stacks)); > + > + /* Finishing executing all deferred actions. */ > + while (!action_stack_is_empty(stack)) { > + struct ovs_deferred_action *da = action_stack_pop(stack); > + struct sk_buff *skb = da->skb; > + const struct nlattr *actions = da->actions; > + int rem = da->rem; > + > + if (!actions) { > + struct sw_flow *flow = ovs_dp_packet_flow_lookup(dp, > skb); > + > + if (flow) { > + struct sw_flow_actions *sf_acts; > + > + sf_acts = rcu_dereference(flow->sf_acts); > + actions = sf_acts->actions; > + rem = sf_acts->actions_len; > + } > + } > + > + if (actions) > + do_execute_actions(dp, skb, actions, rem); > + } We also need limit on number of recirc actions executed for given packet. > } > > /* Execute a list of actions against 'skb'. */ > -int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, struct > sw_flow_actions *acts) > +void ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, > + struct sw_flow_actions *acts) > { > - return do_execute_actions(dp, skb, acts->actions, acts->actions_len); > + this_cpu_inc(ovs_exec_actions_count); > + > + do_execute_actions(dp, skb, acts->actions, acts->actions_len); > + > + if (this_cpu_read(ovs_exec_actions_count) == 1) > + ovs_process_deferred_packets(dp); > + > + this_cpu_dec(ovs_exec_actions_count); > } > > void ovs_action_stacks_init(void) > diff --git a/datapath/actions.h b/datapath/actions.h > index 5f98ad4..84801fa 100644 > --- a/datapath/actions.h > +++ b/datapath/actions.h > @@ -33,7 +33,7 @@ > > struct ovs_deferred_action { > struct sk_buff *skb; > - struct nlattr *actions; > + const struct nlattr *actions; > int rem; > This should be part of first patch. > /* Store pkt_key clone during recirc */ > @@ -48,7 +48,7 @@ struct ovs_action_stack { > > void ovs_action_stacks_init(void); > void ovs_action_stacks_exit(void);; > -int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, struct > sw_flow_actions *acts); > +void ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, struct > sw_flow_actions *acts); > void ovs_process_action_stack(void); > > #endif /* actions.h */ > diff --git a/datapath/datapath.c b/datapath/datapath.c > index 5bae23e..2424a62 100644 > --- a/datapath/datapath.c > +++ b/datapath/datapath.c > @@ -595,12 +595,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, > struct genl_info *info) > sf_acts = rcu_dereference(flow->sf_acts); > > local_bh_disable(); > - err = ovs_execute_actions(dp, packet, sf_acts); > + ovs_execute_actions(dp, packet, sf_acts); > local_bh_enable(); > rcu_read_unlock(); > > ovs_flow_free(flow, false); > - return err; > + return 0; > > err_unlock: > rcu_read_unlock(); > -- > 1.9.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev