On Mon, Aug 18, 2014 at 1:44 PM, Andy Zhou <az...@nicira.com> wrote: > On Sun, Aug 17, 2014 at 7:43 PM, Pravin Shelar <pshe...@nicira.com> wrote: >> On Fri, Aug 15, 2014 at 3:12 AM, Andy Zhou <az...@nicira.com> wrote: >>> Since kernel stack is limited in size, it is not wise to using >>> recursive function with large stack frames. >>> >>> This patch provides an alternative implementation of recirc action with >>> out using recursion. >>> >>> A recirc action can be embedded in the middle of an actions list. >>> Instead of immediately (and recursively) process the recirc action, >>> the remaining the actions list is pushed on a per-cpu fixed size stack, >>> the skb needs recirc is then returned. >>> >>> ovs_execute_actions now only finishes whenever both per-cpu stack is >>> empty, and the actions list does not have recirc action. Recirc skb >>> is always process before check the stack to preserve the correct >>> execution order.a >>> >>> The depth of the private stack limit the depth of recirc action. >>> >>> Signed-off-by: Andy Zhou <az...@nicira.com> >>> --- >>> datapath/actions.c | 175 >>> ++++++++++++++++++++++++++++++++++++---------------- >>> datapath/actions.h | 7 ++- >>> datapath/datapath.c | 4 +- >>> 3 files changed, 130 insertions(+), 56 deletions(-) >>> >>> diff --git a/datapath/actions.c b/datapath/actions.c >>> index 9dd9063..da3ac89 100644 >>> --- a/datapath/actions.c >>> +++ b/datapath/actions.c >>> @@ -143,8 +143,8 @@ static bool is_skb_flow_key_valid(struct sk_buff *skb) >>> return !!OVS_CB(skb)->pkt_key->eth.type; >>> } >>> >>> -static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, >>> - const struct nlattr *attr, int len); >>> +static struct sk_buff *do_execute_actions(struct datapath *dp, >>> + struct sk_buff *skb, const struct nlattr *attr, int len); >>> >>> static int make_writable(struct sk_buff *skb, int write_len) >>> { >>> @@ -154,6 +154,19 @@ static int make_writable(struct sk_buff *skb, int >>> write_len) >>> return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); >>> } >>> >>> +static inline struct ovs_deferred_action * >>> +push_next_action(struct sk_buff *skb, const struct nlattr *attr, int len) >>> +{ >>> + struct ovs_deferred_action *da; >>> + struct ovs_action_stack *stack; >>> + struct nlattr *next_action = nla_next(attr, &len); >>> + >>> + stack = &__get_cpu_var(ovs_action_stacks); >>> + da = ovs_action_stack_push(stack, skb, next_action, len); >>> + >>> + return da; >>> +} >>> + >>> /* The end of the mac header. >>> * >>> * For non-MPLS skbs this will correspond to the network header. >>> @@ -669,10 +682,8 @@ static bool last_action(const struct nlattr *a, int >>> rem) >>> static int sample(struct datapath *dp, struct sk_buff *skb, >>> const struct nlattr *attr) >>> { >>> - struct sw_flow_key sample_key; >>> const struct nlattr *acts_list = NULL; >>> const struct nlattr *a; >>> - struct sk_buff *sample_skb; >>> int rem; >>> >>> for (a = nla_data(attr), rem = nla_len(attr); rem > 0; >>> @@ -701,24 +712,28 @@ static int sample(struct datapath *dp, struct sk_buff >>> *skb, >>> if (likely(rem == 0 || >>> (nla_type(a) == OVS_ACTION_ATTR_USERSPACE && >>> last_action(a, rem)))) { >>> - sample_skb = skb; >>> - skb_get(skb); >>> + output_userspace(dp, skb, a); >>> } else { >>> - sample_skb = skb_clone(skb, GFP_ATOMIC); >>> - if (!sample_skb) >>> - /* Skip the sample action when out of memory. */ >>> + struct ovs_deferred_action *da; >>> + >>> + skb = skb_clone(skb, GFP_ATOMIC); >>> + if (!skb) >>> + /* Out of memory, skip the sample action. */ >>> + return 0; >>> + >>> + da = push_next_action(skb, a, rem); >>> + if (!da) { >>> + if (net_ratelimit()) >>> + pr_warn("%s: stack limit reached, drop >>> sample action\n", >>> + ovs_dp_name(dp)); >>> + kfree_skb(skb); >>> return 0; >>> + } >>> >>> - flow_key_clone(skb, &sample_key); >>> + flow_key_clone(skb, &da->pkt_key); >>> } >>> >>> - /* Note that do_execute_actions() never consumes skb. >>> - * In the case where skb has been cloned above it is the clone that >>> - * is consumed. Otherwise the skb_get(skb) call prevents >>> - * consumption by do_execute_actions(). Thus, it is safe to simply >>> - * return the error code and let the caller (also >>> - * do_execute_actions()) free skb on error. */ >>> - return do_execute_actions(dp, sample_skb, a, rem); >>> + return 0; >>> } >>> >>> static void execute_hash(struct sk_buff *skb, const struct nlattr *attr) >>> @@ -789,43 +804,57 @@ static int execute_set_action(struct sk_buff *skb, >>> } >>> >>> >>> -static int execute_recirc(struct datapath *dp, struct sk_buff *skb, >>> - const struct nlattr *a, int rem) >>> +static struct sk_buff *execute_recirc(struct datapath *dp, >>> + struct sk_buff *skb, >>> + const struct nlattr *a, int rem) >>> { >>> - struct sw_flow_key recirc_key; >>> int err; >>> >>> - if (!last_action(a, rem)) { >>> - /* Recirc action is the not the last action >>> - * of the action list. */ >>> - skb = skb_clone(skb, GFP_ATOMIC); >>> - >>> - /* Skip the recirc action when out of memory, but >>> - * continue on with the rest of the action list. */ >>> - if (!skb) >>> - return 0; >>> - } >>> - >>> if (!is_skb_flow_key_valid(skb)) { >>> err = ovs_flow_key_update(skb, OVS_CB(skb)->pkt_key); >>> if (err) { >>> kfree_skb(skb); >>> - return err; >>> + return NULL; >>> } >>> } >>> BUG_ON(!is_skb_flow_key_valid(skb)); >>> >>> - if (!last_action(a, rem)) >>> - flow_key_clone(skb, &recirc_key); >>> + if (!last_action(a, rem)) { >>> + struct ovs_deferred_action *da; >>> + /* Recirc action is the not the last action, >>> + * Push the remaining actions on to the stack, >>> + * clone skb and clone pkt key. >>> + * >>> + * Skip the recirc action if the stack is full, or >>> + * we are out of memory to clone the skb. >>> + */ >>> + >>> + da = push_next_action(skb, a, rem); >>> + if (!da) { >>> + /* Stack is full, drop the skb. */ >>> + if (net_ratelimit()) >>> + pr_warn("%s: stack limit reached, drop >>> recirc action\n", >>> + ovs_dp_name(dp)); >>> + consume_skb(skb); >>> + return NULL; >>> + } >>> + >>> + skb = skb_clone(skb, GFP_ATOMIC); >>> + if (skb) >>> + /* Out of memory, do not recurc*/ >>> + return NULL; >>> + >>> + flow_key_clone(skb, &da->pkt_key); >>> + } >>> >>> flow_key_set_recirc_id(skb, nla_get_u32(a)); >>> - ovs_dp_process_packet(skb); >>> - return 0; >>> + return skb; /* Recirc skb */ >>> } >>> >>> /* Execute a list of actions against 'skb'. */ >>> -static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, >>> - const struct nlattr *attr, int len) >>> +static struct sk_buff *do_execute_actions(struct datapath *dp, >>> + struct sk_buff *skb, >>> + const struct nlattr *attr, int >>> len) >>> { >>> /* Every output action needs a separate clone of 'skb', but the >>> common >>> * case is just a single output action, so that doing a clone and >>> @@ -872,7 +901,7 @@ static int do_execute_actions(struct datapath *dp, >>> struct sk_buff *skb, >>> case OVS_ACTION_ATTR_PUSH_VLAN: >>> err = push_vlan(skb, nla_data(a)); >>> if (unlikely(err)) /* skb already freed. */ >>> - return err; >>> + return NULL; >>> break; >>> >>> case OVS_ACTION_ATTR_POP_VLAN: >>> @@ -880,8 +909,7 @@ static int do_execute_actions(struct datapath *dp, >>> struct sk_buff *skb, >>> break; >>> >>> case OVS_ACTION_ATTR_RECIRC: >>> - err = execute_recirc(dp, skb, a, rem); >>> - break; >>> + return execute_recirc(dp, skb, a, rem); >>> >>> case OVS_ACTION_ATTR_SET: >>> err = execute_set_action(skb, nla_data(a)); >>> @@ -894,7 +922,7 @@ static int do_execute_actions(struct datapath *dp, >>> struct sk_buff *skb, >>> >>> if (unlikely(err)) { >>> kfree_skb(skb); >>> - return err; >>> + return NULL; >>> } >>> } >>> >>> @@ -903,7 +931,47 @@ static int do_execute_actions(struct datapath *dp, >>> struct sk_buff *skb, >>> else >>> consume_skb(skb); >>> >>> - return 0; >>> + return NULL; >>> +} >>> + >>> +static struct sk_buff *recirc_packet_process(struct datapath *dp, >>> + struct sk_buff *skb) >>> +{ >>> + if (ovs_dp_packet_flow_lookup(dp, skb)) { >>> + struct sw_flow_actions *acts; >>> + >>> + acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); >>> + >>> + return do_execute_actions(dp, skb, acts->actions, >>> + acts->actions_len); >>> + } >>> + >>> + return NULL; >>> +} >>> + >>> +static struct sk_buff *execute_deferred_actions(struct datapath *dp, >>> + struct ovs_deferred_action >>> *da) >>> +{ >>> + return do_execute_actions(dp, da->skb, da->actions, da->rem); >>> +} >>> + >>> +static void ovs_recirc_packet(struct datapath *dp, struct sk_buff *rskb) >>> +{ >>> + struct ovs_action_stack *stack; >>> + >>> + stack = &__get_cpu_var(ovs_action_stacks); >>> + >>> + /* Finishing executing all deferred actions. */ >>> + while (rskb || !ovs_action_stack_is_empty(stack)) { >>> + if (rskb) >>> + rskb = recirc_packet_process(dp, rskb); >>> + else { >>> + struct ovs_deferred_action *da; >>> + >>> + da = ovs_action_stack_pop(stack); >>> + rskb = execute_deferred_actions(dp, da); >>> + } >>> + } >>> } >>> >> I have couple comments. >> 1. We can handle sample and recirc in same way. So rather than calling >> do_execute_actions() recursively, it can just push cloned skb to >> stack. we can check this stack at end of do_execute_actions(). sample >> action can set action pointer, recirc does not. So on pop if there is >> no action pointer we can lookup flow. > O.K. I will make the change. It may be cleaner and more efficient for > recirc packet to lookup flow immediately, and push packet with looked > up actions on the stack. > I am fine with doing lookup immediately in recirc execute function. only part I was worried about was returning skb from recirc action.
>> 2. This can execute sample action, so we can not call this >> ovs_recirc_packet(). >> > how about ovs_process_deferred_packet() ? that sounds good. _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev