One comment below, otherwise looks good, Acked-by: Jarno Rajahalme <ja...@ovn.org>
> On May 24, 2016, at 12:29 AM, Simon Horman <simon.hor...@netronome.com> wrote: > > Until 8bf009bf8ab4 ("xlate: Always recirculate after an MPLS POP to a > non-MPLS ethertype.") the translation code took some care to only > recirculate as a result of a pop_mpls action if necessary. This was > implemented using per-action checks and resulted in some maintenance > burden. > > Unfortunately recirculation is a relatively expensive operation and a > performance degradation of up to 35% has been observed with the above > mentioned patch applied for the arguably common case of: > > pop_mpls,set(l2 field),output > > This patch attempts to strike a balance between performance and > maintainability by special casing set and output actions such > that recirculation may be avoided. > > This partially reverts the above mentioned commit. In particular most > of the C code outside of do_xlate_actions(). > > Signed-off-by: Simon Horman <simon.hor...@netronome.com> > --- > * Lightly tested using test-suite portion of this patch > > v2 > * Trigger recirculation for select groups as bucket selection > may access fields above L2.5. > * Add test to exercise that recirculation is triggered for patch ports > --- > ofproto/ofproto-dpif-xlate.c | 117 ++++++++++++++++++++++++++++++++++++++++++- > tests/mpls-xlate.at | 78 +++++++++++++++++++++++++++-- > tests/ofproto-dpif.at | 3 +- > 3 files changed, 191 insertions(+), 7 deletions(-) > > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c > index af26c64fcc39..025333ba56eb 100644 > --- a/ofproto/ofproto-dpif-xlate.c > +++ b/ofproto/ofproto-dpif-xlate.c > @@ -349,6 +349,12 @@ struct xlate_ctx { > struct ofpbuf frozen_actions; > const struct ofpact_controller *pause; > > + /* True if a packet was but is no longer MPLS (due to an MPLS pop > action). > + * This is a trigger for recirculation in cases where translating an > action > + * or looking up a flow requires access to the fields of the packet after > + * the MPLS label stack that was originally present. */ > + bool was_mpls; > + > /* True if conntrack has been performed on this packet during processing > * on the current bridge. This is used to determine whether conntrack > * state from the datapath should be honored after thawing. */ > @@ -3029,6 +3035,7 @@ compose_output_action__(struct xlate_ctx *ctx, > ofp_port_t ofp_port, > const struct xport *peer = xport->peer; > struct flow old_flow = ctx->xin->flow; > bool old_conntrack = ctx->conntracked; > + bool old_was_mpls = ctx->was_mpls; > cls_version_t old_version = ctx->tables_version; > struct ofpbuf old_stack = ctx->stack; > union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)]; > @@ -3086,6 +3093,10 @@ compose_output_action__(struct xlate_ctx *ctx, > ofp_port_t ofp_port, > /* Restore calling bridge's lookup version. */ > ctx->tables_version = old_version; > > + /* The peer bridge popping MPLS should have no effect on the original > + * bridge. */ > + ctx->was_mpls = old_was_mpls; > + > /* The peer bridge's conntrack execution should have no effect on the > * original bridge. */ > ctx->conntracked = old_conntrack; > @@ -3294,6 +3305,11 @@ static void > xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t > table_id, > bool may_packet_in, bool honor_table_miss) > { > + /* Check if we need to recirculate before matching in a table. */ > + if (ctx->was_mpls) { > + ctx_trigger_freeze(ctx); > + return; > + } > if (xlate_resubmit_resource_check(ctx)) { > uint8_t old_table_id = ctx->table_id; > struct rule_dpif *rule; > @@ -3355,6 +3371,7 @@ xlate_group_bucket(struct xlate_ctx *ctx, struct > ofputil_bucket *bucket) > struct ofpbuf action_set = ofpbuf_const_initializer(bucket->ofpacts, > bucket->ofpacts_len); > struct flow old_flow = ctx->xin->flow; > + bool old_was_mpls = ctx->was_mpls; > > ofpacts_execute_action_set(&action_list, &action_set); > ctx->indentation++; > @@ -3386,6 +3403,10 @@ xlate_group_bucket(struct xlate_ctx *ctx, struct > ofputil_bucket *bucket) > * group buckets. */ > ctx->xin->flow = old_flow; > > + /* The group bucket popping MPLS should have no effect after bucket > + * execution. */ > + ctx->was_mpls = old_was_mpls; > + > /* The fact that the group bucket exits (for any reason) does not mean > that > * the translation after the group action should exit. Specifically, if > * the group bucket freezes translation, the actions after the group > action > @@ -3509,6 +3530,13 @@ xlate_select_group(struct xlate_ctx *ctx, struct > group_dpif *group) > { > const char *selection_method = group_dpif_get_selection_method(group); > > + /* Select groups may access flow keys beyond L2 in order to > + * select a bucket. Recirculate as appropriate to make this possible. > + */ > + if (ctx->was_mpls) { > + ctx_trigger_freeze(ctx); > + } > + > if (selection_method[0] == '\0') { > xlate_default_select_group(ctx, group); > } else if (!strcasecmp("hash", selection_method)) { > @@ -3802,7 +3830,7 @@ compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 > eth_type) > > if (flow_pop_mpls(flow, n, eth_type, ctx->wc)) { > if (!eth_type_mpls(eth_type) && ctx->xbridge->support.odp.recirc) { > - ctx_trigger_freeze(ctx); > + ctx->was_mpls = true; > } > } else if (n >= FLOW_MAX_MPLS_LABELS) { > if (ctx->xin->packet != NULL) { > @@ -4478,6 +4506,90 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct > ofpact_conntrack *ofc) > } > > static void > +recirc_for_mpls(const struct ofpact *a, struct xlate_ctx *ctx) > +{ > + /* Do not consider recirculating unless the packet was previously MPLS. > */ > + if (!ctx->was_mpls) { > + return; > + } > + > + /* Special case these actions, only recirculating if necessary. > + * This avoids the overhead of recirculation in common use-cases. > + */ > + switch (a->type) { > + > + /* Output actions do not require recirculation. */ > + case OFPACT_OUTPUT: > + case OFPACT_ENQUEUE: > + case OFPACT_OUTPUT_REG: > + /* Set actions that don't touch L3+ fields do not require recirculation. > */ > + case OFPACT_SET_VLAN_VID: > + case OFPACT_SET_VLAN_PCP: > + case OFPACT_SET_ETH_SRC: > + case OFPACT_SET_ETH_DST: > + case OFPACT_SET_TUNNEL: > + case OFPACT_SET_QUEUE: > + /* If actions of a group require recirculation that can be detected > + * when translating them. */ > + case OFPACT_GROUP: > + return; > + > + /* Set field that don't touch L3+ fields don't require recirculation. */ > + case OFPACT_SET_FIELD: > + if (mf_is_l3_or_higher(ofpact_get_SET_FIELD(a)->field)) { > + break; > + } > + return; > + > + /* For simplicity, recirculate in all other cases. */ > + case OFPACT_CONTROLLER: > + case OFPACT_BUNDLE: > + case OFPACT_STRIP_VLAN: > + case OFPACT_PUSH_VLAN: > + case OFPACT_SET_IPV4_SRC: > + case OFPACT_SET_IPV4_DST: > + case OFPACT_SET_IP_DSCP: > + case OFPACT_SET_IP_ECN: > + case OFPACT_SET_IP_TTL: > + case OFPACT_SET_L4_SRC_PORT: > + case OFPACT_SET_L4_DST_PORT: > + case OFPACT_REG_MOVE: > + case OFPACT_STACK_PUSH: > + case OFPACT_STACK_POP: > + case OFPACT_DEC_TTL: > + case OFPACT_SET_MPLS_LABEL: > + case OFPACT_SET_MPLS_TC: > + case OFPACT_SET_MPLS_TTL: > + case OFPACT_DEC_MPLS_TTL: > + case OFPACT_PUSH_MPLS: > + case OFPACT_POP_MPLS: > + case OFPACT_POP_QUEUE: > + case OFPACT_FIN_TIMEOUT: > + case OFPACT_RESUBMIT: > + case OFPACT_LEARN: > + case OFPACT_CONJUNCTION: > + case OFPACT_MULTIPATH: > + case OFPACT_NOTE: > + case OFPACT_EXIT: > + case OFPACT_SAMPLE: > + case OFPACT_UNROLL_XLATE: > + case OFPACT_CT: > + case OFPACT_NAT: > + case OFPACT_DEBUG_RECIRC: > + case OFPACT_METER: > + case OFPACT_CLEAR_ACTIONS: > + case OFPACT_WRITE_ACTIONS: > + case OFPACT_WRITE_METADATA: > + case OFPACT_GOTO_TABLE: > + default: > + break; > + } > + > + /* Recirculate */ > + ctx_trigger_freeze(ctx); > +} > + > +static void > do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, > struct xlate_ctx *ctx) > { > @@ -4500,6 +4612,8 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t > ofpacts_len, > break; > } > > + recirc_for_mpls(a, ctx); > + IMO this should not be done when 'ctx->exit' has already been set, as it may trigger an unnecessary recirculation in that case. Jarno > if (ctx->exit) { > /* Check if need to store the remaining actions for later > * execution. */ > @@ -5151,6 +5265,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out > *xout) > .frozen_actions = OFPBUF_STUB_INITIALIZER(frozen_actions_stub), > .pause = NULL, > > + .was_mpls = false, > .conntracked = false, > > .ct_nat_action = NULL, > diff --git a/tests/mpls-xlate.at b/tests/mpls-xlate.at > index 7df52f513e7e..a9a3cf53bed3 100644 > --- a/tests/mpls-xlate.at > +++ b/tests/mpls-xlate.at > @@ -2,18 +2,43 @@ AT_BANNER([mpls-xlate]) > > AT_SETUP([MPLS xlate action]) > > -OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy > ofport_request=1]) > +OVS_VSWITCHD_START( > + [add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 -- \ > + add-port br0 p1 -- set Interface p1 type=patch \ > + options:peer=p2 ofport_request=2 -- \ > + add-br br1 -- \ > + set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ > + set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ > + fail-mode=secure -- \ > + add-port br1 p2 -- set Interface p2 type=patch \ > + options:peer=p1]) > > AT_CHECK([ovs-appctl dpif/show], [0], [dnl > dummy@ovs-dummy: hit:0 missed:0 > br0: > br0 65534/100: (dummy) > p0 1/1: (dummy) > + p1 2/none: (patch: peer=p2) > + br1: > + br1 65534/101: (dummy) > + p2 1/none: (patch: peer=p1) > ]) > > dnl Setup single MPLS tags. > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-group br0 > group_id=1232,type=select,bucket=output:LOCAL]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-group br0 > group_id=1233,type=all,bucket=output:LOCAL]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-group br0 > group_id=1234,type=all,bucket=dec_ttl,output:LOCAL]) > AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 > in_port=local,dl_type=0x0800,action=push_mpls:0x8847,set_field:10-\>mpls_label,output:1]) > AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 > dl_type=0x8847,in_port=1,mpls_label=20,action=pop_mpls:0x0800,output:LOCAL]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 > dl_type=0x8847,in_port=1,mpls_label=21,action=pop_mpls:0x0800,dec_ttl,output:LOCAL]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 > dl_type=0x8847,in_port=1,mpls_label=22,action=pop_mpls:0x0800,group:1232]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 > dl_type=0x8847,in_port=1,mpls_label=23,action=pop_mpls:0x0800,group:1233]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 > dl_type=0x8847,in_port=1,mpls_label=24,action=pop_mpls:0x0800,group:1234]) > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 > dl_type=0x8847,in_port=1,mpls_label=25,action=pop_mpls:0x0800,output:2]) > + > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br1 > in_port=1,action=output:LOCAL]) > + > + > > dnl Test MPLS push > AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'in_port(100),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=17,tos=0,ttl=64,frag=no),udp(src=7777,dst=80)'], > [0], [stdout]) > @@ -21,17 +46,62 @@ AT_CHECK([tail -1 stdout], [0], > [Datapath actions: push_mpls(label=10,tc=0,ttl=64,bos=1,eth_type=0x8847),1 > ]) > > -dnl Test MPLS pop > +dnl Test MPLS pop then output (actions do not trigger reciculation) > AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=20,tc=0,ttl=64,bos=1)'], > [0], [stdout]) > AT_CHECK([tail -1 stdout], [0], > + [Datapath actions: pop_mpls(eth_type=0x800),100 > +]) > + > +dnl Test MPLS pop, dec_ttl, output (actions trigger recirculation) > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=21,tc=0,ttl=64,bos=1)'], > [0], [stdout]) > +AT_CHECK([tail -1 stdout], [0], > [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x1) > ]) > > AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'recirc_id(1),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], > [0], [stdout]) > AT_CHECK([tail -1 stdout], [0], > + [Datapath actions: set(ipv4(ttl=63)),100 > +]) > + > +dnl Test MPLS pop then select group output (group type triggers > recirculation) > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=22,tc=0,ttl=64,bos=1)'], > [0], [stdout]) > +AT_CHECK([tail -1 stdout], [0], > + [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x2) > +]) > + > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'recirc_id(2),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], > [0], [stdout]) > +AT_CHECK([tail -1 stdout], [0], > [Datapath actions: 100 > ]) > > +dnl Test MPLS pop then all group output (bucket actions do not trigger > recirculation) > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=23,tc=0,ttl=64,bos=1)'], > [0], [stdout]) > +AT_CHECK([tail -1 stdout], [0], > + [Datapath actions: pop_mpls(eth_type=0x800),100 > +]) > + > +dnl Test MPLS pop then all group output (bucket actions trigger > recirculation) > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=24,tc=0,ttl=64,bos=1)'], > [0], [stdout]) > +AT_CHECK([tail -1 stdout], [0], > + [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x3) > +]) > + > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'recirc_id(3),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], > [0], [stdout]) > +AT_CHECK([tail -1 stdout], [0], > + [Datapath actions: set(ipv4(ttl=63)),100 > +]) > + > +dnl Test MPLS pop then all output to patch port > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=25,tc=0,ttl=64,bos=1)'], > [0], [stdout]) > +AT_CHECK([tail -1 stdout], [0], > + [Datapath actions: pop_mpls(eth_type=0x800),recirc(0x4) > +]) > + > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'recirc_id(4),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], > [0], [stdout]) > +AT_CHECK([tail -1 stdout], [0], > + [Datapath actions: 101 > +]) > + > dnl Setup multiple MPLS tags. > AT_CHECK([ovs-ofctl del-flows br0]) > > @@ -52,10 +122,10 @@ AT_CHECK([tail -1 stdout], [0], > dnl Double MPLS pop > AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x8847),mpls(label=60,tc=0,ttl=64,bos=0,label=50,tc=0,ttl=64,bos=1)'], > [0], [stdout]) > AT_CHECK([tail -1 stdout], [0], > - [Datapath actions: > pop_mpls(eth_type=0x8847),pop_mpls(eth_type=0x800),recirc(0x2) > + [Datapath actions: > pop_mpls(eth_type=0x8847),pop_mpls(eth_type=0x800),recirc(0x5) > ]) > > -AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'recirc_id(2),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], > [0], [stdout]) > +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy > 'recirc_id(5),in_port(1),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=47,tos=0,ttl=64,frag=no)'], > [0], [stdout]) > AT_CHECK([tail -1 stdout], [0], > [Datapath actions: set(ipv4(ttl=10)),100 > ]) > diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at > index 18e5e22265a5..835028123e87 100644 > --- a/tests/ofproto-dpif.at > +++ b/tests/ofproto-dpif.at > @@ -6415,8 +6415,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 > 'in_port(1),eth(src=50:54:00:00:00: > sleep 1 > AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout_keep_actions], > [0], [dnl > recirc_id=0,mpls,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,mpls_label=11,mpls_tc=3,mpls_ttl=64,mpls_bos=1, > actions:push_mpls(label=11,tc=3,ttl=64,bos=0,eth_type=0x8847),2 > -recirc_id=0,mpls,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:0b,mpls_bos=1, > actions:pop_mpls(eth_type=0x800),recirc(0x1) > -recirc_id=0x1,ip,in_port=1,vlan_tci=0x0000,nw_frag=no, actions:2 > +recirc_id=0,mpls,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:0b,mpls_bos=1, > actions:pop_mpls(eth_type=0x800),2 > ]) > OVS_VSWITCHD_STOP > AT_CLEANUP > -- > 2.1.4 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev