Currently, ovn-controller will install all lflows for a logical switch, when ovn-controller determines not to skip processing of that logical switch.
This will install too many OVS flows. We have 11 tables for logical switch ingress pipeline, 8 tables for logical switch egress pipeline now, and more in futrue. There are two kind lflows in for logical switch. One has no inport/outport matching, such as lflows in table ls_in_arp_rsp and ls_in_l2_lkup. The other one has, and for now, lflows in the following tables belong to this type: - ls_in_port_sec_l2 - ls_in_port_sec_ip - ls_in_port_sec_nd - ls_in_pre_acl - ls_in_acl - ls_out_pre_acl - ls_out_acl - ls_out_port_sec_ip - ls_out_port_sec_l2 Consider how packet trip through flows in network topology (P: port, S: switch, R: router. Two VM(or VIF) ports are on different chassis): - P-S-P: only flows matching remote inport, local VM port as "inport" and local VM port as "outport" will be matched. There is no chance for flows matching remote VM port as "inport" or "outport" to be matched. - P-S-R-S-P and P-S-R...R-S-P: all these cases seem different from the above one, but they have the same "last jump". No matter how many routers(with or without switches) are used, before packet leaves current chassis, the next jump will be: destination_switch_gateway -> destination_switch_port, so it will become a P-S-P case again. And sinse this patch will not change ingress pipeline for logical routers, so traffic between router port to router port will not be impacted. So, as we can see, we don't need to install flow for a lflow with inport or outport matching in logical switch ingress pipeline, when it tries to match a VM(or VIF) port that doesn't belong to current chassis. This can help ovn-controller to avoid to install many unnecessary flows. The following data are flows number comparison on a 2 chassis environment, with 7 VMs(4 VMs on chassis-1(ch1), 3 VMs on chassis-2(ch2)), 3 dnsmasq interfaces(on chassis-1), and some lsp-lrp pairs: +-------------------+---------------+-------------+-------------+ |number of flows in | upstream-code | this patch | this patch | |table | | works on ch1| works on ch2| +-------------------+---------------+-------------+-------------+ |ls_in_port_sec_l2 | 37 | 34 | 30 | |ls_in_port_sec_ip | 114 | 91 | 81 | |ls_in_port_sec_nd | 70 | 44 | 32 | |ls_in_pre_acl | 26 | 26 | 26 | |ls_in_acl | 71 | 53 | 47 | |ls_out_pre_acl | 30 | 30 | 30 | |ls_out_acl | 157 | 100 | 82 | |ls_out_port_sec_ip | 57 | 35 | 26 | |ls_out_port_sec_l2 | 18 | 15 | 11 | +-------------------+---------------+-------------+-------------+ |total | 580 | 428 | 365 | +-------------------+---------------+-------------+-------------+ |reduce ls flows | -- | 26% | 37% | +-------------------+---------------+-------------+-------------+ |all chassis flows | 767 | 627 | 552 | +-------------------+---------------+-------------+-------------+ |reduce totally | -- | 18% | 28% | +-------------------+---------------+-------------+-------------+ Image such a topology, 10 chassis, and 50 VMs/chassis. Let's take table ls_in_port_sec_l2 as example, with some constant flows, current implement will install 500 + C flows in ls_in_port_sec_l2 on each chassis. After we ignore lflows matching remote VM inport, it will have 50 + C flows been installed in table ls_in_port_sec_l2 on each node. Signed-off-by: Zong Kai LI <zealo...@gmail.com> --- ovn/controller/lflow.c | 42 +++++++++++++++++++++++++++++++++++------ ovn/controller/lflow.h | 3 ++- ovn/controller/ovn-controller.c | 2 +- ovn/northd/ovn-northd.8.xml | 31 +++++++++++++++++++++++++++++- ovn/northd/ovn-northd.c | 17 ++++++++++++++++- ovn/ovn-nb.xml | 3 +++ 6 files changed, 88 insertions(+), 10 deletions(-) diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c index 00d1d6e..eff4d61 100644 --- a/ovn/controller/lflow.c +++ b/ovn/controller/lflow.c @@ -324,7 +324,8 @@ static void consider_logical_flow(const struct lport_index *lports, const struct simap *ct_zones, struct hmap *dhcp_opts_p, uint32_t *conj_id_ofs_p, - struct hmap *flow_table); + struct hmap *flow_table, + const char* chassis_id); static bool lookup_port_cb(const void *aux_, const char *port_name, unsigned int *portp) @@ -362,7 +363,8 @@ add_logical_flows(struct controller_ctx *ctx, const struct lport_index *lports, const struct hmap *local_datapaths, const struct hmap *patched_datapaths, struct group_table *group_table, - const struct simap *ct_zones, struct hmap *flow_table) + const struct simap *ct_zones, struct hmap *flow_table, + const char* chassis_id) { uint32_t conj_id_ofs = 1; @@ -377,7 +379,8 @@ add_logical_flows(struct controller_ctx *ctx, const struct lport_index *lports, SBREC_LOGICAL_FLOW_FOR_EACH (lflow, ctx->ovnsb_idl) { consider_logical_flow(lports, mcgroups, lflow, local_datapaths, patched_datapaths, group_table, ct_zones, - &dhcp_opts, &conj_id_ofs, flow_table); + &dhcp_opts, &conj_id_ofs, flow_table, + chassis_id); } dhcp_opts_destroy(&dhcp_opts); @@ -393,7 +396,8 @@ consider_logical_flow(const struct lport_index *lports, const struct simap *ct_zones, struct hmap *dhcp_opts_p, uint32_t *conj_id_ofs_p, - struct hmap *flow_table) + struct hmap *flow_table, + const char* chassis_id) { /* Determine translation of logical table IDs to physical table IDs. */ bool ingress = !strcmp(lflow->pipeline, "ingress"); @@ -437,6 +441,30 @@ consider_logical_flow(const struct lport_index *lports, return; } } + + /* Skip logical flow when it has an 'inport' or 'outport' to match, + * and the port is a VM or VIF interface, but not a local port to + * current chassis. */ + if (strstr(lflow->match, "inport") + || strstr(lflow->match, "outport")) { + struct lexer lexer; + lexer_init(&lexer, lflow->match); + do { + lexer_get(&lexer); + } while (lexer.token.type != LEX_T_ID + || (strcmp(lexer.token.s, "inport") + && strcmp(lexer.token.s, "outport"))); + /* Skip "==", then get logical port name. */ + lexer_get(&lexer); + lexer_get(&lexer); + const struct sbrec_port_binding *pb + = lport_lookup_by_name(lports, lexer.token.s); + lexer_destroy(&lexer); + if (pb && pb->chassis && !strcmp(pb->type, "") + && strcmp(chassis_id, pb->chassis->name)){ + return; + } + } } /* Determine translation of logical table IDs to physical table IDs. */ @@ -628,11 +656,13 @@ lflow_run(struct controller_ctx *ctx, const struct lport_index *lports, const struct hmap *local_datapaths, const struct hmap *patched_datapaths, struct group_table *group_table, - const struct simap *ct_zones, struct hmap *flow_table) + const struct simap *ct_zones, struct hmap *flow_table, + const char* chassis_id) { update_address_sets(ctx); add_logical_flows(ctx, lports, mcgroups, local_datapaths, - patched_datapaths, group_table, ct_zones, flow_table); + patched_datapaths, group_table, ct_zones, flow_table, + chassis_id); add_neighbor_flows(ctx, lports, flow_table); } diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h index e96a24b..859e614 100644 --- a/ovn/controller/lflow.h +++ b/ovn/controller/lflow.h @@ -66,7 +66,8 @@ void lflow_run(struct controller_ctx *, const struct lport_index *, const struct hmap *patched_datapaths, struct group_table *group_table, const struct simap *ct_zones, - struct hmap *flow_table); + struct hmap *flow_table, + const char* chassis_id); void lflow_destroy(void); #endif /* ovn/lflow.h */ diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c index 28ee13e..c628943 100644 --- a/ovn/controller/ovn-controller.c +++ b/ovn/controller/ovn-controller.c @@ -444,7 +444,7 @@ main(int argc, char *argv[]) struct hmap flow_table = HMAP_INITIALIZER(&flow_table); lflow_run(&ctx, &lports, &mcgroups, &local_datapaths, &patched_datapaths, &group_table, &ct_zones, - &flow_table); + &flow_table, chassis_id); if (chassis_id) { physical_run(&ctx, mff_ovn_geneve, br_int, chassis_id, &ct_zones, &flow_table, diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml index 6bc83ea..c835d2e 100644 --- a/ovn/northd/ovn-northd.8.xml +++ b/ovn/northd/ovn-northd.8.xml @@ -135,6 +135,11 @@ </ul> <p> + Flows in this table with <code>inport</code> to match VM/VIF ports, will + be only installed on chassis which those ports are bound to. + </p> + + <p> There are no flows for disabled logical ports because the default-drop behavior of logical flow tables causes packets that ingress from them to be dropped. @@ -195,6 +200,11 @@ </li> </ul> + <p> + Flows in this table with <code>inport</code> to match VM/VIF ports, will + be only installed on chassis which those ports are bound to. + </p> + <h3>Ingress Table 2: Ingress Port Security - Neighbor discovery</h3> <p> @@ -240,6 +250,11 @@ </li> </ul> + <p> + Flows in this table with <code>inport</code> to match VM/VIF ports, will + be only installed on chassis which those ports are bound to. + </p> + <h3>Ingress Table 3: <code>from-lport</code> Pre-ACLs</h3> <p> @@ -250,6 +265,8 @@ (with <code>reg0[0] = 1; next;</code>) for table <code>Pre-stateful</code> to send IP packets to the connection tracker before eventually advancing to ingress table <code>ACLs</code>. + Flows in this table with <code>inport</code> to match VM/VIF ports, will + be only installed on chassis which those ports are bound to. </p> <h3>Ingress Table 4: Pre-LB</h3> @@ -293,6 +310,8 @@ values from the <code>ACL</code> table have a limited range and have 1000 added to them to leave room for OVN default flows at both higher and lower priorities. + Flows in this table with <code>inport</code> to match VM/VIF ports, will + be only installed on chassis which those ports are bound to. </p> <p> @@ -470,6 +489,9 @@ output; <p> This is similar to ingress table <code>Pre-ACLs</code> except for <code>to-lport</code> traffic. + Flows in this table with <code>inport</code> or <code>outport</code> to + match VM/VIF ports, will be only installed on chassis which those ports + are bound to. </p> <h3>Egress Table 2: Pre-stateful</h3> @@ -488,6 +510,9 @@ output; <p> This is similar to ingress table <code>ACLs</code> except for <code>to-lport</code> ACLs. + Flows in this table with <code>inport</code> or <code>outport</code> to + match VM/VIF ports, will be only installed on chassis which those ports + are bound to. </p> <h3>Egress Table 5: Stateful</h3> @@ -505,6 +530,8 @@ output; <code>eth.dst</code>, <code>ip4.dst</code> and <code>ip6.dst</code> are checked instead of <code>inport</code>, <code>eth.src</code>, <code>ip4.src</code> and <code>ip6.src</code> + Flows with <code>outport</code> assigned to match VM/VIF ports, will be + only installed on chassis which those ports are bound to. </p> <h3>Egress Table 7: Egress Port Security - L2</h3> @@ -521,7 +548,9 @@ output; Finally, to ensure that even broadcast and multicast packets are not delivered to disabled logical ports, a priority-150 flow for each disabled logical <code>outport</code> overrides the priority-100 flow - with a <code>drop;</code> action. + with a <code>drop;</code> action. Flows with <code>outport</code> + assigned to match VM/VIF ports, will only be installed on chassis which + those ports are bound to. </p> <h2>Logical Router Datapaths</h2> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c index f4b4435..14b15bb 100644 --- a/ovn/northd/ovn-northd.c +++ b/ovn/northd/ovn-northd.c @@ -1113,6 +1113,10 @@ build_port_security_ipv6_flow( * for IPv6 Neighbor Advertisement packet. * * - Priority 80 flow to drop ARP and IPv6 ND packets. + * + * Logical flows in S_SWITCH_IN_PORT_SEC_ND stage will have "inport" to match, + * which indicates that these flows will only work for a certain port on + * a certain chassis. */ static void build_port_security_nd(struct ovn_port *op, struct hmap *lflows) @@ -1197,6 +1201,10 @@ build_port_security_nd(struct ovn_port *op, struct hmap *lflows) * * - If the port security has IPv4 addresses or IPv6 addresses or both * - Priority 80 flow to drop all IPv4 and IPv6 traffic + * + * Logical flows in S_SWITCH_(IN/OUT)_PORT_SEC_IP stage will have "inport" + * or "outport" to match, which indicates that these flows will only work + * for a certain port on a certain chassis. */ static void build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op, @@ -1789,6 +1797,9 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, } struct ds match = DS_EMPTY_INITIALIZER; + /* Logical flows in S_SWITCH_IN_PORT_SEC_L2 stage will have "inport" + * to match, which indicates that these flows will only work for a + * certain port on a certain chassis. */ ds_put_format(&match, "inport == %s", op->json_key); build_port_security_l2( "eth.src", op->nbs->port_security, op->nbs->n_port_security, @@ -2016,7 +2027,11 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, * Priority 50 rules implement port security for enabled logical port. * * Priority 150 rules drop packets to disabled logical ports, so that they - * don't even receive multicast or broadcast packets. */ + * don't even receive multicast or broadcast packets. + * + * Logical flows in S_SWITCH_OUT_PORT_SEC_L2 stage will have "outport" + * to match, which indicates that these flows will only work for a certain + * port on a certain chassis. */ HMAP_FOR_EACH (op, key_node, ports) { if (!op->nbs) { continue; diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml index 5542ea4..4130f10 100644 --- a/ovn/ovn-nb.xml +++ b/ovn/ovn-nb.xml @@ -643,6 +643,9 @@ <code>outport</code> logical port is only available in the <code>to-lport</code> direction (the <code>inport</code> is available in both directions). + With <code>inport</code> or <code>outport</code> assigned to match + VM/VIF ports, ACL entries related logical flows will be only installed + on chassis which those ports are bound to. </p> <p> -- 1.9.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev