This implement basic logical L3 routing. It has a lot of caveats, including the following regarding testing:
* Only single-router hops have been tested. Chains or trees of logical routers may work but definitely need testing and may need a little extra code. * No testing of logical router ARP replies. * Not enough testing in general. ovn/TODO describes a lot of other caveats in terms of the work needed to fix them. Signed-off-by: Ben Pfaff <b...@nicira.com> --- ovn/TODO | 6 - ovn/northd/ovn-northd.c | 641 +++++++++++++++++++++++++++++++++++++++++------- ovn/ovn-sb.xml | 28 ++- tests/ovn.at | 168 ++++++++++++- 4 files changed, 738 insertions(+), 105 deletions(-) diff --git a/ovn/TODO b/ovn/TODO index 8ea0d17..1c3a0dc 100644 --- a/ovn/TODO +++ b/ovn/TODO @@ -47,12 +47,6 @@ various ways to ensure it could be implemented, e.g. the same as for OpenFlow by allowing the logical inport to be zeroed, or by introducing a new action that ignores the inport. -** ovn-northd - -*** What flows should it generate? - -See description in ovn-northd(8). - ** New OVN logical actions *** arp diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c index ac3b39e..8825b52 100644 --- a/ovn/northd/ovn-northd.c +++ b/ovn/northd/ovn-northd.c @@ -222,13 +222,20 @@ allocate_tnlid(struct hmap *set, const char *name, uint32_t max, /* The 'key' comes from nb->header_.uuid or sb->external_ids:logical-switch. */ struct ovn_datapath { struct hmap_node key_node; /* Index on 'key'. */ - struct uuid key; /* nb->header_.uuid. */ + struct uuid key; /* (nbs/nbr)->header_.uuid. */ - const struct nbrec_logical_switch *nb; /* May be NULL. */ + const struct nbrec_logical_switch *nbs; /* May be NULL. */ + const struct nbrec_logical_router *nbr; /* May be NULL. */ const struct sbrec_datapath_binding *sb; /* May be NULL. */ struct ovs_list list; /* In list of similar records. */ + /* Logical router data (digested from nbr). */ + ovs_be32 gateway; + + /* Logical switch data. */ + struct ovn_port *router_port; + struct hmap port_tnlids; uint32_t port_key_hint; @@ -237,13 +244,15 @@ struct ovn_datapath { static struct ovn_datapath * ovn_datapath_create(struct hmap *datapaths, const struct uuid *key, - const struct nbrec_logical_switch *nb, + const struct nbrec_logical_switch *nbs, + const struct nbrec_logical_router *nbr, const struct sbrec_datapath_binding *sb) { struct ovn_datapath *od = xzalloc(sizeof *od); od->key = *key; od->sb = sb; - od->nb = nb; + od->nbs = nbs; + od->nbr = nbr; hmap_init(&od->port_tnlids); od->port_key_hint = 0; hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key)); @@ -301,7 +310,8 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths, const struct sbrec_datapath_binding *sb, *sb_next; SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) { struct uuid key; - if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key)) { + if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) && + !smap_get_uuid(&sb->external_ids, "logical-router", &key)) { ovsdb_idl_txn_add_comment(ctx->ovnsb_txn, "deleting Datapath_Binding "UUID_FMT" that " "lacks external-ids:logical-switch", @@ -320,23 +330,62 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths, } struct ovn_datapath *od = ovn_datapath_create(datapaths, &key, - NULL, sb); + NULL, NULL, sb); list_push_back(sb_only, &od->list); } - const struct nbrec_logical_switch *nb; - NBREC_LOGICAL_SWITCH_FOR_EACH (nb, ctx->ovnnb_idl) { + const struct nbrec_logical_switch *nbs; + NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) { struct ovn_datapath *od = ovn_datapath_find(datapaths, - &nb->header_.uuid); + &nbs->header_.uuid); if (od) { - od->nb = nb; + od->nbs = nbs; list_remove(&od->list); list_push_back(both, &od->list); } else { - od = ovn_datapath_create(datapaths, &nb->header_.uuid, nb, NULL); + od = ovn_datapath_create(datapaths, &nbs->header_.uuid, + nbs, NULL, NULL); list_push_back(nb_only, &od->list); } } + + const struct nbrec_logical_router *nbr; + NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) { + struct ovn_datapath *od = ovn_datapath_find(datapaths, + &nbr->header_.uuid); + if (od) { + if (!od->nbs) { + od->nbr = nbr; + list_remove(&od->list); + list_push_back(both, &od->list); + } else { + /* Can't happen! */ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, + "duplicate UUID "UUID_FMT" in OVN_Northbound", + UUID_ARGS(&nbr->header_.uuid)); + continue; + } + } else { + od = ovn_datapath_create(datapaths, &nbr->header_.uuid, + NULL, nbr, NULL); + list_push_back(nb_only, &od->list); + } + + od->gateway = 0; + if (nbr->default_gw) { + ovs_be32 ip, mask; + char *error = ip_parse_masked(nbr->default_gw, &ip, &mask); + if (error || !ip || mask != OVS_BE32_MAX) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw); + free(error); + } else { + od->gateway = ip; + } + } + } } static uint32_t @@ -371,8 +420,9 @@ build_datapaths(struct northd_context *ctx, struct hmap *datapaths) od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn); char uuid_s[UUID_LEN + 1]; - sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->nb->header_.uuid)); - const struct smap id = SMAP_CONST1(&id, "logical-switch", uuid_s); + sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key)); + const char *key = od->nbs ? "logical-switch" : "logical-router"; + const struct smap id = SMAP_CONST1(&id, key, uuid_s); sbrec_datapath_binding_set_external_ids(od->sb, &id); sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key); @@ -391,10 +441,19 @@ build_datapaths(struct northd_context *ctx, struct hmap *datapaths) struct ovn_port { struct hmap_node key_node; /* Index on 'key'. */ - const char *key; /* nb->name and sb->logical_port */ + char *key; /* nb->name and sb->logical_port */ + char *json_key; /* 'key', quoted for use in JSON. */ + + const struct nbrec_logical_port *nbs; /* May be NULL. */ + const struct nbrec_logical_router_port *nbr; /* May be NULL. */ + const struct sbrec_port_binding *sb; /* May be NULL. */ - const struct nbrec_logical_port *nb; /* May be NULL. */ - const struct sbrec_port_binding *sb; /* May be NULL. */ + /* Logical router port data. */ + ovs_be32 ip, mask; /* 192.168.10.123/24. */ + ovs_be32 network; /* 192.168.10.0. */ + ovs_be32 bcast; /* 192.168.10.255. */ + struct eth_addr mac; + struct ovn_port *peer; struct ovn_datapath *od; @@ -403,13 +462,20 @@ struct ovn_port { static struct ovn_port * ovn_port_create(struct hmap *ports, const char *key, - const struct nbrec_logical_port *nb, + const struct nbrec_logical_port *nbs, + const struct nbrec_logical_router_port *nbr, const struct sbrec_port_binding *sb) { struct ovn_port *op = xzalloc(sizeof *op); - op->key = key; + + struct ds json_key = DS_EMPTY_INITIALIZER; + json_string_escape(key, &json_key); + op->json_key = ds_steal_cstr(&json_key); + + op->key = xstrdup(key); op->sb = sb; - op->nb = nb; + op->nbs = nbs; + op->nbr = nbr; hmap_insert(ports, &op->key_node, hash_string(op->key, 0)); return op; } @@ -422,6 +488,8 @@ ovn_port_destroy(struct hmap *ports, struct ovn_port *port) * private list and once we've exited that function it is not safe to * use it. */ hmap_remove(ports, &port->key_node); + free(port->json_key); + free(port->key); free(port); } } @@ -460,24 +528,111 @@ join_logical_ports(struct northd_context *ctx, const struct sbrec_port_binding *sb; SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) { struct ovn_port *op = ovn_port_create(ports, sb->logical_port, - NULL, sb); + NULL, NULL, sb); list_push_back(sb_only, &op->list); } struct ovn_datapath *od; HMAP_FOR_EACH (od, key_node, datapaths) { - for (size_t i = 0; i < od->nb->n_ports; i++) { - const struct nbrec_logical_port *nb = od->nb->ports[i]; - struct ovn_port *op = ovn_port_find(ports, nb->name); - if (op) { - op->nb = nb; - list_remove(&op->list); - list_push_back(both, &op->list); - } else { - op = ovn_port_create(ports, nb->name, nb, NULL); - list_push_back(nb_only, &op->list); + if (od->nbs) { + for (size_t i = 0; i < od->nbs->n_ports; i++) { + const struct nbrec_logical_port *nbs = od->nbs->ports[i]; + struct ovn_port *op = ovn_port_find(ports, nbs->name); + if (op) { + if (op->nbs || op->nbr) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "duplicate logical port %s", + nbs->name); + continue; + } + op->nbs = nbs; + list_remove(&op->list); + list_push_back(both, &op->list); + } else { + op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL); + list_push_back(nb_only, &op->list); + } + + op->od = od; + } + } else { + for (size_t i = 0; i < od->nbr->n_ports; i++) { + const struct nbrec_logical_router_port *nbr + = od->nbr->ports[i]; + + struct eth_addr mac; + if (!eth_addr_from_string(nbr->mac, &mac)) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac); + continue; + } + + ovs_be32 ip, mask; + char *error = ip_parse_masked(nbr->network, &ip, &mask); + if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network); + free(error); + continue; + } + + char name[UUID_LEN + 1]; + snprintf(name, sizeof name, UUID_FMT, + UUID_ARGS(&nbr->header_.uuid)); + struct ovn_port *op = ovn_port_find(ports, name); + if (op) { + if (op->nbs || op->nbr) { + static struct vlog_rate_limit rl + = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, "duplicate logical router port %s", + name); + continue; + } + op->nbr = nbr; + list_remove(&op->list); + list_push_back(both, &op->list); + } else { + op = ovn_port_create(ports, name, NULL, nbr, NULL); + list_push_back(nb_only, &op->list); + } + + op->ip = ip; + op->mask = mask; + op->network = ip & mask; + op->bcast = ip | ~mask; + op->mac = mac; + + op->od = od; } - op->od = od; + } + } + + /* Connect logical router ports, and logical switch ports of type "router", + * to their peers. */ + struct ovn_port *op; + HMAP_FOR_EACH (op, key_node, ports) { + if (op->nbs && !strcmp(op->nbs->type, "router")) { + const char *peer_name = smap_get(&op->nbs->options, "router-port"); + if (!peer_name) { + continue; + } + + struct ovn_port *peer = ovn_port_find(ports, peer_name); + if (!peer || !peer->nbr) { + continue; + } + + peer->peer = op; + op->peer = peer; + op->od->router_port = op; + } else if (op->nbr && op->nbr->peer) { + char peer_name[UUID_LEN + 1]; + snprintf(peer_name, sizeof peer_name, UUID_FMT, + UUID_ARGS(&op->nbr->peer->header_.uuid)); + op->peer = ovn_port_find(ports, peer_name); } } } @@ -485,13 +640,37 @@ join_logical_ports(struct northd_context *ctx, static void ovn_port_update_sbrec(const struct ovn_port *op) { - sbrec_port_binding_set_type(op->sb, op->nb->type); - sbrec_port_binding_set_options(op->sb, &op->nb->options); sbrec_port_binding_set_datapath(op->sb, op->od->sb); - sbrec_port_binding_set_parent_port(op->sb, op->nb->parent_name); - sbrec_port_binding_set_tag(op->sb, op->nb->tag, op->nb->n_tag); - sbrec_port_binding_set_mac(op->sb, (const char **) op->nb->addresses, - op->nb->n_addresses); + if (op->nbr) { + sbrec_port_binding_set_type(op->sb, "patch"); + + const char *peer = op->peer ? op->peer->key : "<error>"; + const struct smap ids = SMAP_CONST1(&ids, "peer", peer); + sbrec_port_binding_set_options(op->sb, &ids); + + sbrec_port_binding_set_parent_port(op->sb, NULL); + sbrec_port_binding_set_tag(op->sb, NULL, 0); + sbrec_port_binding_set_mac(op->sb, NULL, 0); + } else { + if (strcmp(op->nbs->type, "router")) { + sbrec_port_binding_set_type(op->sb, op->nbs->type); + sbrec_port_binding_set_options(op->sb, &op->nbs->options); + } else { + sbrec_port_binding_set_type(op->sb, "patch"); + + const char *router_port = smap_get(&op->nbs->options, + "router-port"); + if (!router_port) { + router_port = "<error>"; + } + const struct smap ids = SMAP_CONST1(&ids, "peer", router_port); + sbrec_port_binding_set_options(op->sb, &ids); + } + sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name); + sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag); + sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses, + op->nbs->n_addresses); + } } static void @@ -759,57 +938,63 @@ lport_is_enabled(const struct nbrec_logical_port *lport) return !lport->enabled || *lport->enabled; } -/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database, - * constructing their contents based on the OVN_NB database. */ static void -build_lflows(struct northd_context *ctx, struct hmap *datapaths, - struct hmap *ports) +build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, + struct hmap *lflows, struct hmap *mcgroups) { /* This flow table structure is documented in ovn-northd(8), so please * update ovn-northd.8.xml if you change anything. */ - struct hmap lflows = HMAP_INITIALIZER(&lflows); - struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups); - - /* Ingress table 0: Admission control framework (priorities 0 and 100). */ + /* Logical switch ingress table 0: Admission control framework (priority + * 100). */ struct ovn_datapath *od; HMAP_FOR_EACH (od, key_node, datapaths) { + if (!od->nbs) { + continue; + } + /* Logical VLANs not supported. */ - ovn_lflow_add(&lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present", + ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present", "drop;"); /* Broadcast/multicast source address is invalid. */ - ovn_lflow_add(&lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]", + ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]", "drop;"); /* Port security flows have priority 50 (see below) and will continue * to the next table if packet source is acceptable. */ } - /* Ingress table 0: Ingress port security (priority 50). */ + /* Logical switch ingress table 0: Ingress port security (priority 50). */ struct ovn_port *op; HMAP_FOR_EACH (op, key_node, ports) { - if (!lport_is_enabled(op->nb)) { + if (!op->nbs) { + continue; + } + + if (!lport_is_enabled(op->nbs)) { /* Drop packets from disabled logical ports (since logical flow * tables are default-drop). */ continue; } struct ds match = DS_EMPTY_INITIALIZER; - ds_put_cstr(&match, "inport == "); - json_string_escape(op->key, &match); + ds_put_format(&match, "inport == %s", op->json_key); build_port_security("eth.src", - op->nb->port_security, op->nb->n_port_security, + op->nbs->port_security, op->nbs->n_port_security, &match); - ovn_lflow_add(&lflows, op->od, S_SWITCH_IN_PORT_SEC, 50, + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50, ds_cstr(&match), "next;"); ds_destroy(&match); } /* Ingress table 1: ACLs (any priority). */ HMAP_FOR_EACH (od, key_node, datapaths) { - for (size_t i = 0; i < od->nb->n_acls; i++) { - const struct nbrec_acl *acl = od->nb->acls[i]; + if (!od->nbs) { + continue; + } + for (size_t i = 0; i < od->nbs->n_acls; i++) { + const struct nbrec_acl *acl = od->nbs->acls[i]; const char *action; if (strcmp(acl->direction, "from-lport")) { @@ -819,48 +1004,55 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, action = (!strcmp(acl->action, "allow") || !strcmp(acl->action, "allow-related")) ? "next;" : "drop;"; - ovn_lflow_add(&lflows, od, S_SWITCH_IN_ACL, acl->priority, + ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, acl->priority, acl->match, action); } } HMAP_FOR_EACH (od, key_node, datapaths) { - ovn_lflow_add(&lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;"); } /* Ingress table 2: Destination lookup, broadcast and multicast handling * (priority 100). */ HMAP_FOR_EACH (op, key_node, ports) { - if (lport_is_enabled(op->nb)) { - ovn_multicast_add(&mcgroups, &mc_flood, op); + if (!op->nbs) { + continue; + } + + if (lport_is_enabled(op->nbs)) { + ovn_multicast_add(mcgroups, &mc_flood, op); } } HMAP_FOR_EACH (od, key_node, datapaths) { - ovn_lflow_add(&lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast", + ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast", "outport = \""MC_FLOOD"\"; output;"); } /* Ingress table 2: Destination lookup, unicast handling (priority 50), */ HMAP_FOR_EACH (op, key_node, ports) { - for (size_t i = 0; i < op->nb->n_addresses; i++) { + if (!op->nbs) { + continue; + } + + for (size_t i = 0; i < op->nbs->n_addresses; i++) { struct eth_addr mac; - if (eth_addr_from_string(op->nb->addresses[i], &mac)) { + if (eth_addr_from_string(op->nbs->addresses[i], &mac)) { struct ds match, actions; ds_init(&match); - ds_put_format(&match, "eth.dst == %s", op->nb->addresses[i]); + ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT, + ETH_ADDR_ARGS(mac)); ds_init(&actions); - ds_put_cstr(&actions, "outport = "); - json_string_escape(op->nb->name, &actions); - ds_put_cstr(&actions, "; output;"); - ovn_lflow_add(&lflows, op->od, S_SWITCH_IN_L2_LKUP, 50, + ds_put_format(&actions, "outport = %s; output;", op->json_key); + ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50, ds_cstr(&match), ds_cstr(&actions)); ds_destroy(&actions); ds_destroy(&match); - } else if (!strcmp(op->nb->addresses[i], "unknown")) { - if (lport_is_enabled(op->nb)) { - ovn_multicast_add(&mcgroups, &mc_unknown, op); + } else if (!strcmp(op->nbs->addresses[i], "unknown")) { + if (lport_is_enabled(op->nbs)) { + ovn_multicast_add(mcgroups, &mc_unknown, op); op->od->has_unknown = true; } } else { @@ -868,23 +1060,30 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, VLOG_INFO_RL(&rl, "%s: invalid syntax '%s' in addresses column", - op->nb->name, op->nb->addresses[i]); + op->nbs->name, op->nbs->addresses[i]); } } } /* Ingress table 2: Destination lookup for unknown MACs (priority 0). */ HMAP_FOR_EACH (od, key_node, datapaths) { + if (!od->nbs) { + continue; + } + if (od->has_unknown) { - ovn_lflow_add(&lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1", + ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1", "outport = \""MC_UNKNOWN"\"; output;"); } } /* Egress table 0: ACLs (any priority). */ HMAP_FOR_EACH (od, key_node, datapaths) { - for (size_t i = 0; i < od->nb->n_acls; i++) { - const struct nbrec_acl *acl = od->nb->acls[i]; + if (!od->nbs) { + continue; + } + for (size_t i = 0; i < od->nbs->n_acls; i++) { + const struct nbrec_acl *acl = od->nbs->acls[i]; const char *action; if (strcmp(acl->direction, "to-lport")) { @@ -894,18 +1093,26 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, action = (!strcmp(acl->action, "allow") || !strcmp(acl->action, "allow-related")) ? "next;" : "drop;"; - ovn_lflow_add(&lflows, od, S_SWITCH_OUT_ACL, acl->priority, + ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, acl->priority, acl->match, action); } } HMAP_FOR_EACH (od, key_node, datapaths) { - ovn_lflow_add(&lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;"); + if (!od->nbs) { + continue; + } + + ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;"); } /* Egress table 1: Egress port security multicast/broadcast (priority * 100). */ HMAP_FOR_EACH (od, key_node, datapaths) { - ovn_lflow_add(&lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast", + if (!od->nbs) { + continue; + } + + ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast", "output;"); } @@ -916,24 +1123,276 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, * Priority 150 rules drop packets to disabled logical ports, so that they * don't even receive multicast or broadcast packets. */ HMAP_FOR_EACH (op, key_node, ports) { - struct ds match; - - ds_init(&match); - ds_put_cstr(&match, "outport == "); - json_string_escape(op->key, &match); - if (lport_is_enabled(op->nb)) { - build_port_security("eth.dst", - op->nb->port_security, op->nb->n_port_security, - &match); - ovn_lflow_add(&lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50, + if (!op->nbs) { + continue; + } + + struct ds match = DS_EMPTY_INITIALIZER; + ds_put_format(&match, "outport == %s", op->json_key); + if (lport_is_enabled(op->nbs)) { + build_port_security("eth.dst", op->nbs->port_security, + op->nbs->n_port_security, &match); + ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50, ds_cstr(&match), "output;"); } else { - ovn_lflow_add(&lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150, + ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150, ds_cstr(&match), "drop;"); } ds_destroy(&match); } +} + +static bool +lrport_is_enabled(const struct nbrec_logical_router_port *lrport) +{ + return !lrport->enabled || *lrport->enabled; +} + +static void +add_route(struct hmap *lflows, struct ovn_datapath *od, + ovs_be32 network, ovs_be32 mask, ovs_be32 gateway) +{ + char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT, + IP_ARGS(network), IP_ARGS(mask)); + + struct ds actions = DS_EMPTY_INITIALIZER; + ds_put_cstr(&actions, "ip4.ttl--; reg0 = "); + if (gateway) { + ds_put_format(&actions, IP_FMT, IP_ARGS(gateway)); + } else { + ds_put_cstr(&actions, "ip4.dst"); + } + ds_put_cstr(&actions, "; next;"); + + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, + count_1bits(ntohl(mask)), match, ds_cstr(&actions)); + ds_destroy(&actions); + free(match); +} + +static void +build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, + struct hmap *lflows) +{ + /* This flow table structure is documented in ovn-northd(8), so please + * update ovn-northd.8.xml if you change anything. */ + + /* XXX ICMP echo reply */ + + /* Logical router ingress table 0: Admission control framework. */ + struct ovn_datapath *od; + HMAP_FOR_EACH (od, key_node, datapaths) { + if (!od->nbr) { + continue; + } + + /* Logical VLANs not supported. + * Broadcast/multicast source address is invalid. */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100, + "vlan.present || eth.src[40]", "drop;"); + } + + /* Logical router ingress table 0: match (priority 50). */ + struct ovn_port *op; + HMAP_FOR_EACH (op, key_node, ports) { + if (!op->nbr) { + continue; + } + + if (!lrport_is_enabled(op->nbr)) { + /* Drop packets from disabled logical ports (since logical flow + * tables are default-drop). */ + continue; + } + + char *match = xasprintf( + "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s", + ETH_ADDR_ARGS(op->mac), op->json_key); + ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50, + match, "next;"); + } + + /* Logical router ingress table 1: IP Input. */ + HMAP_FOR_EACH (od, key_node, datapaths) { + if (!od->nbr) { + continue; + } + + /* L3 admission control: drop multicast and broadcast source, localhost + * source or destination, and zero network source or destination + * (priority 220). */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 220, + "ip4.mcast || " + "ip4.src == 255.255.255.255 || " + "ip4.src == 127.0.0.0/8 || " + "ip4.dst == 127.0.0.0/8 || " + "ip4.src == 0.0.0.0/8 || " + "ip4.dst == 0.0.0.0/8", + "drop;"); + + /* Drop Ethernet local broadcast. By definition this traffic should + * not be forwarded.*/ + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 190, + "eth.bcast", "drop;"); + + /* Drop IP multicast. */ + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 190, + "ip4.mcast", "drop;"); + + /* TTL discard. + * + * XXX Need to send ICMP time exceeded if !ip.later_frag. */ + char *match = xasprintf("ip4 && ip.ttl == {0, 1}"); + ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 170, match, "drop;"); + free(match); + } + + HMAP_FOR_EACH (op, key_node, ports) { + if (!op->nbr) { + continue; + } + + /* L3 admission control: drop packets that originate from an IP address + * owned by the router or a broadcast address known to the router + * (priority 220). */ + char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}", + IP_ARGS(op->ip), IP_ARGS(op->bcast)); + ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 220, + match, "drop;"); + free(match); + + /* ARP reply. These flows reply to ARP requests for the router's own + * IP address. */ + match = xasprintf( + "inport == %s && arp.tha == "ETH_ADDR_FMT" && arp.op == 1", + op->json_key, ETH_ADDR_ARGS(op->mac)); + char *actions = xasprintf( + "eth.dst = eth.src; " + "eth.src = "ETH_ADDR_FMT"; " + "arp.op = 2; /* ARP reply */ " + "arp.tha = arp.sha; " + "arp.sha = "ETH_ADDR_FMT"; " + "arp.tpa = arp.spa; " + "arp.spa = "IP_FMT"; " + "outport = %s; " + "inport = \"\"; /* Allow sending out inport. */ " + "output;", + ETH_ADDR_ARGS(op->mac), + ETH_ADDR_ARGS(op->mac), + IP_ARGS(op->ip), + op->json_key); + ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 210, + match, actions); + + /* Drop IP traffic to this router. */ + match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip)); + ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 200, + match, "drop;"); + free(match); + } + + /* Logical router ingress table 2: IP Routing. + * + * A packet that arrives at this table is an IP packet that should be + * routed to the address in ip4.dst. This table sets reg0 to the next-hop + * IP address (leaving ip4.dst, the packet’s final destination, unchanged) + * and advances to the next table for ARP resolution. */ + HMAP_FOR_EACH (op, key_node, ports) { + if (!op->nbr) { + continue; + } + + add_route(lflows, op->od, op->network, op->mask, 0); + } + HMAP_FOR_EACH (od, key_node, datapaths) { + if (!od->nbr) { + continue; + } + + if (od->gateway) { + add_route(lflows, od, 0, 0, od->gateway); + } + } + /* XXX destination unreachable */ + + /* Local router ingress table 3: ARP Resolution. + * + * Any packet that reaches this table is an IP packet whose next-hop IP + * address is in reg0. (ip4.dst is the final destination.) This table + * resolves the IP address in reg0 into an output port in outport and an + * Ethernet address in eth.dst. */ + HMAP_FOR_EACH (op, key_node, ports) { + if (op->nbr) { + /* XXX ARP for neighboring router */ + } else if (op->od->router_port) { + const char *peer_name = smap_get( + &op->od->router_port->nbs->options, "router-port"); + if (!peer_name) { + continue; + } + + struct ovn_port *peer = ovn_port_find(ports, peer_name); + if (!peer || !peer->nbr) { + continue; + } + + for (size_t i = 0; i < op->nbs->n_addresses; i++) { + struct eth_addr ea; + ovs_be32 ip; + + if (ovs_scan(op->nbs->addresses[i], + ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT, + ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) { + char *match = xasprintf("reg0 == "IP_FMT, IP_ARGS(ip)); + char *actions = xasprintf("eth.src = "ETH_ADDR_FMT"; " + "eth.dst = "ETH_ADDR_FMT"; " + "outport = %s; " + "output;", + ETH_ADDR_ARGS(peer->mac), + ETH_ADDR_ARGS(ea), + peer->json_key); + ovn_lflow_add(lflows, peer->od, + S_ROUTER_IN_ARP, 200, match, actions); + free(actions); + free(match); + } + } + } + } + + /* Logical router egress table 0: Delivery (priority 100). + * + * Priority 100 rules deliver packets to enabled logical ports. */ + HMAP_FOR_EACH (op, key_node, ports) { + if (!op->nbr) { + continue; + } + + if (!lrport_is_enabled(op->nbr)) { + /* Drop packets to disabled logical ports (since logical flow + * tables are default-drop). */ + continue; + } + + char *match = xasprintf("outport == %s", op->json_key); + ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100, + match, "output;"); + free(match); + } +} + +/* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database, + * constructing their contents based on the OVN_NB database. */ +static void +build_lflows(struct northd_context *ctx, struct hmap *datapaths, + struct hmap *ports) +{ + struct hmap lflows = HMAP_INITIALIZER(&lflows); + struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups); + + build_lswitch_flows(datapaths, ports, &lflows, &mcgroups); + build_lrouter_flows(datapaths, ports, &lflows); /* Push changes to the Logical_Flow table to database. */ const struct sbrec_logical_flow *sbflow, *next_sbflow; @@ -945,7 +1404,7 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, continue; } - enum ovn_datapath_type dp_type = DP_SWITCH; /* XXX no routers yet. */ + enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER; enum ovn_pipeline pipeline = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT; struct ovn_lflow *lflow = ovn_lflow_find( @@ -964,8 +1423,8 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn); sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb); - sbrec_logical_flow_set_pipeline(sbflow, - pipeline ? "ingress" : "egress"); + sbrec_logical_flow_set_pipeline( + sbflow, pipeline == P_IN ? "ingress" : "egress"); sbrec_logical_flow_set_table_id(sbflow, table); sbrec_logical_flow_set_priority(sbflow, lflow->priority); sbrec_logical_flow_set_match(sbflow, lflow->match); diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml index f898f97..7d9d22f 100644 --- a/ovn/ovn-sb.xml +++ b/ovn/ovn-sb.xml @@ -1025,12 +1025,28 @@ tcp.flags = RST; constructed for each supported encapsulation. </column> - <column name="external_ids" key="logical-switch" type='{"type": "uuid"}'> - Each row in <ref table="Datapath_Binding"/> is associated with some - logical datapath. <code>ovn-northd</code> uses this key to store the - UUID of the logical datapath <ref table="Logical_Switch" - db="OVN_Northbound"/> row in the <ref db="OVN_Northbound"/> database. - </column> + <group title="OVN_Northbound Relationship"> + <p> + Each row in <ref table="Datapath_Binding"/> is associated with some + logical datapath. <code>ovn-northd</code> uses these key to track the + association of a logical datapath with concepts in the <ref + db="OVN_Northbound"/> database. + </p> + + <column name="external_ids" key="logical-switch" type='{"type": "uuid"}'> + For a logical datapath that represents a logical switch, + <code>ovn-northd</code> stores in this key the UUID of the + corresponding <ref table="Logical_Switch" db="OVN_Northbound"/> row in + the <ref db="OVN_Northbound"/> database. + </column> + + <column name="external_ids" key="logical-router" type='{"type": "uuid"}'> + For a logical datapath that represents a logical router, + <code>ovn-northd</code> stores in this key the UUID of the + corresponding <ref table="Logical_Router" db="OVN_Northbound"/> row in + the <ref db="OVN_Northbound"/> database. + </column> + </group> <group title="Common Columns"> The overall purpose of these columns is described under <code>Common diff --git a/tests/ovn.at b/tests/ovn.at index a42a319..319ab4b 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -510,12 +510,13 @@ AT_CLEANUP AT_BANNER([OVN end-to-end tests]) -AT_SETUP([ovn -- 3 HVs, 3 VIFs/HV, 1 logical switch]) +# 3 hypervisors, one logical switch, 3 logical ports per hypervisor +AT_SETUP([ovn -- 3 HVs, 1 LS, 3 lports/HV]) AT_SKIP_IF([test $HAVE_PYTHON = no]) ovn_start # Create hypervisors hv[123]. -# Add vif1[123] to hv1, vif2[123] to hv2, vif3[123]. +# Add vif1[123] to hv1, vif2[123] to hv2, vif3[123] to hv3. # Add all of the vifs to a single logical switch lsw0. # Turn on port security on all the vifs except vif[123]1. # Make vif13, vif2[23], vif3[123] destinations for unknown MACs. @@ -676,3 +677,166 @@ for i in 1 2 3; do done done AT_CLEANUP + +# 3 hypervisors, 3 logical switches with 3 logical ports each, 1 logical router +AT_SETUP([ovn -- 3 HVs, 3 LS, 3 lports/LS, 1 LR]) +AT_SKIP_IF([test $HAVE_PYTHON = no]) +ovn_start + +# Logical network: +# +# Three logical switches ls1, ls2, ls3. +# Three VIFs on each: lp1[123], lp2[123], lp3[123]. +# One logical router lr connected to ls[123]. +ovn-nbctl \ + -- create Logical_Router name=lr0 ports=@lrp1,@lrp2,@lrp3 \ + -- --id=@lrp1 create Logical_Router_Port name=lrp1 \ + network=192.168.1.254/24 mac='"00:00:00:00:ff:01"' \ + -- --id=@lrp2 create Logical_Router_Port name=lrp2 \ + network=192.168.2.254/24 mac='"00:00:00:00:ff:02"' \ + -- --id=@lrp3 create Logical_Router_Port name=lrp3 \ + network=192.168.3.254/24 mac='"00:00:00:00:ff:03"' +for i in 1 2 3; do + lrp_uuid=`ovn-nbctl get Logical_Router_Port lrp$i _uuid` + ovn-nbctl \ + -- lswitch-add ls$i \ + -- lport-add ls$i lrp$i-attachment \ + -- set Logical_Port lrp$i-attachment type=router \ + options:router-port=$lrp_uuid \ + addresses='"00:00:00:00:ff:0'$i'"' + for j in 1 2 3; do + ovn-nbctl \ + -- lport-add ls$i lp$i$j \ + -- lport-set-addresses lp$i$j "f0:00:00:00:00:$i$j 192.168.$i.$j" + done +done + +# Physical network: +# +# Three hypervisors hv[123]. +# lp1[123] spread across hv[123]: lp11 on hv1, lp12 on hv2, lp13 on hv3. +# lp2[123] spread across hv[23]: lp21 and lp22 on hv2, lp23 on hv3. +# lp3[123] all on hv3. + +# Given the name of a logical port, prints the name of the hypervisor +# on which it is located. +vif_to_hv() { + case $1 in dnl ( + 11) echo 1 ;; dnl ( + 12 | 21 | 22) echo 2 ;; dnl ( + 13 | 23 | 3?) echo 3 ;; + esac +} + +net_add n1 +for i in 1 2 3; do + sim_add hv$i + as hv$i + ovs-vsctl add-br br-phys + ovn_attach n1 br-phys 192.168.0.$i +done +for i in 1 2 3; do + for j in 1 2 3; do + hv=`vif_to_hv $i$j` + as hv$hv ovs-vsctl \ + -- add-port br-int vif$i$j \ + -- set Interface vif$i$j external-ids:iface-id=lp$i$j \ + options:tx_pcap=hv$hv/vif$i$j-tx.pcap \ + options:rxq_pcap=hv$hv/vif$i$j-rx.pcap \ + ofport-request=$i$j + done +done + +# Pre-populate the hypervisors' ARP tables so that we don't lose any +# packets for ARP resolution (native tunneling doesn't queue packets +# for ARP resolution). +ovn_populate_arp + +# Allow some time for ovn-northd and ovn-controller to catch up. +# XXX This should be more systematic. +sleep 1 + +# test_packet INPORT SRC_MAC DST_MAC SRC_IP DST_IP OUTPORT... +# +# This shell function causes a packet to be received on INPORT. The packet's +# content has Ethernet destination DST and source SRC (each exactly 12 hex +# digits) and Ethernet type ETHTYPE (4 hex digits). The OUTPORTs (zero or +# more) list the VIFs on which the packet should be received. INPORT and the +# OUTPORTs are specified as lport numbers, e.g. 11 for vif11. +trim_zeros() { + sed 's/\(00\)\{1,\}$//' +} +for i in 1 2 3; do + for j in 1 2 3; do + : > $i$j.expected + done +done +test_packet() { + # This packet has bad checksums but logical L3 routing doesn't check. + local inport=$1 src_mac=$2 dst_mac=$3 src_ip=$4 dst_ip=$5 + local packet=$3$208004500001c0000000040110000$4$50035111100080000 + shift; shift; shift; shift; shift + hv=hv`vif_to_hv $inport` + as $hv ovs-appctl netdev-dummy/receive vif$inport $packet + #as $hv ovs-appctl ofproto/trace br-int in_port=$inport $packet + for outport; do + ins=`echo $inport | sed 's/^\(.\).*/\1/'` + outs=`echo $outport | sed 's/^\(.\).*/\1/'` + if test $ins = $outs; then + # Ports on the same logical switch receive exactly the same packet. + echo $packet + else + # Routing decrements TTL and updates source and dest MAC + # (and checksum). + echo f000000000${outport}00000000ff0${outs}08004500001c00000000"3f1101"00${src_ip}${dst_ip}0035111100080000 + fi | trim_zeros >> $outport.expected + done +} + +as hv1 ovn-sbctl dump-flows +as hv1 ovs-ofctl dump-flows br-int + +# Send packets between all pairs of source and destination ports: +# +# 1. Unicast IP packets are delivered to exactly one lport (except +# that packets destined to their input ports are dropped). +# +# 2. Broadcast IP packets are delivered to all lports except the input port. +for is in 1 2 3; do + for js in 1 2 3; do + bcast= + s=$is$js + smac=f000000000$s + sip=c0a80${is}0${js} + for id in 1 2 3; do + for jd in 1 2 3; do + d=$id$jd + dip=c0a80${id}0${jd} + if test $is = $id; then dmac=f000000000$d; else dmac=00000000ff0$is; fi + if test $d != $s; then unicast=$d; else unicast=; fi + + test_packet $s $smac $dmac $sip $dip $unicast #1 + + if test $id = $is && test $jd != $js; then bcast="$bcast $d"; fi + done + done + test_packet $s $smac ffffffffffff $sip ffffffff $bcast #2 + done +done + +# Allow some time for packet forwarding. +# XXX This can be improved. +sleep 1 + +# Now check the packets actually received against the ones expected. +for i in 1 2 3; do + for j in 1 2 3; do + file=hv`vif_to_hv $i$j`/vif$i$j-tx.pcap + echo $file + $PYTHON "$top_srcdir/utilities/ovs-pcap.in" $file | trim_zeros > $i$j.packets + cp $i$j.expected expout + AT_CHECK([cat $i$j.packets], [0], [expout]) + echo + done +done +AT_CLEANUP -- 2.1.3 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev