On 3 July 2016 at 10:23, Ben Pfaff <b...@ovn.org> wrote: > On Wed, Jun 29, 2016 at 01:17:10AM -0700, Gurucharan Shetty wrote: > > ovn-controller now supports 2 new logical actions. > > > > 1. ct_lb; > > Sends the packet through the conntrack zone to NAT > > packets. Packets that are part of established connection > > will automatically get NATed based on the NAT arguments > > supplied to conntrack when the first packet was committed. > > > > 2. ct_lb(192.168.1.2, 192.168.1.3); > > ct_lb(192.168.1.2:80, 192.168.1.3:80); > > Creates an OpenFlow group with multiple buckets and equal weights > > that changes the destination IP address (and port number) of the packet > > statefully to one of the options provided inside the parenthesis. > > > > Signed-off-by: Gurucharan Shetty <g...@ovn.org> > > Thanks for implementing this! > > It looks to me like the VLOG_ERR calls in ofctrl_put() could get to be > very repetitive if there is a bug that produces bad group strings. I'd > rate-limit them. > I did that.
> > I don't think that the 'group' member of struct group_info needs to be a > pointer. > > I'm still bothered by putting the ct_lb() argument into a quoted > string. It doesn't seem right to me. > > I'm appending an incremental, followed by a full patch, that implements > the changes I suggest except for the rate-limiting. Either way, it > requires the patch that I just sent out to be applied first, this one > here: > https://patchwork.ozlabs.org/patch/643797/ I looked at the above patch, sanity tested it with your provided incremental for this patch to see that groups are getting created properly in br-int. (There were a few merge issues with Ryan's incremental processing patches, but that looked trivial). Thank you for providing the below incremental and making this better. I applied it with couple of changes 1. Removed a unnecessary free from ovn_group_table_clear() 2. Removed lexer_get_string() from lex.c as it was not needed any more. And applied this. > > > If you're happy with my suggestions, then: > Acked-by: Ben Pfaff <b...@ovn.org> > > --8<--------------------------cut here-------------------------->8-- > > diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c > index b8175d3..6335f88 100644 > --- a/ovn/controller/ofctrl.c > +++ b/ovn/controller/ofctrl.c > @@ -1,4 +1,4 @@ > -/* Copyright (c) 2015 Nicira, Inc. > +/* Copyright (c) 2015, 2016 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -648,8 +648,8 @@ ovn_group_table_clear(struct group_table *group_table, > bool existing) > HMAP_FOR_EACH_SAFE (g, next, hmap_node, target_group) { > hmap_remove(target_group, &g->hmap_node); > bitmap_set0(group_table->group_ids, g->group_id); > - ds_destroy(g->group); > - free(g->group); > + ds_destroy(&g->group); > + free(&g->group); > free(g); > } > } > @@ -705,7 +705,7 @@ ofctrl_put(struct hmap *flow_table, struct group_table > *group_table) > char *error; > struct ds group_string = DS_EMPTY_INITIALIZER; > ds_put_format(&group_string, "group_id=%u,%s", > - desired->group_id, ds_cstr(desired->group)); > + desired->group_id, ds_cstr(&desired->group)); > > error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD, > ds_cstr(&group_string), > @@ -818,8 +818,7 @@ ofctrl_put(struct hmap *flow_table, struct group_table > *group_table) > > /* Remove 'installed' from 'group_table->existing_groups' */ > hmap_remove(&group_table->existing_groups, > &installed->hmap_node); > - ds_destroy(installed->group); > - free(installed->group); > + ds_destroy(&installed->group); > > /* Dealloc group_id. */ > bitmap_set0(group_table->group_ids, installed->group_id); > @@ -835,8 +834,7 @@ ofctrl_put(struct hmap *flow_table, struct group_table > *group_table) > hmap_insert(&group_table->existing_groups, > &desired->hmap_node, > desired->hmap_node.hash); > } else { > - ds_destroy(desired->group); > - free(desired->group); > + ds_destroy(&desired->group); > free(desired); > } > } > diff --git a/ovn/controller/ovn-controller.c > b/ovn/controller/ovn-controller.c > index 2e397f3..fbaf630 100644 > --- a/ovn/controller/ovn-controller.c > +++ b/ovn/controller/ovn-controller.c > @@ -544,8 +544,7 @@ main(int argc, char *argv[]) > HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node, > &group_table.existing_groups) { > hmap_remove(&group_table.existing_groups, &installed->hmap_node); > - ds_destroy(installed->group); > - free(installed->group); > + ds_destroy(&installed->group); > free(installed); > } > hmap_destroy(&group_table.existing_groups); > diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c > index 81d809f..0d18adc 100644 > --- a/ovn/lib/actions.c > +++ b/ovn/lib/actions.c > @@ -626,6 +626,21 @@ parse_put_dhcp_opts_action(struct action_context *ctx, > finish_controller_op(ctx->ofpacts, oc_offset); > } > > +static bool > +action_parse_port(struct action_context *ctx, uint16_t *port) > +{ > + if (lexer_is_int(ctx->lexer)) { > + int value = ntohll(ctx->lexer->token.value.integer); > + if (value <= UINT16_MAX) { > + *port = value; > + lexer_get(ctx->lexer); > + return true; > + } > + } > + action_syntax_error(ctx, "expecting port number"); > + return false; > +} > + > static void > parse_ct_lb_action(struct action_context *ctx) > { > @@ -638,7 +653,7 @@ parse_ct_lb_action(struct action_context *ctx) > } > > if (!lexer_match(ctx->lexer, LEX_T_LPAREN)) { > - /* ct_lb without parenthesis means that this is an established > + /* ct_lb without parentheses means that this is an established > * connection and we just need to do a NAT. */ > const size_t ct_offset = ctx->ofpacts->size; > ofpbuf_pull(ctx->ofpacts, ct_offset); > @@ -669,47 +684,51 @@ parse_ct_lb_action(struct action_context *ctx) > return; > } > > - char *ips, *ip, *save = NULL; > uint32_t group_id = 0, bucket_id = 0, hash; > struct group_info *group_info; > struct ofpact_group *og; > - struct ds *ds; > > - if (!lexer_get_string(ctx->lexer, &ips) || !ips[0]) { > - action_error(ctx, "ct_lb has missing ip parameters."); > - return; > - } > + struct ds ds = DS_EMPTY_INITIALIZER; > + ds_put_format(&ds, "type=select"); > > - ds = xmalloc(sizeof *ds); > - ds_init(ds); > - ds_put_format(ds, "type=select"); > + BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0); > + BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS); > + do { > + if (ctx->lexer->token.type != LEX_T_INTEGER > + || mf_subvalue_width(&ctx->lexer->token.value) > 32) { > + action_syntax_error(ctx, "expecting IPv4 address"); > + ds_destroy(&ds); > + return; > + } > + ovs_be32 ip = ctx->lexer->token.value.ipv4; > + lexer_get(ctx->lexer); > + > + uint16_t port = 0; > + if (lexer_match(ctx->lexer, LEX_T_COLON) > + && !action_parse_port(ctx, &port)) { > + ds_destroy(&ds); > + return; > + } > > - ip = strtok_r(ips, ",", &save); > - ds_put_format(ds, ",bucket=bucket_id=%u,weight:100,actions=" > - > "ct(nat(dst=%s),commit,table=%d,zone=NXM_NX_REG5[0..15])" > - ,bucket_id, ip, recirc_table); > - while ((ip = strtok_r(NULL, ",", &save)) != NULL) { > bucket_id++; > - ds_put_format(ds, ",bucket=bucket_id=%u,weight:100,actions=" > - "ct(nat(dst=%s),commit,table=%d,zone=" > - "NXM_NX_REG5[0..15])", bucket_id, ip, recirc_table); > - } > + ds_put_format(&ds, ",bucket=bucket_id=%u,weight:100,actions=" > + "ct(nat(dst="IP_FMT, bucket_id, IP_ARGS(ip)); > + if (port) { > + ds_put_format(&ds, ":%"PRIu16, port); > + } > + ds_put_format(&ds, "),commit,table=%d,zone=NXM_NX_REG%d[0..15])", > + recirc_table, MFF_LOG_CT_ZONE - MFF_REG0); > > - free(ips); > - if (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { > - ds_destroy(ds); > - free(ds); > - action_syntax_error(ctx, "expecting `)'"); > - return; > - } > + lexer_match(ctx->lexer, LEX_T_COMMA); > + } while (!lexer_match(ctx->lexer, LEX_T_RPAREN)); > add_prerequisite(ctx, "ip"); > > - hash = hash_string(ds_cstr(ds), 0); > + hash = hash_string(ds_cstr(&ds), 0); > > /* Check whether we have non installed but allocated group_id. */ > HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash, > &ctx->ap->group_table->desired_groups) { > - if (!strcmp(ds_cstr(group_info->group), ds_cstr(ds))) { > + if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) { > group_id = group_info->group_id; > break; > } > @@ -720,7 +739,7 @@ parse_ct_lb_action(struct action_context *ctx) > * combination. */ > HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash, > &ctx->ap->group_table->existing_groups) { > - if (!strcmp(ds_cstr(group_info->group), ds_cstr(ds))) { > + if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) { > group_id = group_info->group_id; > } > } > @@ -732,8 +751,7 @@ parse_ct_lb_action(struct action_context *ctx) > } > > if (group_id == MAX_OVN_GROUPS + 1) { > - ds_destroy(ds); > - free(ds); > + ds_destroy(&ds); > action_error(ctx, "out of group ids."); > return; > } > @@ -747,8 +765,7 @@ parse_ct_lb_action(struct action_context *ctx) > hmap_insert(&ctx->ap->group_table->desired_groups, > &group_info->hmap_node, group_info->hmap_node.hash); > } else { > - ds_destroy(ds); > - free(ds); > + ds_destroy(&ds); > } > > /* Create an action to set the group. */ > diff --git a/ovn/lib/actions.h b/ovn/lib/actions.h > index 7f3f478..4918900 100644 > --- a/ovn/lib/actions.h > +++ b/ovn/lib/actions.h > @@ -21,6 +21,7 @@ > #include <stdint.h> > #include "compiler.h" > #include "hmap.h" > +#include "openvswitch/dynamic-string.h" > #include "util.h" > > struct expr; > @@ -41,7 +42,7 @@ struct group_table { > > struct group_info { > struct hmap_node hmap_node; > - struct ds *group; > + struct ds group; > uint32_t group_id; > }; > > diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml > index 61b6c25..c5a4834 100644 > --- a/ovn/ovn-sb.xml > +++ b/ovn/ovn-sb.xml > @@ -965,7 +965,7 @@ > <p> > <code>ct_dnat(<var>IP</var>)</code> sends the packet through > the > DNAT zone to change the destination IP address of the packet > to > - the one provided inside the parenthesis and commits the > connection. > + the one provided inside the parentheses and commits the > connection. > The packet is then automatically sent to the next tables as if > followed by <code>next;</code> action. The next tables will > see > the changes in the packet caused by the connection tracker. > @@ -1110,26 +1110,27 @@ > </dd> > > <dt><code>ct_lb;</code></dt> > - <dt><code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>");</code></dt> > + > <dt><code>ct_lb(</code><var>ip</var>[<code>:</code><var>port</var>]...<code>);</code></dt> > <dd> > <p> > - <code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>,..")</code> > - commits the packet to the connection tracking table and DNATs > the > - packet's destination IP address (and port) to any one of the > - provided IP address (and port) inside the parenthesis. Each > of > - the provided IP address is given equal weight while picking > the > - DNAT address. The processing automatically moves on to the > next > - table and the next tables will see the changes in the packet > caused > - by the connection tracker. The connection tracking state is > scoped > - by the logical port, so overlapping addresses may be used. > - </p> > - <p> > - <code>ct_lb</code> sends the packet to the connection tracking > - table to NAT the packets. If the packet is part of an > established > - connection that was previously committed to the connection > tracker > - via > <code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>,..")</code> > - action, it will automatically get DNATed to the same IP > address > - as the first packet in that connection. > + With one or more arguments, <code>ct_lb</code> commits the > packet > + to the connection tracking table and DNATs the packet's > destination > + IP address (and port) to the IP address or addresses (and > optional > + ports) specified in the string. If multiple comma-separated > IP > + addresses are specified, each is given equal weight for > picking the > + DNAT address. Processing automatically moves on to the next > table, > + as if <code>next;</code> were specified, and later tables act > on > + the packet as modified by the connection tracker. Connection > + tracking state is scoped by the logical port, so overlapping > + addresses may be used. > + </p> > + <p> > + Without arguments, <code>ct_lb</code> sends the packet to the > + connection tracking table to NAT the packets. If the packet > is > + part of an established connection that was previously > committed to > + the connection tracker via > <code>ct_lb(</code>...<code>)</code>, it > + will automatically get DNATed to the same IP address as the > first > + packet in that connection. > </p> > </dd> > </dl> > diff --git a/tests/ovn.at b/tests/ovn.at > index 8728b45..7a25402 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -506,11 +506,12 @@ ip.ttl => Syntax error at end of input expecting > `--'. > > # load balancing. > ct_lb; => actions=ct(table=27,zone=NXM_NX_REG5[0..15],nat), prereqs=ip > -ct_lb(); => ct_lb has missing ip parameters. > -ct_lb(""); => ct_lb has missing ip parameters. > -ct_lb("192.168.1.2:80, 192.168.1.3:80"); => actions=group:1, prereqs=ip > -ct_lb("192.168.1.2, 192.168.1.3"); => actions=group:2, prereqs=ip > -ct_lb("foo"); => actions=group:3, prereqs=ip > +ct_lb(); => Syntax error at `)' expecting IPv4 address. > +ct_lb(192.168.1.2:80, 192.168.1.3:80); => actions=group:1, prereqs=ip > +ct_lb(192.168.1.2, 192.168.1.3, ); => actions=group:2, prereqs=ip > +ct_lb(192.168.1.2:); => Syntax error at `)' expecting port number. > +ct_lb(192.168.1.2:123456); => Syntax error at `123456' expecting port > number. > +ct_lb(foo); => Syntax error at `foo' expecting IPv4 address. > > # conntrack > ct_next; => actions=ct(table=27,zone=NXM_NX_REG5[0..15]), prereqs=ip > > --8<--------------------------cut here-------------------------->8-- > > Author: Gurucharan Shetty <g...@ovn.org> > Date: Wed Jun 29 01:17:10 2016 -0700 > > ovn-controller: Add support for load balancing. > > ovn-controller now supports 2 new logical actions. > > 1. ct_lb; > Sends the packet through the conntrack zone to NAT > packets. Packets that are part of established connection > will automatically get NATed based on the NAT arguments > supplied to conntrack when the first packet was committed. > > 2. ct_lb(192.168.1.2, 192.168.1.3); > ct_lb(192.168.1.2:80, 192.168.1.3:80); > Creates an OpenFlow group with multiple buckets and equal weights > that changes the destination IP address (and port number) of the packet > statefully to one of the options provided inside the parenthesis. > > Signed-off-by: Gurucharan Shetty <g...@ovn.org> > Signed-off-by: Ben Pfaff <b...@ovn.org> > > diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c > index 52e6131..837e5c5 100644 > --- a/ovn/controller/lflow.c > +++ b/ovn/controller/lflow.c > @@ -200,6 +200,7 @@ add_logical_flows(struct controller_ctx *ctx, const > struct lport_index *lports, > const struct mcgroup_index *mcgroups, > const struct hmap *local_datapaths, > const struct hmap *patched_datapaths, > + struct group_table *group_table, > const struct simap *ct_zones, struct hmap *flow_table) > { > uint32_t conj_id_ofs = 1; > @@ -286,6 +287,7 @@ add_logical_flows(struct controller_ctx *ctx, const > struct lport_index *lports, > .lookup_port = lookup_port_cb, > .aux = &aux, > .ct_zones = ct_zones, > + .group_table = group_table, > > .n_tables = LOG_PIPELINE_LEN, > .first_ptable = first_ptable, > @@ -437,10 +439,11 @@ lflow_run(struct controller_ctx *ctx, const struct > lport_index *lports, > const struct mcgroup_index *mcgroups, > const struct hmap *local_datapaths, > const struct hmap *patched_datapaths, > + struct group_table *group_table, > const struct simap *ct_zones, struct hmap *flow_table) > { > add_logical_flows(ctx, lports, mcgroups, local_datapaths, > - patched_datapaths, ct_zones, flow_table); > + patched_datapaths, group_table, ct_zones, > flow_table); > add_neighbor_flows(ctx, lports, flow_table); > } > > diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h > index a3fc50c..e96a24b 100644 > --- a/ovn/controller/lflow.h > +++ b/ovn/controller/lflow.h > @@ -36,6 +36,7 @@ > #include <stdint.h> > > struct controller_ctx; > +struct group_table; > struct hmap; > struct lport_index; > struct mcgroup_index; > @@ -63,6 +64,7 @@ void lflow_run(struct controller_ctx *, const struct > lport_index *, > const struct mcgroup_index *, > const struct hmap *local_datapaths, > const struct hmap *patched_datapaths, > + struct group_table *group_table, > const struct simap *ct_zones, > struct hmap *flow_table); > void lflow_destroy(void); > diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c > index f537bc0..6335f88 100644 > --- a/ovn/controller/ofctrl.c > +++ b/ovn/controller/ofctrl.c > @@ -1,4 +1,4 @@ > -/* Copyright (c) 2015 Nicira, Inc. > +/* Copyright (c) 2015, 2016 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -14,6 +14,7 @@ > */ > > #include <config.h> > +#include "bitmap.h" > #include "byte-order.h" > #include "dirs.h" > #include "hash.h" > @@ -24,11 +25,13 @@ > #include "openvswitch/match.h" > #include "openvswitch/ofp-actions.h" > #include "openvswitch/ofp-msgs.h" > +#include "openvswitch/ofp-parse.h" > #include "openvswitch/ofp-print.h" > #include "openvswitch/ofp-util.h" > #include "openvswitch/ofpbuf.h" > #include "openvswitch/vlog.h" > #include "ovn-controller.h" > +#include "ovn/lib/actions.h" > #include "physical.h" > #include "rconn.h" > #include "socket-util.h" > @@ -63,6 +66,8 @@ static void queue_flow_mod(struct ofputil_flow_mod *); > /* OpenFlow connection to the switch. */ > static struct rconn *swconn; > > +static void queue_group_mod(struct ofputil_group_mod *); > + > /* Last seen sequence number for 'swconn'. When this differs from > * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */ > static unsigned int seqno; > @@ -95,6 +100,9 @@ static struct rconn_packet_counter *tx_counter; > * installed in the switch. */ > static struct hmap installed_flows; > > +/* A reference to the group_table. */ > +static struct group_table *groups; > + > /* MFF_* field ID for our Geneve option. In S_TLV_TABLE_MOD_SENT, this is > * the option we requested (we don't know whether we obtained it yet). In > * S_CLEAR_FLOWS or S_UPDATE_FLOWS, this is really the option we have. */ > @@ -103,6 +111,9 @@ static enum mf_field_id mff_ovn_geneve; > static void ovn_flow_table_clear(struct hmap *flow_table); > static void ovn_flow_table_destroy(struct hmap *flow_table); > > +static void ovn_group_table_clear(struct group_table *group_table, > + bool existing); > + > static void ofctrl_recv(const struct ofp_header *, enum ofptype); > > void > @@ -312,9 +323,23 @@ run_S_CLEAR_FLOWS(void) > queue_flow_mod(&fm); > VLOG_DBG("clearing all flows"); > > + struct ofputil_group_mod gm; > + memset(&gm, 0, sizeof gm); > + gm.command = OFPGC11_DELETE; > + gm.group_id = OFPG_ALL; > + gm.command_bucket_id = OFPG15_BUCKET_ALL; > + ovs_list_init(&gm.buckets); > + queue_group_mod(&gm); > + ofputil_bucket_list_destroy(&gm.buckets); > + > /* Clear installed_flows, to match the state of the switch. */ > ovn_flow_table_clear(&installed_flows); > > + /* Clear existing groups, to match the state of the switch. */ > + if (groups) { > + ovn_group_table_clear(groups, true); > + } > + > state = S_UPDATE_FLOWS; > } > > @@ -591,16 +616,71 @@ queue_flow_mod(struct ofputil_flow_mod *fm) > queue_msg(ofputil_encode_flow_mod(fm, OFPUTIL_P_OF13_OXM)); > } > > + > +/* group_table. */ > + > +/* Finds and returns a group_info in 'existing_groups' whose key is > identical > + * to 'target''s key, or NULL if there is none. */ > +static struct group_info * > +ovn_group_lookup(struct hmap *exisiting_groups, > + const struct group_info *target) > +{ > + struct group_info *e; > + > + HMAP_FOR_EACH_WITH_HASH(e, hmap_node, target->hmap_node.hash, > + exisiting_groups) { > + if (e->group_id == target->group_id) { > + return e; > + } > + } > + return NULL; > +} > + > +/* Clear either desired_groups or existing_groups in group_table. */ > +static void > +ovn_group_table_clear(struct group_table *group_table, bool existing) > +{ > + struct group_info *g, *next; > + struct hmap *target_group = existing > + ? &group_table->existing_groups > + : &group_table->desired_groups; > + > + HMAP_FOR_EACH_SAFE (g, next, hmap_node, target_group) { > + hmap_remove(target_group, &g->hmap_node); > + bitmap_set0(group_table->group_ids, g->group_id); > + ds_destroy(&g->group); > + free(&g->group); > + free(g); > + } > +} > + > +static void > +queue_group_mod(struct ofputil_group_mod *gm) > +{ > + queue_msg(ofputil_encode_group_mod(OFP13_VERSION, gm)); > +} > + > + > /* Replaces the flow table on the switch, if possible, by the flows in > * 'flow_table', which should have been added with ofctrl_add_flow(). > * Regardless of whether the flow table is updated, this deletes all of > the > * flows from 'flow_table' and frees them. (The hmap itself isn't > * destroyed.) > * > + * Replaces the group table on the switch, if possible, by the groups in > + * 'group_table->desired_groups'. Regardless of whether the group table > + * is updated, this deletes all the groups from the > + * 'group_table->desired_groups' and frees them. (The hmap itself isn't > + * destroyed.) > + * > * This called be called be ofctrl_run() within the main loop. */ > void > -ofctrl_put(struct hmap *flow_table) > +ofctrl_put(struct hmap *flow_table, struct group_table *group_table) > { > + if (!groups) { > + groups = group_table; > + } > + > /* The flow table can be updated if the connection to the switch is > up and > * in the correct state and not backlogged with existing flow_mods. > (Our > * criteria for being backlogged appear very conservative, but the > socket > @@ -610,9 +690,37 @@ ofctrl_put(struct hmap *flow_table) > if (state != S_UPDATE_FLOWS > || rconn_packet_counter_n_packets(tx_counter)) { > ovn_flow_table_clear(flow_table); > + ovn_group_table_clear(group_table, false); > return; > } > > + /* Iterate through all the desired groups. If there are new ones, > + * add them to the switch. */ > + struct group_info *desired; > + HMAP_FOR_EACH(desired, hmap_node, &group_table->desired_groups) { > + if (!ovn_group_lookup(&group_table->existing_groups, desired)) { > + /* Create and install new group. */ > + struct ofputil_group_mod gm; > + enum ofputil_protocol usable_protocols; > + char *error; > + struct ds group_string = DS_EMPTY_INITIALIZER; > + ds_put_format(&group_string, "group_id=%u,%s", > + desired->group_id, ds_cstr(&desired->group)); > + > + error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD, > + ds_cstr(&group_string), > + &usable_protocols); > + if (!error) { > + queue_group_mod(&gm); > + } else { > + VLOG_ERR("new group %s %s", error, > ds_cstr(&group_string)); > + free(error); > + } > + ds_destroy(&group_string); > + ofputil_bucket_list_destroy(&gm.buckets); > + } > + } > + > /* Iterate through all of the installed flows. If any of them are no > * longer desired, delete them; if any of them should have different > * actions, update them. */ > @@ -682,4 +790,52 @@ ofctrl_put(struct hmap *flow_table) > hmap_remove(flow_table, &d->hmap_node); > hmap_insert(&installed_flows, &d->hmap_node, d->hmap_node.hash); > } > + > + /* Iterate through the installed groups from previous runs. If they > + * are not needed delete them. */ > + struct group_info *installed, *next_group; > + HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node, > + &group_table->existing_groups) { > + if (!ovn_group_lookup(&group_table->desired_groups, installed)) { > + /* Delete the group. */ > + struct ofputil_group_mod gm; > + enum ofputil_protocol usable_protocols; > + char *error; > + struct ds group_string = DS_EMPTY_INITIALIZER; > + ds_put_format(&group_string, "group_id=%u", > installed->group_id); > + > + error = parse_ofp_group_mod_str(&gm, OFPGC11_DELETE, > + ds_cstr(&group_string), > + &usable_protocols); > + if (!error) { > + queue_group_mod(&gm); > + } else { > + VLOG_ERR("%s", error); > + free(error); > + } > + ds_destroy(&group_string); > + ofputil_bucket_list_destroy(&gm.buckets); > + > + /* Remove 'installed' from 'group_table->existing_groups' */ > + hmap_remove(&group_table->existing_groups, > &installed->hmap_node); > + ds_destroy(&installed->group); > + > + /* Dealloc group_id. */ > + bitmap_set0(group_table->group_ids, installed->group_id); > + free(installed); > + } > + } > + > + /* Move the contents of desired_groups to existing_groups. */ > + HMAP_FOR_EACH_SAFE(desired, next_group, hmap_node, > + &group_table->desired_groups) { > + hmap_remove(&group_table->desired_groups, &desired->hmap_node); > + if (!ovn_group_lookup(&group_table->existing_groups, desired)) { > + hmap_insert(&group_table->existing_groups, > &desired->hmap_node, > + desired->hmap_node.hash); > + } else { > + ds_destroy(&desired->group); > + free(desired); > + } > + } > } > diff --git a/ovn/controller/ofctrl.h b/ovn/controller/ofctrl.h > index bc9cfba..bf5dfd5 100644 > --- a/ovn/controller/ofctrl.h > +++ b/ovn/controller/ofctrl.h > @@ -26,11 +26,12 @@ struct hmap; > struct match; > struct ofpbuf; > struct ovsrec_bridge; > +struct group_table; > > /* Interface for OVN main loop. */ > void ofctrl_init(void); > enum mf_field_id ofctrl_run(const struct ovsrec_bridge *br_int); > -void ofctrl_put(struct hmap *flows); > +void ofctrl_put(struct hmap *flows, struct group_table *group_table); > void ofctrl_wait(void); > void ofctrl_destroy(void); > > diff --git a/ovn/controller/ovn-controller.c > b/ovn/controller/ovn-controller.c > index 47e6824..fbaf630 100644 > --- a/ovn/controller/ovn-controller.c > +++ b/ovn/controller/ovn-controller.c > @@ -39,6 +39,7 @@ > #include "ofctrl.h" > #include "openvswitch/vconn.h" > #include "openvswitch/vlog.h" > +#include "ovn/lib/actions.h" > #include "ovn/lib/ovn-sb-idl.h" > #include "ovn/lib/ovn-util.h" > #include "patch.h" > @@ -351,6 +352,13 @@ main(int argc, char *argv[]) > } > unixctl_command_register("exit", "", 0, 0, ovn_controller_exit, > &exiting); > > + /* Initialize group ids for loadbalancing. */ > + struct group_table group_table; > + group_table.group_ids = bitmap_allocate(MAX_OVN_GROUPS); > + bitmap_set1(group_table.group_ids, 0); /* Group id 0 is invalid. */ > + hmap_init(&group_table.desired_groups); > + hmap_init(&group_table.existing_groups); > + > daemonize_complete(); > > ovsrec_init(); > @@ -462,13 +470,14 @@ main(int argc, char *argv[]) > > struct hmap flow_table = HMAP_INITIALIZER(&flow_table); > lflow_run(&ctx, &lports, &mcgroups, &local_datapaths, > - &patched_datapaths, &ct_zones, &flow_table); > + &patched_datapaths, &group_table, &ct_zones, > + &flow_table); > if (chassis_id) { > physical_run(&ctx, mff_ovn_geneve, > br_int, chassis_id, &ct_zones, &flow_table, > &local_datapaths, &patched_datapaths); > } > - ofctrl_put(&flow_table); > + ofctrl_put(&flow_table, &group_table); > hmap_destroy(&flow_table); > } > > @@ -528,6 +537,18 @@ main(int argc, char *argv[]) > > simap_destroy(&ct_zones); > > + bitmap_free(group_table.group_ids); > + hmap_destroy(&group_table.desired_groups); > + > + struct group_info *installed, *next_group; > + HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node, > + &group_table.existing_groups) { > + hmap_remove(&group_table.existing_groups, &installed->hmap_node); > + ds_destroy(&installed->group); > + free(installed); > + } > + hmap_destroy(&group_table.existing_groups); > + > ovsdb_idl_loop_destroy(&ovs_idl_loop); > ovsdb_idl_loop_destroy(&ovnsb_idl_loop); > > diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c > index 569970e..0d18adc 100644 > --- a/ovn/lib/actions.c > +++ b/ovn/lib/actions.c > @@ -18,10 +18,13 @@ > #include <stdarg.h> > #include <stdbool.h> > #include "actions.h" > +#include "bitmap.h" > #include "byte-order.h" > #include "compiler.h" > #include "ovn-dhcp.h" > #include "expr.h" > +#include "hash.h" > +#include "hmap.h" > #include "lex.h" > #include "logical-fields.h" > #include "nx-match.h" > @@ -623,6 +626,153 @@ parse_put_dhcp_opts_action(struct action_context > *ctx, > finish_controller_op(ctx->ofpacts, oc_offset); > } > > +static bool > +action_parse_port(struct action_context *ctx, uint16_t *port) > +{ > + if (lexer_is_int(ctx->lexer)) { > + int value = ntohll(ctx->lexer->token.value.integer); > + if (value <= UINT16_MAX) { > + *port = value; > + lexer_get(ctx->lexer); > + return true; > + } > + } > + action_syntax_error(ctx, "expecting port number"); > + return false; > +} > + > +static void > +parse_ct_lb_action(struct action_context *ctx) > +{ > + uint8_t recirc_table; > + if (ctx->ap->cur_ltable < ctx->ap->n_tables) { > + recirc_table = ctx->ap->first_ptable + ctx->ap->cur_ltable + 1; > + } else { > + action_error(ctx, "\"ct_lb\" action not allowed in last table."); > + return; > + } > + > + if (!lexer_match(ctx->lexer, LEX_T_LPAREN)) { > + /* ct_lb without parentheses means that this is an established > + * connection and we just need to do a NAT. */ > + const size_t ct_offset = ctx->ofpacts->size; > + ofpbuf_pull(ctx->ofpacts, ct_offset); > + > + struct ofpact_conntrack *ct = ofpact_put_CT(ctx->ofpacts); > + struct ofpact_nat *nat; > + size_t nat_offset; > + ct->zone_src.field = mf_from_id(MFF_LOG_CT_ZONE); > + ct->zone_src.ofs = 0; > + ct->zone_src.n_bits = 16; > + ct->flags = 0; > + ct->recirc_table = recirc_table; > + ct->alg = 0; > + > + add_prerequisite(ctx, "ip"); > + > + nat_offset = ctx->ofpacts->size; > + ofpbuf_pull(ctx->ofpacts, nat_offset); > + > + nat = ofpact_put_NAT(ctx->ofpacts); > + nat->flags = 0; > + nat->range_af = AF_UNSPEC; > + > + ctx->ofpacts->header = ofpbuf_push_uninit(ctx->ofpacts, > nat_offset); > + ct = ctx->ofpacts->header; > + ofpact_finish(ctx->ofpacts, &ct->ofpact); > + ofpbuf_push_uninit(ctx->ofpacts, ct_offset); > + return; > + } > + > + uint32_t group_id = 0, bucket_id = 0, hash; > + struct group_info *group_info; > + struct ofpact_group *og; > + > + struct ds ds = DS_EMPTY_INITIALIZER; > + ds_put_format(&ds, "type=select"); > + > + BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0); > + BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS); > + do { > + if (ctx->lexer->token.type != LEX_T_INTEGER > + || mf_subvalue_width(&ctx->lexer->token.value) > 32) { > + action_syntax_error(ctx, "expecting IPv4 address"); > + ds_destroy(&ds); > + return; > + } > + ovs_be32 ip = ctx->lexer->token.value.ipv4; > + lexer_get(ctx->lexer); > + > + uint16_t port = 0; > + if (lexer_match(ctx->lexer, LEX_T_COLON) > + && !action_parse_port(ctx, &port)) { > + ds_destroy(&ds); > + return; > + } > + > + bucket_id++; > + ds_put_format(&ds, ",bucket=bucket_id=%u,weight:100,actions=" > + "ct(nat(dst="IP_FMT, bucket_id, IP_ARGS(ip)); > + if (port) { > + ds_put_format(&ds, ":%"PRIu16, port); > + } > + ds_put_format(&ds, "),commit,table=%d,zone=NXM_NX_REG%d[0..15])", > + recirc_table, MFF_LOG_CT_ZONE - MFF_REG0); > + > + lexer_match(ctx->lexer, LEX_T_COMMA); > + } while (!lexer_match(ctx->lexer, LEX_T_RPAREN)); > + add_prerequisite(ctx, "ip"); > + > + hash = hash_string(ds_cstr(&ds), 0); > + > + /* Check whether we have non installed but allocated group_id. */ > + HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash, > + &ctx->ap->group_table->desired_groups) { > + if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) { > + group_id = group_info->group_id; > + break; > + } > + } > + > + if (!group_id) { > + /* Check whether we already have an installed entry for this > + * combination. */ > + HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash, > + &ctx->ap->group_table->existing_groups) { > + if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) { > + group_id = group_info->group_id; > + } > + } > + > + if (!group_id) { > + /* Reserve a new group_id. */ > + group_id = bitmap_scan(ctx->ap->group_table->group_ids, 0, 1, > + MAX_OVN_GROUPS + 1); > + } > + > + if (group_id == MAX_OVN_GROUPS + 1) { > + ds_destroy(&ds); > + action_error(ctx, "out of group ids."); > + return; > + } > + bitmap_set1(ctx->ap->group_table->group_ids, group_id); > + > + group_info = xmalloc(sizeof *group_info); > + group_info->group = ds; > + group_info->group_id = group_id; > + group_info->hmap_node.hash = hash; > + > + hmap_insert(&ctx->ap->group_table->desired_groups, > + &group_info->hmap_node, group_info->hmap_node.hash); > + } else { > + ds_destroy(&ds); > + } > + > + /* Create an action to set the group. */ > + og = ofpact_put_GROUP(ctx->ofpacts); > + og->group_id = group_id; > +} > + > static void > emit_ct(struct action_context *ctx, bool recirc_next, bool commit) > { > @@ -762,6 +912,8 @@ parse_action(struct action_context *ctx) > parse_ct_nat(ctx, false); > } else if (lexer_match_id(ctx->lexer, "ct_snat")) { > parse_ct_nat(ctx, true); > + } else if (lexer_match_id(ctx->lexer, "ct_lb")) { > + parse_ct_lb_action(ctx); > } else if (lexer_match_id(ctx->lexer, "arp")) { > parse_arp_action(ctx); > } else if (lexer_match_id(ctx->lexer, "get_arp")) { > diff --git a/ovn/lib/actions.h b/ovn/lib/actions.h > index f49e15e..4918900 100644 > --- a/ovn/lib/actions.h > +++ b/ovn/lib/actions.h > @@ -20,6 +20,8 @@ > #include <stdbool.h> > #include <stdint.h> > #include "compiler.h" > +#include "hmap.h" > +#include "openvswitch/dynamic-string.h" > #include "util.h" > > struct expr; > @@ -28,6 +30,22 @@ struct ofpbuf; > struct shash; > struct simap; > > +#define MAX_OVN_GROUPS 65535 > + > +struct group_table { > + unsigned long *group_ids; /* Used as a bitmap with value set > + * for allocated group ids in either > + * desired_groups or existing_groups. */ > + struct hmap desired_groups; > + struct hmap existing_groups; > +}; > + > +struct group_info { > + struct hmap_node hmap_node; > + struct ds group; > + uint32_t group_id; > +}; > + > enum action_opcode { > /* "arp { ...actions... }". > * > @@ -80,6 +98,9 @@ struct action_params { > /* A map from a port name to its connection tracking zone. */ > const struct simap *ct_zones; > > + /* A struct to figure out the group_id for group actions. */ > + struct group_table *group_table; > + > /* OVN maps each logical flow table (ltable), one-to-one, onto a > physical > * OpenFlow flow table (ptable). A number of parameters describe this > * mapping and data related to flow tables: > diff --git a/ovn/lib/lex.c b/ovn/lib/lex.c > index 89da666..bd748d0 100644 > --- a/ovn/lib/lex.c > +++ b/ovn/lib/lex.c > @@ -864,3 +864,16 @@ lexer_get_int(struct lexer *lexer, int *value) > return false; > } > } > + > +bool > +lexer_get_string(struct lexer *lexer, char **str) > +{ > + if (lexer->token.type == LEX_T_STRING) { > + *str = xstrdup(lexer->token.s); > + lexer_get(lexer); > + return true; > + } else { > + *str = NULL; > + return false; > + } > +} > diff --git a/ovn/lib/lex.h b/ovn/lib/lex.h > index 22f2807..7701c93 100644 > --- a/ovn/lib/lex.h > +++ b/ovn/lib/lex.h > @@ -122,5 +122,6 @@ bool lexer_match(struct lexer *, enum lex_type); > bool lexer_match_id(struct lexer *, const char *id); > bool lexer_is_int(const struct lexer *); > bool lexer_get_int(struct lexer *, int *value); > +bool lexer_get_string(struct lexer *, char **str); > > #endif /* ovn/lex.h */ > diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml > index f330374..c5a4834 100644 > --- a/ovn/ovn-sb.xml > +++ b/ovn/ovn-sb.xml > @@ -965,7 +965,7 @@ > <p> > <code>ct_dnat(<var>IP</var>)</code> sends the packet through > the > DNAT zone to change the destination IP address of the packet > to > - the one provided inside the parenthesis and commits the > connection. > + the one provided inside the parentheses and commits the > connection. > The packet is then automatically sent to the next tables as if > followed by <code>next;</code> action. The next tables will > see > the changes in the packet caused by the connection tracker. > @@ -1108,6 +1108,31 @@ > </code> > </p> > </dd> > + > + <dt><code>ct_lb;</code></dt> > + > <dt><code>ct_lb(</code><var>ip</var>[<code>:</code><var>port</var>]...<code>);</code></dt> > + <dd> > + <p> > + With one or more arguments, <code>ct_lb</code> commits the > packet > + to the connection tracking table and DNATs the packet's > destination > + IP address (and port) to the IP address or addresses (and > optional > + ports) specified in the string. If multiple comma-separated > IP > + addresses are specified, each is given equal weight for > picking the > + DNAT address. Processing automatically moves on to the next > table, > + as if <code>next;</code> were specified, and later tables act > on > + the packet as modified by the connection tracker. Connection > + tracking state is scoped by the logical port, so overlapping > + addresses may be used. > + </p> > + <p> > + Without arguments, <code>ct_lb</code> sends the packet to the > + connection tracking table to NAT the packets. If the packet > is > + part of an established connection that was previously > committed to > + the connection tracker via > <code>ct_lb(</code>...<code>)</code>, it > + will automatically get DNATed to the same IP address as the > first > + packet in that connection. > + </p> > + </dd> > </dl> > > <p> > diff --git a/tests/ovn.at b/tests/ovn.at > index 89879ce..7a25402 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -504,6 +504,15 @@ reg0[0..7] <-> ip.proto; => Field ip.proto is not > modifiable. > ip.ttl--; => actions=dec_ttl, prereqs=ip > ip.ttl => Syntax error at end of input expecting `--'. > > +# load balancing. > +ct_lb; => actions=ct(table=27,zone=NXM_NX_REG5[0..15],nat), prereqs=ip > +ct_lb(); => Syntax error at `)' expecting IPv4 address. > +ct_lb(192.168.1.2:80, 192.168.1.3:80); => actions=group:1, prereqs=ip > +ct_lb(192.168.1.2, 192.168.1.3, ); => actions=group:2, prereqs=ip > +ct_lb(192.168.1.2:); => Syntax error at `)' expecting port number. > +ct_lb(192.168.1.2:123456); => Syntax error at `123456' expecting port > number. > +ct_lb(foo); => Syntax error at `foo' expecting IPv4 address. > + > # conntrack > ct_next; => actions=ct(table=27,zone=NXM_NX_REG5[0..15]), prereqs=ip > ct_commit; => actions=ct(commit,zone=NXM_NX_REG5[0..15]), prereqs=ip > diff --git a/tests/test-ovn.c b/tests/test-ovn.c > index 18e5aca..1c468d6 100644 > --- a/tests/test-ovn.c > +++ b/tests/test-ovn.c > @@ -1257,6 +1257,13 @@ test_parse_actions(struct ovs_cmdl_context *ctx > OVS_UNUSED) > create_symtab(&symtab); > create_dhcp_opts(&dhcp_opts); > > + /* Initialize group ids. */ > + struct group_table group_table; > + group_table.group_ids = bitmap_allocate(MAX_OVN_GROUPS); > + bitmap_set1(group_table.group_ids, 0); /* Group id 0 is invalid. */ > + hmap_init(&group_table.desired_groups); > + hmap_init(&group_table.existing_groups); > + > simap_init(&ports); > simap_put(&ports, "eth0", 5); > simap_put(&ports, "eth1", 6); > @@ -1277,6 +1284,7 @@ test_parse_actions(struct ovs_cmdl_context *ctx > OVS_UNUSED) > .lookup_port = lookup_port_cb, > .aux = &ports, > .ct_zones = &ct_zones, > + .group_table = &group_table, > > .n_tables = 16, > .first_ptable = 16, > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev