On 3 July 2016 at 10:23, Ben Pfaff <b...@ovn.org> wrote:

> On Wed, Jun 29, 2016 at 01:17:10AM -0700, Gurucharan Shetty wrote:
> > ovn-controller now supports 2 new logical actions.
> >
> > 1. ct_lb;
> > Sends the packet through the conntrack zone to NAT
> > packets. Packets that are part of established connection
> > will automatically get NATed based on the NAT arguments
> > supplied to conntrack when the first packet was committed.
> >
> > 2. ct_lb(192.168.1.2, 192.168.1.3);
> >    ct_lb(192.168.1.2:80, 192.168.1.3:80);
> > Creates an OpenFlow group with multiple buckets and equal weights
> > that changes the destination IP address (and port number) of the packet
> > statefully to one of the options provided inside the parenthesis.
> >
> > Signed-off-by: Gurucharan Shetty <g...@ovn.org>
>
> Thanks for implementing this!
>
> It looks to me like the VLOG_ERR calls in ofctrl_put() could get to be
> very repetitive if there is a bug that produces bad group strings.  I'd
> rate-limit them.
>
I did that.


>
> I don't think that the 'group' member of struct group_info needs to be a
> pointer.
>
> I'm still bothered by putting the ct_lb() argument into a quoted
> string.  It doesn't seem right to me.
>
> I'm appending an incremental, followed by a full patch, that implements
> the changes I suggest except for the rate-limiting.  Either way, it
> requires the patch that I just sent out to be applied first, this one
> here:
>         https://patchwork.ozlabs.org/patch/643797/


I looked at the above patch, sanity tested it with your provided
incremental for this patch to see that groups are getting created properly
in br-int. (There were a few merge issues with Ryan's incremental
processing patches, but that looked trivial).

Thank you for providing the below incremental and making this better. I
applied it with couple of changes
1. Removed a unnecessary free from ovn_group_table_clear()
2. Removed lexer_get_string() from lex.c as it was not needed any more.

And applied this.


>
>
> If you're happy with my suggestions, then:
> Acked-by: Ben Pfaff <b...@ovn.org>
>
> --8<--------------------------cut here-------------------------->8--
>
> diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
> index b8175d3..6335f88 100644
> --- a/ovn/controller/ofctrl.c
> +++ b/ovn/controller/ofctrl.c
> @@ -1,4 +1,4 @@
> -/* Copyright (c) 2015 Nicira, Inc.
> +/* Copyright (c) 2015, 2016 Nicira, Inc.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -648,8 +648,8 @@ ovn_group_table_clear(struct group_table *group_table,
> bool existing)
>      HMAP_FOR_EACH_SAFE (g, next, hmap_node, target_group) {
>          hmap_remove(target_group, &g->hmap_node);
>          bitmap_set0(group_table->group_ids, g->group_id);
> -        ds_destroy(g->group);
> -        free(g->group);
> +        ds_destroy(&g->group);
> +        free(&g->group);
>          free(g);
>      }
>  }
> @@ -705,7 +705,7 @@ ofctrl_put(struct hmap *flow_table, struct group_table
> *group_table)
>              char *error;
>              struct ds group_string = DS_EMPTY_INITIALIZER;
>              ds_put_format(&group_string, "group_id=%u,%s",
> -                          desired->group_id, ds_cstr(desired->group));
> +                          desired->group_id, ds_cstr(&desired->group));
>
>              error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD,
>                                              ds_cstr(&group_string),
> @@ -818,8 +818,7 @@ ofctrl_put(struct hmap *flow_table, struct group_table
> *group_table)
>
>              /* Remove 'installed' from 'group_table->existing_groups' */
>              hmap_remove(&group_table->existing_groups,
> &installed->hmap_node);
> -            ds_destroy(installed->group);
> -            free(installed->group);
> +            ds_destroy(&installed->group);
>
>              /* Dealloc group_id. */
>              bitmap_set0(group_table->group_ids, installed->group_id);
> @@ -835,8 +834,7 @@ ofctrl_put(struct hmap *flow_table, struct group_table
> *group_table)
>              hmap_insert(&group_table->existing_groups,
> &desired->hmap_node,
>                          desired->hmap_node.hash);
>          } else {
> -            ds_destroy(desired->group);
> -            free(desired->group);
> +            ds_destroy(&desired->group);
>              free(desired);
>          }
>      }
> diff --git a/ovn/controller/ovn-controller.c
> b/ovn/controller/ovn-controller.c
> index 2e397f3..fbaf630 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -544,8 +544,7 @@ main(int argc, char *argv[])
>      HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node,
>                         &group_table.existing_groups) {
>          hmap_remove(&group_table.existing_groups, &installed->hmap_node);
> -        ds_destroy(installed->group);
> -        free(installed->group);
> +        ds_destroy(&installed->group);
>          free(installed);
>      }
>      hmap_destroy(&group_table.existing_groups);
> diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
> index 81d809f..0d18adc 100644
> --- a/ovn/lib/actions.c
> +++ b/ovn/lib/actions.c
> @@ -626,6 +626,21 @@ parse_put_dhcp_opts_action(struct action_context *ctx,
>      finish_controller_op(ctx->ofpacts, oc_offset);
>  }
>
> +static bool
> +action_parse_port(struct action_context *ctx, uint16_t *port)
> +{
> +    if (lexer_is_int(ctx->lexer)) {
> +        int value = ntohll(ctx->lexer->token.value.integer);
> +        if (value <= UINT16_MAX) {
> +            *port = value;
> +            lexer_get(ctx->lexer);
> +            return true;
> +        }
> +    }
> +    action_syntax_error(ctx, "expecting port number");
> +    return false;
> +}
> +
>  static void
>  parse_ct_lb_action(struct action_context *ctx)
>  {
> @@ -638,7 +653,7 @@ parse_ct_lb_action(struct action_context *ctx)
>      }
>
>      if (!lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> -        /* ct_lb without parenthesis means that this is an established
> +        /* ct_lb without parentheses means that this is an established
>           * connection and we just need to do a NAT. */
>          const size_t ct_offset = ctx->ofpacts->size;
>          ofpbuf_pull(ctx->ofpacts, ct_offset);
> @@ -669,47 +684,51 @@ parse_ct_lb_action(struct action_context *ctx)
>          return;
>      }
>
> -    char *ips, *ip, *save = NULL;
>      uint32_t group_id = 0, bucket_id = 0, hash;
>      struct group_info *group_info;
>      struct ofpact_group *og;
> -    struct ds *ds;
>
> -    if (!lexer_get_string(ctx->lexer, &ips) || !ips[0]) {
> -        action_error(ctx, "ct_lb has missing ip parameters.");
> -        return;
> -    }
> +    struct ds ds = DS_EMPTY_INITIALIZER;
> +    ds_put_format(&ds, "type=select");
>
> -    ds = xmalloc(sizeof *ds);
> -    ds_init(ds);
> -    ds_put_format(ds, "type=select");
> +    BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0);
> +    BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS);
> +    do {
> +        if (ctx->lexer->token.type != LEX_T_INTEGER
> +            || mf_subvalue_width(&ctx->lexer->token.value) > 32) {
> +            action_syntax_error(ctx, "expecting IPv4 address");
> +            ds_destroy(&ds);
> +            return;
> +        }
> +        ovs_be32 ip = ctx->lexer->token.value.ipv4;
> +        lexer_get(ctx->lexer);
> +
> +        uint16_t port = 0;
> +        if (lexer_match(ctx->lexer, LEX_T_COLON)
> +            && !action_parse_port(ctx, &port)) {
> +            ds_destroy(&ds);
> +            return;
> +        }
>
> -    ip = strtok_r(ips, ",", &save);
> -    ds_put_format(ds, ",bucket=bucket_id=%u,weight:100,actions="
> -
> "ct(nat(dst=%s),commit,table=%d,zone=NXM_NX_REG5[0..15])"
> -                  ,bucket_id, ip, recirc_table);
> -    while ((ip = strtok_r(NULL, ",", &save)) != NULL) {
>          bucket_id++;
> -        ds_put_format(ds, ",bucket=bucket_id=%u,weight:100,actions="
> -                      "ct(nat(dst=%s),commit,table=%d,zone="
> -                      "NXM_NX_REG5[0..15])", bucket_id, ip, recirc_table);
> -    }
> +        ds_put_format(&ds, ",bucket=bucket_id=%u,weight:100,actions="
> +                      "ct(nat(dst="IP_FMT, bucket_id, IP_ARGS(ip));
> +        if (port) {
> +            ds_put_format(&ds, ":%"PRIu16, port);
> +        }
> +        ds_put_format(&ds, "),commit,table=%d,zone=NXM_NX_REG%d[0..15])",
> +                      recirc_table, MFF_LOG_CT_ZONE - MFF_REG0);
>
> -    free(ips);
> -    if (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
> -        ds_destroy(ds);
> -        free(ds);
> -        action_syntax_error(ctx, "expecting `)'");
> -        return;
> -    }
> +        lexer_match(ctx->lexer, LEX_T_COMMA);
> +    } while (!lexer_match(ctx->lexer, LEX_T_RPAREN));
>      add_prerequisite(ctx, "ip");
>
> -    hash = hash_string(ds_cstr(ds), 0);
> +    hash = hash_string(ds_cstr(&ds), 0);
>
>      /* Check whether we have non installed but allocated group_id. */
>      HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash,
>                               &ctx->ap->group_table->desired_groups) {
> -        if (!strcmp(ds_cstr(group_info->group), ds_cstr(ds))) {
> +        if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) {
>              group_id = group_info->group_id;
>              break;
>          }
> @@ -720,7 +739,7 @@ parse_ct_lb_action(struct action_context *ctx)
>           * combination. */
>          HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash,
>                                   &ctx->ap->group_table->existing_groups) {
> -            if (!strcmp(ds_cstr(group_info->group), ds_cstr(ds))) {
> +            if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) {
>                  group_id = group_info->group_id;
>              }
>          }
> @@ -732,8 +751,7 @@ parse_ct_lb_action(struct action_context *ctx)
>          }
>
>          if (group_id == MAX_OVN_GROUPS + 1) {
> -            ds_destroy(ds);
> -            free(ds);
> +            ds_destroy(&ds);
>              action_error(ctx, "out of group ids.");
>              return;
>          }
> @@ -747,8 +765,7 @@ parse_ct_lb_action(struct action_context *ctx)
>          hmap_insert(&ctx->ap->group_table->desired_groups,
>                      &group_info->hmap_node, group_info->hmap_node.hash);
>      } else {
> -        ds_destroy(ds);
> -        free(ds);
> +        ds_destroy(&ds);
>      }
>
>      /* Create an action to set the group. */
> diff --git a/ovn/lib/actions.h b/ovn/lib/actions.h
> index 7f3f478..4918900 100644
> --- a/ovn/lib/actions.h
> +++ b/ovn/lib/actions.h
> @@ -21,6 +21,7 @@
>  #include <stdint.h>
>  #include "compiler.h"
>  #include "hmap.h"
> +#include "openvswitch/dynamic-string.h"
>  #include "util.h"
>
>  struct expr;
> @@ -41,7 +42,7 @@ struct group_table {
>
>  struct group_info {
>      struct hmap_node hmap_node;
> -    struct ds *group;
> +    struct ds group;
>      uint32_t group_id;
>  };
>
> diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
> index 61b6c25..c5a4834 100644
> --- a/ovn/ovn-sb.xml
> +++ b/ovn/ovn-sb.xml
> @@ -965,7 +965,7 @@
>            <p>
>              <code>ct_dnat(<var>IP</var>)</code> sends the packet through
> the
>              DNAT zone to change the destination IP address of the packet
> to
> -            the one provided inside the parenthesis and commits the
> connection.
> +            the one provided inside the parentheses and commits the
> connection.
>              The packet is then automatically sent to the next tables as if
>              followed by <code>next;</code> action.  The next tables will
> see
>              the changes in the packet caused by the connection tracker.
> @@ -1110,26 +1110,27 @@
>          </dd>
>
>          <dt><code>ct_lb;</code></dt>
> -        <dt><code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>");</code></dt>
> +
> <dt><code>ct_lb(</code><var>ip</var>[<code>:</code><var>port</var>]...<code>);</code></dt>
>          <dd>
>            <p>
> -            <code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>,..")</code>
> -            commits the packet to the connection tracking table and DNATs
> the
> -            packet's destination IP address (and port) to any one of the
> -            provided IP address (and port) inside the parenthesis.  Each
> of
> -            the provided IP address is given equal weight while picking
> the
> -            DNAT address.  The processing automatically moves on to the
> next
> -            table and the next tables will see the changes in the packet
> caused
> -            by the connection tracker.  The connection tracking state is
> scoped
> -            by the logical port, so overlapping addresses may be used.
> -          </p>
> -          <p>
> -            <code>ct_lb</code> sends the packet to the connection tracking
> -            table to NAT the packets.  If the packet is part of an
> established
> -            connection that was previously committed to the connection
> tracker
> -            via
> <code>ct_lb("<var>ip1[:port1],ip2[:port2]</var>,..")</code>
> -            action, it will automatically get DNATed to the same IP
> address
> -            as the first packet in that connection.
> +            With one or more arguments, <code>ct_lb</code> commits the
> packet
> +            to the connection tracking table and DNATs the packet's
> destination
> +            IP address (and port) to the IP address or addresses (and
> optional
> +            ports) specified in the string.  If multiple comma-separated
> IP
> +            addresses are specified, each is given equal weight for
> picking the
> +            DNAT address.  Processing automatically moves on to the next
> table,
> +            as if <code>next;</code> were specified, and later tables act
> on
> +            the packet as modified by the connection tracker.  Connection
> +            tracking state is scoped by the logical port, so overlapping
> +            addresses may be used.
> +          </p>
> +          <p>
> +            Without arguments, <code>ct_lb</code> sends the packet to the
> +            connection tracking table to NAT the packets.  If the packet
> is
> +            part of an established connection that was previously
> committed to
> +            the connection tracker via
> <code>ct_lb(</code>...<code>)</code>, it
> +            will automatically get DNATed to the same IP address as the
> first
> +            packet in that connection.
>            </p>
>          </dd>
>        </dl>
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 8728b45..7a25402 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -506,11 +506,12 @@ ip.ttl => Syntax error at end of input expecting
> `--'.
>
>  # load balancing.
>  ct_lb; => actions=ct(table=27,zone=NXM_NX_REG5[0..15],nat), prereqs=ip
> -ct_lb(); => ct_lb has missing ip parameters.
> -ct_lb(""); => ct_lb has missing ip parameters.
> -ct_lb("192.168.1.2:80, 192.168.1.3:80"); => actions=group:1, prereqs=ip
> -ct_lb("192.168.1.2, 192.168.1.3"); => actions=group:2, prereqs=ip
> -ct_lb("foo"); => actions=group:3, prereqs=ip
> +ct_lb(); => Syntax error at `)' expecting IPv4 address.
> +ct_lb(192.168.1.2:80, 192.168.1.3:80); => actions=group:1, prereqs=ip
> +ct_lb(192.168.1.2, 192.168.1.3, ); => actions=group:2, prereqs=ip
> +ct_lb(192.168.1.2:); => Syntax error at `)' expecting port number.
> +ct_lb(192.168.1.2:123456); => Syntax error at `123456' expecting port
> number.
> +ct_lb(foo); => Syntax error at `foo' expecting IPv4 address.
>
>  # conntrack
>  ct_next; => actions=ct(table=27,zone=NXM_NX_REG5[0..15]), prereqs=ip
>
> --8<--------------------------cut here-------------------------->8--
>
> Author: Gurucharan Shetty <g...@ovn.org>
> Date:   Wed Jun 29 01:17:10 2016 -0700
>
>     ovn-controller: Add support for load balancing.
>
>     ovn-controller now supports 2 new logical actions.
>
>     1. ct_lb;
>     Sends the packet through the conntrack zone to NAT
>     packets. Packets that are part of established connection
>     will automatically get NATed based on the NAT arguments
>     supplied to conntrack when the first packet was committed.
>
>     2. ct_lb(192.168.1.2, 192.168.1.3);
>        ct_lb(192.168.1.2:80, 192.168.1.3:80);
>     Creates an OpenFlow group with multiple buckets and equal weights
>     that changes the destination IP address (and port number) of the packet
>     statefully to one of the options provided inside the parenthesis.
>
>     Signed-off-by: Gurucharan Shetty <g...@ovn.org>
>     Signed-off-by: Ben Pfaff <b...@ovn.org>
>
> diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
> index 52e6131..837e5c5 100644
> --- a/ovn/controller/lflow.c
> +++ b/ovn/controller/lflow.c
> @@ -200,6 +200,7 @@ add_logical_flows(struct controller_ctx *ctx, const
> struct lport_index *lports,
>                    const struct mcgroup_index *mcgroups,
>                    const struct hmap *local_datapaths,
>                    const struct hmap *patched_datapaths,
> +                  struct group_table *group_table,
>                    const struct simap *ct_zones, struct hmap *flow_table)
>  {
>      uint32_t conj_id_ofs = 1;
> @@ -286,6 +287,7 @@ add_logical_flows(struct controller_ctx *ctx, const
> struct lport_index *lports,
>              .lookup_port = lookup_port_cb,
>              .aux = &aux,
>              .ct_zones = ct_zones,
> +            .group_table = group_table,
>
>              .n_tables = LOG_PIPELINE_LEN,
>              .first_ptable = first_ptable,
> @@ -437,10 +439,11 @@ lflow_run(struct controller_ctx *ctx, const struct
> lport_index *lports,
>            const struct mcgroup_index *mcgroups,
>            const struct hmap *local_datapaths,
>            const struct hmap *patched_datapaths,
> +          struct group_table *group_table,
>            const struct simap *ct_zones, struct hmap *flow_table)
>  {
>      add_logical_flows(ctx, lports, mcgroups, local_datapaths,
> -                      patched_datapaths, ct_zones, flow_table);
> +                      patched_datapaths, group_table, ct_zones,
> flow_table);
>      add_neighbor_flows(ctx, lports, flow_table);
>  }
>
> diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h
> index a3fc50c..e96a24b 100644
> --- a/ovn/controller/lflow.h
> +++ b/ovn/controller/lflow.h
> @@ -36,6 +36,7 @@
>  #include <stdint.h>
>
>  struct controller_ctx;
> +struct group_table;
>  struct hmap;
>  struct lport_index;
>  struct mcgroup_index;
> @@ -63,6 +64,7 @@ void lflow_run(struct controller_ctx *, const struct
> lport_index *,
>                 const struct mcgroup_index *,
>                 const struct hmap *local_datapaths,
>                 const struct hmap *patched_datapaths,
> +               struct group_table *group_table,
>                 const struct simap *ct_zones,
>                 struct hmap *flow_table);
>  void lflow_destroy(void);
> diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
> index f537bc0..6335f88 100644
> --- a/ovn/controller/ofctrl.c
> +++ b/ovn/controller/ofctrl.c
> @@ -1,4 +1,4 @@
> -/* Copyright (c) 2015 Nicira, Inc.
> +/* Copyright (c) 2015, 2016 Nicira, Inc.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -14,6 +14,7 @@
>   */
>
>  #include <config.h>
> +#include "bitmap.h"
>  #include "byte-order.h"
>  #include "dirs.h"
>  #include "hash.h"
> @@ -24,11 +25,13 @@
>  #include "openvswitch/match.h"
>  #include "openvswitch/ofp-actions.h"
>  #include "openvswitch/ofp-msgs.h"
> +#include "openvswitch/ofp-parse.h"
>  #include "openvswitch/ofp-print.h"
>  #include "openvswitch/ofp-util.h"
>  #include "openvswitch/ofpbuf.h"
>  #include "openvswitch/vlog.h"
>  #include "ovn-controller.h"
> +#include "ovn/lib/actions.h"
>  #include "physical.h"
>  #include "rconn.h"
>  #include "socket-util.h"
> @@ -63,6 +66,8 @@ static void queue_flow_mod(struct ofputil_flow_mod *);
>  /* OpenFlow connection to the switch. */
>  static struct rconn *swconn;
>
> +static void queue_group_mod(struct ofputil_group_mod *);
> +
>  /* Last seen sequence number for 'swconn'.  When this differs from
>   * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
>  static unsigned int seqno;
> @@ -95,6 +100,9 @@ static struct rconn_packet_counter *tx_counter;
>   * installed in the switch. */
>  static struct hmap installed_flows;
>
> +/* A reference to the group_table. */
> +static struct group_table *groups;
> +
>  /* MFF_* field ID for our Geneve option.  In S_TLV_TABLE_MOD_SENT, this is
>   * the option we requested (we don't know whether we obtained it yet).  In
>   * S_CLEAR_FLOWS or S_UPDATE_FLOWS, this is really the option we have. */
> @@ -103,6 +111,9 @@ static enum mf_field_id mff_ovn_geneve;
>  static void ovn_flow_table_clear(struct hmap *flow_table);
>  static void ovn_flow_table_destroy(struct hmap *flow_table);
>
> +static void ovn_group_table_clear(struct group_table *group_table,
> +                                  bool existing);
> +
>  static void ofctrl_recv(const struct ofp_header *, enum ofptype);
>
>  void
> @@ -312,9 +323,23 @@ run_S_CLEAR_FLOWS(void)
>      queue_flow_mod(&fm);
>      VLOG_DBG("clearing all flows");
>
> +    struct ofputil_group_mod gm;
> +    memset(&gm, 0, sizeof gm);
> +    gm.command = OFPGC11_DELETE;
> +    gm.group_id = OFPG_ALL;
> +    gm.command_bucket_id = OFPG15_BUCKET_ALL;
> +    ovs_list_init(&gm.buckets);
> +    queue_group_mod(&gm);
> +    ofputil_bucket_list_destroy(&gm.buckets);
> +
>      /* Clear installed_flows, to match the state of the switch. */
>      ovn_flow_table_clear(&installed_flows);
>
> +    /* Clear existing groups, to match the state of the switch. */
> +    if (groups) {
> +        ovn_group_table_clear(groups, true);
> +    }
> +
>      state = S_UPDATE_FLOWS;
>  }
>
> @@ -591,16 +616,71 @@ queue_flow_mod(struct ofputil_flow_mod *fm)
>      queue_msg(ofputil_encode_flow_mod(fm, OFPUTIL_P_OF13_OXM));
>  }
>
> +
> +/* group_table. */
> +
> +/* Finds and returns a group_info in 'existing_groups' whose key is
> identical
> + * to 'target''s key, or NULL if there is none. */
> +static struct group_info *
> +ovn_group_lookup(struct hmap *exisiting_groups,
> +                 const struct group_info *target)
> +{
> +    struct group_info *e;
> +
> +    HMAP_FOR_EACH_WITH_HASH(e, hmap_node, target->hmap_node.hash,
> +                            exisiting_groups) {
> +        if (e->group_id == target->group_id) {
> +            return e;
> +        }
> +   }
> +    return NULL;
> +}
> +
> +/* Clear either desired_groups or existing_groups in group_table. */
> +static void
> +ovn_group_table_clear(struct group_table *group_table, bool existing)
> +{
> +    struct group_info *g, *next;
> +    struct hmap *target_group = existing
> +                                ? &group_table->existing_groups
> +                                : &group_table->desired_groups;
> +
> +    HMAP_FOR_EACH_SAFE (g, next, hmap_node, target_group) {
> +        hmap_remove(target_group, &g->hmap_node);
> +        bitmap_set0(group_table->group_ids, g->group_id);
> +        ds_destroy(&g->group);
> +        free(&g->group);
> +        free(g);
> +    }
> +}
> +
> +static void
> +queue_group_mod(struct ofputil_group_mod *gm)
> +{
> +    queue_msg(ofputil_encode_group_mod(OFP13_VERSION, gm));
> +}
> +
> +
>  /* Replaces the flow table on the switch, if possible, by the flows in
>   * 'flow_table', which should have been added with ofctrl_add_flow().
>   * Regardless of whether the flow table is updated, this deletes all of
> the
>   * flows from 'flow_table' and frees them.  (The hmap itself isn't
>   * destroyed.)
>   *
> + * Replaces the group table on the switch, if possible, by the groups in
> + * 'group_table->desired_groups'. Regardless of whether the group table
> + * is updated, this deletes all the groups from the
> + * 'group_table->desired_groups' and frees them. (The hmap itself isn't
> + * destroyed.)
> + *
>   * This called be called be ofctrl_run() within the main loop. */
>  void
> -ofctrl_put(struct hmap *flow_table)
> +ofctrl_put(struct hmap *flow_table, struct group_table *group_table)
>  {
> +    if (!groups) {
> +        groups = group_table;
> +    }
> +
>      /* The flow table can be updated if the connection to the switch is
> up and
>       * in the correct state and not backlogged with existing flow_mods.
> (Our
>       * criteria for being backlogged appear very conservative, but the
> socket
> @@ -610,9 +690,37 @@ ofctrl_put(struct hmap *flow_table)
>      if (state != S_UPDATE_FLOWS
>          || rconn_packet_counter_n_packets(tx_counter)) {
>          ovn_flow_table_clear(flow_table);
> +        ovn_group_table_clear(group_table, false);
>          return;
>      }
>
> +    /* Iterate through all the desired groups. If there are new ones,
> +     * add them to the switch. */
> +    struct group_info *desired;
> +    HMAP_FOR_EACH(desired, hmap_node, &group_table->desired_groups) {
> +        if (!ovn_group_lookup(&group_table->existing_groups, desired)) {
> +            /* Create and install new group. */
> +            struct ofputil_group_mod gm;
> +            enum ofputil_protocol usable_protocols;
> +            char *error;
> +            struct ds group_string = DS_EMPTY_INITIALIZER;
> +            ds_put_format(&group_string, "group_id=%u,%s",
> +                          desired->group_id, ds_cstr(&desired->group));
> +
> +            error = parse_ofp_group_mod_str(&gm, OFPGC11_ADD,
> +                                            ds_cstr(&group_string),
> +                                            &usable_protocols);
> +            if (!error) {
> +                queue_group_mod(&gm);
> +            } else {
> +                VLOG_ERR("new group %s %s", error,
> ds_cstr(&group_string));
> +                free(error);
> +            }
> +            ds_destroy(&group_string);
> +            ofputil_bucket_list_destroy(&gm.buckets);
> +        }
> +    }
> +
>      /* Iterate through all of the installed flows.  If any of them are no
>       * longer desired, delete them; if any of them should have different
>       * actions, update them. */
> @@ -682,4 +790,52 @@ ofctrl_put(struct hmap *flow_table)
>          hmap_remove(flow_table, &d->hmap_node);
>          hmap_insert(&installed_flows, &d->hmap_node, d->hmap_node.hash);
>      }
> +
> +    /* Iterate through the installed groups from previous runs. If they
> +     * are not needed delete them. */
> +    struct group_info *installed, *next_group;
> +    HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node,
> +                       &group_table->existing_groups) {
> +        if (!ovn_group_lookup(&group_table->desired_groups, installed)) {
> +            /* Delete the group. */
> +            struct ofputil_group_mod gm;
> +            enum ofputil_protocol usable_protocols;
> +            char *error;
> +            struct ds group_string = DS_EMPTY_INITIALIZER;
> +            ds_put_format(&group_string, "group_id=%u",
> installed->group_id);
> +
> +            error = parse_ofp_group_mod_str(&gm, OFPGC11_DELETE,
> +                                            ds_cstr(&group_string),
> +                                            &usable_protocols);
> +            if (!error) {
> +                queue_group_mod(&gm);
> +            } else {
> +                VLOG_ERR("%s", error);
> +                free(error);
> +            }
> +            ds_destroy(&group_string);
> +            ofputil_bucket_list_destroy(&gm.buckets);
> +
> +            /* Remove 'installed' from 'group_table->existing_groups' */
> +            hmap_remove(&group_table->existing_groups,
> &installed->hmap_node);
> +            ds_destroy(&installed->group);
> +
> +            /* Dealloc group_id. */
> +            bitmap_set0(group_table->group_ids, installed->group_id);
> +            free(installed);
> +        }
> +    }
> +
> +    /* Move the contents of desired_groups to existing_groups. */
> +    HMAP_FOR_EACH_SAFE(desired, next_group, hmap_node,
> +                       &group_table->desired_groups) {
> +        hmap_remove(&group_table->desired_groups, &desired->hmap_node);
> +        if (!ovn_group_lookup(&group_table->existing_groups, desired)) {
> +            hmap_insert(&group_table->existing_groups,
> &desired->hmap_node,
> +                        desired->hmap_node.hash);
> +        } else {
> +            ds_destroy(&desired->group);
> +            free(desired);
> +        }
> +    }
>  }
> diff --git a/ovn/controller/ofctrl.h b/ovn/controller/ofctrl.h
> index bc9cfba..bf5dfd5 100644
> --- a/ovn/controller/ofctrl.h
> +++ b/ovn/controller/ofctrl.h
> @@ -26,11 +26,12 @@ struct hmap;
>  struct match;
>  struct ofpbuf;
>  struct ovsrec_bridge;
> +struct group_table;
>
>  /* Interface for OVN main loop. */
>  void ofctrl_init(void);
>  enum mf_field_id ofctrl_run(const struct ovsrec_bridge *br_int);
> -void ofctrl_put(struct hmap *flows);
> +void ofctrl_put(struct hmap *flows, struct group_table *group_table);
>  void ofctrl_wait(void);
>  void ofctrl_destroy(void);
>
> diff --git a/ovn/controller/ovn-controller.c
> b/ovn/controller/ovn-controller.c
> index 47e6824..fbaf630 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -39,6 +39,7 @@
>  #include "ofctrl.h"
>  #include "openvswitch/vconn.h"
>  #include "openvswitch/vlog.h"
> +#include "ovn/lib/actions.h"
>  #include "ovn/lib/ovn-sb-idl.h"
>  #include "ovn/lib/ovn-util.h"
>  #include "patch.h"
> @@ -351,6 +352,13 @@ main(int argc, char *argv[])
>      }
>      unixctl_command_register("exit", "", 0, 0, ovn_controller_exit,
> &exiting);
>
> +    /* Initialize group ids for loadbalancing. */
> +    struct group_table group_table;
> +    group_table.group_ids = bitmap_allocate(MAX_OVN_GROUPS);
> +    bitmap_set1(group_table.group_ids, 0); /* Group id 0 is invalid. */
> +    hmap_init(&group_table.desired_groups);
> +    hmap_init(&group_table.existing_groups);
> +
>      daemonize_complete();
>
>      ovsrec_init();
> @@ -462,13 +470,14 @@ main(int argc, char *argv[])
>
>              struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
>              lflow_run(&ctx, &lports, &mcgroups, &local_datapaths,
> -                      &patched_datapaths, &ct_zones, &flow_table);
> +                      &patched_datapaths, &group_table, &ct_zones,
> +                      &flow_table);
>              if (chassis_id) {
>                  physical_run(&ctx, mff_ovn_geneve,
>                               br_int, chassis_id, &ct_zones, &flow_table,
>                               &local_datapaths, &patched_datapaths);
>              }
> -            ofctrl_put(&flow_table);
> +            ofctrl_put(&flow_table, &group_table);
>              hmap_destroy(&flow_table);
>          }
>
> @@ -528,6 +537,18 @@ main(int argc, char *argv[])
>
>      simap_destroy(&ct_zones);
>
> +    bitmap_free(group_table.group_ids);
> +    hmap_destroy(&group_table.desired_groups);
> +
> +    struct group_info *installed, *next_group;
> +    HMAP_FOR_EACH_SAFE(installed, next_group, hmap_node,
> +                       &group_table.existing_groups) {
> +        hmap_remove(&group_table.existing_groups, &installed->hmap_node);
> +        ds_destroy(&installed->group);
> +        free(installed);
> +    }
> +    hmap_destroy(&group_table.existing_groups);
> +
>      ovsdb_idl_loop_destroy(&ovs_idl_loop);
>      ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
>
> diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
> index 569970e..0d18adc 100644
> --- a/ovn/lib/actions.c
> +++ b/ovn/lib/actions.c
> @@ -18,10 +18,13 @@
>  #include <stdarg.h>
>  #include <stdbool.h>
>  #include "actions.h"
> +#include "bitmap.h"
>  #include "byte-order.h"
>  #include "compiler.h"
>  #include "ovn-dhcp.h"
>  #include "expr.h"
> +#include "hash.h"
> +#include "hmap.h"
>  #include "lex.h"
>  #include "logical-fields.h"
>  #include "nx-match.h"
> @@ -623,6 +626,153 @@ parse_put_dhcp_opts_action(struct action_context
> *ctx,
>      finish_controller_op(ctx->ofpacts, oc_offset);
>  }
>
> +static bool
> +action_parse_port(struct action_context *ctx, uint16_t *port)
> +{
> +    if (lexer_is_int(ctx->lexer)) {
> +        int value = ntohll(ctx->lexer->token.value.integer);
> +        if (value <= UINT16_MAX) {
> +            *port = value;
> +            lexer_get(ctx->lexer);
> +            return true;
> +        }
> +    }
> +    action_syntax_error(ctx, "expecting port number");
> +    return false;
> +}
> +
> +static void
> +parse_ct_lb_action(struct action_context *ctx)
> +{
> +    uint8_t recirc_table;
> +    if (ctx->ap->cur_ltable < ctx->ap->n_tables) {
> +        recirc_table = ctx->ap->first_ptable + ctx->ap->cur_ltable + 1;
> +    } else {
> +        action_error(ctx, "\"ct_lb\" action not allowed in last table.");
> +        return;
> +    }
> +
> +    if (!lexer_match(ctx->lexer, LEX_T_LPAREN)) {
> +        /* ct_lb without parentheses means that this is an established
> +         * connection and we just need to do a NAT. */
> +        const size_t ct_offset = ctx->ofpacts->size;
> +        ofpbuf_pull(ctx->ofpacts, ct_offset);
> +
> +        struct ofpact_conntrack *ct = ofpact_put_CT(ctx->ofpacts);
> +        struct ofpact_nat *nat;
> +        size_t nat_offset;
> +        ct->zone_src.field = mf_from_id(MFF_LOG_CT_ZONE);
> +        ct->zone_src.ofs = 0;
> +        ct->zone_src.n_bits = 16;
> +        ct->flags = 0;
> +        ct->recirc_table = recirc_table;
> +        ct->alg = 0;
> +
> +        add_prerequisite(ctx, "ip");
> +
> +        nat_offset = ctx->ofpacts->size;
> +        ofpbuf_pull(ctx->ofpacts, nat_offset);
> +
> +        nat = ofpact_put_NAT(ctx->ofpacts);
> +        nat->flags = 0;
> +        nat->range_af = AF_UNSPEC;
> +
> +        ctx->ofpacts->header = ofpbuf_push_uninit(ctx->ofpacts,
> nat_offset);
> +        ct = ctx->ofpacts->header;
> +        ofpact_finish(ctx->ofpacts, &ct->ofpact);
> +        ofpbuf_push_uninit(ctx->ofpacts, ct_offset);
> +        return;
> +    }
> +
> +    uint32_t group_id = 0, bucket_id = 0, hash;
> +    struct group_info *group_info;
> +    struct ofpact_group *og;
> +
> +    struct ds ds = DS_EMPTY_INITIALIZER;
> +    ds_put_format(&ds, "type=select");
> +
> +    BUILD_ASSERT(MFF_LOG_CT_ZONE >= MFF_REG0);
> +    BUILD_ASSERT(MFF_LOG_CT_ZONE < MFF_REG0 + FLOW_N_REGS);
> +    do {
> +        if (ctx->lexer->token.type != LEX_T_INTEGER
> +            || mf_subvalue_width(&ctx->lexer->token.value) > 32) {
> +            action_syntax_error(ctx, "expecting IPv4 address");
> +            ds_destroy(&ds);
> +            return;
> +        }
> +        ovs_be32 ip = ctx->lexer->token.value.ipv4;
> +        lexer_get(ctx->lexer);
> +
> +        uint16_t port = 0;
> +        if (lexer_match(ctx->lexer, LEX_T_COLON)
> +            && !action_parse_port(ctx, &port)) {
> +            ds_destroy(&ds);
> +            return;
> +        }
> +
> +        bucket_id++;
> +        ds_put_format(&ds, ",bucket=bucket_id=%u,weight:100,actions="
> +                      "ct(nat(dst="IP_FMT, bucket_id, IP_ARGS(ip));
> +        if (port) {
> +            ds_put_format(&ds, ":%"PRIu16, port);
> +        }
> +        ds_put_format(&ds, "),commit,table=%d,zone=NXM_NX_REG%d[0..15])",
> +                      recirc_table, MFF_LOG_CT_ZONE - MFF_REG0);
> +
> +        lexer_match(ctx->lexer, LEX_T_COMMA);
> +    } while (!lexer_match(ctx->lexer, LEX_T_RPAREN));
> +    add_prerequisite(ctx, "ip");
> +
> +    hash = hash_string(ds_cstr(&ds), 0);
> +
> +    /* Check whether we have non installed but allocated group_id. */
> +    HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash,
> +                             &ctx->ap->group_table->desired_groups) {
> +        if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) {
> +            group_id = group_info->group_id;
> +            break;
> +        }
> +    }
> +
> +    if (!group_id) {
> +        /* Check whether we already have an installed entry for this
> +         * combination. */
> +        HMAP_FOR_EACH_WITH_HASH (group_info, hmap_node, hash,
> +                                 &ctx->ap->group_table->existing_groups) {
> +            if (!strcmp(ds_cstr(&group_info->group), ds_cstr(&ds))) {
> +                group_id = group_info->group_id;
> +            }
> +        }
> +
> +        if (!group_id) {
> +            /* Reserve a new group_id. */
> +            group_id = bitmap_scan(ctx->ap->group_table->group_ids, 0, 1,
> +                                   MAX_OVN_GROUPS + 1);
> +        }
> +
> +        if (group_id == MAX_OVN_GROUPS + 1) {
> +            ds_destroy(&ds);
> +            action_error(ctx, "out of group ids.");
> +            return;
> +        }
> +        bitmap_set1(ctx->ap->group_table->group_ids, group_id);
> +
> +        group_info = xmalloc(sizeof *group_info);
> +        group_info->group = ds;
> +        group_info->group_id = group_id;
> +        group_info->hmap_node.hash = hash;
> +
> +        hmap_insert(&ctx->ap->group_table->desired_groups,
> +                    &group_info->hmap_node, group_info->hmap_node.hash);
> +    } else {
> +        ds_destroy(&ds);
> +    }
> +
> +    /* Create an action to set the group. */
> +    og = ofpact_put_GROUP(ctx->ofpacts);
> +    og->group_id = group_id;
> +}
> +
>  static void
>  emit_ct(struct action_context *ctx, bool recirc_next, bool commit)
>  {
> @@ -762,6 +912,8 @@ parse_action(struct action_context *ctx)
>          parse_ct_nat(ctx, false);
>      } else if (lexer_match_id(ctx->lexer, "ct_snat")) {
>          parse_ct_nat(ctx, true);
> +    } else if (lexer_match_id(ctx->lexer, "ct_lb")) {
> +        parse_ct_lb_action(ctx);
>      } else if (lexer_match_id(ctx->lexer, "arp")) {
>          parse_arp_action(ctx);
>      } else if (lexer_match_id(ctx->lexer, "get_arp")) {
> diff --git a/ovn/lib/actions.h b/ovn/lib/actions.h
> index f49e15e..4918900 100644
> --- a/ovn/lib/actions.h
> +++ b/ovn/lib/actions.h
> @@ -20,6 +20,8 @@
>  #include <stdbool.h>
>  #include <stdint.h>
>  #include "compiler.h"
> +#include "hmap.h"
> +#include "openvswitch/dynamic-string.h"
>  #include "util.h"
>
>  struct expr;
> @@ -28,6 +30,22 @@ struct ofpbuf;
>  struct shash;
>  struct simap;
>
> +#define MAX_OVN_GROUPS 65535
> +
> +struct group_table {
> +    unsigned long *group_ids;  /* Used as a bitmap with value set
> +                                * for allocated group ids in either
> +                                * desired_groups or existing_groups. */
> +    struct hmap desired_groups;
> +    struct hmap existing_groups;
> +};
> +
> +struct group_info {
> +    struct hmap_node hmap_node;
> +    struct ds group;
> +    uint32_t group_id;
> +};
> +
>  enum action_opcode {
>      /* "arp { ...actions... }".
>       *
> @@ -80,6 +98,9 @@ struct action_params {
>      /* A map from a port name to its connection tracking zone. */
>      const struct simap *ct_zones;
>
> +    /* A struct to figure out the group_id for group actions. */
> +    struct group_table *group_table;
> +
>      /* OVN maps each logical flow table (ltable), one-to-one, onto a
> physical
>       * OpenFlow flow table (ptable).  A number of parameters describe this
>       * mapping and data related to flow tables:
> diff --git a/ovn/lib/lex.c b/ovn/lib/lex.c
> index 89da666..bd748d0 100644
> --- a/ovn/lib/lex.c
> +++ b/ovn/lib/lex.c
> @@ -864,3 +864,16 @@ lexer_get_int(struct lexer *lexer, int *value)
>          return false;
>      }
>  }
> +
> +bool
> +lexer_get_string(struct lexer *lexer, char **str)
> +{
> +    if (lexer->token.type == LEX_T_STRING) {
> +        *str = xstrdup(lexer->token.s);
> +        lexer_get(lexer);
> +        return true;
> +    } else {
> +        *str = NULL;
> +        return false;
> +    }
> +}
> diff --git a/ovn/lib/lex.h b/ovn/lib/lex.h
> index 22f2807..7701c93 100644
> --- a/ovn/lib/lex.h
> +++ b/ovn/lib/lex.h
> @@ -122,5 +122,6 @@ bool lexer_match(struct lexer *, enum lex_type);
>  bool lexer_match_id(struct lexer *, const char *id);
>  bool lexer_is_int(const struct lexer *);
>  bool lexer_get_int(struct lexer *, int *value);
> +bool lexer_get_string(struct lexer *, char  **str);
>
>  #endif /* ovn/lex.h */
> diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
> index f330374..c5a4834 100644
> --- a/ovn/ovn-sb.xml
> +++ b/ovn/ovn-sb.xml
> @@ -965,7 +965,7 @@
>            <p>
>              <code>ct_dnat(<var>IP</var>)</code> sends the packet through
> the
>              DNAT zone to change the destination IP address of the packet
> to
> -            the one provided inside the parenthesis and commits the
> connection.
> +            the one provided inside the parentheses and commits the
> connection.
>              The packet is then automatically sent to the next tables as if
>              followed by <code>next;</code> action.  The next tables will
> see
>              the changes in the packet caused by the connection tracker.
> @@ -1108,6 +1108,31 @@
>              </code>
>            </p>
>          </dd>
> +
> +        <dt><code>ct_lb;</code></dt>
> +
> <dt><code>ct_lb(</code><var>ip</var>[<code>:</code><var>port</var>]...<code>);</code></dt>
> +        <dd>
> +          <p>
> +            With one or more arguments, <code>ct_lb</code> commits the
> packet
> +            to the connection tracking table and DNATs the packet's
> destination
> +            IP address (and port) to the IP address or addresses (and
> optional
> +            ports) specified in the string.  If multiple comma-separated
> IP
> +            addresses are specified, each is given equal weight for
> picking the
> +            DNAT address.  Processing automatically moves on to the next
> table,
> +            as if <code>next;</code> were specified, and later tables act
> on
> +            the packet as modified by the connection tracker.  Connection
> +            tracking state is scoped by the logical port, so overlapping
> +            addresses may be used.
> +          </p>
> +          <p>
> +            Without arguments, <code>ct_lb</code> sends the packet to the
> +            connection tracking table to NAT the packets.  If the packet
> is
> +            part of an established connection that was previously
> committed to
> +            the connection tracker via
> <code>ct_lb(</code>...<code>)</code>, it
> +            will automatically get DNATed to the same IP address as the
> first
> +            packet in that connection.
> +          </p>
> +        </dd>
>        </dl>
>
>        <p>
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 89879ce..7a25402 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -504,6 +504,15 @@ reg0[0..7] <-> ip.proto; => Field ip.proto is not
> modifiable.
>  ip.ttl--; => actions=dec_ttl, prereqs=ip
>  ip.ttl => Syntax error at end of input expecting `--'.
>
> +# load balancing.
> +ct_lb; => actions=ct(table=27,zone=NXM_NX_REG5[0..15],nat), prereqs=ip
> +ct_lb(); => Syntax error at `)' expecting IPv4 address.
> +ct_lb(192.168.1.2:80, 192.168.1.3:80); => actions=group:1, prereqs=ip
> +ct_lb(192.168.1.2, 192.168.1.3, ); => actions=group:2, prereqs=ip
> +ct_lb(192.168.1.2:); => Syntax error at `)' expecting port number.
> +ct_lb(192.168.1.2:123456); => Syntax error at `123456' expecting port
> number.
> +ct_lb(foo); => Syntax error at `foo' expecting IPv4 address.
> +
>  # conntrack
>  ct_next; => actions=ct(table=27,zone=NXM_NX_REG5[0..15]), prereqs=ip
>  ct_commit; => actions=ct(commit,zone=NXM_NX_REG5[0..15]), prereqs=ip
> diff --git a/tests/test-ovn.c b/tests/test-ovn.c
> index 18e5aca..1c468d6 100644
> --- a/tests/test-ovn.c
> +++ b/tests/test-ovn.c
> @@ -1257,6 +1257,13 @@ test_parse_actions(struct ovs_cmdl_context *ctx
> OVS_UNUSED)
>      create_symtab(&symtab);
>      create_dhcp_opts(&dhcp_opts);
>
> +    /* Initialize group ids. */
> +    struct group_table group_table;
> +    group_table.group_ids = bitmap_allocate(MAX_OVN_GROUPS);
> +    bitmap_set1(group_table.group_ids, 0); /* Group id 0 is invalid. */
> +    hmap_init(&group_table.desired_groups);
> +    hmap_init(&group_table.existing_groups);
> +
>      simap_init(&ports);
>      simap_put(&ports, "eth0", 5);
>      simap_put(&ports, "eth1", 6);
> @@ -1277,6 +1284,7 @@ test_parse_actions(struct ovs_cmdl_context *ctx
> OVS_UNUSED)
>              .lookup_port = lookup_port_cb,
>              .aux = &ports,
>              .ct_zones = &ct_zones,
> +            .group_table = &group_table,
>
>              .n_tables = 16,
>              .first_ptable = 16,
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to