This patch series updates the vtep schema, vtep-ctl commands and vtep
simulator to support source node replication in addition to service node
replication per logical switch and per physical switch.  The default
replication mode is service_node as that was the only mode previously
supported.

Signed-off-by: Darrell Ball <dlu...@gmail.com>
---
 tests/vtep-ctl.at       | 16 +++++++++++++
 vtep/README.ovs-vtep.md | 18 +++++++++++++--
 vtep/ovs-vtep           | 55 +++++++++++++++++++++++++++++++++++++++++----
 vtep/vtep-ctl.8.in      | 20 +++++++++++++++++
 vtep/vtep-ctl.c         | 60 +++++++++++++++++++++++++++++++++++++++++++++++++
 vtep/vtep.ovsschema     | 13 ++++++++++-
 vtep/vtep.xml           | 53 +++++++++++++++++++++++++++++++++++++++++--
 7 files changed, 226 insertions(+), 9 deletions(-)

diff --git a/tests/vtep-ctl.at b/tests/vtep-ctl.at
index 99e97e8..8abfa8a 100644
--- a/tests/vtep-ctl.at
+++ b/tests/vtep-ctl.at
@@ -264,6 +264,14 @@ CHECK_PORTS([b], [p1])
 VTEP_CTL_CLEANUP
 AT_CLEANUP
 
+AT_SETUP([add-ps a, set-ps-replication-mode a source_node])
+AT_KEYWORDS([vtep-ctl])
+VTEP_CTL_SETUP
+AT_CHECK([RUN_VTEP_CTL(
+  [add-ps a],[set-ps-replication-mode a source_node])],
+  [0], [], [], [VTEP_CTL_CLEANUP])
+VTEP_CTL_CLEANUP
+AT_CLEANUP
 
 dnl ----------------------------------------------------------------------
 AT_BANNER([vtep-ctl unit tests -- logical switch tests])
@@ -318,6 +326,14 @@ CHECK_LSWITCHES([a])
 VTEP_CTL_CLEANUP
 AT_CLEANUP
 
+AT_SETUP([add-ls a, set-ls-replication-mode a source_node])
+AT_KEYWORDS([vtep-ctl])
+VTEP_CTL_SETUP
+AT_CHECK([RUN_VTEP_CTL(
+  [add-ls a],[set-ls-replication-mode a source_node])],
+  [0], [], [], [VTEP_CTL_CLEANUP])
+VTEP_CTL_CLEANUP
+AT_CLEANUP
 
 dnl ----------------------------------------------------------------------
 AT_BANNER([vtep-ctl unit tests -- logical binding tests])
diff --git a/vtep/README.ovs-vtep.md b/vtep/README.ovs-vtep.md
index 6734dab..61c937a 100644
--- a/vtep/README.ovs-vtep.md
+++ b/vtep/README.ovs-vtep.md
@@ -166,13 +166,27 @@ vtep-ctl bind-ls br0 p0 0 ls0
 vtep-ctl set Logical_Switch ls0 tunnel_key=33
       ```
 
-3. Direct unknown destinations out a tunnel:
+3. Optionally, change the replication mode from a default of service_node to
+   source_node, which can either be done at the physical switch or logical
+   switch levels.  A physical switch mode level change affects all the
+   contained logical switches in the VTEP emulator.  When a logical switch is
+   first bound to a physical switch the logical switch will inherit the
+   replication mode of the physical switch.  The replication mode of the
+   logical switch can be subsequently changed:
+
+      ```
+vtep-ctl set-ps-replication-mode br0 source_node
+                    or
+vtep-ctl set-ls-replication-mode ls0 source_node
+      ```
+
+4. Direct unknown destinations out a tunnel:
 
       ```
 vtep-ctl add-mcast-remote ls0 unknown-dst 10.2.2.2
       ```
 
-4. Direct unicast destinations out a different tunnel:
+5. Direct unicast destinations out a different tunnel:
 
       ```
 vtep-ctl add-ucast-remote ls0 00:11:22:33:44:55 10.2.2.3
diff --git a/vtep/ovs-vtep b/vtep/ovs-vtep
index 31ff159..da3a65d 100755
--- a/vtep/ovs-vtep
+++ b/vtep/ovs-vtep
@@ -43,6 +43,8 @@ exiting = False
 
 ps_name = ""
 ps_type = ""
+ps_replication_mode = "service_node"
+ps_replication_mode_chg = False
 Tunnel_Ip = ""
 Lswitches = {}
 Bindings = {}
@@ -94,6 +96,7 @@ class Logical_Switch(object):
         self.unknown_dsts = set()
         self.tunnel_key = 0
         self.setup_ls()
+        self.replication_mode = "service_node"
 
     def __del__(self):
         vlog.info("destroying lswitch %s" % self.name)
@@ -141,13 +144,17 @@ class Logical_Switch(object):
             ovs_ofctl("add-flow %s table=1,priority=1,in_port=%s,action=%s"
                         % (self.short_name, port_no, ",".join(flood_ports)))
 
-        # Traffic coming from a VTEP physical port should only be flooded to
-        # one 'unknown-dst' and to all other physical ports that belong to that
-        # VTEP device and this logical switch.
+        # Traffic coming from a VTEP physical port should always be flooded to
+        # to all the other physical ports that belong to that VTEP device and
+        # this logical switch.  If the replication mode is service node then
+        # send to one unknown_dst HV (the first one here); else we assume the
+        # replication mode is source node and we send the packet to all
+        # unknown_dst HVs.
         for tunnel in self.unknown_dsts:
             port_no = self.tunnels[tunnel][0]
             flood_ports.append(port_no)
-            break
+            if self.replication_mode == "service_node":
+                break
 
         ovs_ofctl("add-flow %s table=1,priority=0,action=%s"
                   % (self.short_name, ",".join(flood_ports)))
@@ -293,8 +300,32 @@ class Logical_Switch(object):
 
         self.remote_macs = remote_macs
 
+        replication_mode = vtep_ctl("get logical_switch %s replication_mode"
+                                    % self.name)
+
+        # If the logical switch level replication mode has changed then
+        # update to that value. Otherwise if the physical switch parent
+        # replication node has changed, then update the logical switch
+        # mode to the physical switch mode value.
+        replic_mode_change = False
+        if replication_mode != self.replication_mode:
+            self.replication_mode = replication_mode
+            vlog.info("%s replication mode changed to %s" %
+                       (self.name, self.replication_mode))
+            replic_mode_change = True
+        elif ps_replication_mode_chg is True:
+            self.replication_mode = ps_replication_mode
+            vlog.info("%s replication mode changed to %s; inherit physical "
+                      "switch replication mode"
+                      % (self.name, self.replication_mode))
+            replic_mode_change = True
+
+        unk_dsts_mode_change = False
         if (self.unknown_dsts != unknown_dsts):
             self.unknown_dsts = unknown_dsts
+            unk_dsts_mode_change = True
+
+        if replic_mode_change is True or unk_dsts_mode_change is True:
             self.update_flood()
 
     def update_stats(self):
@@ -575,6 +606,22 @@ def handle_physical():
         vlog.info("deleting %s from %s" % (pp_name, ps_name))
         vtep_ctl("del-port %s %s" % (ps_name, pp_name))
 
+    # Get the physical switch replication mode from the database and update
+    # the VTEP simulator physical switch replication mode and set the
+    # physical switch replication mode changed flag so the logical switches
+    # can know to update to the new physical switch mode value.
+    replication_mode = vtep_ctl("get physical_switch %s replication_mode"
+                                   % ps_name)
+
+    global ps_replication_mode_chg
+    global ps_replication_mode
+    ps_replication_mode_chg = False
+    if replication_mode != ps_replication_mode:
+        ps_replication_mode = replication_mode
+        ps_replication_mode_chg = True
+        vlog.info("%s replication mode changed to %s" %
+                   (ps_name, ps_replication_mode))
+
     new_bindings = set()
     for pp_name in vtep_pp_set:
         binding_set = set(vtep_ctl("list-bindings %s %s"
diff --git a/vtep/vtep-ctl.8.in b/vtep/vtep-ctl.8.in
index 129c7ed..6eaf721 100644
--- a/vtep/vtep-ctl.8.in
+++ b/vtep/vtep-ctl.8.in
@@ -124,6 +124,16 @@ Without \fB\-\-if\-exists\fR, attempting to delete a 
switch that does
 not exist is an error.  With \fB\-\-if\-exists\fR, attempting to
 delete a switch that does not exist has no effect.
 .
+.IP "\fBset\-ps\-replication\-mode \fIpswitch replication\-mode\fR"
+Set physical switch \fIpswitch\fR replication mode to \fIreplication\-mode\fR;
+valid replication mode values are "service_node" and "source_node".
+The physical switch \fIpswitch\fR replication mode is inherited by all the
+contained logical switches unless the logical switch later overrides that
+value.  When a logical switch is first bound to a physical switch
+\fIpswitch\fR via bind\-ls, the logical switch inherits the physical switch
+\fIpswitch\fR replication mode.  This can later be overridden at the
+logical switch level.
+.
 .IP "\fBlist\-ps\fR"
 Lists all existing physical switches on standard output, one per line.
 .
@@ -195,6 +205,16 @@ combination on the physical switch \fIpswitch\fR.
 List the logical switch bindings for \fIport\fR on the physical switch
 \fIpswitch\fR.
 .
+.IP "\fBset\-ls\-replication\-mode \fIlswitch replication\-mode\fR"
+Set logical switch \fIlswitch\fR replication mode to \fIreplication\-mode\fR;
+valid values for replication mode are "service_node" and "source_node".
+The physical switch replication mode is inherited by all the contained
+logical switch \fIlswitch\fR unless the logical switch \fIlswitch\fR later
+overrides that value.  When a logical switch \fIlswitch\fR is first bound to
+a physical switch via bind\-ls, the logical switch \fIlswitch\fR inherits the
+physical switch replication mode.  This can later be overridden at the
+logical switch \fIlswitch\fR level.
+.
 .SS "Logical Router Commands"
 These commands examine and manipulate logical routers.
 .
diff --git a/vtep/vtep-ctl.c b/vtep/vtep-ctl.c
index 29d9a17..17d57f7 100644
--- a/vtep/vtep-ctl.c
+++ b/vtep/vtep-ctl.c
@@ -319,6 +319,7 @@ Manager commands:\n\
 Physical Switch commands:\n\
   add-ps PS                   create a new physical switch named PS\n\
   del-ps PS                   delete PS and all of its ports\n\
+  set-ps-replication-mode PS MODE  set replication mode on PS\n\
   list-ps                     print the names of all the physical switches\n\
   ps-exists PS                exit 2 if PS does not exist\n\
 \n\
@@ -335,6 +336,7 @@ Logical Switch commands:\n\
   bind-ls PS PORT VLAN LS     bind LS to VLAN on PORT\n\
   unbind-ls PS PORT VLAN      unbind logical switch on VLAN from PORT\n\
   list-bindings PS PORT       list bindings for PORT on PS\n\
+  set-ls-replication-mode LS MODE  set replication mode on LS\n\
 \n\
 Logical Router commands:\n\
   add-lr LR                   create a new logical router named LR\n\
@@ -846,11 +848,13 @@ pre_get_info(struct ctl_context *ctx)
     ovsdb_idl_add_column(ctx->idl, &vteprec_physical_switch_col_name);
     ovsdb_idl_add_column(ctx->idl, &vteprec_physical_switch_col_ports);
     ovsdb_idl_add_column(ctx->idl, &vteprec_physical_switch_col_tunnels);
+    ovsdb_idl_add_column(ctx->idl, 
&vteprec_physical_switch_col_replication_mode);
 
     ovsdb_idl_add_column(ctx->idl, &vteprec_physical_port_col_name);
     ovsdb_idl_add_column(ctx->idl, &vteprec_physical_port_col_vlan_bindings);
 
     ovsdb_idl_add_column(ctx->idl, &vteprec_logical_switch_col_name);
+    ovsdb_idl_add_column(ctx->idl, 
&vteprec_logical_switch_col_replication_mode);
 
     ovsdb_idl_add_column(ctx->idl, &vteprec_logical_router_col_name);
 
@@ -1206,6 +1210,7 @@ cmd_add_ps(struct ctl_context *ctx)
 
     ps = vteprec_physical_switch_insert(ctx->txn);
     vteprec_physical_switch_set_name(ps, ps_name);
+    vteprec_physical_switch_set_replication_mode(ps, "service_node");
 
     vtep_insert_pswitch(vtepctl_ctx->vtep_global, ps);
 
@@ -1246,6 +1251,25 @@ cmd_del_ps(struct ctl_context *ctx)
 }
 
 static void
+cmd_set_ps_replication_mode(struct ctl_context *ctx)
+{
+    struct vtep_ctl_context *vtepctl_ctx = vtep_ctl_context_cast(ctx);
+    struct vtep_ctl_pswitch *ps;
+
+    vtep_ctl_context_populate_cache(ctx);
+
+    if (strcmp(ctx->argv[2], "service_node") &&
+        strcmp(ctx->argv[2], "source_node")) {
+        ctl_fatal("Replication mode must be 'service node' or 'source_node'");
+    }
+
+    ps = find_pswitch(vtepctl_ctx, ctx->argv[1], true);
+    vteprec_physical_switch_set_replication_mode(ps->ps_cfg, ctx->argv[2]);
+
+    vtep_ctl_context_invalidate_cache(ctx);
+}
+
+static void
 output_sorted(struct svec *svec, struct ds *output)
 {
     const char *name;
@@ -1407,6 +1431,7 @@ cmd_add_ls(struct ctl_context *ctx)
 
     ls = vteprec_logical_switch_insert(ctx->txn);
     vteprec_logical_switch_set_name(ls, ls_name);
+    vteprec_logical_switch_set_replication_mode(ls, "service_node");
 
     vtep_ctl_context_invalidate_cache(ctx);
 }
@@ -1492,6 +1517,8 @@ cmd_bind_ls(struct ctl_context *ctx)
     struct vtep_ctl_lswitch *ls;
     struct vtep_ctl_port *port;
     const char *vlan;
+    struct vtep_ctl_pswitch *ps;
+    const char *ps_name = ctx->argv[1];
 
     vtep_ctl_context_populate_cache(ctx);
 
@@ -1502,6 +1529,15 @@ cmd_bind_ls(struct ctl_context *ctx)
     add_ls_binding_to_cache(port, vlan, ls);
     commit_ls_bindings(port);
 
+    ps = find_pswitch(vtepctl_ctx, ps_name, true);
+
+    if (!ps->ps_cfg->replication_mode) {
+        ctl_fatal("Replication mode not found for physical switch %s",
+                  ps_name);
+    }
+    vteprec_logical_switch_set_replication_mode(ls->ls_cfg,
+        ps->ps_cfg->replication_mode);
+
     vtep_ctl_context_invalidate_cache(ctx);
 }
 
@@ -1523,6 +1559,26 @@ cmd_unbind_ls(struct ctl_context *ctx)
     vtep_ctl_context_invalidate_cache(ctx);
 }
 
+static void
+cmd_set_ls_replication_mode(struct ctl_context *ctx)
+{
+    struct vtep_ctl_context *vtepctl_ctx = vtep_ctl_context_cast(ctx);
+    struct vtep_ctl_lswitch *ls;
+    const char *ls_name = ctx->argv[1];
+
+    vtep_ctl_context_populate_cache(ctx);
+
+    if (strcmp(ctx->argv[2], "service_node") &&
+        strcmp(ctx->argv[2], "source_node")) {
+        ctl_fatal("Replication mode must be 'service node' or 'source_node'");
+    }
+
+    ls = find_lswitch(vtepctl_ctx, ls_name, true);
+    vteprec_logical_switch_set_replication_mode(ls->ls_cfg, ctx->argv[2]);
+
+    vtep_ctl_context_invalidate_cache(ctx);
+}
+
 static struct vtep_ctl_lrouter *
 find_lrouter(struct vtep_ctl_context *vtepctl_ctx,
              const char *name, bool must_exist)
@@ -2443,6 +2499,8 @@ static const struct ctl_command_syntax vtep_commands[] = {
     {"del-ps", 1, 1, NULL, pre_get_info, cmd_del_ps, NULL, "--if-exists", RW},
     {"list-ps", 0, 0, NULL, pre_get_info, cmd_list_ps, NULL, "", RO},
     {"ps-exists", 1, 1, NULL, pre_get_info, cmd_ps_exists, NULL, "", RO},
+    {"set-ps-replication-mode", 2, 2, "PS MODE", pre_get_info,
+        cmd_set_ps_replication_mode, NULL, "", RW},
 
     /* Port commands. */
     {"list-ports", 1, 1, NULL, pre_get_info, cmd_list_ports, NULL, "", RO},
@@ -2459,6 +2517,8 @@ static const struct ctl_command_syntax vtep_commands[] = {
     {"list-bindings", 2, 2, NULL, pre_get_info, cmd_list_bindings, NULL, "", 
RO},
     {"bind-ls", 4, 4, NULL, pre_get_info, cmd_bind_ls, NULL, "", RO},
     {"unbind-ls", 3, 3, NULL, pre_get_info, cmd_unbind_ls, NULL, "", RO},
+    {"set-ls-replication-mode", 2, 2, "LS MODE", pre_get_info,
+        cmd_set_ls_replication_mode, NULL, "", RW},
 
     /* Logical Router commands. */
     {"add-lr", 1, 1, NULL, pre_get_info, cmd_add_lr, NULL, "--may-exist", RW},
diff --git a/vtep/vtep.ovsschema b/vtep/vtep.ovsschema
index 533fd2e..abb83b8 100644
--- a/vtep/vtep.ovsschema
+++ b/vtep/vtep.ovsschema
@@ -1,6 +1,7 @@
 {
   "name": "hardware_vtep",
-  "cksum": "770244945 11113",
+  "version": "1.1.0",
+  "cksum": "213455912 11473",
   "tables": {
     "Global": {
       "columns": {
@@ -28,6 +29,11 @@
         "tunnels": {
           "type": {"key": {"type": "uuid", "refTable": "Tunnel"},
                    "min": 0, "max": "unlimited"}},
+        "replication_mode": {
+          "type": {
+            "key": {
+              "enum": ["set", ["source_node","service_node"]],
+              "type": "string"}}},
         "other_config": {
           "type": {"key": "string", "value": "string",
                    "min": 0, "max": "unlimited"}},
@@ -96,6 +102,11 @@
         "name": {"type": "string"},
         "description": {"type": "string"},
         "tunnel_key": {"type": {"key": "integer", "min": 0, "max": 1}},
+        "replication_mode": {
+          "type": {
+            "key": {
+              "enum": ["set", ["source_node","service_node"]],
+              "type": "string"}}},
         "other_config": {
           "type": {"key": "string", "value": "string",
                    "min": 0, "max": "unlimited"}}},
diff --git a/vtep/vtep.xml b/vtep/vtep.xml
index a3a6988..a70c188 100644
--- a/vtep/vtep.xml
+++ b/vtep/vtep.xml
@@ -304,6 +304,31 @@
       </column>
     </group>
 
+    <group title="Per Physical-Switch Replication Mode">
+      <p>
+        For handling broadcast, multicast (in default manner) and unknown
+        unicast traffic, packets can be sent to all members of a logical
+        switch contained by a physical switch.  There are different modes
+        to replicate the packets.  One mode of replication is to send the
+        traffic to a hypervisor designated to handle such replication and
+        allow that hypervisor to replicate the traffic to other hypervisors.
+        This mode is called service node replication.  A second mode of
+        replication, called source node replication involves the source node
+        sending to all hypervisors. Hypervisors are always responsible for
+        doing their own replication for locally attached VMs in both modes.
+      </p>
+
+      <column name="replication_mode">
+        <p>
+          This column defines the replication mode per
+          <ref table="Physical_Switch"/>.  There are two valid values
+          presently, <code>service_node</code> and <code>source_node</code>.
+          A contained logical switch may reference the value set by the
+          parent physical switch.
+        </p>
+      </column>
+    </group>
+
     <group title="Identification">
       <column name="name">
         Symbolic name for the switch, such as its hostname.
@@ -754,6 +779,30 @@
       </column>
     </group>
 
+    <group title="Per Logical-Switch Replication Mode">
+      <p>
+        For handling broadcast, unknown unicast and multicast traffic
+        (in default manner), packets can be sent to all members of a logical
+        switch.  There are different modes to replicate the packets.  One
+        mode of replication is to send the traffic to a hypervisor designated
+        to handle such replication and allow that hypervisor to replicate the
+        traffic to other hypervisors.  This mode is called service node
+        replication.  A second mode of replication, called source node
+        replication involves the source node sending to all hypervisors.
+        Hypervisors are always responsible for doing their own replication
+        for locally attached VMs in both modes.
+      </p>
+
+      <column name="replication_mode">
+        <p>
+          This column defines the replication mode per
+          <ref table="Logical_Switch"/>.  There are two valid values presently,
+          <code>service_node</code> and <code>source_node</code>.
+        </p>
+
+      </column>
+    </group>
+
     <group title="Identification">
       <column name="name">
         Symbolic name for the logical switch.
@@ -887,8 +936,8 @@
       Multicast packet replication may be handled by a service node,
       in which case the physical locators will be IP addresses of
       service nodes. If the VTEP supports replication onto multiple
-      tunnels, then this may be used to replicate directly onto
-      VTEP-hypervisor tunnels.
+      tunnels, using source node replication, then this may be used to
+      replicate directly onto VTEP-hypervisor tunnels.
     </p>
 
     <column name="MAC">
-- 
1.9.1

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to