On Tue, Jul 28, 2015 at 8:44 AM, Ben Pfaff <b...@nicira.com> wrote:

> Until now, ovn-controller has been full of loops that commit a transaction
> to the OVS or OVN Southbound database.  These blocking loops delay other
> work within ovn-controller.  They also make it unsafe to keep pointers to
> database records within a single ovn-controller main loop, since calls
> to ovsdb_idl_run() can cause IDL records to be destroyed.  This commit
> drops all of the blocking calls, instead doing a single commit to the
> databases at the end of each main loop.
>
> Signed-off-by: Ben Pfaff <b...@nicira.com>
> Acked-by: Russell Bryant <rbry...@redhat.com>
> ---
> v1->v2: Rename idl_loop_wait() to idl_loop_commit_and_wait().
> ---
>  ovn/controller/binding.c        |  61 ++++++++------------
>  ovn/controller/binding.h        |   4 +-
>  ovn/controller/chassis.c        | 117
> ++++++++++++++------------------------
>  ovn/controller/chassis.h        |   4 +-
>  ovn/controller/ovn-controller.c | 123
> +++++++++++++++++++++++++++++++++++++---
>  ovn/controller/ovn-controller.h |   4 ++
>  6 files changed, 188 insertions(+), 125 deletions(-)
>
> diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
> index 0a4a39e..6af216c 100644
> --- a/ovn/controller/binding.c
> +++ b/ovn/controller/binding.c
> @@ -76,10 +76,12 @@ binding_run(struct controller_ctx *ctx)
>  {
>      const struct sbrec_chassis *chassis_rec;
>      const struct sbrec_binding *binding_rec;
> -    struct ovsdb_idl_txn *txn;
>      struct sset lports, all_lports;
>      const char *name;
> -    int retval;
> +
> +    if (!ctx->ovnsb_idl_txn) {
> +        return;
> +    }
>
>      chassis_rec = get_chassis_by_name(ctx->ovnsb_idl, ctx->chassis_id);
>      if (!chassis_rec) {
> @@ -91,8 +93,7 @@ binding_run(struct controller_ctx *ctx)
>      get_local_iface_ids(ctx, &lports);
>      sset_clone(&all_lports, &lports);
>
> -    txn = ovsdb_idl_txn_create(ctx->ovnsb_idl);
> -    ovsdb_idl_txn_add_comment(txn,
> +    ovsdb_idl_txn_add_comment(ctx->ovnsb_idl_txn,
>                                "ovn-controller: updating bindings for
> '%s'",
>                                ctx->chassis_id);
>
> @@ -115,14 +116,6 @@ binding_run(struct controller_ctx *ctx)
>          }
>      }
>
> -    retval = ovsdb_idl_txn_commit_block(txn);
> -    if (retval == TXN_ERROR) {
> -        VLOG_INFO("Problem committing binding information: %s",
> -                  ovsdb_idl_txn_status_to_string(retval));
> -    }
> -
> -    ovsdb_idl_txn_destroy(txn);
> -
>      SSET_FOR_EACH (name, &lports) {
>          VLOG_DBG("No binding record for lport %s", name);
>      }
> @@ -130,40 +123,32 @@ binding_run(struct controller_ctx *ctx)
>      sset_destroy(&all_lports);
>  }
>
> -void
> -binding_destroy(struct controller_ctx *ctx)
> +/* Returns true if the database is all cleaned up, false if more work is
> + * required. */
> +bool
> +binding_cleanup(struct controller_ctx *ctx)
>  {
> -    const struct sbrec_chassis *chassis_rec;
> -    int retval = TXN_TRY_AGAIN;
> -
> -    ovs_assert(ctx->ovnsb_idl);
> +    if (!ctx->ovnsb_idl_txn) {
> +        return false;
> +    }
>
> -    chassis_rec = get_chassis_by_name(ctx->ovnsb_idl, ctx->chassis_id);
> +    const struct sbrec_chassis *chassis_rec
> +        = get_chassis_by_name(ctx->ovnsb_idl, ctx->chassis_id);
>      if (!chassis_rec) {
> -        return;
> +        return true;
>      }
>
> -    while (retval != TXN_SUCCESS && retval != TXN_UNCHANGED) {
> -        const struct sbrec_binding *binding_rec;
> -        struct ovsdb_idl_txn *txn;
> -
> -        txn = ovsdb_idl_txn_create(ctx->ovnsb_idl);
> -        ovsdb_idl_txn_add_comment(txn,
> +    ovsdb_idl_txn_add_comment(ctx->ovnsb_idl_txn,
>                                "ovn-controller: removing all bindings for
> '%s'",
>                                ctx->chassis_id);
>
> -        SBREC_BINDING_FOR_EACH(binding_rec, ctx->ovnsb_idl) {
> -            if (binding_rec->chassis == chassis_rec) {
> -                sbrec_binding_set_chassis(binding_rec, NULL);
> -            }
> -        }
> -
> -        retval = ovsdb_idl_txn_commit_block(txn);
> -        if (retval == TXN_ERROR) {
> -            VLOG_INFO("Problem removing bindings: %s",
> -                      ovsdb_idl_txn_status_to_string(retval));
> +    const struct sbrec_binding *binding_rec;
> +    bool any_changes = false;
> +    SBREC_BINDING_FOR_EACH(binding_rec, ctx->ovnsb_idl) {
> +        if (binding_rec->chassis == chassis_rec) {
> +            sbrec_binding_set_chassis(binding_rec, NULL);
> +            any_changes = true;
>          }
> -
> -        ovsdb_idl_txn_destroy(txn);
>      }
> +    return !any_changes;
>  }
> diff --git a/ovn/controller/binding.h b/ovn/controller/binding.h
> index 2611173..e73c1d1 100644
> --- a/ovn/controller/binding.h
> +++ b/ovn/controller/binding.h
> @@ -17,10 +17,12 @@
>  #ifndef OVN_BINDING_H
>  #define OVN_BINDING_H 1
>
> +#include <stdbool.h>
> +
>  struct controller_ctx;
>
>  void binding_init(struct controller_ctx *);
>  void binding_run(struct controller_ctx *);
> -void binding_destroy(struct controller_ctx *);
> +bool binding_cleanup(struct controller_ctx *);
>
>  #endif /* ovn/binding.h */
> diff --git a/ovn/controller/chassis.c b/ovn/controller/chassis.c
> index cf18dd0..9d2959a 100644
> --- a/ovn/controller/chassis.c
> +++ b/ovn/controller/chassis.c
> @@ -52,8 +52,6 @@ register_chassis(struct controller_ctx *ctx)
>      const char *encap_type, *encap_ip;
>      struct sbrec_encap *encap_rec;
>      static bool inited = false;
> -    int retval = TXN_TRY_AGAIN;
> -    struct ovsdb_idl_txn *txn;
>
>      chassis_rec = get_chassis_by_name(ctx->ovnsb_idl, ctx->chassis_id);
>
> @@ -92,31 +90,22 @@ register_chassis(struct controller_ctx *ctx)
>          }
>      }
>
> -    txn = ovsdb_idl_txn_create(ctx->ovnsb_idl);
> -    ovsdb_idl_txn_add_comment(txn,
> +    ovsdb_idl_txn_add_comment(ctx->ovnsb_idl_txn,
>                                "ovn-controller: registering chassis '%s'",
>                                ctx->chassis_id);
>
>      if (!chassis_rec) {
> -        chassis_rec = sbrec_chassis_insert(txn);
> +        chassis_rec = sbrec_chassis_insert(ctx->ovnsb_idl_txn);
>          sbrec_chassis_set_name(chassis_rec, ctx->chassis_id);
>      }
>
> -    encap_rec = sbrec_encap_insert(txn);
> +    encap_rec = sbrec_encap_insert(ctx->ovnsb_idl_txn);
>
>      sbrec_encap_set_type(encap_rec, encap_type);
>      sbrec_encap_set_ip(encap_rec, encap_ip);
>
>      sbrec_chassis_set_encaps(chassis_rec, &encap_rec, 1);
>
> -    retval = ovsdb_idl_txn_commit_block(txn);
> -    if (retval != TXN_SUCCESS && retval != TXN_UNCHANGED) {
> -        VLOG_INFO("Problem registering chassis: %s",
> -                  ovsdb_idl_txn_status_to_string(retval));
> -        poll_immediate_wake();
> -    }
> -    ovsdb_idl_txn_destroy(txn);
> -
>      inited = true;
>  }
>
> @@ -311,7 +300,6 @@ update_encaps(struct controller_ctx *ctx)
>  {
>      const struct sbrec_chassis *chassis_rec;
>      const struct ovsrec_bridge *br;
> -    int retval;
>
>      struct tunnel_ctx tc = {
>          .tunnel_hmap = HMAP_INITIALIZER(&tc.tunnel_hmap),
> @@ -319,7 +307,7 @@ update_encaps(struct controller_ctx *ctx)
>          .br_int = ctx->br_int
>      };
>
> -    tc.ovs_txn = ovsdb_idl_txn_create(ctx->ovs_idl);
> +    tc.ovs_txn = ctx->ovs_idl_txn;
>      ovsdb_idl_txn_add_comment(tc.ovs_txn,
>                                "ovn-controller: modifying OVS tunnels
> '%s'",
>                                ctx->chassis_id);
> @@ -366,81 +354,58 @@ update_encaps(struct controller_ctx *ctx)
>      }
>      hmap_destroy(&tc.tunnel_hmap);
>      sset_destroy(&tc.port_names);
> -
> -    retval = ovsdb_idl_txn_commit_block(tc.ovs_txn);
> -    if (retval != TXN_SUCCESS && retval != TXN_UNCHANGED) {
> -        VLOG_INFO("Problem modifying OVS tunnels: %s",
> -                  ovsdb_idl_txn_status_to_string(retval));
> -        poll_immediate_wake();
> -    }
> -    ovsdb_idl_txn_destroy(tc.ovs_txn);
>  }
>
>  void
>  chassis_run(struct controller_ctx *ctx)
>  {
> -    register_chassis(ctx);
> -    update_encaps(ctx);
> +    if (ctx->ovnsb_idl_txn) {
> +        register_chassis(ctx);
> +    }
> +
> +    if (ctx->ovs_idl_txn) {
> +        update_encaps(ctx);
> +    }
>  }
>
> -void
> -chassis_destroy(struct controller_ctx *ctx)
> +/* Returns true if the database is all cleaned up, false if more work is
> + * required. */
> +bool
> +chassis_cleanup(struct controller_ctx *ctx)
>  {
> -    int retval = TXN_TRY_AGAIN;
> -
> -    ovs_assert(ctx->ovnsb_idl);
> -
> -    while (retval != TXN_SUCCESS && retval != TXN_UNCHANGED) {
> -        const struct sbrec_chassis *chassis_rec;
> -        struct ovsdb_idl_txn *txn;
> +    if (!ctx->ovnsb_idl_txn || !ctx->ovs_idl_txn) {
> +        return false;
> +    }
>
> -        chassis_rec = get_chassis_by_name(ctx->ovnsb_idl,
> ctx->chassis_id);
> -        if (!chassis_rec) {
> -            break;
> -        }
> +    bool any_changes = false;
>
> -        txn = ovsdb_idl_txn_create(ctx->ovnsb_idl);
> -        ovsdb_idl_txn_add_comment(txn,
> +    /* Delete Chassis row. */
> +    const struct sbrec_chassis *chassis_rec
> +        = get_chassis_by_name(ctx->ovnsb_idl, ctx->chassis_id);
> +    if (chassis_rec) {
> +        ovsdb_idl_txn_add_comment(ctx->ovnsb_idl_txn,
>                                    "ovn-controller: unregistering chassis
> '%s'",
>                                    ctx->chassis_id);
>          sbrec_chassis_delete(chassis_rec);
> -
> -        retval = ovsdb_idl_txn_commit_block(txn);
> -        if (retval == TXN_ERROR) {
> -            VLOG_INFO("Problem unregistering chassis: %s",
> -                      ovsdb_idl_txn_status_to_string(retval));
> -        }
> -        ovsdb_idl_txn_destroy(txn);
> +        any_changes = true;
>      }
>
> -    retval = TXN_TRY_AGAIN;
> -    while (retval != TXN_SUCCESS && retval != TXN_UNCHANGED) {
> -        struct ovsrec_port **ports;
> -        struct ovsdb_idl_txn *txn;
> -        size_t i, n;
> -
> -        txn = ovsdb_idl_txn_create(ctx->ovs_idl);
> -        ovsdb_idl_txn_add_comment(txn,
> -                                  "ovn-controller: destroying tunnels");
> -
> -        /* Delete all the OVS-created tunnels from the integration
> -         * bridge. */
> -        ports = xmalloc(sizeof *ctx->br_int->ports *
> ctx->br_int->n_ports);
> -        for (i = n = 0; i < ctx->br_int->n_ports; i++) {
> -            if (!smap_get(&ctx->br_int->ports[i]->external_ids,
> -                          "ovn-chassis-id")) {
> -                ports[n++] = ctx->br_int->ports[i];
> -            }
> -        }
> -        ovsrec_bridge_verify_ports(ctx->br_int);
> -        ovsrec_bridge_set_ports(ctx->br_int, ports, n);
> -        free(ports);
> -
> -        retval = ovsdb_idl_txn_commit_block(txn);
> -        if (retval == TXN_ERROR) {
> -            VLOG_INFO("Problem destroying tunnels: %s",
> -                      ovsdb_idl_txn_status_to_string(retval));
> +    /* Delete all the OVS-created tunnels from the integration bridge. */
> +    ovsdb_idl_txn_add_comment(ctx->ovs_idl_txn,
> +                              "ovn-controller: destroying tunnels");
> +    struct ovsrec_port **ports
> +        = xmalloc(sizeof *ctx->br_int->ports * ctx->br_int->n_ports);
> +    size_t n = 0;
> +    for (size_t i = 0; i < ctx->br_int->n_ports; i++) {
> +        if (!smap_get(&ctx->br_int->ports[i]->external_ids,
> +                      "ovn-chassis-id")) {
> +            ports[n++] = ctx->br_int->ports[i];
> +            any_changes = true;
>          }
> -        ovsdb_idl_txn_destroy(txn);
>      }
> +    ovsrec_bridge_verify_ports(ctx->br_int);
> +    ovsrec_bridge_set_ports(ctx->br_int, ports, n);
> +    free(ports);
> +
> +    return !any_changes;
>  }
> diff --git a/ovn/controller/chassis.h b/ovn/controller/chassis.h
> index aee701b..851a300 100644
> --- a/ovn/controller/chassis.h
> +++ b/ovn/controller/chassis.h
> @@ -16,10 +16,12 @@
>  #ifndef OVN_CHASSIS_H
>  #define OVN_CHASSIS_H 1
>
> +#include <stdbool.h>
> +
>  struct controller_ctx;
>
>  void chassis_init(struct controller_ctx *);
>  void chassis_run(struct controller_ctx *);
> -void chassis_destroy(struct controller_ctx *);
> +bool chassis_cleanup(struct controller_ctx *);
>
>  #endif /* ovn/chassis.h */
> diff --git a/ovn/controller/ovn-controller.c
> b/ovn/controller/ovn-controller.c
> index 6f94187..962d6de 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -145,6 +145,78 @@ try_again:
>
>  }
>
> +struct idl_loop {
> +    struct ovsdb_idl *idl;
> +    unsigned int skip_seqno;
> +
> +    struct ovsdb_idl_txn *committing_txn;
> +    unsigned int precommit_seqno;
> +
> +    struct ovsdb_idl_txn *open_txn;
> +};
> +
> +#define IDL_LOOP_INITIALIZER(IDL) { .idl = (IDL) }
> +
> +static void
> +idl_loop_destroy(struct idl_loop *loop)
> +{
> +    if (loop) {
> +        ovsdb_idl_destroy(loop->idl);
> +    }
> +}
> +
> +static struct ovsdb_idl_txn *
> +idl_loop_run(struct idl_loop *loop)
> +{
> +    ovsdb_idl_run(loop->idl);
> +    loop->open_txn = (loop->committing_txn
> +                      || ovsdb_idl_get_seqno(loop->idl) ==
> loop->skip_seqno
> +                      ? NULL
> +                      : ovsdb_idl_txn_create(loop->idl));
> +    return loop->open_txn;
> +}
> +
> +static void
> +idl_loop_commit_and_wait(struct idl_loop *loop)
> +{
> +    if (loop->open_txn) {
> +        loop->committing_txn = loop->open_txn;
> +        loop->open_txn = NULL;
> +
> +        loop->precommit_seqno = ovsdb_idl_get_seqno(loop->idl);
> +    }
> +
> +    struct ovsdb_idl_txn *txn = loop->committing_txn;
> +    if (txn) {
> +        enum ovsdb_idl_txn_status status = ovsdb_idl_txn_commit(txn);
> +        switch (status) {
> +        case TXN_INCOMPLETE:
> +            break;
> +
> +        case TXN_TRY_AGAIN:
> +            loop->skip_seqno = loop->precommit_seqno;
> +            if (ovsdb_idl_get_seqno(loop->idl) != loop->skip_seqno) {
> +                poll_immediate_wake();
> +            }
> +            /* Fall through. */
> +        case TXN_UNCHANGED:
> +        case TXN_ABORTED:
> +        case TXN_SUCCESS:
> +        case TXN_NOT_LOCKED:
> +        case TXN_ERROR:
> +            ovsdb_idl_txn_destroy(txn);
> +            loop->committing_txn = NULL;
> +            break;
> +
> +        case TXN_UNCOMMITTED:
> +            OVS_NOT_REACHED();
> +
> +        }
> +    }
> +
> +    ovsdb_idl_wait(loop->idl);
> +}
>


Curious here, if we could run into the following race (in main function):

1. first run of while loop: we have some change from ovnsb~
2. ovn-controller execute all *_run() functions to update~
3. the txn commit to ovsdb is INCOMPLETE~ and at the same time, there is
another
    change from ovnsb~
4. second run of while loop: the next call to
idl_loop_run()->ovsdb_idl_run() will update the
    local idl.  however, since the 'idl_loop' has 'committing_txn', the
*_run() will be no-op.
5. assume this time, the commit to ovsdb finishes.  since the local idl has
already been
    updated, ovn-controller will call poll_block() with no immediate
wakeup...
6. so, the second change to ovnsb will only be checked the next time there
is a new update
    from ovnsb.

Hope this make sense,

Thanks,
Alex Wang,



> +
>  int
>  main(int argc, char *argv[])
>  {
> @@ -195,10 +267,14 @@ main(int argc, char *argv[])
>                                       true, true);
>      get_initial_snapshot(ctx.ovnsb_idl);
>
> +    struct idl_loop ovnsb_idl_loop = IDL_LOOP_INITIALIZER(ctx.ovnsb_idl);
> +    struct idl_loop ovs_idl_loop = IDL_LOOP_INITIALIZER(ctx.ovs_idl);
> +
> +    /* Main loop. */
>      exiting = false;
>      while (!exiting) {
> -        ovsdb_idl_run(ctx.ovs_idl);
> -        ovsdb_idl_run(ctx.ovnsb_idl);
> +        ctx.ovnsb_idl_txn = idl_loop_run(&ovnsb_idl_loop);
> +        ctx.ovs_idl_txn = idl_loop_run(&ovs_idl_loop);
>
>          /* xxx If run into any surprising changes, we exit.  We should
>           * xxx handle this more gracefully. */
> @@ -207,7 +283,7 @@ main(int argc, char *argv[])
>              VLOG_ERR("Integration bridge '%s' disappeared",
>                       ctx.br_int_name);
>              retval = EXIT_FAILURE;
> -            break;
> +            goto exit;
>          }
>
>          ofctrl_clear_flows();
> @@ -224,20 +300,49 @@ main(int argc, char *argv[])
>              poll_immediate_wake();
>          }
>
> -        ovsdb_idl_wait(ctx.ovs_idl);
> -        ovsdb_idl_wait(ctx.ovnsb_idl);
> +        idl_loop_commit_and_wait(&ovnsb_idl_loop);
> +        idl_loop_commit_and_wait(&ovs_idl_loop);
> +
>          ofctrl_wait();
>          poll_block();
>      }
>
> +    /* It's time to exit.  Clean up the databases. */
> +    bool done = false;
> +    while (!done) {
> +        ctx.ovnsb_idl_txn = idl_loop_run(&ovnsb_idl_loop);
> +        ctx.ovs_idl_txn = idl_loop_run(&ovs_idl_loop);
> +
> +        /* xxx If run into any surprising changes, we exit.  We should
> +         * xxx handle this more gracefully. */
> +        ctx.br_int = get_bridge(&ctx, ctx.br_int_name);
> +        if (!ctx.br_int) {
> +            VLOG_ERR("Integration bridge '%s' disappeared",
> +                     ctx.br_int_name);
> +            retval = EXIT_FAILURE;
> +            goto exit;
> +        }
> +
> +        /* Run both the binding and chassis cleanup, even if one of them
> +         * returns false.  We're done if both return true. */
> +        done = binding_cleanup(&ctx);
> +        done = chassis_cleanup(&ctx) && done;
> +        if (done) {
> +            poll_immediate_wake();
> +        }
> +
> +        idl_loop_commit_and_wait(&ovnsb_idl_loop);
> +        idl_loop_commit_and_wait(&ovs_idl_loop);
> +        poll_block();
> +    }
> +
> +exit:
>      unixctl_server_destroy(unixctl);
>      pipeline_destroy(&ctx);
>      ofctrl_destroy();
> -    binding_destroy(&ctx);
> -    chassis_destroy(&ctx);
>
> -    ovsdb_idl_destroy(ctx.ovs_idl);
> -    ovsdb_idl_destroy(ctx.ovnsb_idl);
> +    idl_loop_destroy(&ovs_idl_loop);
> +    idl_loop_destroy(&ovnsb_idl_loop);
>
>      free(ctx.br_int_name);
>      free(ctx.chassis_id);
> diff --git a/ovn/controller/ovn-controller.h
> b/ovn/controller/ovn-controller.h
> index 6f98658..b72d891 100644
> --- a/ovn/controller/ovn-controller.h
> +++ b/ovn/controller/ovn-controller.h
> @@ -22,8 +22,12 @@
>  struct controller_ctx {
>      char *chassis_id;               /* ID for this chassis. */
>      char *br_int_name;              /* Name of local integration bridge.
> */
> +
>      struct ovsdb_idl *ovnsb_idl;
> +    struct ovsdb_idl_txn *ovnsb_idl_txn;
> +
>      struct ovsdb_idl *ovs_idl;
> +    struct ovsdb_idl_txn *ovs_idl_txn;
>
>      const struct ovsrec_bridge *br_int;
>  };
> --
> 2.1.3
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to