"Plato, Michael" <michael.pl...@tu-berlin.de> writes:

> Hi Paolo,
> I installed the patch for 2.17 on april 6th in our test environment and can 
> confirm that it works. We haven't had any crashes since then. Many thanks for 
> the quick solution!
>

Hi Micheal,

Nice! That's helpful. Thanks for testing it.

Paolo

> Best regards
>
> Michael
>
> -----Ursprüngliche Nachricht-----
> Von: Paolo Valerio <pvale...@redhat.com> 
> Gesendet: Montag, 17. April 2023 10:36
> An: Lazuardi Nasution <mrxlazuar...@gmail.com>
> Cc: ovs-discuss@openvswitch.org; Plato, Michael <michael.pl...@tu-berlin.de>
> Betreff: Re: Re: [ovs-discuss] ovs-vswitchd crashes serveral times a day
>
> Lazuardi Nasution <mrxlazuar...@gmail.com> writes:
>
>> Hi Paolo,
>>
>> I'm interested in your statement of "expired connections (but not yet 
>> reclaimed)". Do you think that shortening conntrack timeout policy will help?
>> Or, should we make it larger so there will be fewer conntrack table 
>> update and flush attempts?
>>
>
> it's hard to say as it depends on the specific use case.
> Probably making it larger for the specific case could help, but in general, I 
> would not rely on that.
> Of course, an actual fix is needed. It would be great if the patch sent could 
> tested, but in any case, I'll work on a formal patch.
>
>> Best regards.
>>
>> On Wed, Apr 5, 2023 at 2:51 AM Paolo Valerio <pvale...@redhat.com> wrote:
>>
>>     Hello,
>>
>>     thanks for reporting this.
>>     I had a look at it, and, although this needs to be confirmed, I suspect
>>     it's related to nat (CT_CONN_TYPE_UN_NAT) and expired connections (but
>>     not yet reclaimed).
>>
>>     The nat part does not necessarily perform any actual translation, but
>>     could still be triggered by ct(nat(src)...) which is the all-zero binding
>>     to avoid collisions, if any.
>>
>>     Is there any chance to test the following patch (targeted for ovs 2.17)?
>>     This should help to confirm.
>>
>>     -- >8 --
>>     diff --git a/lib/conntrack.c b/lib/conntrack.c
>>     index 08da4ddf7..ba334afb0 100644
>>     --- a/lib/conntrack.c
>>     +++ b/lib/conntrack.c
>>     @@ -94,9 +94,8 @@ static bool valid_new(struct dp_packet *pkt, struct
>>     conn_key *);
>>      static struct conn *new_conn(struct conntrack *ct, struct dp_packet 
>> *pkt,
>>                                   struct conn_key *, long long now,
>>                                   uint32_t tp_id);
>>     -static void delete_conn_cmn(struct conn *);
>>     +static void delete_conn__(struct conn *);
>>      static void delete_conn(struct conn *);
>>     -static void delete_conn_one(struct conn *conn);
>>      static enum ct_update_res conn_update(struct conntrack *ct, struct conn
>>     *conn,
>>                                            struct dp_packet *pkt,
>>                                            struct conn_lookup_ctx *ctx,
>>     @@ -444,14 +443,13 @@ zone_limit_delete(struct conntrack *ct, uint16_t
>>     zone)
>>      }
>>
>>      static void
>>     -conn_clean_cmn(struct conntrack *ct, struct conn *conn)
>>     +conn_clean_cmn(struct conntrack *ct, struct conn *conn, uint32_t hash)
>>          OVS_REQUIRES(ct->ct_lock)
>>      {
>>          if (conn->alg) {
>>              expectation_clean(ct, &conn->key);
>>          }
>>
>>     -    uint32_t hash = conn_key_hash(&conn->key, ct->hash_basis);
>>          cmap_remove(&ct->conns, &conn->cm_node, hash);
>>
>>          struct zone_limit *zl = zone_limit_lookup(ct, conn->admit_zone);
>>     @@ -467,11 +465,14 @@ conn_clean(struct conntrack *ct, struct conn *conn)
>>          OVS_REQUIRES(ct->ct_lock)
>>      {
>>          ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>>     +    uint32_t conn_hash = conn_key_hash(&conn->key, 
>> ct->hash_basis);
>>
>>     -    conn_clean_cmn(ct, conn);
>>     +    conn_clean_cmn(ct, conn, conn_hash);
>>          if (conn->nat_conn) {
>>              uint32_t hash = conn_key_hash(&conn->nat_conn->key, ct->
>>     hash_basis);
>>     -        cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
>>     +        if (conn_hash != hash) {
>>     +            cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash);
>>     +        }
>>          }
>>          ovs_list_remove(&conn->exp_node);
>>          conn->cleaned = true;
>>     @@ -479,19 +480,6 @@ conn_clean(struct conntrack *ct, struct conn *conn)
>>          atomic_count_dec(&ct->n_conn);
>>      }
>>
>>     -static void
>>     -conn_clean_one(struct conntrack *ct, struct conn *conn)
>>     -    OVS_REQUIRES(ct->ct_lock)
>>     -{
>>     -    conn_clean_cmn(ct, conn);
>>     -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
>>     -        ovs_list_remove(&conn->exp_node);
>>     -        conn->cleaned = true;
>>     -        atomic_count_dec(&ct->n_conn);
>>     -    }
>>     -    ovsrcu_postpone(delete_conn_one, conn);
>>     -}
>>     -
>>      /* Destroys the connection tracker 'ct' and frees all the allocated
>>     memory.
>>       * The caller of this function must already have shut down packet input
>>       * and PMD threads (which would have been quiesced).  */
>>     @@ -505,7 +493,10 @@ conntrack_destroy(struct conntrack *ct)
>>
>>          ovs_mutex_lock(&ct->ct_lock);
>>          CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
>>     -        conn_clean_one(ct, conn);
>>     +        if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
>>     +            continue;
>>     +        }
>>     +        conn_clean(ct, conn);
>>          }
>>          cmap_destroy(&ct->conns);
>>
>>     @@ -1052,7 +1043,10 @@ conn_not_found(struct conntrack *ct, struct
>>     dp_packet *pkt,
>>                  nat_conn->alg = NULL;
>>                  nat_conn->nat_conn = NULL;
>>                  uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->
>>     hash_basis);
>>     -            cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
>>     +
>>     +            if (nat_hash != ctx->hash) {
>>     +                cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash);
>>     +            }
>>              }
>>
>>              nc->nat_conn = nat_conn;
>>     @@ -1080,7 +1074,7 @@ conn_not_found(struct conntrack *ct, struct 
>> dp_packet
>>     *pkt,
>>      nat_res_exhaustion:
>>          free(nat_conn);
>>          ovs_list_remove(&nc->exp_node);
>>     -    delete_conn_cmn(nc);
>>     +    delete_conn__(nc);
>>          static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
>>          VLOG_WARN_RL(&rl, "Unable to NAT due to tuple space exhaustion - "
>>                       "if DoS attack, use firewalling and/or zone
>>     partitioning.");
>>     @@ -2549,7 +2543,7 @@ new_conn(struct conntrack *ct, struct dp_packet 
>> *pkt,
>>     struct conn_key *key,
>>      }
>>
>>      static void
>>     -delete_conn_cmn(struct conn *conn)
>>     +delete_conn__(struct conn *conn)
>>      {
>>          free(conn->alg);
>>          free(conn);
>>     @@ -2561,17 +2555,7 @@ delete_conn(struct conn *conn)
>>          ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT);
>>          ovs_mutex_destroy(&conn->lock);
>>          free(conn->nat_conn);
>>     -    delete_conn_cmn(conn);
>>     -}
>>     -
>>     -/* Only used by conn_clean_one(). */
>>     -static void
>>     -delete_conn_one(struct conn *conn)
>>     -{
>>     -    if (conn->conn_type == CT_CONN_TYPE_DEFAULT) {
>>     -        ovs_mutex_destroy(&conn->lock);
>>     -    }
>>     -    delete_conn_cmn(conn);
>>     +    delete_conn__(conn);
>>      }
>>
>>      /* Convert a conntrack address 'a' into an IP address 'b' based on
>>     'dl_type'.
>>     @@ -2747,8 +2731,12 @@ conntrack_flush(struct conntrack *ct, const 
>> uint16_t
>>     *zone)
>>
>>          ovs_mutex_lock(&ct->ct_lock);
>>          CMAP_FOR_EACH (conn, cm_node, &ct->conns) {
>>     +        if (conn->conn_type == CT_CONN_TYPE_UN_NAT) {
>>     +            continue;
>>     +        }
>>     +
>>              if (!zone || *zone == conn->key.zone) {
>>     -            conn_clean_one(ct, conn);
>>     +            conn_clean(ct, conn);
>>              }
>>          }
>>          ovs_mutex_unlock(&ct->ct_lock);

_______________________________________________
discuss mailing list
disc...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-discuss

Reply via email to