Numan, Hopefully I don't embarrass myself too much here, but here goes :) . If we implemented a separate thread for performing DNS lookups would that work ?
diff --git a/controller/pinctrl.c b/controller/pinctrl.c index c86b4f940..aaa316542 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -189,6 +189,27 @@ static struct pinctrl pinctrl; static void init_buffered_packets_ctx(void); static void destroy_buffered_packets_ctx(void); + +/* DNS thread */ +static struct latch dns_thread_exit; +static pthread_t dns_thread; + +/* DNS query queue */ +struct dns_query { + struct dp_packet packet; + struct ofputil_packet_in pin; + struct ofpbuf userdata; + struct ofpbuf *continuation; + struct ovs_list node; +}; + +static struct ovs_list dns_query_queue = OVS_LIST_INITIALIZER(&dns_query_queue); +static struct ovs_mutex dns_queue_mutex = OVS_MUTEX_INITIALIZER; +static struct seq *dns_seq; + +static void dns_queue_push(struct dns_query *query); +static struct dns_query *dns_queue_pop(void); +static void *dns_lookup_thread(void *arg OVS_UNUSED); static void run_buffered_binding(const struct sbrec_mac_binding_table *mac_binding_table, struct ovsdb_idl_index *sbrec_port_binding_by_key, @@ -572,6 +593,10 @@ pinctrl_init(void) latch_init(&pinctrl.pinctrl_thread_exit); pinctrl.pinctrl_thread = ovs_thread_create("ovn_pinctrl", pinctrl_handler, &pinctrl); + latch_init(&dns_thread_exit); + dns_seq = seq_create(); + pinctrl.dns_thread = ovs_thread_create("dns_lookup", dns_lookup_thread, NULL); + } static ovs_be32 @@ -3435,6 +3460,51 @@ dns_build_ptr_answer( #define DNS_RCODE_SERVER_REFUSE 0x5 #define DNS_QUERY_TYPE_CLASS_LEN (2 * sizeof(ovs_be16)) +static void * +dns_lookup_thread(void *arg OVS_UNUSED) +{ + while (!latch_is_set(&dns_thread_exit)) { + struct dns_query *query = dns_queue_pop(); + if (query) { + bool processed = pinctrl_handle_dns_lookup(swconn, &query->packet, + &query->pin, &query->userdata, + query->continuation); + if (!processed) { + queue_msg(swconn, ofputil_encode_resume(&query->pin, + query->continuation, + ofputil_protocol_from_ofp_version( + rconn_get_version(swconn)))); + } + ofpbuf_delete(query->continuation); + free(query); + } + poll_block(); + } + return NULL; +} + +static void +dns_queue_push(struct dns_query *query) +{ + ovs_mutex_lock(&dns_queue_mutex); + ovs_list_push_back(&dns_query_queue, &query->node); + seq_change(dns_seq); + ovs_mutex_unlock(&dns_queue_mutex); +} + +static struct dns_query * +dns_queue_pop(void) +{ + struct dns_query *query = NULL; + ovs_mutex_lock(&dns_queue_mutex); + if (!ovs_list_is_empty(&dns_query_queue)) { + query = CONTAINER_OF(ovs_list_pop_front(&dns_query_queue), struct dns_query, node); + } + ovs_mutex_unlock(&dns_queue_mutex); + return query; +} + + /* Called with in the pinctrl_handler thread context. */ static void pinctrl_handle_dns_lookup( @@ -3804,11 +3874,16 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) break; case ACTION_OPCODE_DNS_LOOKUP: - ovs_mutex_lock(&pinctrl_mutex); - pinctrl_handle_dns_lookup(swconn, &packet, &pin, &userdata, - &continuation); - ovs_mutex_unlock(&pinctrl_mutex); - break; + { + struct dns_query *query = xmalloc(sizeof *query); + dp_packet_clone(&query->packet, &packet); + query->pin = pin; + ofpbuf_clone(&query->userdata, &userdata); + query->continuation = continuation ? ofpbuf_clone(continuation) : NULL; + dns_queue_push(query); + poll_immediate_wake(); + } + break; case ACTION_OPCODE_LOG: handle_acl_log(&headers, &userdata, On Fri, 4 Oct 2024 at 15:59, Gavin McKee <gavmcke...@googlemail.com> wrote: > > Hi Numan, > > Yes , the DNS packets are sent by VMs . We have DNS records for these VMs so > the controller is catching all DNS requests and failing to respond in a > timely manner. > > I think this relates to the fact that there is no incremental handler for SB > DNS. > > In short the blocking thread causes us all sorts of problems . > > Hope that makes sense > > Gav > > On Fri, Oct 4, 2024 at 15:34 Numan Siddique <num...@ovn.org> wrote: >> >> On Fri, Oct 4, 2024 at 5:50 PM Gavin McKee via discuss >> <ovs-discuss@openvswitch.org> wrote: >> > >> > Hi, >> > >> > We currently experience DNS related timeouts when the ovn controller >> > is under high load. >> > >> > Will this commit ensure that DNS packets needing to be forwarded to an >> > external resolver will be processed in a timely manner and not lead to >> > timeouts? >> > https://github.com/ovn-org/ovn/commit/762ae66cd70efa149d91d35305fcef0040e9addd >> > >> >> This particular patch is useful when the ovn-controller calls >> dns_resolve() of ovs/lib/dns_resolve.c [1] >> to resolve a dns entry. >> >> In your case, what are these DNS packets? Are these sent by VMs and >> ovn-controller pinctrl thread handles >> these requests and its blocking due to pinctrl_mutex ? >> >> If so, I think we need to find a solution for this problem, but the >> above commit will definitely not help you. >> Maybe we can maintain a separate mutex to protect the dns_cache [2] so >> that even if ovn-controller main thread >> is busy (and not updating the internal dns cache), the DNS packets >> from the VMs can be handled without any delay >> in the pinctrl thread. >> >> [1] - https://github.com/openvswitch/ovs/blob/main/lib/dns-resolve.c#L148 >> [2] - https://github.com/ovn-org/ovn/blob/main/controller/pinctrl.c#L3288 >> >> >> Thanks >> Numan >> >> > We currently run OVN 23.09.04. >> > >> > Thanks >> > >> > Gav >> > _______________________________________________ >> > discuss mailing list >> > disc...@openvswitch.org >> > https://mail.openvswitch.org/mailman/listinfo/ovs-discuss >> > _______________________________________________ discuss mailing list disc...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-discuss