Turns out I found a way to trigger "bad packet: too short" messages by
suspending / resuming my laptop in just the right spot. That allowed me
to figure out what's going on:
When asr fails (i.e. the stub strategy) it obviously does not synthesize
a SERVFAIL DNS packet for us like libunbound does. resolve_done() and
check_resolver_done() however depended on this. Since we are not looking
at the packet in case of SERVFAIL there is no need to check the packet
size and then complain about this, we can just bail out early.

So we first need to fix that asr errors are propagated up as rcode
SERVFAIL and then use the rcode as the first check in resolve_done() /
check_resolver_done().

I'm not sure where the "too long" errors are coming from, I suspect
there is an error path in libunbound that reports a wrong length because
it doesn't reset the internal sldns_buffer. I'm cautiously optimistic
that those errors are also caught by this diff because rcode might just
be set correctly.

OK?

commit c25ea4620c5ed21a5d12556fafba1f1ac22842e3
Author: Florian Obser <flor...@narrans.de>
Date:   Sat Apr 15 11:41:42 2023 +0200

    Improve asr error handling.
    
    When an upstream nameserver is not available asr is not synthesizing a
    SERVFAIL rcode (duh), but sets ar_errno. When we need SERVFAIL we need
    to set it ourselves.
    
    While here, don't complain about a too short packet when asr already
    told us that resolving did not work out in check_dns64_done.

diff --git resolver.c resolver.c
index 71614237506..9b60445f80d 100644
--- resolver.c
+++ resolver.c
@@ -1623,8 +1623,9 @@ void
 asr_resolve_done(struct asr_result *ar, void *arg)
 {
        struct resolver_cb_data *cb_data = arg;
-       cb_data->cb(cb_data->res, cb_data->data, ar->ar_rcode, ar->ar_data,
-           ar->ar_datalen, 0, NULL);
+       cb_data->cb(cb_data->res, cb_data->data, ar->ar_errno == 0 ?
+           ar->ar_rcode : LDNS_RCODE_SERVFAIL, ar->ar_data, ar->ar_datalen, 0,
+           NULL);
        free(ar->ar_data);
        resolver_unref(cb_data->res);
        free(cb_data);
@@ -2289,6 +2290,9 @@ check_dns64_done(struct asr_result *ar, void *arg)
        int                              preflen, count = 0;
        void                            *asr_ctx = arg;
 
+       if (ar->ar_errno != 0)
+               goto fail;
+
        memset(&qinfo, 0, sizeof(qinfo));
        alloc_init(&alloc, NULL, 0);
 
@@ -2390,6 +2394,7 @@ check_dns64_done(struct asr_result *ar, void *arg)
        alloc_clear(&alloc);
        regional_destroy(region);
        sldns_buffer_free(buf);
+ fail:
        free(ar->ar_data);
        asr_resolver_free(asr_ctx);
 }

commit 9ede59d6a547b0e5f574819bb7fcaf68eda24630
Author: Florian Obser <flor...@narrans.de>
Date:   Sat Apr 15 11:46:40 2023 +0200

    If rcode is SERVFAIL, there is no need to look at the packet.
    
    This pulls the check for rcode up, before we check if the answer
    packet has sensible length. Since we are not touching the packet at
    all, we don't care about the size and don't need to log if the size is
    wrong from a DNS perspective.
    
    With asr error reporting improved in the previous commit, this
    probably gets rid of all "bad packet: too short" messages.

diff --git resolver.c resolver.c
index 9b60445f80d..9625dcb471a 100644
--- resolver.c
+++ resolver.c
@@ -953,6 +953,12 @@ resolve_done(struct uw_resolver *res, void *arg, int rcode,
 
        running_res = --rq->running;
 
+       if (rcode == LDNS_RCODE_SERVFAIL) {
+               if (res->stop != 1)
+                       check_resolver(res);
+               goto servfail;
+       }
+
        if (answer_len < LDNS_HEADER_SIZE) {
                log_warnx("bad packet: too short");
                goto servfail;
@@ -965,12 +971,6 @@ resolve_done(struct uw_resolver *res, void *arg, int rcode,
        }
        answer_header->answer_len = answer_len;
 
-       if (rcode == LDNS_RCODE_SERVFAIL) {
-               if (res->stop != 1)
-                       check_resolver(res);
-               goto servfail;
-       }
-
        if ((result = calloc(1, sizeof(*result))) == NULL)
                goto servfail;
        if ((buf = sldns_buffer_new(answer_len)) == NULL)
@@ -1545,12 +1545,6 @@ check_resolver_done(struct uw_resolver *res, void *arg, 
int rcode,
 
        prev_state = checked_resolver->state;
 
-       if (answer_len < LDNS_HEADER_SIZE) {
-               checked_resolver->state = DEAD;
-               log_warnx("%s: bad packet: too short", __func__);
-               goto out;
-       }
-
        if (rcode == LDNS_RCODE_SERVFAIL) {
                log_debug("%s: %s rcode: SERVFAIL", __func__,
                    uw_resolver_type_str[checked_resolver->type]);
@@ -1559,6 +1553,12 @@ check_resolver_done(struct uw_resolver *res, void *arg, 
int rcode,
                goto out;
        }
 
+       if (answer_len < LDNS_HEADER_SIZE) {
+               checked_resolver->state = DEAD;
+               log_warnx("%s: bad packet: too short", __func__);
+               goto out;
+       }
+
        if (sec == SECURE) {
                if (dns64_present && (res->type == UW_RES_AUTOCONF ||
                    res->type == UW_RES_ODOT_AUTOCONF)) {
@@ -1902,6 +1902,11 @@ trust_anchor_resolve_done(struct uw_resolver *res, void 
*arg, int rcode,
        uint16_t                 dnskey_flags;
        char                     rdata_buf[1024], *ta;
 
+       if (rcode == LDNS_RCODE_SERVFAIL) {
+               log_debug("%s: rcode: SERVFAIL", __func__);
+               goto out;
+       }
+
        if (answer_len < LDNS_HEADER_SIZE) {
                log_warnx("bad packet: too short");
                goto out;


-- 
In my defence, I have been left unsupervised.

Reply via email to