On 7/21/23 18:08, Eric Blake wrote: > Support receiving headers for 64-bit replies if extended headers were > negotiated. We already insist that the server not send us too much > payload in one reply, so we can exploit that and continue to use > h->payload_left as a 32-bit length for the rest of the payload > parsing. The NBD protocol specifically documents that extended mode > takes precedence over structured replies, and that there are no simple > replies in extended mode. We can also take advantage that the cookie > field is in the same offset in all the various reply types. Checking > that the server sent back the correct offset adds a bit more verbosity > for the sake of nicer debug messages. > > Note that if we negotiate extended headers, but a non-compliant server > replies with a non-extended header, this patch will stall waiting for > the server to send more bytes rather than noticing that the magic > number is wrong (for aio operations, you'll get a magic number > mismatch once you send a second command that elicits a reply; but for > blocking operations, we basically deadlock). The easy alternative > would be to read just the first 4 bytes of magic, then determine how > many more bytes to expect; but that would require more states and > syscalls, and not worth it since the typical server will be compliant. > The other alternative is what the next patch implements: teaching > REPLY.RECV_REPLY to handle short reads that were at least long enough > to transmit magic to specifically look for magic number mismatch. > > At this point, h->extended_headers is permanently false (we can't > enable it until all other aspects of the protocol have likewise been > converted). > > Signed-off-by: Eric Blake <ebl...@redhat.com> > --- > > v4: don't normalize length in-place [Laszlo], rebase on earlier > changes, better debug reporting if offsets mismatch > --- > lib/internal.h | 1 + > generator/states-reply.c | 89 ++++++++++++++++++++++++---------- > generator/states-reply-chunk.c | 40 +++++++++++---- > 3 files changed, 95 insertions(+), 35 deletions(-) > > diff --git a/lib/internal.h b/lib/internal.h > index 2e4f987c..9df6a685 100644 > --- a/lib/internal.h > +++ b/lib/internal.h > @@ -244,6 +244,7 @@ struct nbd_handle { > union reply_header { > struct nbd_simple_reply simple; > struct nbd_structured_reply structured; > + struct nbd_extended_reply extended; > /* The wire formats share magic and cookie at the same offsets; > * provide aliases for one less level of typing. > */ > diff --git a/generator/states-reply.c b/generator/states-reply.c > index 0d200513..05301ae8 100644 > --- a/generator/states-reply.c > +++ b/generator/states-reply.c > @@ -68,22 +68,30 @@ REPLY.START: > */ > ssize_t r; > > - /* We read all replies initially as if they are simple replies, but > - * check the magic in CHECK_REPLY_MAGIC below. This works because > - * the structured_reply header is larger, and because the last > - * member of a simple reply, cookie, is coincident between the two > - * structs (an intentional design decision in the NBD spec when > - * structured replies were added). > + /* With extended headers, there is only one size to read, so we can > + * do it all in one syscall. But for older structured replies, we > + * don't know if we have a simple or structured reply until we read > + * the magic number, requiring a two-part read with > + * CHECK_REPLY_MAGIC below. This works because the structured_reply > + * header is larger, and because the last member of a simple reply, > + * cookie, is coincident between all three structs (intentional > + * design decisions in the NBD spec when structured and extended > + * replies were added). > */ > ASSERT_MEMBER_ALIAS (union reply_header, simple.magic, magic); > ASSERT_MEMBER_ALIAS (union reply_header, simple.cookie, cookie); > ASSERT_MEMBER_ALIAS (union reply_header, structured.magic, magic); > ASSERT_MEMBER_ALIAS (union reply_header, structured.cookie, cookie); > + ASSERT_MEMBER_ALIAS (union reply_header, extended.magic, magic); > + ASSERT_MEMBER_ALIAS (union reply_header, extended.cookie, cookie); > assert (h->reply_cmd == NULL); > assert (h->rlen == 0); > > h->rbuf = &h->sbuf.reply.hdr; > - h->rlen = sizeof h->sbuf.reply.hdr.simple; > + if (h->extended_headers) > + h->rlen = sizeof h->sbuf.reply.hdr.extended; > + else > + h->rlen = sizeof h->sbuf.reply.hdr.simple; > > r = h->sock->ops->recv (h, h->sock, h->rbuf, h->rlen); > if (r == -1) { > @@ -129,10 +137,25 @@ REPLY.CHECK_REPLY_MAGIC: > uint64_t cookie; > > magic = be32toh (h->sbuf.reply.hdr.magic); > - if (magic == NBD_SIMPLE_REPLY_MAGIC) { > + switch (magic) { > + case NBD_SIMPLE_REPLY_MAGIC: > + if (h->extended_headers) > + /* Server is non-compliant, and we've already read more bytes > + * than a simple header contains; no recovery possible > + */ > + goto invalid; > + > + /* All other payload checks handled in the simple payload engine */ > SET_NEXT_STATE (%SIMPLE_REPLY.START); > - } > - else if (magic == NBD_STRUCTURED_REPLY_MAGIC) { > + break; > + > + case NBD_STRUCTURED_REPLY_MAGIC: > + if (h->extended_headers) > + /* Server is non-compliant, and we've already read more bytes > + * than a structured header contains; no recovery possible > + */ > + goto invalid; > + > /* We've only read the bytes that fill hdr.simple. But > * hdr.structured is longer, so prepare to read the remaining > * bytes. We depend on the memory aliasing in union sbuf to > @@ -145,23 +168,29 @@ REPLY.CHECK_REPLY_MAGIC: > h->rlen = sizeof h->sbuf.reply.hdr.structured; > h->rlen -= sizeof h->sbuf.reply.hdr.simple; > SET_NEXT_STATE (%RECV_STRUCTURED_REMAINING); > - } > - else { > - /* We've probably lost synchronization. */ > - SET_NEXT_STATE (%.DEAD); > - set_error (0, "invalid reply magic 0x%" PRIx32, magic); > -#if 0 /* uncomment to see desynchronized data */ > - nbd_internal_hexdump (&h->sbuf.reply.hdr.simple, > - sizeof (h->sbuf.reply.hdr.simple), > - stderr); > -#endif > - return 0; > + break; > + > + case NBD_EXTENDED_REPLY_MAGIC: > + if (!h->extended_headers) > + /* Server is non-compliant. We could continue reading bytes up > + * to the length of an extended reply to regain sync, but old > + * servers are unlikely to send this magic, so it's just as easy > + * to punt. > + */ > + goto invalid; > + > + /* All other payload checks handled in the chunk payload engine */ > + SET_NEXT_STATE (%CHUNK_REPLY.START); > + break; > + > + default: > + goto invalid; > } > > - /* NB: This works for both simple and structured replies, even > - * though we haven't finished reading the structured header yet, > - * because the cookie is stored at the same offset. See the > - * STATIC_ASSERT above in state REPLY.START that confirmed this. > + /* NB: This works for all three reply types, even though we haven't > + * finished reading a structured header yet, because the cookie is > + * stored at the same offset. See the ASSERT_MEMBER_ALIAS above in > + * state REPLY.START that confirmed this. > */ > h->chunks_received++; > cookie = be64toh (h->sbuf.reply.hdr.cookie); > @@ -175,6 +204,16 @@ REPLY.CHECK_REPLY_MAGIC: > h->reply_cmd = cmd; > return 0; > > + invalid: > + SET_NEXT_STATE (%.DEAD); /* We've probably lost synchronization. */ > + set_error (0, "invalid or unexpected reply magic 0x%" PRIx32, magic); > +#if 0 /* uncomment to see desynchronized data */ > + nbd_internal_hexdump (&h->sbuf.reply.hdr.simple, > + sizeof (h->sbuf.reply.hdr.simple), > + stderr); > +#endif > + return 0; > + > REPLY.RECV_STRUCTURED_REMAINING: > switch (recv_into_rbuf (h)) { > case -1: SET_NEXT_STATE (%.DEAD); return 0; > diff --git a/generator/states-reply-chunk.c b/generator/states-reply-chunk.c > index 1758ac34..17bb5149 100644 > --- a/generator/states-reply-chunk.c > +++ b/generator/states-reply-chunk.c > @@ -69,11 +69,17 @@ STATE_MACHINE { > REPLY.CHUNK_REPLY.START: > struct command *cmd = h->reply_cmd; > uint16_t flags, type; > - uint32_t length; > + uint64_t length; > + uint64_t offset = -1; > > flags = be16toh (h->sbuf.reply.hdr.structured.flags); > type = be16toh (h->sbuf.reply.hdr.structured.type); > - length = be32toh (h->sbuf.reply.hdr.structured.length); > + if (h->extended_headers) { > + length = be64toh (h->sbuf.reply.hdr.extended.length); > + offset = be64toh (h->sbuf.reply.hdr.extended.offset); > + } > + else > + length = be32toh (h->sbuf.reply.hdr.structured.length); > > /* Reject a server that replies with too much information, but don't > * reject a single structured reply to NBD_CMD_READ on the largest > @@ -83,13 +89,14 @@ REPLY.CHUNK_REPLY.START: > * not worth keeping the connection alive. > */ > if (length > MAX_REQUEST_SIZE + sizeof h->sbuf.reply.payload.offset_data) { > - set_error (0, "invalid server reply length %" PRIu32, length); > + set_error (0, "invalid server reply length %" PRIu64, length); > SET_NEXT_STATE (%.DEAD); > return 0; > } > > /* Skip an unexpected structured reply, including to an unknown cookie. */ > - if (cmd == NULL || !h->structured_replies) > + if (cmd == NULL || !h->structured_replies || > + (h->extended_headers && offset != cmd->offset)) > goto resync; > h->payload_left = length; > > @@ -504,7 +511,8 @@ REPLY.CHUNK_REPLY.RECV_BS_ENTRIES: > REPLY.CHUNK_REPLY.RESYNC: > struct command *cmd = h->reply_cmd; > uint16_t type; > - uint32_t length; > + uint64_t length; > + uint64_t offset = -1; > > assert (h->rbuf == NULL); > switch (recv_into_rbuf (h)) { > @@ -524,11 +532,23 @@ REPLY.CHUNK_REPLY.RESYNC: > return 0; > } > type = be16toh (h->sbuf.reply.hdr.structured.type); > - length = be32toh (h->sbuf.reply.hdr.structured.length); > - debug (h, "unexpected reply type %u or payload length %" PRIu32 > - " for cookie %" PRIu64 " and command %" PRIu32 > - ", this is probably a server bug", > - type, length, cmd->cookie, cmd->type); > + if (h->extended_headers) { > + length = be64toh (h->sbuf.reply.hdr.extended.length); > + offset = be64toh (h->sbuf.reply.hdr.extended.offset); > + if (offset != cmd->offset) > + debug (h, "unexpected reply offset %" PRIu64 " for cookie %" PRIu64 > + " and command %" PRIu32 ", this is probably a server bug", > + length, cmd->cookie, cmd->type); > + else > + offset = -1; > + } > + else > + length = be32toh (h->sbuf.reply.hdr.structured.length); > + if (offset == -1) > + debug (h, "unexpected reply type %u or payload length %" PRIu64 > + " for cookie %" PRIu64 " and command %" PRIu32 > + ", this is probably a server bug", > + type, length, cmd->cookie, cmd->type); > if (cmd->error == 0) > cmd->error = EPROTO; > SET_NEXT_STATE (%FINISH);
Acked-by: Laszlo Ersek <ler...@redhat.com> _______________________________________________ Libguestfs mailing list Libguestfs@redhat.com https://listman.redhat.com/mailman/listinfo/libguestfs