This patch is a new version of qemu-img using NBD device to mount Qemu disk image.
To not hang on UP system, it needs following patch: http://article.gmane.org/gmane.linux.drivers.nbd.general/42 If you want to use loop to see partitions, you need this patch: http://article.gmane.org/gmane.linux.kernel/651269 otherwise use kpartx (see kpartx package of your distro). This patch implements in qemu-img the client and the server of the nbd protocol. Moreover, to avoid to specify a port to use, it creates a UNIX socket instead of a INET socket. It adds two actions to qemu-img: - bind, to bind a disk image to a NBD device, qemu-img bind [-d] [-f fmt] device filename ('-d' to daemonize) - unbind, to unbind it. qemu-img unbind device Example: # qemu-img bind -d /dev/nbd0 fc6.qcow2 [here you can use any tools you want to see partitions: kpartx, patched loop device (see LKML), patched NBD driver (I can provide the patch, I use this in the following example)] # mount /dev/nbd0p1 /mnt # ls /mnt config-2.6.22.4-45.fc6 lost+found vmlinuz config-2.6.22.5-49.fc6 System.map vmlinuz-2.6.22.4-45.fc6 grub System.map-2.6.22.4-45.fc6 vmlinuz-2.6.22.5 initrd-2.6.22.4-45.fc6.img System.map-2.6.22.5 vmlinuz-2.6.22.5-49.fc6 initrd-2.6.22.5-49.fc6.img System.map-2.6.22.5-49.fc6 vmlinuz-2.6.22.5.old initrd-2.6.22.5.img System.map-2.6.22.5.old # umount /mnt # qemu-img unbind /dev/nbd0 /dev/nbd0 disconnected Laurent --- qemu-img.c | 426 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 426 insertions(+) Index: qemu/qemu-img.c =================================================================== --- qemu.orig/qemu-img.c 2008-03-10 09:58:24.000000000 +0100 +++ qemu/qemu-img.c 2008-03-10 11:39:35.000000000 +0100 @@ -25,6 +25,22 @@ #include "block_int.h" #include <assert.h> +#ifdef __linux__ +#define NBD_SERVER +#endif + +#ifdef NBD_SERVER +#include <sys/wait.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <arpa/inet.h> +#include <linux/types.h> +#include <linux/nbd.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#endif /* NBD_SERVER */ + #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #include <windows.h> @@ -92,6 +108,10 @@ static void help(void) " commit [-f fmt] filename\n" " convert [-c] [-e] [-6] [-f fmt] [-O output_fmt] filename [filename2 [...]] output_filename\n" " info [-f fmt] filename\n" +#ifdef NBD_SERVER + " bind [-d] [-f fmt] device filename\n" + " unbind device\n" +#endif "\n" "Command parameters:\n" " 'filename' is a disk image filename\n" @@ -105,6 +125,9 @@ static void help(void) " '-c' indicates that target image must be compressed (qcow format only)\n" " '-e' indicates that the target image must be encrypted (qcow format only)\n" " '-6' indicates that the target image must use compatibility level 6 (vmdk format only)\n" +#ifdef NBD_SERVER + " '-d' daemonize (bind only)\n" +#endif ); printf("\nSupported format:"); bdrv_iterate_format(format_print, NULL); @@ -602,6 +625,403 @@ static int img_convert(int argc, char ** return 0; } +#ifdef NBD_SERVER + +//#define DEBUG_SERVER + +#ifdef DEBUG_SERVER +#define DPRINTF(fmt, args...) \ +do { printf("img-bind: " fmt , ##args); } while (0) +#else +#define DPRINTF(fmt, args...) do {} while(0) +#endif + +#define BUFSIZE (1024*1024) + +static int nbd_receive(int fd, char *buf, size_t len) +{ + ssize_t rd; + + while (len > 0) { + rd = read(fd, buf, len); + if (rd == -1) + return -errno; + len -= rd; + buf += rd; + } + return 0; +} + +static int nbd_send(int fd, char *buf, size_t len) +{ + ssize_t written; + + while (len > 0) { + written = write(fd, buf, len); + if (written == -1) + return -errno; + len -= written; + buf += written; + } + return 0; +} + +# if __BYTE_ORDER == __BIG_ENDIAN +# define htonll(x) (x) +# define ntohll(x) (x) +#else +# define htonll(x) __bswap_64(x) +# define ntohll(x) __bswap_64(x) +#endif + +static void bdrv_loop(BlockDriverState *drv, int net) +{ + struct nbd_request request; + struct nbd_reply reply; + char *buf; + int ret; + + buf = qemu_malloc(BUFSIZE); + if (buf == NULL) + return; + + while(1) { + uint32_t len; + uint64_t from; + + ret = nbd_receive(net, (char*)&request, sizeof(request)); + if (ret < 0) { + DPRINTF("read failed %d (%d)\n", ret, errno); + break; + } + + DPRINTF("request magic %x type %d from %lx len %x\n", + ntohl(request.magic), ntohl(request.type), + ntohll(request.from), ntohl(request.len)); + + if (request.magic != htonl(NBD_REQUEST_MAGIC)) { + DPRINTF("Bad Magic\n"); + break; + } + + if (request.type == htonl(NBD_CMD_DISC)) { + /* disconnect */ + DPRINTF("Command Disconnect\n"); + break; + } + + len = ntohl(request.len); + if (len > BUFSIZE - sizeof(struct nbd_reply)) { + DPRINTF("len too big %d\n", len); + break; + } + + /* prepare reply */ + + reply.magic = htonl(NBD_REPLY_MAGIC); + reply.error = 0; + memcpy(reply.handle, request.handle, sizeof(reply.handle)); + + /* do I/O */ + + from = ntohll(request.from); + + switch(ntohl(request.type)) { + case NBD_CMD_READ: + reply.error = -bdrv_read(drv, from >> 9, + buf + sizeof(struct nbd_reply), len >> 9); + if (reply.error != 0) { + DPRINTF("bdrv_read error %d\n", reply.error); + } + memcpy(buf, &reply, sizeof(struct nbd_reply)); + reply.error = htonl(reply.error); + ret = nbd_send(net, buf, len + sizeof(struct nbd_reply)); + if (ret < 0) { + DPRINTF("NBD_CMD_READ: cannot sent result\n"); + return; + } + break; + + case NBD_CMD_WRITE: + ret = nbd_receive(net, buf, len); + if (ret < 0) { + DPRINTF("NBD_CMD_WRITE: cannot receive block %d != %d\n", ret, len); + goto out; + } + reply.error = -bdrv_write(drv, from >> 9, buf, len >> 9); + + if (reply.error != 0) { + DPRINTF("bdrv_write error %d\n", reply.error); + } + reply.error = htonl(reply.error); + ret = nbd_send(net, (char*)&reply, sizeof(reply)); + if (ret < 0) { + DPRINTF("NBD_CMD_WRITE: cannot sent result %d != %d\n", ret, len); + goto out; + } + break; + } + } +out: + qemu_free(buf); + DPRINTF("bdrv_loop: exit\n"); +} + +static void show_part(char *device) +{ + int nbd; + sleep(1); + nbd = open(device, O_RDWR); + if (nbd == -1) + return; + ioctl(nbd, BLKRRPART, NULL); + close(nbd); +} + +void server_loop(BlockDriverState *drv, char *device) +{ + struct sockaddr_un addrin; + uint64_t total_sectors; + pid_t pid; + int sock; + int ret; + int net; + int nbd; + + memset(&addrin, 0, sizeof(addrin)); + addrin.sun_family = AF_UNIX; + sprintf(addrin.sun_path, "/var/lock/qemu-img-%s", basename(device)); + DPRINTF("socket: %s\n", addrin.sun_path); + + pid = fork(); + if (pid < 0) { + printf("Cannot fork\n"); + bdrv_delete(drv); + return; + } + if (pid == 0) { + socklen_t addrinlen; + int yes = 1; + + bdrv_get_geometry(drv, &total_sectors); + + /* child */ + + sock = socket(PF_UNIX, SOCK_STREAM, 0); + if (sock == -1) { + fprintf(stderr, "Cannot create socket\n"); + goto child_cleanup2; + } + + ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, + &yes, sizeof(int)); + if (ret == -1) { + fprintf(stderr, "Cannot setsockopt\n"); + goto child_cleanup1; + } + + DPRINTF("bind\n"); + ret = bind(sock, (struct sockaddr *) &addrin, sizeof(addrin)); + if (ret == -1) { + fprintf(stderr, "Cannot bind socket\n"); + goto child_cleanup1; + } + + ret = listen(sock, 1); + if (ret == -1) { + fprintf(stderr, "Cannot listen socket\n"); + goto child_cleanup1; + } + + DPRINTF("accept\n"); + + printf("Starting qemu image server\n"); + + net = accept(sock, &addrin, &addrinlen); + if (ret == -1) { + fprintf(stderr, "accept failed\n"); + goto child_cleanup1; + } + + ret = write(net, &total_sectors, sizeof(total_sectors)); + if (ret == sizeof(total_sectors)) { + if (fork() == 0) { + show_part(device); + exit(0); + } + DPRINTF("bdrv_loop\n"); + bdrv_loop(drv, net); + } + + close(net); +child_cleanup1: + close(sock); +child_cleanup2: + bdrv_delete(drv); + unlink(addrin.sun_path); + + return; + } + + bdrv_delete(drv); + + /* parent */ + + sock = socket(PF_UNIX, SOCK_STREAM, 0); + if (sock == -1) { + fprintf(stderr, "Cannot create socket\n"); + goto exit; + } + + do { + ret = connect(sock, (struct sockaddr *) &addrin, sizeof(addrin)); + if (ret == -1) { + if (errno != ENOENT && errno != ECONNREFUSED) { + fprintf(stderr, "Cannot create connect (%d: %s)\n", + errno, strerror(errno)); + goto exit; + } + sleep(1); + } + /* wait children */ + } while (ret == -1); + + nbd = open(device, O_RDWR); + if (nbd == -1) { + fprintf(stderr, "Cannot open %s\n", device); + goto exit; + } + + ret = read(sock, &total_sectors, sizeof(total_sectors)); + if (ret != sizeof(total_sectors)) { + fprintf(stderr, "Cannot read image disk size\n"); + goto closeall; + } + + ret = ioctl(nbd, NBD_SET_BLKSIZE, 512); + if (ret == -1) { + fprintf(stderr, "Cannot set block size\n"); + goto closeall; + } + + ret = ioctl(nbd, NBD_SET_SIZE_BLOCKS, total_sectors); + if (ret == -1) { + fprintf(stderr, "Cannot set device size\n"); + goto closeall; + } + + ret = ioctl(nbd, NBD_CLEAR_SOCK); + if (ret == -1) { + fprintf(stderr, "Cannot clear sock\n"); + goto closeall; + } + + ret = ioctl(nbd, NBD_SET_SOCK, sock); + if (ret == -1) { + fprintf(stderr, "Cannot set sock\n"); + goto closeall; + } + + printf("Starting NBD interface\n"); + + ret = ioctl(nbd, NBD_DO_IT); + if (ret == -1) + fprintf(stderr, "NBD_DO_IT failed %d\n", errno); + + ioctl(nbd, NBD_CLEAR_QUE); + + ioctl(nbd, NBD_CLEAR_SOCK); + +closeall: + close(nbd); +exit: + kill(pid, SIGTERM); + unlink(addrin.sun_path); +} + +static int img_bind(int argc, char **argv) +{ + int c; + char *fmt; + char *device; + char *filename; + int daemonize = 0; + BlockDriverState *drv; + + fmt = NULL; + for(;;) { + c = getopt(argc, argv, "f:hd"); + if (c == -1) + break; + switch(c) { + case 'h': + help(); + break; + case 'f': + fmt = optarg; + break; + case 'd': + daemonize = 1; + break; + } + } + + if (argc - optind < 2) + help(); + + device = argv[optind]; + filename = argv[optind + 1]; + + drv = bdrv_new_open(filename, fmt); + if (!drv) { + fprintf(stderr, "Cannot open %s\n", filename); + return 1; + } + +#ifndef DEBUG_SERVER + if (daemonize) + daemon(0, 0); +#endif + + server_loop(drv, device); + + return 0; +} + +static int img_unbind(int argc, char **argv) +{ + char *device; + int nbd; + int ret; + + if (argc - optind < 1) + help(); + + device = argv[optind]; + + nbd = open(device, O_RDWR); + if (nbd == -1) + error("Cannot open %s", device); + + ret = ioctl(nbd, NBD_CLEAR_QUE); + if (ret) + error("ioctl(NBD_CLEAR_QUE) failed"); + ret = ioctl(nbd, NBD_DISCONNECT); + if (ret) + error("ioctl(NBD_DISCONNECT) failed"); + ret = ioctl(nbd, NBD_CLEAR_SOCK); + if (ret) + error("ioctl(NBD_CLEAR_SOCK) failed"); + + close(nbd); + + printf("%s disconnected\n", device); + + return 0; +} + +#endif /* NBD_SERVER */ + #ifdef _WIN32 static int64_t get_allocated_file_size(const char *filename) { @@ -746,6 +1166,12 @@ int main(int argc, char **argv) img_convert(argc, argv); } else if (!strcmp(cmd, "info")) { img_info(argc, argv); +#ifdef NBD_SERVER + } else if (!strcmp(cmd, "bind")) { + img_bind(argc, argv); + } else if (!strcmp(cmd, "unbind")) { + img_unbind(argc, argv); +#endif /* NBD_SERVER */ } else { help(); }