[Qemu-devel] [PATCH v6 0/6] char: non-blocking writes, virtio-console flow control

2010-05-05 Thread Amit Shah
Hello,

This series lets interested callers ask for an -EAGAIN return from the
chardev backends (only unix and tcp sockets as of now) to implement
their own flow control.

A new call, qemu_chr_write_nb() is added, that will fallback to
qemu_chr_write() if the backend file isn't non-blocking or if no
callback was supplied.

Support for other backend types is easy to add and will be done in
later patches.

v6:
- Continue write operation on EINTR instead of returning partial
  writes (which was a change from prev. behaviour) (Gerd)

v5:
- Fix bug pointed out by Gerd
- Convert to using a struct for passing on handlers to
  qemu_chr_add_handlers() instead of passing each one
  individually. Simplifies patches. (Inspired by Juan's comment)
- Re-arranged patches


Amit Shah (6):
  virtio-console: Factor out common init between console and generic
ports
  char: Add a QemuChrHandlers struct to initialise chardev handlers
  char: Let writers know how much data was written in case of errors
  char: Add qemu_chr_write_nb() for nonblocking writes
  char: unix/tcp: Add a non-blocking write handler
  virtio-console: Throttle virtio-serial-bus if we can't consume any
more guest data

 gdbstub.c|9 ++-
 hw/debugcon.c|2 +-
 hw/escc.c|9 ++-
 hw/etraxfs_ser.c |   13 +++-
 hw/mcf_uart.c|9 ++-
 hw/pl011.c   |9 ++-
 hw/pxa2xx.c  |   13 +++-
 hw/serial.c  |9 ++-
 hw/sh_serial.c   |   12 +++-
 hw/syborg_serial.c   |9 ++-
 hw/usb-serial.c  |9 ++-
 hw/virtio-console.c  |  162 +++--
 hw/xen_console.c |   16 --
 hw/xilinx_uartlite.c |   11 +++-
 monitor.c|   19 +-
 net/slirp.c  |8 ++-
 net/socket.c |4 +-
 qemu-char.c  |  122 --
 qemu-char.h  |   20 +-
 qemu_socket.h|3 +-
 20 files changed, 384 insertions(+), 84 deletions(-)





[Qemu-devel] [PATCH v6 1/6] virtio-console: Factor out common init between console and generic ports

2010-05-05 Thread Amit Shah
The initialisation for generic ports and console ports is similar.
Factor out the parts that are the same in a different function that can
be called from each of the initfns.

Signed-off-by: Amit Shah 
---
 hw/virtio-console.c |   31 ++-
 1 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/hw/virtio-console.c b/hw/virtio-console.c
index caea11f..d7fe68b 100644
--- a/hw/virtio-console.c
+++ b/hw/virtio-console.c
@@ -58,24 +58,28 @@ static void chr_event(void *opaque, int event)
 }
 }
 
-/* Virtio Console Ports */
-static int virtconsole_initfn(VirtIOSerialDevice *dev)
+static int generic_port_init(VirtConsole *vcon, VirtIOSerialDevice *dev)
 {
-VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, &dev->qdev);
-VirtConsole *vcon = DO_UPCAST(VirtConsole, port, port);
-
-port->info = dev->info;
-
-port->is_console = true;
+vcon->port.info = dev->info;
 
 if (vcon->chr) {
 qemu_chr_add_handlers(vcon->chr, chr_can_read, chr_read, chr_event,
   vcon);
-port->info->have_data = flush_buf;
+vcon->port.info->have_data = flush_buf;
 }
 return 0;
 }
 
+/* Virtio Console Ports */
+static int virtconsole_initfn(VirtIOSerialDevice *dev)
+{
+VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, &dev->qdev);
+VirtConsole *vcon = DO_UPCAST(VirtConsole, port, port);
+
+port->is_console = true;
+return generic_port_init(vcon, dev);
+}
+
 static int virtconsole_exitfn(VirtIOSerialDevice *dev)
 {
 VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, &dev->qdev);
@@ -115,14 +119,7 @@ static int virtserialport_initfn(VirtIOSerialDevice *dev)
 VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, &dev->qdev);
 VirtConsole *vcon = DO_UPCAST(VirtConsole, port, port);
 
-port->info = dev->info;
-
-if (vcon->chr) {
-qemu_chr_add_handlers(vcon->chr, chr_can_read, chr_read, chr_event,
-  vcon);
-port->info->have_data = flush_buf;
-}
-return 0;
+return generic_port_init(vcon, dev);
 }
 
 static VirtIOSerialPortInfo virtserialport_info = {
-- 
1.6.2.5





[Qemu-devel] [PATCH v6 2/6] char: Add a QemuChrHandlers struct to initialise chardev handlers

2010-05-05 Thread Amit Shah
Instead of passing each handler in the qemu_add_handlers() function,
create a struct of handlers that can be passed to the function instead.

Signed-off-by: Amit Shah 
---
 gdbstub.c|9 +++--
 hw/debugcon.c|2 +-
 hw/escc.c|9 +++--
 hw/etraxfs_ser.c |   13 +
 hw/mcf_uart.c|9 +++--
 hw/pl011.c   |9 +++--
 hw/pxa2xx.c  |   13 +
 hw/serial.c  |9 +++--
 hw/sh_serial.c   |   12 +---
 hw/syborg_serial.c   |9 +++--
 hw/usb-serial.c  |9 +++--
 hw/virtio-console.c  |9 +++--
 hw/xen_console.c |   16 +++-
 hw/xilinx_uartlite.c |   11 +--
 monitor.c|   19 +++
 net/slirp.c  |8 ++--
 qemu-char.c  |   27 ++-
 qemu-char.h  |   12 
 18 files changed, 151 insertions(+), 54 deletions(-)

diff --git a/gdbstub.c b/gdbstub.c
index 93c4850..7b981ce 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2621,6 +2621,12 @@ static void gdb_sigterm_handler(int signal)
 }
 #endif
 
+static QemuChrHandlers gdb_handlers = {
+.fd_can_read = gdb_chr_can_receive,
+.fd_read = gdb_chr_receive,
+.fd_event = gdb_chr_event,
+};
+
 int gdbserver_start(const char *device)
 {
 GDBState *s;
@@ -2650,8 +2656,7 @@ int gdbserver_start(const char *device)
 if (!chr)
 return -1;
 
-qemu_chr_add_handlers(chr, gdb_chr_can_receive, gdb_chr_receive,
-  gdb_chr_event, NULL);
+qemu_chr_add_handlers(chr, &gdb_handlers, NULL);
 }
 
 s = gdbserver_state;
diff --git a/hw/debugcon.c b/hw/debugcon.c
index 5ee6821..e79a595 100644
--- a/hw/debugcon.c
+++ b/hw/debugcon.c
@@ -73,7 +73,7 @@ static void debugcon_init_core(DebugconState *s)
 exit(1);
 }
 
-qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, s);
+qemu_chr_add_handlers(s->chr, NULL, s);
 }
 
 static int debugcon_isa_initfn(ISADevice *dev)
diff --git a/hw/escc.c b/hw/escc.c
index 6d2fd36..1978bf7 100644
--- a/hw/escc.c
+++ b/hw/escc.c
@@ -890,6 +890,12 @@ void slavio_serial_ms_kbd_init(target_phys_addr_t base, 
qemu_irq irq,
 sysbus_mmio_map(s, 0, base);
 }
 
+static QemuChrHandlers serial_handlers = {
+.fd_can_read = serial_can_receive,
+.fd_read = serial_receive1,
+.fd_event = serial_event,
+};
+
 static int escc_init1(SysBusDevice *dev)
 {
 SerialState *s = FROM_SYSBUS(SerialState, dev);
@@ -903,8 +909,7 @@ static int escc_init1(SysBusDevice *dev)
 s->chn[i].chn = 1 - i;
 s->chn[i].clock = s->frequency / 2;
 if (s->chn[i].chr) {
-qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
-  serial_receive1, serial_event, &s->chn[i]);
+qemu_chr_add_handlers(s->chn[i].chr, &serial_handlers, &s->chn[i]);
 }
 }
 s->chn[0].otherchn = &s->chn[1];
diff --git a/hw/etraxfs_ser.c b/hw/etraxfs_ser.c
index e1f9615..e22f770 100644
--- a/hw/etraxfs_ser.c
+++ b/hw/etraxfs_ser.c
@@ -161,6 +161,12 @@ static void serial_event(void *opaque, int event)
 
 }
 
+static QemuChrHandlers serial_handlers = {
+.fd_can_read = serial_can_receive,
+.fd_read = serial_receive,
+.fd_event = serial_event,
+};
+
 static int etraxfs_ser_init(SysBusDevice *dev)
 {
 struct etrax_serial *s = FROM_SYSBUS(typeof (*s), dev);
@@ -174,10 +180,9 @@ static int etraxfs_ser_init(SysBusDevice *dev)
 ser_regs = cpu_register_io_memory(ser_read, ser_write, s);
 sysbus_init_mmio(dev, R_MAX * 4, ser_regs);
 s->chr = qdev_init_chardev(&dev->qdev);
-if (s->chr)
-qemu_chr_add_handlers(s->chr,
-  serial_can_receive, serial_receive,
-  serial_event, s);
+if (s->chr) {
+qemu_chr_add_handlers(s->chr, &serial_handlers, s);
+}
 return 0;
 }
 
diff --git a/hw/mcf_uart.c b/hw/mcf_uart.c
index d16bac7..301b901 100644
--- a/hw/mcf_uart.c
+++ b/hw/mcf_uart.c
@@ -268,6 +268,12 @@ static void mcf_uart_receive(void *opaque, const uint8_t 
*buf, int size)
 mcf_uart_push_byte(s, buf[0]);
 }
 
+static QemuChrHandlers mcf_uart_handlers = {
+.fd_can_read = mcf_uart_can_receive,
+.fd_read = mcf_uart_receive,
+.fd_event = mcf_uart_event,
+};
+
 void *mcf_uart_init(qemu_irq irq, CharDriverState *chr)
 {
 mcf_uart_state *s;
@@ -276,8 +282,7 @@ void *mcf_uart_init(qemu_irq irq, CharDriverState *chr)
 s->chr = chr;
 s->irq = irq;
 if (chr) {
-qemu_chr_add_handlers(chr, mcf_uart_can_receive, mcf_uart_receive,
-  mcf_uart_event, s);
+qemu_chr_add_handlers(chr, &mcf_uart_handlers, s);
 }
 mcf_uart_reset(s);
 return s;
diff --git a/hw/pl011.c b/hw/pl011.c
index 81de91e..8e5356e 100644
--- a/hw/pl011.c
+++ b/hw/pl011.c
@@ -286,6 +286,12 @@ static int pl011_load(QEMUFile *f, void *opaque, int 
version_id)
 retur

[Qemu-devel] [PATCH v6 3/6] char: Let writers know how much data was written in case of errors

2010-05-05 Thread Amit Shah
On writing errors, we just returned -1 even if some bytes were already
written out. Ensure we return the number of bytes written before we
return the error (on a subsequent call to qemu_chr_write()).

Signed-off-by: Amit Shah 
---
 qemu-char.c |   12 +++-
 1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index 65cb3f5..1f5d886 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -507,6 +507,9 @@ int send_all(int fd, const void *buf, int len1)
 while (len > 0) {
 ret = send(fd, buf, len, 0);
 if (ret < 0) {
+if (len1 - len) {
+return len1 - len;
+}
 errno = WSAGetLastError();
 if (errno != WSAEWOULDBLOCK) {
 return -1;
@@ -531,8 +534,15 @@ static int unix_write(int fd, const uint8_t *buf, int len1)
 while (len > 0) {
 ret = write(fd, buf, len);
 if (ret < 0) {
-if (errno != EINTR && errno != EAGAIN)
+if (errno == EINTR) {
+continue;
+}
+if (len1 - len) {
+return len1 - len;
+}
+if (errno != EAGAIN) {
 return -1;
+}
 } else if (ret == 0) {
 break;
 } else {
-- 
1.6.2.5





[Qemu-devel] [PATCH v6 4/6] char: Add qemu_chr_write_nb() for nonblocking writes

2010-05-05 Thread Amit Shah
For char devices whose backing files are open in non-blocking mode,
non-blocking writes can now be made using qemu_chr_write_nb().

For non-blocking chardevs, we can return -EAGAIN to callers of
qemu_chr_write_nb(). When the backend is ready to accept more data,
we can let the caller know via a callback.

-EAGAIN is returned only when the backend's file is non-blocking
and a callback is registered by the caller when invoking
qemu_chr_add_handlers().

In case a backend doesn't support non-blocking writes,
qemu_chr_write_nb() invokes qemu_chr_write().

Individual callers can update their code to add a callback handler,
call qemu_chr_write_nb() instead of qemu_chr_write() if they wish to
receive -EAGAIN notifications.

No backend currently supports non-blocking writes.

Signed-off-by: Amit Shah 
---
 net/socket.c  |4 ++--
 qemu-char.c   |   31 ---
 qemu-char.h   |8 
 qemu_socket.h |3 ++-
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 1c4e153..8a401e6 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -56,8 +56,8 @@ static ssize_t net_socket_receive(VLANClientState *nc, const 
uint8_t *buf, size_
 uint32_t len;
 len = htonl(size);
 
-send_all(s->fd, (const uint8_t *)&len, sizeof(len));
-return send_all(s->fd, buf, size);
+send_all(s->fd, (const uint8_t *)&len, sizeof(len), false);
+return send_all(s->fd, buf, size, false);
 }
 
 static ssize_t net_socket_receive_dgram(VLANClientState *nc, const uint8_t 
*buf, size_t size)
diff --git a/qemu-char.c b/qemu-char.c
index 1f5d886..e6934f6 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -145,6 +145,16 @@ int qemu_chr_write(CharDriverState *s, const uint8_t *buf, 
int len)
 return s->chr_write(s, buf, len);
 }
 
+ssize_t qemu_chr_write_nb(CharDriverState *s, const uint8_t *buf, size_t len)
+{
+if (!s->nonblock) {
+/* Fallback to blocking write if no callback registered */
+return qemu_chr_write(s, buf, len);
+}
+
+return s->chr_write_nb(s, buf, len);
+}
+
 int qemu_chr_ioctl(CharDriverState *s, int cmd, void *arg)
 {
 if (!s->chr_ioctl)
@@ -203,11 +213,15 @@ void qemu_chr_add_handlers(CharDriverState *s,
 }
 s->chr_can_read = handlers->fd_can_read;
 s->chr_read = handlers->fd_read;
+s->chr_write_unblocked = handlers->fd_write_unblocked;
 s->chr_event = handlers->fd_event;
 s->handler_opaque = opaque;
 if (s->chr_update_read_handler)
 s->chr_update_read_handler(s);
 
+/* We'll set this at connect-time */
+s->nonblock = false;
+
 /* We're connecting to an already opened device, so let's make sure we
also get the open event */
 if (s->opened) {
@@ -499,7 +513,7 @@ static CharDriverState *qemu_chr_open_mux(CharDriverState 
*drv)
 
 
 #ifdef _WIN32
-int send_all(int fd, const void *buf, int len1)
+int send_all(int fd, const void *buf, int len1, bool nonblock)
 {
 int ret, len;
 
@@ -526,7 +540,7 @@ int send_all(int fd, const void *buf, int len1)
 
 #else
 
-static int unix_write(int fd, const uint8_t *buf, int len1)
+static int unix_write(int fd, const uint8_t *buf, int len1, bool nonblock)
 {
 int ret, len;
 
@@ -540,6 +554,9 @@ static int unix_write(int fd, const uint8_t *buf, int len1)
 if (len1 - len) {
 return len1 - len;
 }
+if (errno == EAGAIN && nonblock) {
+return -EAGAIN;
+}
 if (errno != EAGAIN) {
 return -1;
 }
@@ -553,9 +570,9 @@ static int unix_write(int fd, const uint8_t *buf, int len1)
 return len1 - len;
 }
 
-int send_all(int fd, const void *buf, int len1)
+int send_all(int fd, const void *buf, int len1, bool nonblock)
 {
-return unix_write(fd, buf, len1);
+return unix_write(fd, buf, len1, nonblock);
 }
 #endif /* !_WIN32 */
 
@@ -572,7 +589,7 @@ static int stdio_nb_clients = 0;
 static int fd_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
 FDCharDriver *s = chr->opaque;
-return send_all(s->fd_out, buf, len);
+return send_all(s->fd_out, buf, len, false);
 }
 
 static int fd_chr_read_poll(void *opaque)
@@ -884,7 +901,7 @@ static int pty_chr_write(CharDriverState *chr, const 
uint8_t *buf, int len)
 pty_chr_update_read_handler(chr);
 return 0;
 }
-return send_all(s->fd, buf, len);
+return send_all(s->fd, buf, len, false);
 }
 
 static int pty_chr_read_poll(void *opaque)
@@ -1949,7 +1966,7 @@ static int tcp_chr_write(CharDriverState *chr, const 
uint8_t *buf, int len)
 {
 TCPCharDriver *s = chr->opaque;
 if (s->connected) {
-return send_all(s->fd, buf, len);
+return send_all(s->fd, buf, len, false);
 } else {
 /* XXX: indicate an error ? */
 return len;
diff --git a/qemu-char.h b/qemu-char.h
index 6ff490b..7b10b2b 100644
--- a/qemu-char.h
+++ b/qemu-char.h
@@ -1,6 +1,8 @@
 #ifndef QEMU_CHAR_H
 #define QEMU_CHA

[Qemu-devel] [PATCH v6 5/6] char: unix/tcp: Add a non-blocking write handler

2010-05-05 Thread Amit Shah
Add a non-blocking write handler that can return with -EAGAIN to the
caller and also callback when the socket becomes writable.

Non-blocking writes are only enabled for sockets that are opened in
non-blocking mode and only for callers that have registered a callback
handler for resuming writes.

Signed-off-by: Amit Shah 
---
 qemu-char.c |   50 ++
 1 files changed, 50 insertions(+), 0 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index e6934f6..da70b9b 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2129,11 +2129,60 @@ static void tcp_chr_read(void *opaque)
 }
 }
 
+static void tcp_chr_write_unblocked(void *opaque)
+{
+CharDriverState *chr = opaque;
+TCPCharDriver *s = chr->opaque;
+
+assert(chr->write_blocked && chr->chr_write_unblocked);
+
+chr->write_blocked = false;
+qemu_set_fd_handler2(s->fd, tcp_chr_read_poll, tcp_chr_read, NULL, chr);
+chr->chr_write_unblocked(chr->handler_opaque);
+}
+
+static ssize_t tcp_chr_write_nb(CharDriverState *chr, const uint8_t *buf,
+size_t len)
+{
+TCPCharDriver *s = chr->opaque;
+ssize_t ret;
+
+if (!s->connected) {
+/* XXX: indicate an error? */
+return len;
+}
+
+ret = send_all(s->fd, buf, len, true);
+if (ret == -EAGAIN) {
+chr->write_blocked = true;
+qemu_set_fd_handler2(s->fd, tcp_chr_read_poll,
+ tcp_chr_read, tcp_chr_write_unblocked, chr);
+}
+return ret;
+}
+
 static void tcp_chr_connect(void *opaque)
 {
 CharDriverState *chr = opaque;
 TCPCharDriver *s = chr->opaque;
+IOHandler *write_cb;
+int flags;
+bool nonblock;
+
+flags = fcntl(s->fd, F_GETFL);
+if (flags == -1) {
+flags = 0;
+}
+nonblock = flags & O_NONBLOCK;
+
+write_cb = NULL;
+chr->nonblock = false;
+if (nonblock && chr->chr_write_unblocked) {
+write_cb = chr->chr_write_unblocked;
+chr->nonblock = true;
+}
 
+chr->write_blocked = false;
 s->connected = 1;
 qemu_set_fd_handler2(s->fd, tcp_chr_read_poll,
  tcp_chr_read, NULL, chr);
@@ -2266,6 +2315,7 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts 
*opts)
 
 chr->opaque = s;
 chr->chr_write = tcp_chr_write;
+chr->chr_write_nb = tcp_chr_write_nb;
 chr->chr_close = tcp_chr_close;
 chr->get_msgfd = tcp_get_msgfd;
 
-- 
1.6.2.5





[Qemu-devel] [PATCH v6 6/6] virtio-console: Throttle virtio-serial-bus if we can't consume any more guest data

2010-05-05 Thread Amit Shah
If the char device we're connected to is overwhelmed with data and it
can't accept any more, signal to the virtio-serial-bus to stop sending
us more data till we tell otherwise.

If the current buffer being processed hasn't been completely written out
to the char device, we have to keep it around and re-try sending it
since the virtio-serial-bus code assumes we consume the entire buffer.

Allow the chardev backends to return -EAGAIN; we're ready with a
callback handler that will flush the remainder of the buffer.

Also register with savevm so that we save/restore such a buffer across
migration.

Signed-off-by: Amit Shah 
---
 hw/virtio-console.c |  126 +-
 1 files changed, 123 insertions(+), 3 deletions(-)

diff --git a/hw/virtio-console.c b/hw/virtio-console.c
index 749ed59..7eb6aa1 100644
--- a/hw/virtio-console.c
+++ b/hw/virtio-console.c
@@ -13,18 +13,92 @@
 #include "qemu-char.h"
 #include "virtio-serial.h"
 
+typedef struct Buffer {
+uint8_t *buf;
+size_t rem_len;
+size_t offset;
+} Buffer;
+
 typedef struct VirtConsole {
 VirtIOSerialPort port;
 CharDriverState *chr;
+Buffer *unflushed_buf;
 } VirtConsole;
 
+static void add_unflushed_buf(VirtConsole *vcon, const uint8_t *buf, size_t 
len)
+{
+vcon->unflushed_buf = qemu_malloc(sizeof(Buffer));
+vcon->unflushed_buf->buf = qemu_malloc(len);
+
+memcpy(vcon->unflushed_buf->buf, buf, len);
+vcon->unflushed_buf->rem_len = len;
+vcon->unflushed_buf->offset = 0;
+}
+
+static void free_unflushed_buf(VirtConsole *vcon)
+{
+if (vcon->unflushed_buf) {
+qemu_free(vcon->unflushed_buf->buf);
+qemu_free(vcon->unflushed_buf);
+vcon->unflushed_buf = NULL;
+}
+}
+
+static int buffered_write_to_chardev(VirtConsole *vcon, const uint8_t *buf,
+ size_t len)
+{
+size_t written;
+ssize_t ret;
+
+written = 0;
+do {
+ret = qemu_chr_write_nb(vcon->chr, buf + written, len - written);
+if (ret < 0) {
+if (vcon->unflushed_buf) {
+vcon->unflushed_buf->offset += written;
+vcon->unflushed_buf->rem_len -= written;
+} else {
+virtio_serial_throttle_port(&vcon->port, true);
+add_unflushed_buf(vcon, buf + written, len - written);
+}
+
+return -EAGAIN;
+}
+
+written += ret;
+} while (written != len);
+
+return 0;
+}
+
+/* Callback function called when the chardev can accept more data */
+static void chr_write_unblocked(void *opaque)
+{
+VirtConsole *vcon = opaque;
+
+if (vcon->unflushed_buf) {
+int ret;
+
+ret = buffered_write_to_chardev(vcon, vcon->unflushed_buf->buf
+  + vcon->unflushed_buf->offset,
+vcon->unflushed_buf->rem_len);
+if (ret < 0) {
+return;
+}
+free_unflushed_buf(vcon);
+}
+virtio_serial_throttle_port(&vcon->port, false);
+}
 
 /* Callback function that's called when the guest sends us data */
 static void flush_buf(VirtIOSerialPort *port, const uint8_t *buf, size_t len)
 {
 VirtConsole *vcon = DO_UPCAST(VirtConsole, port, port);
 
-qemu_chr_write(vcon->chr, buf, len);
+/* If a previous write was incomplete, we should've been throttled. */
+assert(!vcon->unflushed_buf);
+
+buffered_write_to_chardev(vcon, buf, len);
 }
 
 /* Readiness of the guest to accept data on a port */
@@ -48,19 +122,62 @@ static void chr_event(void *opaque, int event)
 VirtConsole *vcon = opaque;
 
 switch (event) {
-case CHR_EVENT_OPENED: {
+case CHR_EVENT_OPENED:
 virtio_serial_open(&vcon->port);
 break;
-}
+
 case CHR_EVENT_CLOSED:
+if (vcon->unflushed_buf) {
+free_unflushed_buf(vcon);
+}
 virtio_serial_close(&vcon->port);
 break;
 }
 }
 
+static void virtio_console_port_save(QEMUFile *f, void *opaque)
+{
+VirtConsole *vcon = opaque;
+uint32_t have_buffer;
+
+have_buffer = vcon->unflushed_buf ? true : false;
+
+qemu_put_be32s(f, &have_buffer);
+if (have_buffer) {
+qemu_put_be64s(f, &vcon->unflushed_buf->rem_len);
+qemu_put_buffer(f, vcon->unflushed_buf->buf
+   + vcon->unflushed_buf->offset,
+vcon->unflushed_buf->rem_len);
+}
+}
+
+static int virtio_console_port_load(QEMUFile *f, void *opaque, int version_id)
+{
+VirtConsole *vcon = opaque;
+uint32_t have_buffer;
+
+if (version_id > 1) {
+return -EINVAL;
+}
+
+qemu_get_be32s(f, &have_buffer);
+if (have_buffer) {
+vcon->unflushed_buf = qemu_mallocz(sizeof(Buffer));
+
+qemu_get_be64s(f, &vcon->unflushed_buf->rem_len);
+vcon->unflushed_buf->buf = qemu_malloc(vcon->unflushed_buf->rem_len);
+vcon->unflushed_buf->offset = 0;

[Qemu-devel] Re: [PATCH] virtio-spec: document block CMD and FLUSH

2010-05-05 Thread Christoph Hellwig
On Fri, Feb 19, 2010 at 12:22:20AM +0200, Michael S. Tsirkin wrote:
> I took a stub at documenting CMD and FLUSH request types in virtio
> block.  Christoph, could you look over this please?
> 
> I note that the interface seems full of warts to me,
> this might be a first step to cleaning them.

The whole virtio-blk interface is full of warts.  It has been
extended rather ad-hoc, so that is rather expected.

> One issue I struggled with especially is how type
> field mixes bits and non-bit values. I ended up
> simply defining all legal values, so that we have
> CMD = 2, CMD_OUT = 3 and so on.

It's basically a complete mess without much logic behind it.

> +\change_unchanged
> +the high bit
> +\change_inserted 0 1266497301
> + (VIRTIO_BLK_T_BARRIER)
> +\change_unchanged
> + indicates that this request acts as a barrier and that all preceeding 
> requests
> + must be complete before this one, and all following requests must not be
> + started until this is complete.
> +
> +\change_inserted 0 1266504385
> + Note that a barrier does not flush caches in the underlying backend device
> + in host, and thus does not serve as data consistency guarantee.
> + Driver must use FLUSH request to flush the host cache.
> +\change_unchanged

I'm not sure it's even worth documenting it.  I can't see any way to
actually implement safe behaviour with the VIRTIO_BLK_T_BARRIER-style
barriers.


Btw, did I mention that .lyx is a a really horrible format to review
diffs for?  Plain latex would be a lot better..




Re: [Qemu-devel] [PATCH] virtio-spec: document block CMD and FLUSH

2010-05-05 Thread Christoph Hellwig
On Tue, Apr 20, 2010 at 02:46:35AM +0100, Jamie Lokier wrote:
> Does this mean that virtio-blk supports all three combinations?
> 
>1. FLUSH that isn't a barrier
>2. FLUSH that is also a barrier
>3. Barrier that is not a flush
> 
> 1 is good for fsync-like operations;
> 2 is good for journalling-like ordered operations.
> 3 sounds like it doesn't mean a lot as the host cache provides no
> guarantees and has no ordering facility that can be used.

No.  The Linux virtio_blk guest driver either supports data integrity
by using FLUSH or can send down BARRIER requests which aren't much
help at all.  Qemu only implements FLUSH anyway.





[Qemu-devel] Re: [PATCH] virtio-spec: document block CMD and FLUSH

2010-05-05 Thread Michael S. Tsirkin
On Tue, May 04, 2010 at 08:54:59PM +0200, Christoph Hellwig wrote:
> On Fri, Feb 19, 2010 at 12:22:20AM +0200, Michael S. Tsirkin wrote:
> > I took a stub at documenting CMD and FLUSH request types in virtio
> > block.  Christoph, could you look over this please?
> > 
> > I note that the interface seems full of warts to me,
> > this might be a first step to cleaning them.
> 
> The whole virtio-blk interface is full of warts.  It has been
> extended rather ad-hoc, so that is rather expected.
> 
> > One issue I struggled with especially is how type
> > field mixes bits and non-bit values. I ended up
> > simply defining all legal values, so that we have
> > CMD = 2, CMD_OUT = 3 and so on.
> 
> It's basically a complete mess without much logic behind it.
> 
> > +\change_unchanged
> > +the high bit
> > +\change_inserted 0 1266497301
> > + (VIRTIO_BLK_T_BARRIER)
> > +\change_unchanged
> > + indicates that this request acts as a barrier and that all preceeding 
> > requests
> > + must be complete before this one, and all following requests must not be
> > + started until this is complete.
> > +
> > +\change_inserted 0 1266504385
> > + Note that a barrier does not flush caches in the underlying backend device
> > + in host, and thus does not serve as data consistency guarantee.
> > + Driver must use FLUSH request to flush the host cache.
> > +\change_unchanged
> 
> I'm not sure it's even worth documenting it.  I can't see any way to
> actually implement safe behaviour with the VIRTIO_BLK_T_BARRIER-style
> barriers.

lguest seems to still use this.
I guess if you have a reliable host, VIRTIO_BLK_T_BARRIER is enough?

> Btw, did I mention that .lyx is a a really horrible format to review
> diffs for?  Plain latex would be a lot better..




[Qemu-devel] Re: [PATCH] virtio-spec: document block CMD and FLUSH

2010-05-05 Thread Michael S. Tsirkin
On Tue, May 04, 2010 at 09:56:18PM +0300, Michael S. Tsirkin wrote:
> On Tue, May 04, 2010 at 08:54:59PM +0200, Christoph Hellwig wrote:
> > On Fri, Feb 19, 2010 at 12:22:20AM +0200, Michael S. Tsirkin wrote:
> > > I took a stub at documenting CMD and FLUSH request types in virtio
> > > block.  Christoph, could you look over this please?
> > > 
> > > I note that the interface seems full of warts to me,
> > > this might be a first step to cleaning them.
> > 
> > The whole virtio-blk interface is full of warts.  It has been
> > extended rather ad-hoc, so that is rather expected.
> > 
> > > One issue I struggled with especially is how type
> > > field mixes bits and non-bit values. I ended up
> > > simply defining all legal values, so that we have
> > > CMD = 2, CMD_OUT = 3 and so on.
> > 
> > It's basically a complete mess without much logic behind it.
> > 
> > > +\change_unchanged
> > > +the high bit
> > > +\change_inserted 0 1266497301
> > > + (VIRTIO_BLK_T_BARRIER)
> > > +\change_unchanged
> > > + indicates that this request acts as a barrier and that all preceeding 
> > > requests
> > > + must be complete before this one, and all following requests must not be
> > > + started until this is complete.
> > > +
> > > +\change_inserted 0 1266504385
> > > + Note that a barrier does not flush caches in the underlying backend 
> > > device
> > > + in host, and thus does not serve as data consistency guarantee.
> > > + Driver must use FLUSH request to flush the host cache.
> > > +\change_unchanged
> > 
> > I'm not sure it's even worth documenting it.  I can't see any way to
> > actually implement safe behaviour with the VIRTIO_BLK_T_BARRIER-style
> > barriers.
> 
> lguest seems to still use this.

Sorry, it doesn't. No idea why I thought it does.

> I guess if you have a reliable host, VIRTIO_BLK_T_BARRIER is enough?
> 
> > Btw, did I mention that .lyx is a a really horrible format to review
> > diffs for?  Plain latex would be a lot better..




Re: [Qemu-devel] [PATCH] virtio-spec: document block CMD and FLUSH

2010-05-05 Thread Michael S. Tsirkin
On Tue, May 04, 2010 at 08:56:14PM +0200, Christoph Hellwig wrote:
> On Tue, Apr 20, 2010 at 02:46:35AM +0100, Jamie Lokier wrote:
> > Does this mean that virtio-blk supports all three combinations?
> > 
> >1. FLUSH that isn't a barrier
> >2. FLUSH that is also a barrier
> >3. Barrier that is not a flush
> > 
> > 1 is good for fsync-like operations;
> > 2 is good for journalling-like ordered operations.
> > 3 sounds like it doesn't mean a lot as the host cache provides no
> > guarantees and has no ordering facility that can be used.
> 
> No.  The Linux virtio_blk guest driver either supports data integrity
> by using FLUSH or can send down BARRIER requests which aren't much
> help at all.

It seems we use BARRIER when we get REQ_HARDBARRIER, right?
What does the REQ_HARDBARRIER flag in request mean and when is it set?

>  Qemu only implements FLUSH anyway.




[Qemu-devel] [PATCH] sparc64: implement global translation table entries

2010-05-05 Thread Igor V. Kovalenko
From: Igor V. Kovalenko 

- match global tte against any context
- show global tte in MMU dump

v0->v1: added default case to switch statement in demap_tlb
- should fix gcc warning about uninitialized context variable

Signed-off-by: Igor V. Kovalenko 
---
 target-sparc/cpu.h   |   18 
 target-sparc/helper.c|   33 -
 target-sparc/op_helper.c |   52 ++
 3 files changed, 75 insertions(+), 28 deletions(-)

diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 0e7f390..b705728 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -513,6 +513,24 @@ static inline void cpu_set_cwp(CPUSPARCState *env1, int 
new_cwp)
 /* sun4m.c, sun4u.c */
 void cpu_check_irqs(CPUSPARCState *env);
 
+#if defined (TARGET_SPARC64)
+
+static inline int compare_masked(uint64_t x, uint64_t y, uint64_t mask)
+{
+return (x & mask) == (y & mask);
+}
+
+#define MMU_CONTEXT_BITS 13
+#define MMU_CONTEXT_MASK ((1 << MMU_CONTEXT_BITS) - 1)
+
+static inline int tlb_compare_context(const SparcTLBEntry *tlb,
+  uint64_t context)
+{
+return compare_masked(context, tlb->tag, MMU_CONTEXT_MASK);
+}
+
+#endif
+
 static inline void PUT_PSR(CPUSPARCState *env1, target_ulong val)
 {
 env1->psr = val & PSR_ICC;
diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index 1f0f7d4..4ece01b 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -381,17 +381,11 @@ static inline target_phys_addr_t 
ultrasparc_truncate_physical(uint64_t x)
  * UltraSparc IIi I/DMMUs
  */
 
-static inline int compare_masked(uint64_t x, uint64_t y, uint64_t mask)
-{
-return (x & mask) == (y & mask);
-}
-
 // Returns true if TTE tag is valid and matches virtual address value in 
context
 // requires virtual address mask value calculated from TTE entry size
 static inline int ultrasparc_tag_match(SparcTLBEntry *tlb,
uint64_t address, uint64_t context,
-   target_phys_addr_t *physical,
-   int is_nucleus)
+   target_phys_addr_t *physical)
 {
 uint64_t mask;
 
@@ -413,8 +407,7 @@ static inline int ultrasparc_tag_match(SparcTLBEntry *tlb,
 
 // valid, context match, virtual address match?
 if (TTE_IS_VALID(tlb->tte) &&
-((is_nucleus && compare_masked(0, tlb->tag, 0x1fff))
- || TTE_IS_GLOBAL(tlb->tte) || compare_masked(context, tlb->tag, 
0x1fff))
+(TTE_IS_GLOBAL(tlb->tte) || tlb_compare_context(tlb, context))
 && compare_masked(address, tlb->tag, mask))
 {
 // decode physical address
@@ -431,7 +424,6 @@ static int get_physical_address_data(CPUState *env,
 {
 unsigned int i;
 uint64_t context;
-int is_nucleus;
 
 if ((env->lsu & DMMU_E) == 0) { /* DMMU disabled */
 *physical = ultrasparc_truncate_physical(address);
@@ -439,14 +431,16 @@ static int get_physical_address_data(CPUState *env,
 return 0;
 }
 
-context = env->dmmu.mmu_primary_context & 0x1fff;
-is_nucleus = env->tl > 0;
+if (env->tl == 0) {
+context = env->dmmu.mmu_primary_context & 0x1fff;
+} else {
+context = 0;
+}
 
 for (i = 0; i < 64; i++) {
 // ctx match, vaddr match, valid?
 if (ultrasparc_tag_match(&env->dtlb[i],
- address, context, physical,
- is_nucleus)) {
+ address, context, physical)) {
 // access ok?
 if (((env->dtlb[i].tte & 0x4) && is_user) ||
 (!(env->dtlb[i].tte & 0x2) && (rw == 1))) {
@@ -492,7 +486,6 @@ static int get_physical_address_code(CPUState *env,
 {
 unsigned int i;
 uint64_t context;
-int is_nucleus;
 
 if ((env->lsu & IMMU_E) == 0 || (env->pstate & PS_RED) != 0) {
 /* IMMU disabled */
@@ -501,14 +494,16 @@ static int get_physical_address_code(CPUState *env,
 return 0;
 }
 
-context = env->dmmu.mmu_primary_context & 0x1fff;
-is_nucleus = env->tl > 0;
+if (env->tl == 0) {
+context = env->dmmu.mmu_primary_context & 0x1fff;
+} else {
+context = 0;
+}
 
 for (i = 0; i < 64; i++) {
 // ctx match, vaddr match, valid?
 if (ultrasparc_tag_match(&env->itlb[i],
- address, context, physical,
- is_nucleus)) {
+ address, context, physical)) {
 // access ok?
 if ((env->itlb[i].tte & 0x4) && is_user) {
 if (env->immu.sfsr) /* Fault status register */
diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
index b27778b..e048845 100644
--- a/target-sparc/op_helper.c
+++ b/target-sparc/op_helper.c
@@ -129,24 +129,58 @@ static void demap_tlb(SparcTLBEntry *tlb, target_ulong 
demap_addr,
 

Re: [Qemu-devel] [PATCH 2/3] sparc64: implement global translation table entries

2010-05-05 Thread Igor Kovalenko
On Tue, May 4, 2010 at 12:29 AM, Blue Swirl  wrote:
> On 5/3/10, Igor Kovalenko  wrote:
>> On Tue, May 4, 2010 at 12:06 AM, Blue Swirl  wrote:
>>  > On 5/3/10, Igor V. Kovalenko  wrote:
>>  >> From: Igor V. Kovalenko 
>>  >>
>>  >>  - match global tte against any context
>>  >>  - show global tte in MMU dump
>>  >>
>>  >>  Signed-off-by: Igor V. Kovalenko 
>>  >
>>  > I get this error:
>>  >  CC    sparc64-softmmu/op_helper.o
>>  > cc1: warnings being treated as errors
>>  > /src/qemu/target-sparc/op_helper.c: In function 'demap_tlb':
>>  > /src/qemu/target-sparc/op_helper.c:129: error: 'context' may be used
>>  > uninitialized in this function
>>  >
>>
>>
>> My gcc (Gentoo 4.4.3-r2 p1.2) is silent, and looking at the change all
>>  4 possible cases are handled in switch statement.
>
> I think gcc is not intelligent enough to know that x & 3 has only 4
> possible cases. :-)
>
>>  It should initializes context in 3 usable cases and returns from the
>>  4th which is reserved.
>>  How do we fix this issue?
>
> I'd add a default case to one of the cases. Another possibility is to
> initialize the context with 0 and then make one of the cases empty.
>

Added default case, resent this patch only.

-- 
Kind regards,
Igor V. Kovalenko




[Qemu-devel] [PATCH] sparc64: implement global translation table entries v1

2010-05-05 Thread Igor V. Kovalenko
From: Igor V. Kovalenko 

- match global tte against any context
- show global tte in MMU dump

v0->v1: added default case to switch statement in demap_tlb
- should fix gcc warning about uninitialized context variable

Signed-off-by: Igor V. Kovalenko 
---
 target-sparc/cpu.h   |   18 
 target-sparc/helper.c|   33 -
 target-sparc/op_helper.c |   53 ++
 3 files changed, 76 insertions(+), 28 deletions(-)

diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 0e7f390..b705728 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -513,6 +513,24 @@ static inline void cpu_set_cwp(CPUSPARCState *env1, int 
new_cwp)
 /* sun4m.c, sun4u.c */
 void cpu_check_irqs(CPUSPARCState *env);
 
+#if defined (TARGET_SPARC64)
+
+static inline int compare_masked(uint64_t x, uint64_t y, uint64_t mask)
+{
+return (x & mask) == (y & mask);
+}
+
+#define MMU_CONTEXT_BITS 13
+#define MMU_CONTEXT_MASK ((1 << MMU_CONTEXT_BITS) - 1)
+
+static inline int tlb_compare_context(const SparcTLBEntry *tlb,
+  uint64_t context)
+{
+return compare_masked(context, tlb->tag, MMU_CONTEXT_MASK);
+}
+
+#endif
+
 static inline void PUT_PSR(CPUSPARCState *env1, target_ulong val)
 {
 env1->psr = val & PSR_ICC;
diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index 1f0f7d4..4ece01b 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -381,17 +381,11 @@ static inline target_phys_addr_t 
ultrasparc_truncate_physical(uint64_t x)
  * UltraSparc IIi I/DMMUs
  */
 
-static inline int compare_masked(uint64_t x, uint64_t y, uint64_t mask)
-{
-return (x & mask) == (y & mask);
-}
-
 // Returns true if TTE tag is valid and matches virtual address value in 
context
 // requires virtual address mask value calculated from TTE entry size
 static inline int ultrasparc_tag_match(SparcTLBEntry *tlb,
uint64_t address, uint64_t context,
-   target_phys_addr_t *physical,
-   int is_nucleus)
+   target_phys_addr_t *physical)
 {
 uint64_t mask;
 
@@ -413,8 +407,7 @@ static inline int ultrasparc_tag_match(SparcTLBEntry *tlb,
 
 // valid, context match, virtual address match?
 if (TTE_IS_VALID(tlb->tte) &&
-((is_nucleus && compare_masked(0, tlb->tag, 0x1fff))
- || TTE_IS_GLOBAL(tlb->tte) || compare_masked(context, tlb->tag, 
0x1fff))
+(TTE_IS_GLOBAL(tlb->tte) || tlb_compare_context(tlb, context))
 && compare_masked(address, tlb->tag, mask))
 {
 // decode physical address
@@ -431,7 +424,6 @@ static int get_physical_address_data(CPUState *env,
 {
 unsigned int i;
 uint64_t context;
-int is_nucleus;
 
 if ((env->lsu & DMMU_E) == 0) { /* DMMU disabled */
 *physical = ultrasparc_truncate_physical(address);
@@ -439,14 +431,16 @@ static int get_physical_address_data(CPUState *env,
 return 0;
 }
 
-context = env->dmmu.mmu_primary_context & 0x1fff;
-is_nucleus = env->tl > 0;
+if (env->tl == 0) {
+context = env->dmmu.mmu_primary_context & 0x1fff;
+} else {
+context = 0;
+}
 
 for (i = 0; i < 64; i++) {
 // ctx match, vaddr match, valid?
 if (ultrasparc_tag_match(&env->dtlb[i],
- address, context, physical,
- is_nucleus)) {
+ address, context, physical)) {
 // access ok?
 if (((env->dtlb[i].tte & 0x4) && is_user) ||
 (!(env->dtlb[i].tte & 0x2) && (rw == 1))) {
@@ -492,7 +486,6 @@ static int get_physical_address_code(CPUState *env,
 {
 unsigned int i;
 uint64_t context;
-int is_nucleus;
 
 if ((env->lsu & IMMU_E) == 0 || (env->pstate & PS_RED) != 0) {
 /* IMMU disabled */
@@ -501,14 +494,16 @@ static int get_physical_address_code(CPUState *env,
 return 0;
 }
 
-context = env->dmmu.mmu_primary_context & 0x1fff;
-is_nucleus = env->tl > 0;
+if (env->tl == 0) {
+context = env->dmmu.mmu_primary_context & 0x1fff;
+} else {
+context = 0;
+}
 
 for (i = 0; i < 64; i++) {
 // ctx match, vaddr match, valid?
 if (ultrasparc_tag_match(&env->itlb[i],
- address, context, physical,
- is_nucleus)) {
+ address, context, physical)) {
 // access ok?
 if ((env->itlb[i].tte & 0x4) && is_user) {
 if (env->immu.sfsr) /* Fault status register */
diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
index b27778b..245eba7 100644
--- a/target-sparc/op_helper.c
+++ b/target-sparc/op_helper.c
@@ -129,24 +129,59 @@ static void demap_tlb(SparcTLBEntry *tlb, target_ulong 
demap_addr,
 

[Qemu-devel] Re: [PATCH v6 0/6] char: non-blocking writes, virtio-console flow control

2010-05-05 Thread Gerd Hoffmann

On 05/04/10 20:23, Amit Shah wrote:

Hello,

This series lets interested callers ask for an -EAGAIN return from the
chardev backends (only unix and tcp sockets as of now) to implement
their own flow control.

A new call, qemu_chr_write_nb() is added, that will fallback to
qemu_chr_write() if the backend file isn't non-blocking or if no
callback was supplied.

Support for other backend types is easy to add and will be done in
later patches.


I think we've finally sorted out all issues.

Acked-by: Gerd Hoffmann 

cheers,
  Gerd




[Qemu-devel] Re: [PATCH v6 5/6] char: unix/tcp: Add a non-blocking write handler

2010-05-05 Thread Juan Quintela
Amit Shah  wrote:
> Add a non-blocking write handler that can return with -EAGAIN to the
> caller and also callback when the socket becomes writable.
>
> Non-blocking writes are only enabled for sockets that are opened in
> non-blocking mode and only for callers that have registered a callback
> handler for resuming writes.
>
> Signed-off-by: Amit Shah 

I 

>  static void tcp_chr_connect(void *opaque)
>  {
>  CharDriverState *chr = opaque;
>  TCPCharDriver *s = chr->opaque;
> +IOHandler *write_cb;
> +int flags;
> +bool nonblock;
> +
> +flags = fcntl(s->fd, F_GETFL);
> +if (flags == -1) {
> +flags = 0;
> +}
> +nonblock = flags & O_NONBLOCK;
> +
> +write_cb = NULL;
> +chr->nonblock = false;
> +if (nonblock && chr->chr_write_unblocked) {
> +write_cb = chr->chr_write_unblocked;
> +chr->nonblock = true;
> +}
>  
> +chr->write_blocked = false;
>  s->connected = 1;
>  qemu_set_fd_handler2(s->fd, tcp_chr_read_poll,

write_cb is a write-only variable, no?




Re: [Qemu-devel] [PATCH 2/3] Compile vl.c once

2010-05-05 Thread Blue Swirl
On 5/4/10, TeLeMan  wrote:
> This patch breaks cpu list("-cpu ?").

Thanks for reporting. Fixed in HEAD.




Re: [Qemu-devel] [PATCH v5 2/6] char: Add a QemuChrHandlers struct to initialise chardev handlers

2010-05-05 Thread Blue Swirl
On 5/4/10, Amit Shah  wrote:
> Instead of passing each handler in the qemu_add_handlers() function,
>  create a struct of handlers that can be passed to the function instead.
>
>  Signed-off-by: Amit Shah 
>  ---
>   gdbstub.c|9 +++--
>   hw/debugcon.c|2 +-
>   hw/escc.c|9 +++--
>   hw/etraxfs_ser.c |   13 +
>   hw/mcf_uart.c|9 +++--
>   hw/pl011.c   |9 +++--
>   hw/pxa2xx.c  |   13 +
>   hw/serial.c  |9 +++--
>   hw/sh_serial.c   |   12 +---
>   hw/syborg_serial.c   |9 +++--
>   hw/usb-serial.c  |9 +++--
>   hw/virtio-console.c  |9 +++--
>   hw/xen_console.c |   16 +++-
>   hw/xilinx_uartlite.c |   11 +--
>   monitor.c|   19 +++
>   net/slirp.c  |8 ++--
>   qemu-char.c  |   27 ++-
>   qemu-char.h  |   12 
>   18 files changed, 151 insertions(+), 54 deletions(-)
>
>  diff --git a/gdbstub.c b/gdbstub.c
>  index 93c4850..7b981ce 100644
>  --- a/gdbstub.c
>  +++ b/gdbstub.c
>  @@ -2621,6 +2621,12 @@ static void gdb_sigterm_handler(int signal)
>   }
>   #endif
>
>  +static QemuChrHandlers gdb_handlers = {

Please add 'const' in places like this ...

>  +.fd_can_read = gdb_chr_can_receive,
>  +.fd_read = gdb_chr_receive,
>  +.fd_event = gdb_chr_event,
>  +};
>  +
>   int gdbserver_start(const char *device)
>   {
>  GDBState *s;
>  @@ -2650,8 +2656,7 @@ int gdbserver_start(const char *device)
>  if (!chr)
>  return -1;
>
>  -qemu_chr_add_handlers(chr, gdb_chr_can_receive, gdb_chr_receive,
>  -  gdb_chr_event, NULL);
>  +qemu_chr_add_handlers(chr, &gdb_handlers, NULL);
>  }
>
>  s = gdbserver_state;
>  diff --git a/hw/debugcon.c b/hw/debugcon.c
>  index 5ee6821..e79a595 100644
>  --- a/hw/debugcon.c
>  +++ b/hw/debugcon.c
>  @@ -73,7 +73,7 @@ static void debugcon_init_core(DebugconState *s)
>  exit(1);
>  }
>
>  -qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, s);
>  +qemu_chr_add_handlers(s->chr, NULL, s);
>   }
>
>   static int debugcon_isa_initfn(ISADevice *dev)
>  diff --git a/hw/escc.c b/hw/escc.c
>  index 6d2fd36..1978bf7 100644
>  --- a/hw/escc.c
>  +++ b/hw/escc.c
>  @@ -890,6 +890,12 @@ void slavio_serial_ms_kbd_init(target_phys_addr_t base, 
> qemu_irq irq,
>  sysbus_mmio_map(s, 0, base);
>   }
>
>  +static QemuChrHandlers serial_handlers = {
>  +.fd_can_read = serial_can_receive,
>  +.fd_read = serial_receive1,
>  +.fd_event = serial_event,
>  +};
>  +
>   static int escc_init1(SysBusDevice *dev)
>   {
>  SerialState *s = FROM_SYSBUS(SerialState, dev);
>  @@ -903,8 +909,7 @@ static int escc_init1(SysBusDevice *dev)
>  s->chn[i].chn = 1 - i;
>  s->chn[i].clock = s->frequency / 2;
>  if (s->chn[i].chr) {
>  -qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
>  -  serial_receive1, serial_event, 
> &s->chn[i]);
>  +qemu_chr_add_handlers(s->chn[i].chr, &serial_handlers, 
> &s->chn[i]);
>  }
>  }
>  s->chn[0].otherchn = &s->chn[1];
>  diff --git a/hw/etraxfs_ser.c b/hw/etraxfs_ser.c
>  index e1f9615..e22f770 100644
>  --- a/hw/etraxfs_ser.c
>  +++ b/hw/etraxfs_ser.c
>  @@ -161,6 +161,12 @@ static void serial_event(void *opaque, int event)
>
>   }
>
>  +static QemuChrHandlers serial_handlers = {
>  +.fd_can_read = serial_can_receive,
>  +.fd_read = serial_receive,
>  +.fd_event = serial_event,
>  +};
>  +
>   static int etraxfs_ser_init(SysBusDevice *dev)
>   {
>  struct etrax_serial *s = FROM_SYSBUS(typeof (*s), dev);
>  @@ -174,10 +180,9 @@ static int etraxfs_ser_init(SysBusDevice *dev)
>  ser_regs = cpu_register_io_memory(ser_read, ser_write, s);
>  sysbus_init_mmio(dev, R_MAX * 4, ser_regs);
>  s->chr = qdev_init_chardev(&dev->qdev);
>  -if (s->chr)
>  -qemu_chr_add_handlers(s->chr,
>  -  serial_can_receive, serial_receive,
>  -  serial_event, s);
>  +if (s->chr) {
>  +qemu_chr_add_handlers(s->chr, &serial_handlers, s);
>  +}
>  return 0;
>   }
>
>  diff --git a/hw/mcf_uart.c b/hw/mcf_uart.c
>  index d16bac7..301b901 100644
>  --- a/hw/mcf_uart.c
>  +++ b/hw/mcf_uart.c
>  @@ -268,6 +268,12 @@ static void mcf_uart_receive(void *opaque, const 
> uint8_t *buf, int size)
>  mcf_uart_push_byte(s, buf[0]);
>   }
>
>  +static QemuChrHandlers mcf_uart_handlers = {
>  +.fd_can_read = mcf_uart_can_receive,
>  +.fd_read = mcf_uart_receive,
>  +.fd_event = mcf_uart_event,
>  +};
>  +
>   void *mcf_uart_init(qemu_irq irq, CharDriverState *chr)
>   {
>  mcf_uart_state *s;
>  @@ -276,8 +282,7 @@ void *mcf_uart_init(qemu_irq irq, CharDriverState *chr)
>  s->chr = chr;
>  s->irq 

[Qemu-devel] Re: [PATCH v6 5/6] char: unix/tcp: Add a non-blocking write handler

2010-05-05 Thread Amit Shah
On (Tue) May 04 2010 [21:54:09], Juan Quintela wrote:
> >  static void tcp_chr_connect(void *opaque)
> >  {
> >  CharDriverState *chr = opaque;
> >  TCPCharDriver *s = chr->opaque;
> > +IOHandler *write_cb;
> > +int flags;
> > +bool nonblock;
> > +
> > +flags = fcntl(s->fd, F_GETFL);
> > +if (flags == -1) {
> > +flags = 0;
> > +}
> > +nonblock = flags & O_NONBLOCK;
> > +
> > +write_cb = NULL;
> > +chr->nonblock = false;
> > +if (nonblock && chr->chr_write_unblocked) {
> > +write_cb = chr->chr_write_unblocked;
> > +chr->nonblock = true;
> > +}
> >  
> > +chr->write_blocked = false;
> >  s->connected = 1;
> >  qemu_set_fd_handler2(s->fd, tcp_chr_read_poll,
> 
> write_cb is a write-only variable, no?

Leftover from previous design.

I'll clean this up in the next series (already applied to my local
branch).

Thanks,
Amit




[Qemu-devel] Re: sparc64 lazy conditional codes evaluation

2010-05-05 Thread Blue Swirl
On 5/3/10, Igor Kovalenko  wrote:
> On Mon, May 3, 2010 at 11:54 PM, Blue Swirl  wrote:
>  > On 5/3/10, Igor Kovalenko  wrote:
>  >> On Mon, May 3, 2010 at 11:24 PM, Blue Swirl  wrote:
>  >>  > On 5/3/10, Igor Kovalenko  wrote:
>  >>  >> Hi!
>  >>  >>
>  >>  >>  There is an issue with lazy conditional codes evaluation where
>  >>  >>  we return from trap handler with mismatching conditionals.
>  >>  >>
>  >>  >>  I seldom reproduce it here when dragging qemu window while
>  >>  >>  machine is working through silo initialization. I use gentoo minimal 
> cd
>  >>  >>  install-sparc64-minimal-20100322.iso but I think anything with silo 
> boot
>  >>  >>  would experience the same. Once in a while it would report crc error,
>  >>  >>  unable to open cd partition or it would fail to decompress image.
>  >>  >
>  >>  > I think I've also seen this.
>  >>  >
>  >>  >>  Pattern that fails appears to require a sequence of compare insn
>  >>  >>  possibly followed by a few instructions which do not touch 
> conditionals,
>  >>  >>  then conditional branch insn. If it happens that we trap while 
> processing
>  >>  >>  conditional branch insn so it is restarted after return from trap 
> then
>  >>  >>  seldom conditional codes are calculated incorrectly.
>  >>  >>
>  >>  >>  I cannot point to exact cause but it appears that after trap return
>  >>  >>  we may have CC_OP and CC_SRC* mismatch somewhere,
>  >>  >>  since adding more cond evaluation flushes over the code helps.
>  >>  >>
>  >>  >>  We already tried doing flush more frequently and it is still not
>  >>  >>  complete, so the question is how to finally do this once and right :)
>  >>  >>
>  >>  >>  Obviously I do not get the design of lazy evaluation right, but
>  >>  >>  the following list appears to be good start. Plan is to prepare
>  >>  >>  a change to qemu and find a way to test it.
>  >>  >>
>  >>  >>  1. Since SPARC* is a RISC CPU it seems to be not profitable to
>  >>  >>use DisasContext->cc_op to predict if flags should be not evaluated
>  >>  >>due to overriding insn. Instead we can drop cc_op from disassembler
>  >>  >>context and simplify code to only use cc_op from env.
>  >>  >
>  >>  > Not currently, but in the future we may use that to do even lazier
>  >>  > flags computation. For example the sequence 'cmp x, y; bne target'
>  >>  > could be much more optimal by changing the branch to do the
>  >>  > comparison. Here's an old unfinished patch to do some of this.
>  >>  >
>  >>  >>Another point is that we always write to env->cc_op when
>  >>  >>  translating *cc insns
>  >>  >>This should solve any issue with dc->cc_op prediction going
>  >>  >>out of sync with env->cc_op and cpu_cc_src*
>  >>  >
>  >>  > I think this is what is happening now.
>  >>  >
>  >>  >>  2. We must flush lazy evaluation back to CC_OP_FLAGS in a few cases 
> when
>  >>  >>a. conditional code is required by insn (like addc, cond branch 
> etc.)
>  >>  >>   - here we can optimize by evaluating specific bits (carry?)
>  >>  >>   - not sure if it works in case we have two cond consuming insns,
>  >>  >> where first needs carry another needs the rest of flags
>  >>  >
>  >>  > Here's another patch to optimize C flag handling. It doesn't pass my
>  >>  > tests though.
>  >>  >
>  >>  >>b. CCR is read by rdccr (helper_rdccr)
>  >>  >>   - have to compute all flags
>  >>  >>c. trap occurs and we prepare trap level context (saving pstate)
>  >>  >>   - have to compute all flags
>  >>  >>d. control goes out of tcg runtime (so gdbstub reads correct value 
> from env)
>  >>  >>   - have to compute all flags
>  >>  >
>  >>  > Fully agree.
>  >>
>  >>
>  >> Cool
>  >>
>  >>  Still I'd propose to kill dc->cc_op, find a reliable way to test it
>  >>  and then add it back possibly with more optimizations.
>  >>  I'm lost in the code up to the point where I believe we need to
>  >>  save/restore cc_op and cpu_cc* while switching trap levels.
>  >
>  > I'd think this should do the trick:
>  >
>  > diff --git a/target-sparc/op_helper.c b/target-sparc/op_helper.c
>  > index b27778b..94921cd 100644
>  > --- a/target-sparc/op_helper.c
>  > +++ b/target-sparc/op_helper.c
>  > @@ -3506,6 +3506,8 @@ void do_interrupt(CPUState *env)
>  > }
>  > tsptr = cpu_tsptr(env);
>  >
>  > +helper_compute_psr();
>  > +
>  > tsptr->tstate = ((uint64_t)GET_CCR(env) << 32) |
>  > ((env->asi & 0xff) << 24) | ((env->pstate & 0xf3f) << 8) |
>  > GET_CWP64(env);
>  >
>
>
> Thanks, this change seems to work here for silo issue.
>
>  Another change would be to flush for gdbstub use of GET_CCR and for
>  helper_rdccr.
>  I tried to embed flush into GET_CCR but the code looks ugly since we
>  need to proxy a call to helper_compute_psr from gdbstub passing
>  available env pointer.
>
>  Not really tested with your changes, but still what is the breakage you see?

Aurora 2.0 
(http://distro.ibiblio.org/pub/linux/distri

Re: [Qemu-devel] Re: [PATCH] virtio-spec: document block CMD and FLUSH

2010-05-05 Thread Jamie Lokier
Jens Axboe wrote:
> On Tue, May 04 2010, Rusty Russell wrote:
> > ISTR someone mentioning a desire for such an API years ago, so CC'ing the
> > usual I/O suspects...
> 
> It would be nice to have a more fuller API for this, but the reality is
> that only the flush approach is really workable. Even just strict
> ordering of requests could only be supported on SCSI, and even there the
> kernel still lacks proper guarantees on error handling to prevent
> reordering there.

There's a few I/O scheduling differences that might be useful:

1. The I/O scheduler could freely move WRITEs before a FLUSH but not
   before a BARRIER.  That might be useful for time-critical WRITEs,
   and those issued by high I/O priority.

2. The I/O scheduler could move WRITEs after a FLUSH if the FLUSH is
   only for data belonging to a particular file (e.g. fdatasync with
   no file size change, even on btrfs if O_DIRECT was used for the
   writes being committed).  That would entail tagging FLUSHes and
   WRITEs with a fs-specific identifier (such as inode number), opaque
   to the scheduler which only checks equality.

3. By delaying FLUSHes through reordering as above, the I/O scheduler
   could merge multiple FLUSHes into a single command.

4. On MD/RAID, BARRIER requires every backing device to quiesce before
   sending the low-level cache-flush, and all of those to finish
   before resuming each backing device.  FLUSH doesn't require as much
   synchronising.  (With per-file FLUSH; see 2; it could even avoid
   FLUSH altogether to some backing devices for small files).

In other words, FLUSH can be more relaxed than BARRIER inside the
kernel.  It's ironic that we think of fsync as stronger than
fbarrier outside the kernel :-)

-- Jamie




[Qemu-devel] Re: [PATCH v6 6/6] virtio-console: Throttle virtio-serial-bus if we can't consume any more guest data

2010-05-05 Thread Juan Quintela
Amit Shah  wrote:
> If the char device we're connected to is overwhelmed with data and it
> can't accept any more, signal to the virtio-serial-bus to stop sending
> us more data till we tell otherwise.
>
> If the current buffer being processed hasn't been completely written out
> to the char device, we have to keep it around and re-try sending it
> since the virtio-serial-bus code assumes we consume the entire buffer.
>
> Allow the chardev backends to return -EAGAIN; we're ready with a
> callback handler that will flush the remainder of the buffer.
>
> Also register with savevm so that we save/restore such a buffer across
> migration.
>
> Signed-off-by: Amit Shah 
> ---
>  hw/virtio-console.c |  126 +-
>  1 files changed, 123 insertions(+), 3 deletions(-)
>
> diff --git a/hw/virtio-console.c b/hw/virtio-console.c
> index 749ed59..7eb6aa1 100644
> --- a/hw/virtio-console.c
> +++ b/hw/virtio-console.c
> @@ -13,18 +13,92 @@
>  #include "qemu-char.h"
>  #include "virtio-serial.h"
>  
> +typedef struct Buffer {
> +uint8_t *buf;
> +size_t rem_len;
> +size_t offset;
> +} Buffer;
> +
>  typedef struct VirtConsole {
>  VirtIOSerialPort port;
>  CharDriverState *chr;
> +Buffer *unflushed_buf;

This part of teh struct can be static instead of a pointer, and adjust
rest of code accordingly.  for the rest, it looks ok.

For this to work, I think that you will have to change rem_len for
total_len (or whatever) and adjust code around.  This makes trivial to
check for buffer used/no, just see if len = 0.

> +static int buffered_write_to_chardev(VirtConsole *vcon, const uint8_t *buf,
> + size_t len)
> +{
> +size_t written;
> +ssize_t ret;
> +
> +written = 0;
> +do {
> +ret = qemu_chr_write_nb(vcon->chr, buf + written, len - written);
> +if (ret < 0) {
> +if (vcon->unflushed_buf) {
> +vcon->unflushed_buf->offset += written;
> +vcon->unflushed_buf->rem_len -= written;
> +} else {
> +virtio_serial_throttle_port(&vcon->port, true);
> +add_unflushed_buf(vcon, buf + written, len - written);
> +}
> +
> +return -EAGAIN;

You can "return ret" instead of "inventing" a new error :)

> +static void virtio_console_port_save(QEMUFile *f, void *opaque)
> +{
> +VirtConsole *vcon = opaque;
> +uint32_t have_buffer;
> +
> +have_buffer = vcon->unflushed_buf ? true : false;

So, here you can just write vcon->unflushed_buf.len if you go with my
proposal, what will make VMState transition easier.


Rest of patch is good.  I also like the series, specially the change of
qemu_chr_add_handlers() to use one struct to pass its arguments.

Later, Juan.




Re: [Qemu-devel] [PATCH 1/2] qemu-error: Introduce get_errno_name()

2010-05-05 Thread Luiz Capitulino
On Tue, 04 May 2010 09:03:47 -0500
Anthony Liguori  wrote:

> On 05/04/2010 08:56 AM, Luiz Capitulino wrote:
> > On Mon, 03 May 2010 08:16:35 -0500
> > Anthony Liguori  wrote:
> >
> >
> >> On 05/03/2010 08:06 AM, Markus Armbruster wrote:
> >>  
> >>> Luiz Capitulino   writes:
> >>>
> >>>
> >>>
>  We need to expose errno in QMP, for three reasons:
> 
>  1. Some error handling functions print errno codes to the user,
> while it's debatable whether this is good or not from a user
> perspective, sometimes it's the best we can do because it's
> what system calls and libraries return
> 
>  2. Some events (eg. BLOCK_IO_ERROR) will be made even more
> complete with errno information
> 
>  3. It's very good for debugging
> 
>  So, we need a way to expose those codes in QMP. We can't just use
>  the codes themselfs because they may vary between systems.
> 
>  The best solution I can think of is to return the string
>  representation of the name. For example, EIO becomes "EIO".
> 
>  This is what get_errno_name() does.
> 
>  Signed-off-by: Luiz Capitulino
>  ---
> qemu-error.c |   85 
>  ++
> qemu-error.h |1 +
> 2 files changed, 86 insertions(+), 0 deletions(-)
> 
>  diff --git a/qemu-error.c b/qemu-error.c
>  index 5a35e7c..7035417 100644
>  --- a/qemu-error.c
>  +++ b/qemu-error.c
>  @@ -207,3 +207,88 @@ void error_report(const char *fmt, ...)
> va_end(ap);
> error_printf("\n");
> }
>  +
>  +/*
>  + * Probably only useful for QMP
>  + */
>  +const char *get_errno_name(int err)
>  +{
>  +switch (abs(err)) {
>  +case EPERM:
>  +return "EPERM";
>  +case ENOENT:
>  +return "ENOENT";
> 
>   
> >>> [...]
> >>>
> >>>
>  +case EDOM:
>  +return "EDOM";
>  +case ERANGE:
>  +return "ERANGE";
>  +case ENOMEDIUM:
>  +return "ENOMEDIUM";
>  +case ENOTSUP:
>  +return "ENOTSUP";
>  +default:
>  +return "unknown";
> 
>   
> >>> How did you choose the codes to implement?  POSIX has many more...
> >>>
> >   I just ran an awk script on the linux's base errno header file, my
> > idea is just to have the common names, anything 'new' will hit the
> > default clause and we can add it later.
> >
> >
> >> Let me say another way why I think this is a bad path to go down.
> >>
> >> In generally, we could never just pass errno through down.  Different
> >> host platforms are going to generate different errno values so we really
> >> need to filter and send reliable errno values so that clients don't have
> >> to have special code for when they're on Linux vs. AIX vs. Solaris.
> >>  
> >   Sorry for the potential stupid question, but what would a 'reliable'
> > errno be? Or, what's an unreliable errno?
> >
> >   We're not sending plain integers to the clients, so the only problem
> > I can see is if different unices return different errnos for the
> > same error. Is that the problem you're seeing?
> >
> 
> Different types of platforms return different errno values for the same 
> error type.
> 
> As an example, on Linux, connect() returns EINPROGRESS when you set the 
> socket non-blocking.  On Windows, it returns EWOULDBLOCK.

 Wonderful.

> If you just pass through errno, then all QMP clients are going to have 
> to know the different between QEMU on Windows and Linux and handle the 
> errnos appropriately.

 Right.

> >> If we're white listing errno values, we should be able to trivially
> >> convert errnos to QError types via a table just like you have above.
> >>  
> >   Having a direct errno ->  QError mapping doesn't seem good for the
> > current use cases.
> >
> >   For example, do_savevm() (not merged yet) has a StateVmSaveFailed
> > error which also has a 'reason' member, which would look like this
> > on the wire:
> >
> > "{ 'class': 'StateVmSaveFailed', 'data': { 'reason': "EIO" } }"
> >
> >   So, the QError class says what has failed and the errno part says
> > why it has failed.
> >
> >   I'd be happy to implement a different solution that satisfies this
> > basic requirement.
> >
> 
> We need to map errnos to some QMP defined error type.  However, as I've 
> said before, "reason" is usually an indicator that an error is bad.
> 
> A better error would be:
> 
> { 'class': 'SocketIOError': 'data' : { 'source': 'migration' }}
> 
> Or something like that.  You don't want to have command and then 
> CommandFailed as the error.  You want the errors to be the 'reason's for 
> the commands failure.

 StateVmSaveFailed is not like CommandFailed, there are five errors
in do_savevm() and StateVmSaveFailed happens to be 

Re: [Qemu-devel] Re: [PATCH] virtio-spec: document block CMD and FLUSH

2010-05-05 Thread Jamie Lokier
Rusty Russell wrote:
> On Fri, 19 Feb 2010 08:52:20 am Michael S. Tsirkin wrote:
> > I took a stub at documenting CMD and FLUSH request types in virtio
> > block.  Christoph, could you look over this please?
> > 
> > I note that the interface seems full of warts to me,
> > this might be a first step to cleaning them.
> 
> ISTR Christoph had withdrawn some patches in this area, and was waiting
> for him to resubmit?
> 
> I've given up on figuring out the block device.  What seem to me to be sane
> semantics along the lines of memory barriers are foreign to disk people: they
> want (and depend on) flushing everywhere.
> 
> For example, tdb transactions do not require a flush, they only require what
> I would call a barrier: that prior data be written out before any future data.
> Surely that would be more efficient in general than a flush!  In fact, TDB
> wants only writes to *that file* (and metadata) written out first; it has no
> ordering issues with other I/O on the same device.

I've just posted elsewhere on this thread, that an I/O level flush can
be more efficient than an I/O level barrier (implemented using a
cache-flush really), because the barrier has stricter ordering
requirements at the I/O scheduling level.

By the time you work up to tdb, another way to think of it is
distinguishing "eager fsync" from "fsync but I'm not in a hurry -
delay as long as is convenient".  The latter makes much more sense
with AIO.

> A generic I/O interface would allow you to specify "this request
> depends on these outstanding requests" and leave it at that.  It
> might have some sync flush command for dumb applications and OSes.

For filesystems, it would probably be easy to label in-place
overwrites and fdatasync data flushes when there's no file extension
with an opqaue per-file identifier for certain operations.  Typically
over-writing in place and fdatasync would match up and wouldn't need
ordering against anything else.  Other operations would tend to get
labelled as ordered against everything including these.

-- Jamie




Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Reinhard Max

Hi,

On Tue, 4 May 2010 at 11:23, Anthony Liguori wrote:


0.11.0 is pretty old.  Please update your patch against the latest git.


Ok, will do.


I'm not sure we're doing the wrong thing right now.


Well, I think it just can't be correct, that an IPv6-enabled process 
running on a dual-stack machine when it gets told to listen on 
"localhost", ends up only listening on ::1 and doesn't accept 
connections to 127.0.0.1.



cu
Reinhard




Re: [Qemu-devel] Re: [RFC] [PATCH] add ahci support into qemu

2010-05-05 Thread Sebastian Herbszt

乔崇 wrote:

I add Sebastian Herbsz's patch,add WIN_WIN_STANDBYNOW1 support to fix
hw_error on linux shut down etc.

I am  not familiar with  git send-email,I am studying it now :).


[snip]


>From e94912b03ed33080d550eb5764fc99911498101b Mon Sep 17 00:00:00 2001
From: QiaoChong 
Date: Tue, 4 May 2010 07:31:30 +0800
Subject: [PATCH] ahci pci ids  into pci_ids.h,add warning messages.

move ahci pci device id define into pci_ids.h,add warning messages for
unsupported features.

Signed-off-by: QiaoChong 
---
hw/ahci.c|   10 +-
hw/pci_ids.h |1 +
2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/hw/ahci.c b/hw/ahci.c
index cb4a851..e1aed4a 100644
--- a/hw/ahci.c
+++ b/hw/ahci.c
@@ -662,7 +662,9 @@ static void handle_cmd(AHCIState *s,int port,int slot)

 if(fis[1]==0)
 {
-
+#ifdef DEBUG_AHCI
+ printf("now,just ignore command fis[0]=%02x fis[1]=%02x 
fis[2]=%02x\n",fis[0],fis[1],fis[2]);
+#endif
 }

 if(fis[1]==(1<<7))
@@ -755,15 +757,13 @@ static void ahci_pci_map(PCIDevice *pci_dev, int 
region_num,
 cpu_register_physical_memory(addr, size, s->mem);
}

-#define PCI_VENDOR_MYDEVICE  0x8086
-#define PCI_PRODUCT_MYDEVICE 0x2652

static int pci_ahci_init(PCIDevice *dev)
{
 struct ahci_pci_state *d;
 d = DO_UPCAST(struct ahci_pci_state, card, dev);
- pci_config_set_vendor_id(d->card.config,PCI_VENDOR_MYDEVICE);
- pci_config_set_device_id(d->card.config,PCI_PRODUCT_MYDEVICE);
+ pci_config_set_vendor_id(d->card.config,PCI_VENDOR_ID_INTEL);
+ pci_config_set_device_id(d->card.config,PCI_DEVICE_ID_INTEL_ICH6R_AHCI);
 d->card.config[PCI_COMMAND] = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | 
PCI_COMMAND_MASTER;
 d->card.config[PCI_CLASS_DEVICE] = 0;
 d->card.config[0x0b] = 1;//storage
diff --git a/hw/pci_ids.h b/hw/pci_ids.h
index fe7a121..4d4de93 100644
--- a/hw/pci_ids.h
+++ b/hw/pci_ids.h
@@ -97,3 +97,4 @@
#define PCI_DEVICE_ID_INTEL_82371AB  0x7111
#define PCI_DEVICE_ID_INTEL_82371AB_20x7112
#define PCI_DEVICE_ID_INTEL_82371AB_30x7113
+#define PCI_DEVICE_ID_INTEL_ICH6R_AHCI0x2652


The list is sorted by vendor and device id. This entry should go
after "PCI_DEVICE_ID_INTEL_ESB_9". The naming scheme seems
to be VENDOR_DEVICE_FUNCTION, so i suggest something like
PCI_DEVICE_ID_INTEL_ICH6R_2 or PCI_DEVICE_ID_INTEL_82801FR_2.

Sebastian





Re: [Qemu-devel] [PATCH, RFC] block: separate raw images from the file protocol

2010-05-05 Thread Stefan Weil

Am 08.04.2010 11:50, schrieb Kevin Wolf:

Am 07.04.2010 22:30, schrieb Christoph Hellwig:
   

We're running into various problems because the "raw" file access, which
is used internally by the various image formats is entangled with the
"raw" image format, which maps the VM view 1:1 to a file system.

This patch renames the raw file backends to the file protocol which
is treated like other protocols (e.g. nbd and http) and adds a new
"raw" image format which is just a wrapper around calls to the underlying
protocol.
 

As you know and as I mentioned in previous discussions this approach is
exactly what I think we need in the block layer.

You provided a nice long patch description that covers almost
everything, so I think I can put the greatest part of my comments there.

   

The patch is surprisingly simple, besides changing the probing logical
in block.c to only look for image formats when using bdrv_open and
renaming of the old raw protocols to file there's almost nothing in there.

One thing that looks suspicious in the patch is moving the actual
posix file creation from raw-posix into the new raw image.  This is
a layering violation, but exactly the same as done by all other image
formats implementing the create operations, and not easily fixable
without a major API change in this area.
 

This is not only a layering violation, but also buggy in this case.
raw-win32.c has a different implementation of raw_create which wouldn't
be called any more.

The two solutions that I see are making raw_create a wrapper that calls
the create function of the protocol, or do make the step and use bdrv_*
in the create functions of the drivers. I think the former is what could
be done to keep this patch simple, but the latter is what we should aim
for longer term.

   

The only issues still open are in the handling of the host devices.
Firstly in current qemu we can specifiy the host* format names
on various command line acceping images, but the new code can't
do that without adding some translation.  Second the layering breaks
the no_zero_init flag in the BlockDriver used by qemu-img.  I'm not
happy how this is done per-driver instead of per-state so I'll
prepare a separate patch to clean this up.
 

Hm, I don't like that very much, but there's probably no sane way around
it. It's clearly a property of the protocol and not of a single device,
but protocols might be stacked and just checking the first one doesn't
give the right result.

Anyway, before merging this patch we obviously need to fix this kind of
things (is it caught by qemu-iotests, by the way?). I'm not sure if we
should add a compatibility translation of host_device =>  raw or if we
should just remove support for that completely. It would be helpful to
know if this is actually used.

   

There's some more cleanup opportunity after this patch, e.g. using
separate lists and registration functions for image formats vs
protocols and maybe even host drivers, but this can be done at a
later stage.

Also there's a check for protocol in bdrv_open for the BDRV_O_SNAPSHOT
case that I don't quite understand, but which I fear won't work as
expected - possibly even before this patch.
 

You mean that is_protocol thing? It comes into play when you do strange
things like qemu -hda fat:/tmp/testdir -snapshot and I think it actually
does work.

Hm, apropos vvfat... Should vvfat actually be implemented as raw backed
by vvfat now instead of using vvfat directly? We could then forbid
protocols to be used directly.

   

Note that this patch requires various recent block patches from Kevin
and me, which should all be in his block queue.

Signed-off-by: Christoph Hellwig

Index: qemu/Makefile.objs
===
--- qemu.orig/Makefile.objs 2010-04-07 13:56:27.429254145 +0200
+++ qemu/Makefile.objs  2010-04-07 22:01:24.974284455 +0200
@@ -12,7 +12,7 @@ block-obj-y += nbd.o block.o aio.o aes.o
  block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o

-block-nested-y += cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
+block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o 
vvfat.o
  block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
  block-nested-y += parallels.o nbd.o
  block-nested-$(CONFIG_WIN32) += raw-win32.o
 

This hunk only applies with fuzz on my queue (caused by blkdebug.o). Can
you make sure to rebase the final version on the queue?

   

@@ -1026,12 +985,12 @@ static int hdev_create(const char *filen

  static BlockDriver bdrv_host_device = {
  .format_name= "host_device",
+.protocol_name= "host_device",
  .instance_size  = sizeof(BDRVRawState),
  .bdrv_probe_device  = hdev_probe_device,
  .bdrv_open  = hdev_open,
  .bdrv_close = raw_close,
  .bdrv_create= hdev_create,
-.create_options = raw_create_options,

[Qemu-devel] Re: [RFC] [PATCH] add ahci support into qemu

2010-05-05 Thread Sebastian Herbszt

Elek Roland wrote:

On v, 2010-05-02 at 17:56 +0200, Elek Roland wrote:

I'm going to try the patch in practice as
soon as I finish my university project. (It's due today.)


I've tried the patch, and I can confirm that it does provide an AHCI
device and a SATA disk. It shows up in qemu as an Intel Corporation
82801FR/FRW (ICH6R/ICH6RW) SATA Controller, which is consistent with the
device and vendor IDs in the code. I could successfully create an ext2
filesystem on the virtual disk, mount it, write a text file on it and
read it back.
I've noticed the following errors:
- The source code claims to implement 2 ports, Linux reports 3.
- When I halt the guest OS, the AHCI device throws a hardware error
about an unimplemented command.


Those issues should be fixed now by the following two patches:

0001-fix-port-count-cap-and-version-etc-to-ahci.patch
0002-add-WIN_STANDBYNOW1-process-into-ahci.patch

Sebastian


I think we could clean up the code and use it as a basis for full
support.

Regards,
Roland









[Qemu-devel] Problem with QEMU / KVM

2010-05-05 Thread K D
Hi

I built 2.6.27.10 kernel with KVM configured as built-in.

CONFIG_HAVE_KVM=y
CONFIG_VIRTUALIZATION=y
CONFIG_KVM=y
CONFIG_KVM_INTEL=y

I built qemu-kvm 0.12.2 from sources pointing to above kernel. When I spawn a 
VM, it hangs at boot. Pl see below. I copied qemu-kvm binary and bios, vga etc 
binaries. I installed grub on a manually created img file. This image has 
nothing but grub, its stage1,1_5,stage2, my kernel and initrd files.

Am spawning VM as below:

qemu-system-x86_64 -hda vmhd.img -L . -curses -show-cursor

grub> root (hd0,0)
 Filesystem type is ext2fs, partition type 0x83

grub> kernel /boot/vmos root=/dev/ram0 ramdisk_size=32768 rw single
   [Linux-bzImage, setup=0x1800, size=0x1cb9b0]

grub> initrd /boot/initrdfs.gz
   [Linux-initrd @ 0x6751000, 0x189b587 bytes]

grub> boot



I copied all these binaries from my dev host to this linux box.
 
-rw-r--r-- 1 root root 131072 May  2 21:31 bios.bin
-rw-r--r-- 1 root root   1025 May  2 21:31 linuxboot.bin
-rw-r--r-- 1 root root   1024 May  2 21:31 multiboot.bin
-rw-r--r-- 1 root root 524288 May  2 21:31 ppc_rom.bin
-rw-r--r-- 1 root root  72192 May  2 21:31 pxe-e1000.bin
-rw-r--r-- 1 root root  56832 May  2 21:31 pxe-i82559er.bin
-rw-r--r-- 1 root root  56320 May  2 21:31 pxe-ne2k_pci.bin
-rw-r--r-- 1 root root  56832 May  2 21:31 pxe-pcnet.bin
-rw-r--r-- 1 root root  56320 May  2 21:31 pxe-rtl8139.bin
-rw-r--r-- 1 root root  56320 May  2 21:31 pxe-virtio.bin
-rwxr-xr-x 1 root root   8960 May  2 21:31 vapic.bin
-rw-r--r-- 1 root root  35840 May  2 21:31 vgabios-cirrus.bin
-rw-r--r-- 1 root root  39936 May  2 21:31 vgabios.bin



Appreciate your help.




From: Anthony Liguori 
To: Gerhard Wiesinger 
Cc: qemu-devel@nongnu.org; Gerd Hoffmann 
Sent: Tue, May 4, 2010 6:32:50 AM
Subject: Re: [Qemu-devel] Problem with QEMU on KVM

On 05/04/2010 12:09 AM, Gerhard Wiesinger wrote:
> On Sat, 24 Apr 2010, Gerhard Wiesinger wrote:
>>> Guess problems comes from the following commit (not yet verified):
>>> commit 37c34d9d5d87ea9d51760310c8863b82cb8c055a
>>> Author: Anthony Liguori 
>>> Date:   Wed Mar 10 09:38:29 2010 -0600
>>> 
>>>input: make vnc use mouse mode notifiers
>>> 
>>>When we switch to absolute mode, we send out a notification (if the 
>>> client
>>>supports it).  Today, we only send this notification when the client 
>>> sends us
>>>a mouse event and we're in the wrong mode.
>>> 
>>>Signed-off-by: Anthony Liguori 
>> 
>> 
>> Ok, verified:
>> git revert -n 37c34d9d5d87ea9d51760310c8863b82cb8c055a
>> => works well.
> 
> Still got no feedback and saw no further changes to fix this problem.

Fix is on the list.

Regards,

Anthony Liguori

> Thnx.
> 
> Ciao,
> Gerhard
> 
> -- http://www.wiesinger.com/
> 


  

[Qemu-devel] [PATCH v7 0/6]

2010-05-05 Thread Amit Shah
Hello,

This series lets interested callers ask for an -EAGAIN return from the
chardev backends (only unix and tcp sockets as of now) to implement
their own flow control.

A new call, qemu_chr_write_nb() is added, that will fallback to
qemu_chr_write() if the backend file isn't non-blocking or if no
callback was supplied.

Support for other backend types is easy to add and will be done in
later patches.

Please apply.

v7:
- constify handlers (Blue Swirl)
- remove 'write_cb', a leftover from previous design (Juan Quintela)
- return ret instead of -EAGAIN in virtio-console (Juan)
- use pre-allocated meta-buffer instead of allocating one each time
  (Juan)

v6:
- Continue write operation on EINTR instead of returning partial
  writes (which was a change from prev. behaviour) (Gerd)

v5:
- Fix bug pointed out by Gerd
- Convert to using a struct for passing on handlers to
  qemu_chr_add_handlers() instead of passing each one
  individually. Simplifies patches. (Inspired by Juan's comment)
- Re-arranged patches

Amit Shah (6):
  virtio-console: Factor out common init between console and generic
ports
  char: Add a QemuChrHandlers struct to initialise chardev handlers
  char: Let writers know how much data was written in case of errors
  char: Add qemu_chr_write_nb() for nonblocking writes
  char: unix/tcp: Add a non-blocking write handler
  virtio-console: Throttle virtio-serial-bus if we can't consume any
more guest data

 gdbstub.c|9 ++-
 hw/debugcon.c|2 +-
 hw/escc.c|9 ++-
 hw/etraxfs_ser.c |   13 +++-
 hw/mcf_uart.c|9 ++-
 hw/pl011.c   |9 ++-
 hw/pxa2xx.c  |   13 +++-
 hw/serial.c  |9 ++-
 hw/sh_serial.c   |   12 +++-
 hw/syborg_serial.c   |9 ++-
 hw/usb-serial.c  |9 ++-
 hw/virtio-console.c  |  156 +++---
 hw/xen_console.c |   16 --
 hw/xilinx_uartlite.c |   11 +++-
 monitor.c|   19 +-
 net/slirp.c  |8 ++-
 net/socket.c |4 +-
 qemu-char.c  |  119 --
 qemu-char.h  |   20 +-
 qemu_socket.h|3 +-
 20 files changed, 375 insertions(+), 84 deletions(-)





[Qemu-devel] [PATCH v7 1/6] virtio-console: Factor out common init between console and generic ports

2010-05-05 Thread Amit Shah
The initialisation for generic ports and console ports is similar.
Factor out the parts that are the same in a different function that can
be called from each of the initfns.

Signed-off-by: Amit Shah 
---
 hw/virtio-console.c |   31 ++-
 1 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/hw/virtio-console.c b/hw/virtio-console.c
index caea11f..d7fe68b 100644
--- a/hw/virtio-console.c
+++ b/hw/virtio-console.c
@@ -58,24 +58,28 @@ static void chr_event(void *opaque, int event)
 }
 }
 
-/* Virtio Console Ports */
-static int virtconsole_initfn(VirtIOSerialDevice *dev)
+static int generic_port_init(VirtConsole *vcon, VirtIOSerialDevice *dev)
 {
-VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, &dev->qdev);
-VirtConsole *vcon = DO_UPCAST(VirtConsole, port, port);
-
-port->info = dev->info;
-
-port->is_console = true;
+vcon->port.info = dev->info;
 
 if (vcon->chr) {
 qemu_chr_add_handlers(vcon->chr, chr_can_read, chr_read, chr_event,
   vcon);
-port->info->have_data = flush_buf;
+vcon->port.info->have_data = flush_buf;
 }
 return 0;
 }
 
+/* Virtio Console Ports */
+static int virtconsole_initfn(VirtIOSerialDevice *dev)
+{
+VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, &dev->qdev);
+VirtConsole *vcon = DO_UPCAST(VirtConsole, port, port);
+
+port->is_console = true;
+return generic_port_init(vcon, dev);
+}
+
 static int virtconsole_exitfn(VirtIOSerialDevice *dev)
 {
 VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, &dev->qdev);
@@ -115,14 +119,7 @@ static int virtserialport_initfn(VirtIOSerialDevice *dev)
 VirtIOSerialPort *port = DO_UPCAST(VirtIOSerialPort, dev, &dev->qdev);
 VirtConsole *vcon = DO_UPCAST(VirtConsole, port, port);
 
-port->info = dev->info;
-
-if (vcon->chr) {
-qemu_chr_add_handlers(vcon->chr, chr_can_read, chr_read, chr_event,
-  vcon);
-port->info->have_data = flush_buf;
-}
-return 0;
+return generic_port_init(vcon, dev);
 }
 
 static VirtIOSerialPortInfo virtserialport_info = {
-- 
1.6.2.5





[Qemu-devel] [PATCH v7 2/6] char: Add a QemuChrHandlers struct to initialise chardev handlers

2010-05-05 Thread Amit Shah
Instead of passing each handler in the qemu_add_handlers() function,
create a struct of handlers that can be passed to the function instead.

Signed-off-by: Amit Shah 
---
 gdbstub.c|9 +++--
 hw/debugcon.c|2 +-
 hw/escc.c|9 +++--
 hw/etraxfs_ser.c |   13 +
 hw/mcf_uart.c|9 +++--
 hw/pl011.c   |9 +++--
 hw/pxa2xx.c  |   13 +
 hw/serial.c  |9 +++--
 hw/sh_serial.c   |   12 +---
 hw/syborg_serial.c   |9 +++--
 hw/usb-serial.c  |9 +++--
 hw/virtio-console.c  |9 +++--
 hw/xen_console.c |   16 +++-
 hw/xilinx_uartlite.c |   11 +--
 monitor.c|   19 +++
 net/slirp.c  |8 ++--
 qemu-char.c  |   27 ++-
 qemu-char.h  |   12 
 18 files changed, 151 insertions(+), 54 deletions(-)

diff --git a/gdbstub.c b/gdbstub.c
index 93c4850..ae29db1 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2621,6 +2621,12 @@ static void gdb_sigterm_handler(int signal)
 }
 #endif
 
+static const QemuChrHandlers gdb_handlers = {
+.fd_can_read = gdb_chr_can_receive,
+.fd_read = gdb_chr_receive,
+.fd_event = gdb_chr_event,
+};
+
 int gdbserver_start(const char *device)
 {
 GDBState *s;
@@ -2650,8 +2656,7 @@ int gdbserver_start(const char *device)
 if (!chr)
 return -1;
 
-qemu_chr_add_handlers(chr, gdb_chr_can_receive, gdb_chr_receive,
-  gdb_chr_event, NULL);
+qemu_chr_add_handlers(chr, &gdb_handlers, NULL);
 }
 
 s = gdbserver_state;
diff --git a/hw/debugcon.c b/hw/debugcon.c
index 5ee6821..e79a595 100644
--- a/hw/debugcon.c
+++ b/hw/debugcon.c
@@ -73,7 +73,7 @@ static void debugcon_init_core(DebugconState *s)
 exit(1);
 }
 
-qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, s);
+qemu_chr_add_handlers(s->chr, NULL, s);
 }
 
 static int debugcon_isa_initfn(ISADevice *dev)
diff --git a/hw/escc.c b/hw/escc.c
index 6d2fd36..4057bb4 100644
--- a/hw/escc.c
+++ b/hw/escc.c
@@ -890,6 +890,12 @@ void slavio_serial_ms_kbd_init(target_phys_addr_t base, 
qemu_irq irq,
 sysbus_mmio_map(s, 0, base);
 }
 
+static const QemuChrHandlers serial_handlers = {
+.fd_can_read = serial_can_receive,
+.fd_read = serial_receive1,
+.fd_event = serial_event,
+};
+
 static int escc_init1(SysBusDevice *dev)
 {
 SerialState *s = FROM_SYSBUS(SerialState, dev);
@@ -903,8 +909,7 @@ static int escc_init1(SysBusDevice *dev)
 s->chn[i].chn = 1 - i;
 s->chn[i].clock = s->frequency / 2;
 if (s->chn[i].chr) {
-qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
-  serial_receive1, serial_event, &s->chn[i]);
+qemu_chr_add_handlers(s->chn[i].chr, &serial_handlers, &s->chn[i]);
 }
 }
 s->chn[0].otherchn = &s->chn[1];
diff --git a/hw/etraxfs_ser.c b/hw/etraxfs_ser.c
index e1f9615..0c0c485 100644
--- a/hw/etraxfs_ser.c
+++ b/hw/etraxfs_ser.c
@@ -161,6 +161,12 @@ static void serial_event(void *opaque, int event)
 
 }
 
+static const QemuChrHandlers serial_handlers = {
+.fd_can_read = serial_can_receive,
+.fd_read = serial_receive,
+.fd_event = serial_event,
+};
+
 static int etraxfs_ser_init(SysBusDevice *dev)
 {
 struct etrax_serial *s = FROM_SYSBUS(typeof (*s), dev);
@@ -174,10 +180,9 @@ static int etraxfs_ser_init(SysBusDevice *dev)
 ser_regs = cpu_register_io_memory(ser_read, ser_write, s);
 sysbus_init_mmio(dev, R_MAX * 4, ser_regs);
 s->chr = qdev_init_chardev(&dev->qdev);
-if (s->chr)
-qemu_chr_add_handlers(s->chr,
-  serial_can_receive, serial_receive,
-  serial_event, s);
+if (s->chr) {
+qemu_chr_add_handlers(s->chr, &serial_handlers, s);
+}
 return 0;
 }
 
diff --git a/hw/mcf_uart.c b/hw/mcf_uart.c
index d16bac7..d2ce5f6 100644
--- a/hw/mcf_uart.c
+++ b/hw/mcf_uart.c
@@ -268,6 +268,12 @@ static void mcf_uart_receive(void *opaque, const uint8_t 
*buf, int size)
 mcf_uart_push_byte(s, buf[0]);
 }
 
+static const QemuChrHandlers mcf_uart_handlers = {
+.fd_can_read = mcf_uart_can_receive,
+.fd_read = mcf_uart_receive,
+.fd_event = mcf_uart_event,
+};
+
 void *mcf_uart_init(qemu_irq irq, CharDriverState *chr)
 {
 mcf_uart_state *s;
@@ -276,8 +282,7 @@ void *mcf_uart_init(qemu_irq irq, CharDriverState *chr)
 s->chr = chr;
 s->irq = irq;
 if (chr) {
-qemu_chr_add_handlers(chr, mcf_uart_can_receive, mcf_uart_receive,
-  mcf_uart_event, s);
+qemu_chr_add_handlers(chr, &mcf_uart_handlers, s);
 }
 mcf_uart_reset(s);
 return s;
diff --git a/hw/pl011.c b/hw/pl011.c
index 81de91e..24711c2 100644
--- a/hw/pl011.c
+++ b/hw/pl011.c
@@ -286,6 +286,12 @@ static int pl011_load(QEMUFile *f, void *opaque, int

[Qemu-devel] [PATCH v7 3/6] char: Let writers know how much data was written in case of errors

2010-05-05 Thread Amit Shah
On writing errors, we just returned -1 even if some bytes were already
written out. Ensure we return the number of bytes written before we
return the error (on a subsequent call to qemu_chr_write()).

Signed-off-by: Amit Shah 
---
 qemu-char.c |   12 +++-
 1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index 76ad12c..decf687 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -507,6 +507,9 @@ int send_all(int fd, const void *buf, int len1)
 while (len > 0) {
 ret = send(fd, buf, len, 0);
 if (ret < 0) {
+if (len1 - len) {
+return len1 - len;
+}
 errno = WSAGetLastError();
 if (errno != WSAEWOULDBLOCK) {
 return -1;
@@ -531,8 +534,15 @@ static int unix_write(int fd, const uint8_t *buf, int len1)
 while (len > 0) {
 ret = write(fd, buf, len);
 if (ret < 0) {
-if (errno != EINTR && errno != EAGAIN)
+if (errno == EINTR) {
+continue;
+}
+if (len1 - len) {
+return len1 - len;
+}
+if (errno != EAGAIN) {
 return -1;
+}
 } else if (ret == 0) {
 break;
 } else {
-- 
1.6.2.5





[Qemu-devel] [PATCH v7 4/6] char: Add qemu_chr_write_nb() for nonblocking writes

2010-05-05 Thread Amit Shah
For char devices whose backing files are open in non-blocking mode,
non-blocking writes can now be made using qemu_chr_write_nb().

For non-blocking chardevs, we can return -EAGAIN to callers of
qemu_chr_write_nb(). When the backend is ready to accept more data,
we can let the caller know via a callback.

-EAGAIN is returned only when the backend's file is non-blocking
and a callback is registered by the caller when invoking
qemu_chr_add_handlers().

In case a backend doesn't support non-blocking writes,
qemu_chr_write_nb() invokes qemu_chr_write().

Individual callers can update their code to add a callback handler,
call qemu_chr_write_nb() instead of qemu_chr_write() if they wish to
receive -EAGAIN notifications.

No backend currently supports non-blocking writes.

Signed-off-by: Amit Shah 
---
 net/socket.c  |4 ++--
 qemu-char.c   |   31 ---
 qemu-char.h   |8 
 qemu_socket.h |3 ++-
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 1c4e153..8a401e6 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -56,8 +56,8 @@ static ssize_t net_socket_receive(VLANClientState *nc, const 
uint8_t *buf, size_
 uint32_t len;
 len = htonl(size);
 
-send_all(s->fd, (const uint8_t *)&len, sizeof(len));
-return send_all(s->fd, buf, size);
+send_all(s->fd, (const uint8_t *)&len, sizeof(len), false);
+return send_all(s->fd, buf, size, false);
 }
 
 static ssize_t net_socket_receive_dgram(VLANClientState *nc, const uint8_t 
*buf, size_t size)
diff --git a/qemu-char.c b/qemu-char.c
index decf687..5e4dec3 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -145,6 +145,16 @@ int qemu_chr_write(CharDriverState *s, const uint8_t *buf, 
int len)
 return s->chr_write(s, buf, len);
 }
 
+ssize_t qemu_chr_write_nb(CharDriverState *s, const uint8_t *buf, size_t len)
+{
+if (!s->nonblock) {
+/* Fallback to blocking write if no callback registered */
+return qemu_chr_write(s, buf, len);
+}
+
+return s->chr_write_nb(s, buf, len);
+}
+
 int qemu_chr_ioctl(CharDriverState *s, int cmd, void *arg)
 {
 if (!s->chr_ioctl)
@@ -203,11 +213,15 @@ void qemu_chr_add_handlers(CharDriverState *s,
 }
 s->chr_can_read = handlers->fd_can_read;
 s->chr_read = handlers->fd_read;
+s->chr_write_unblocked = handlers->fd_write_unblocked;
 s->chr_event = handlers->fd_event;
 s->handler_opaque = opaque;
 if (s->chr_update_read_handler)
 s->chr_update_read_handler(s);
 
+/* We'll set this at connect-time */
+s->nonblock = false;
+
 /* We're connecting to an already opened device, so let's make sure we
also get the open event */
 if (s->opened) {
@@ -499,7 +513,7 @@ static CharDriverState *qemu_chr_open_mux(CharDriverState 
*drv)
 
 
 #ifdef _WIN32
-int send_all(int fd, const void *buf, int len1)
+int send_all(int fd, const void *buf, int len1, bool nonblock)
 {
 int ret, len;
 
@@ -526,7 +540,7 @@ int send_all(int fd, const void *buf, int len1)
 
 #else
 
-static int unix_write(int fd, const uint8_t *buf, int len1)
+static int unix_write(int fd, const uint8_t *buf, int len1, bool nonblock)
 {
 int ret, len;
 
@@ -540,6 +554,9 @@ static int unix_write(int fd, const uint8_t *buf, int len1)
 if (len1 - len) {
 return len1 - len;
 }
+if (errno == EAGAIN && nonblock) {
+return -EAGAIN;
+}
 if (errno != EAGAIN) {
 return -1;
 }
@@ -553,9 +570,9 @@ static int unix_write(int fd, const uint8_t *buf, int len1)
 return len1 - len;
 }
 
-int send_all(int fd, const void *buf, int len1)
+int send_all(int fd, const void *buf, int len1, bool nonblock)
 {
-return unix_write(fd, buf, len1);
+return unix_write(fd, buf, len1, nonblock);
 }
 #endif /* !_WIN32 */
 
@@ -572,7 +589,7 @@ static int stdio_nb_clients = 0;
 static int fd_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
 FDCharDriver *s = chr->opaque;
-return send_all(s->fd_out, buf, len);
+return send_all(s->fd_out, buf, len, false);
 }
 
 static int fd_chr_read_poll(void *opaque)
@@ -884,7 +901,7 @@ static int pty_chr_write(CharDriverState *chr, const 
uint8_t *buf, int len)
 pty_chr_update_read_handler(chr);
 return 0;
 }
-return send_all(s->fd, buf, len);
+return send_all(s->fd, buf, len, false);
 }
 
 static int pty_chr_read_poll(void *opaque)
@@ -1949,7 +1966,7 @@ static int tcp_chr_write(CharDriverState *chr, const 
uint8_t *buf, int len)
 {
 TCPCharDriver *s = chr->opaque;
 if (s->connected) {
-return send_all(s->fd, buf, len);
+return send_all(s->fd, buf, len, false);
 } else {
 /* XXX: indicate an error ? */
 return len;
diff --git a/qemu-char.h b/qemu-char.h
index eacb3b9..52f9ef1 100644
--- a/qemu-char.h
+++ b/qemu-char.h
@@ -1,6 +1,8 @@
 #ifndef QEMU_CHAR_H
 #define QEMU_CHA

[Qemu-devel] [PATCH v7 5/6] char: unix/tcp: Add a non-blocking write handler

2010-05-05 Thread Amit Shah
Add a non-blocking write handler that can return with -EAGAIN to the
caller and also callback when the socket becomes writable.

Non-blocking writes are only enabled for sockets that are opened in
non-blocking mode and only for callers that have registered a callback
handler for resuming writes.

Signed-off-by: Amit Shah 
---
 qemu-char.c |   47 +++
 1 files changed, 47 insertions(+), 0 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index 5e4dec3..9aa3401 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2129,11 +2129,57 @@ static void tcp_chr_read(void *opaque)
 }
 }
 
+static void tcp_chr_write_unblocked(void *opaque)
+{
+CharDriverState *chr = opaque;
+TCPCharDriver *s = chr->opaque;
+
+assert(chr->write_blocked && chr->chr_write_unblocked);
+
+chr->write_blocked = false;
+qemu_set_fd_handler2(s->fd, tcp_chr_read_poll, tcp_chr_read, NULL, chr);
+chr->chr_write_unblocked(chr->handler_opaque);
+}
+
+static ssize_t tcp_chr_write_nb(CharDriverState *chr, const uint8_t *buf,
+size_t len)
+{
+TCPCharDriver *s = chr->opaque;
+ssize_t ret;
+
+if (!s->connected) {
+/* XXX: indicate an error? */
+return len;
+}
+
+ret = send_all(s->fd, buf, len, true);
+if (ret == -EAGAIN) {
+chr->write_blocked = true;
+qemu_set_fd_handler2(s->fd, tcp_chr_read_poll,
+ tcp_chr_read, tcp_chr_write_unblocked, chr);
+}
+return ret;
+}
+
 static void tcp_chr_connect(void *opaque)
 {
 CharDriverState *chr = opaque;
 TCPCharDriver *s = chr->opaque;
+int flags;
+bool nonblock;
+
+flags = fcntl(s->fd, F_GETFL);
+if (flags == -1) {
+flags = 0;
+}
+nonblock = flags & O_NONBLOCK;
+
+chr->nonblock = false;
+if (nonblock && chr->chr_write_unblocked) {
+chr->nonblock = true;
+}
 
+chr->write_blocked = false;
 s->connected = 1;
 qemu_set_fd_handler2(s->fd, tcp_chr_read_poll,
  tcp_chr_read, NULL, chr);
@@ -2266,6 +2312,7 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts 
*opts)
 
 chr->opaque = s;
 chr->chr_write = tcp_chr_write;
+chr->chr_write_nb = tcp_chr_write_nb;
 chr->chr_close = tcp_chr_close;
 chr->get_msgfd = tcp_get_msgfd;
 
-- 
1.6.2.5





[Qemu-devel] [PATCH v7 6/6] virtio-console: Throttle virtio-serial-bus if we can't consume any more guest data

2010-05-05 Thread Amit Shah
If the char device we're connected to is overwhelmed with data and it
can't accept any more, signal to the virtio-serial-bus to stop sending
us more data till we tell otherwise.

If the current buffer being processed hasn't been completely written out
to the char device, we have to keep it around and re-try sending it
since the virtio-serial-bus code assumes we consume the entire buffer.

Allow the chardev backends to return -EAGAIN; we're ready with a
callback handler that will flush the remainder of the buffer.

Also register with savevm so that we save/restore such a buffer across
migration.

Signed-off-by: Amit Shah 
---
 hw/virtio-console.c |  120 +-
 1 files changed, 117 insertions(+), 3 deletions(-)

diff --git a/hw/virtio-console.c b/hw/virtio-console.c
index 862a431..b0b4351 100644
--- a/hw/virtio-console.c
+++ b/hw/virtio-console.c
@@ -13,18 +13,90 @@
 #include "qemu-char.h"
 #include "virtio-serial.h"
 
+typedef struct Buffer {
+uint8_t *buf;
+size_t rem_len;
+size_t offset;
+} Buffer;
+
 typedef struct VirtConsole {
 VirtIOSerialPort port;
 CharDriverState *chr;
+Buffer unflushed_buf;
 } VirtConsole;
 
+static void add_unflushed_buf(VirtConsole *vcon, const uint8_t *buf, size_t 
len)
+{
+assert(!vcon->unflushed_buf.buf);
+
+vcon->unflushed_buf.buf = qemu_malloc(len);
+
+memcpy(vcon->unflushed_buf.buf, buf, len);
+vcon->unflushed_buf.rem_len = len;
+vcon->unflushed_buf.offset = 0;
+}
+
+static void free_unflushed_buf(VirtConsole *vcon)
+{
+if (vcon->unflushed_buf.buf) {
+qemu_free(vcon->unflushed_buf.buf);
+vcon->unflushed_buf.buf = NULL;
+}
+}
+
+static ssize_t buffered_write_to_chardev(VirtConsole *vcon, const uint8_t *buf,
+ size_t len)
+{
+size_t written;
+ssize_t ret;
+
+written = 0;
+do {
+ret = qemu_chr_write_nb(vcon->chr, buf + written, len - written);
+if (ret < 0) {
+if (vcon->unflushed_buf.buf) {
+vcon->unflushed_buf.offset += written;
+vcon->unflushed_buf.rem_len -= written;
+} else {
+virtio_serial_throttle_port(&vcon->port, true);
+add_unflushed_buf(vcon, buf + written, len - written);
+}
+return ret;
+}
+written += ret;
+} while (written != len);
+
+return 0;
+}
+
+/* Callback function called when the chardev can accept more data */
+static void chr_write_unblocked(void *opaque)
+{
+VirtConsole *vcon = opaque;
+
+if (vcon->unflushed_buf.buf) {
+ssize_t ret;
+
+ret = buffered_write_to_chardev(vcon, vcon->unflushed_buf.buf
+  + vcon->unflushed_buf.offset,
+vcon->unflushed_buf.rem_len);
+if (ret < 0) {
+return;
+}
+free_unflushed_buf(vcon);
+}
+virtio_serial_throttle_port(&vcon->port, false);
+}
 
 /* Callback function that's called when the guest sends us data */
 static void flush_buf(VirtIOSerialPort *port, const uint8_t *buf, size_t len)
 {
 VirtConsole *vcon = DO_UPCAST(VirtConsole, port, port);
 
-qemu_chr_write(vcon->chr, buf, len);
+/* If a previous write was incomplete, we should've been throttled. */
+assert(!vcon->unflushed_buf.buf);
+
+buffered_write_to_chardev(vcon, buf, len);
 }
 
 /* Readiness of the guest to accept data on a port */
@@ -48,19 +120,58 @@ static void chr_event(void *opaque, int event)
 VirtConsole *vcon = opaque;
 
 switch (event) {
-case CHR_EVENT_OPENED: {
+case CHR_EVENT_OPENED:
 virtio_serial_open(&vcon->port);
 break;
-}
+
 case CHR_EVENT_CLOSED:
+free_unflushed_buf(vcon);
 virtio_serial_close(&vcon->port);
 break;
 }
 }
 
+static void virtio_console_port_save(QEMUFile *f, void *opaque)
+{
+VirtConsole *vcon = opaque;
+uint32_t have_buffer;
+
+have_buffer = vcon->unflushed_buf.buf ? true : false;
+
+qemu_put_be32s(f, &have_buffer);
+if (have_buffer) {
+qemu_put_be64s(f, &vcon->unflushed_buf.rem_len);
+qemu_put_buffer(f, vcon->unflushed_buf.buf
+   + vcon->unflushed_buf.offset,
+vcon->unflushed_buf.rem_len);
+}
+}
+
+static int virtio_console_port_load(QEMUFile *f, void *opaque, int version_id)
+{
+VirtConsole *vcon = opaque;
+uint32_t have_buffer;
+
+if (version_id > 1) {
+return -EINVAL;
+}
+
+qemu_get_be32s(f, &have_buffer);
+if (have_buffer) {
+qemu_get_be64s(f, &vcon->unflushed_buf.rem_len);
+vcon->unflushed_buf.buf = qemu_malloc(vcon->unflushed_buf.rem_len);
+vcon->unflushed_buf.offset = 0;
+
+qemu_get_buffer(f, vcon->unflushed_buf.buf,
+vcon->unflushed_buf.rem_len);
+}
+return 0;
+}
+
 static const QemuChrHan

[Qemu-devel] Re: [PATCH v7 0/6] char: non-blocking writes, virtio-console flow control

2010-05-05 Thread Amit Shah
[Fix subject]

Amit




Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Anthony Liguori

On 05/04/2010 03:44 PM, Reinhard Max wrote:

Hi,

On Tue, 4 May 2010 at 11:23, Anthony Liguori wrote:


0.11.0 is pretty old.  Please update your patch against the latest git.


Ok, will do.


I'm not sure we're doing the wrong thing right now.


Well, I think it just can't be correct, that an IPv6-enabled process 
running on a dual-stack machine when it gets told to listen on 
"localhost", ends up only listening on ::1


My understanding is that for the majority of systems, 127.0.0.1 should 
still connect to ::1.  The reason we have the ipv4 and ipv6 options is 
because this doesn't work 100% of the time.  Maybe your configuration is 
an example of this.


Honestly, I don't understand how ipv4 compat is supposed to work outside 
of qemu so I'm looking for some additional input.


Regards,

Anthony Liguori


and doesn't accept connections to 127.0.0.1.


cu
Reinhard






Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Gerd Hoffmann

On 05/04/10 18:23, Anthony Liguori wrote:

On 05/04/2010 08:49 AM, Reinhard Max wrote:

Hi,

I am maintaining the tightvnc package for openSUSE and was recently
confronted with an alleged vnc problem with QWMU that turned out to be
a shortcoming in QEMU's code for handling TCP server sockets, which is
used by the vnc and char modules.

The problem occurs when the address to listen on is given as a name
which resolves to multiple IP addresses the most prominent example
being "localhost" resolving to 127.0.0.1 and ::1 .


My tigervnc (tightvnc successor) has IPv6 support and handles this just 
fine.  There is also the option to force qemu to listen on ipv4 (or 
ipv6) only.



The existing code stopped walking the list of addresses returned by
getaddrinfo() as soon as one socket was successfully opened and bound.
The result was that a qemu instance started with "-vnc localhost:42"
only listened on ::1, wasn't reachable through 127.0.0.1. The fact
that the code set the IPV6_V6ONLY socket option didn't help, because
that option only works when the socket gets bound to the IPv6 wildcard
address (::), but is useless for explicit address bindings.


Indeed.


But that said, I'm not sure we're doing the wrong thing right now. Gerd,
what do you think about this behavior?


Reinhard is correct.  If a hostname resolves to multiple addresses like 
this ...


   zweiblum kraxel ~# host zweiblum
   zweiblum.home.kraxel.org has address 192.168.2.101
   zweiblum.home.kraxel.org has IPv6 address 
2001:6f8:1cb3:2:216:41ff:fee1:3d40


... qemu should listen on all addresses returned.  Which in turn 
requires multiple listening sockets.


Changing this is a big deal though, thats why I've taken the somewhat 
unclean shortcut to listen on the first match only when implementing 
this.  Clients are supposed to try to connect to all addresses returned 
by the lookup (and they do if they got IPv6 support), thus this usually 
doesn't cause trouble in practice.


When going for multiple listening sockets in qemu we have to figure how 
we'll handle this in a number of places as there is no single listening 
address any more.  Reporting the vnc server address in QMP is one.  I'm 
sure there are more.


cheers,
  Gerd





Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Anthony Liguori

On 05/04/2010 04:47 PM, Gerd Hoffmann wrote:

On 05/04/10 18:23, Anthony Liguori wrote:

On 05/04/2010 08:49 AM, Reinhard Max wrote:

Hi,

I am maintaining the tightvnc package for openSUSE and was recently
confronted with an alleged vnc problem with QWMU that turned out to be
a shortcoming in QEMU's code for handling TCP server sockets, which is
used by the vnc and char modules.

The problem occurs when the address to listen on is given as a name
which resolves to multiple IP addresses the most prominent example
being "localhost" resolving to 127.0.0.1 and ::1 .


My tigervnc (tightvnc successor) has IPv6 support and handles this 
just fine.  There is also the option to force qemu to listen on ipv4 
(or ipv6) only.



The existing code stopped walking the list of addresses returned by
getaddrinfo() as soon as one socket was successfully opened and bound.
The result was that a qemu instance started with "-vnc localhost:42"
only listened on ::1, wasn't reachable through 127.0.0.1. The fact
that the code set the IPV6_V6ONLY socket option didn't help, because
that option only works when the socket gets bound to the IPv6 wildcard
address (::), but is useless for explicit address bindings.


Indeed.


But that said, I'm not sure we're doing the wrong thing right now. Gerd,
what do you think about this behavior?


Reinhard is correct.  If a hostname resolves to multiple addresses 
like this ...


   zweiblum kraxel ~# host zweiblum
   zweiblum.home.kraxel.org has address 192.168.2.101
   zweiblum.home.kraxel.org has IPv6 address 
2001:6f8:1cb3:2:216:41ff:fee1:3d40


... qemu should listen on all addresses returned.  Which in turn 
requires multiple listening sockets.


Changing this is a big deal though, thats why I've taken the somewhat 
unclean shortcut to listen on the first match only when implementing 
this.  Clients are supposed to try to connect to all addresses 
returned by the lookup (and they do if they got IPv6 support), thus 
this usually doesn't cause trouble in practice.


When going for multiple listening sockets in qemu we have to figure 
how we'll handle this in a number of places as there is no single 
listening address any more.  Reporting the vnc server address in QMP 
is one.  I'm sure there are more.


Okay, that makes sense.  Personally, I'm inclined to agree that this is 
a client problem.


Regards,

Anthony Liguori


cheers,
  Gerd







Re: [Qemu-devel] [PATCH 1/2] qemu-error: Introduce get_errno_name()

2010-05-05 Thread Anthony Liguori

On 05/04/2010 03:30 PM, Luiz Capitulino wrote:


  StateVmSaveFailed is not like CommandFailed, there are five errors
in do_savevm() and StateVmSaveFailed happens to be one of them.

  But I understand what you mean and I have considered doing something
like it, one of the problems though is that I'm not sure 'source' is
enough to determine where the error has happened.

  Consider do_savevm() again. We have three 'operations' that might
fail: delete an existing snapshot, save the VM state and create the
snapshot. All those operations can return -EIO as an error.
   


Maybe those three operations should return distinct errnos?

That way, we can make more useful QErrors.


  So, the first question is: would you map EIO to an QError? Just like
you did for SocketIOError? If so, how would you know which operation
has failed? Would you put its name in source? Or have an additional
'operation' key?

  A related problem is not to degrade the current set of error messages
we offer to the users. For do_savevm()'s 'save state' operation, current
message is:

"Error -7 while writing VM"

  With StateVmSaveFailed it becomes:

"Failed to save VM state ("EIO")"
   


I don't think this is that useful to preserve because as you said, it 
could be one of three things.


Regards,

Anthony Liguori


  Given the current implementation of QError, I'm not sure how we can have
such a good error message if our QErrors are not 'operation based'.
   






Re: [Qemu-devel] [PATCH 1/2] QMP: Introduce commands doc

2010-05-05 Thread Luiz Capitulino
On Mon, 03 May 2010 18:24:11 +0200
Markus Armbruster  wrote:

> > +
> > +Example:
> > +
> > +{ "execute": "migrate", "arguments": { "uri": "tcp:0:4446" } }
> > +
> > +Notes:
> > +
> > +(1) The 'query-migrate' command should be used to check migration's 
> > progress
> > +and final result (this information is provided by the 'status' member)
> > +(2) All boolean arguments default to false
> 
> Don't they always default to false?

 Yes, but I'm not sure if we should make this such a general rule, maybe
it should be possible to say what the default is. And we can do that for
new commands if we want to.

> > +(3) The user Monitor's "detach" argument is invalid in QMP and should not
> > +be used
> 
> Then why do we accept it?

 It's a bug, nevertheless I think it's worth noting it's not accepted.




[Qemu-devel] Re: [PATCH] pflash_cfi01: add device ID read command

2010-05-05 Thread Michael Walle

Ping for this patch. Anything wrong with it?

Am Saturday 01 May 2010 19:34:06 schrieb Michael Walle:
> Add support to read manufacturer and device ID. For everything else (eg.
> lock bits) 0 is returned.
>
> Signed-off-by: Michael Walle 
> ---
>  hw/pflash_cfi01.c |   20 
>  1 files changed, 20 insertions(+), 0 deletions(-)
>
> diff --git a/hw/pflash_cfi01.c b/hw/pflash_cfi01.c
> index 6b2adba..bc901e6 100644
> --- a/hw/pflash_cfi01.c
> +++ b/hw/pflash_cfi01.c
> @@ -166,6 +166,22 @@ static uint32_t pflash_read (pflash_t *pfl,
> target_phys_addr_t offset, ret = pfl->status;
>  DPRINTF("%s: status %x\n", __func__, ret);
>  break;
> +case 0x90:
> +switch (boff) {
> +case 0:
> +ret = pfl->ident[0] << 8 | pfl->ident[1];
> +DPRINTF("%s: Manufacturer Code %04x\n", __func__, ret);
> +break;
> +case 1:
> +ret = pfl->ident[2] << 8 | pfl->ident[3];
> +DPRINTF("%s: Device ID Code %04x\n", __func__, ret);
> +break;
> +default:
> +DPRINTF("%s: Read Device Information boff=%x\n", __func__,
> boff); +ret = 0;
> +break;
> +}
> +break;
>  case 0x98: /* Query mode */
>  if (boff > pfl->cfi_len)
>  ret = 0;
> @@ -283,6 +299,10 @@ static void pflash_write(pflash_t *pfl,
> target_phys_addr_t offset, DPRINTF("%s: Read status register\n", __func__);
>  pfl->cmd = cmd;
>  return;
> +case 0x90: /* Read Device ID */
> +DPRINTF("%s: Read Device information\n", __func__);
> +pfl->cmd = cmd;
> +return;
>  case 0x98: /* CFI query */
>  DPRINTF("%s: CFI query\n", __func__);
>  break;

-- 
wkr michael




[Qemu-devel] VNC heap corruption when display width is not a multiple of 16

2010-05-05 Thread Andrew Lutomirski
Hi all-

qemu-kvm quite reliably crashes when running with a VNC viewer
connected at 1400x1050.  (The crash happens when changing resolution
*from* 1400x1050 or disconnecting and reconnecting a client.)

The problem is that  vnc_refresh_server_surface overruns server->data
and corrupts heap metadata, causing glibc to explode when the buffer
is freed.

The patch below (obviously only for testing) demonstrates the problem
and prevents the crash, but it introduces a black line 8 pixels wide
on the right when running at 1400x1050.  I'm not sure what's going on
-- either I messed something up (entirely possible) or there's another
bug somewhere else.

Note: I've only reproduced this on Avi's qemu-kvm and on Fedora's
packages -- upstream qemu crashes somewhere else.  The code in
question looks the same upstream, though.

--Andy


diff --git a/vnc.c b/vnc.c
index e678fcc..f9d0ad3 100644
--- a/vnc.c
+++ b/vnc.c
@@ -527,6 +527,15 @@ static void vnc_dpy_resize(DisplayState *ds)
 vd->server->data = qemu_mallocz(vd->server->linesize *
 vd->server->height);

+printf("vnc_dpy_resize: linesize = %d, height = %d, bpp = %d, "
+  "width = %d, height = %d, width%%16 = %d, data = %p\n",
+  vd->server->linesize, vd->server->height,
+  ds_get_bytes_per_pixel(ds),
+  ds_get_width(ds), ds_get_height(ds),
+  ds_get_width(ds) % 16,
+  vd->server->data);
+
+
 /* guest surface */
 if (!vd->guest.ds)
 vd->guest.ds = qemu_mallocz(sizeof(*vd->guest.ds));
@@ -1740,7 +1749,7 @@ static void framebuffer_update_request(VncState
*vs, int incremental,
 vs->force_update = 1;
 for (i = 0; i < h; i++) {
 vnc_set_bits(vs->dirty[y_position + i],
- (ds_get_width(vs->ds) / 16), VNC_DIRTY_WORDS);
+ ((ds_get_width(vs->ds) + 15) / 16), VNC_DIRTY_WORDS);
 }
 }
 }
@@ -2320,8 +2329,7 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
  * Check and copy modified bits from guest to server surface.
  * Update server dirty map.
  */
-vnc_set_bits(width_mask, (ds_get_width(vd->ds) / 16), VNC_DIRTY_WORDS);
-cmp_bytes = 16 * ds_get_bytes_per_pixel(vd->ds);
+vnc_set_bits(width_mask, ((ds_get_width(vd->ds) + 15) / 16),
VNC_DIRTY_WORDS);
 guest_row  = vd->guest.ds->data;
 server_row = vd->server->data;
 for (y = 0; y < vd->guest.ds->height; y++) {
@@ -2332,12 +2340,20 @@ static int vnc_refresh_server_surface(VncDisplay *vd)

 guest_ptr  = guest_row;
 server_ptr = server_row;
+int bpp = ds_get_bytes_per_pixel(vd->ds);
+cmp_bytes = 16 * bpp;

 for (x = 0; x < vd->guest.ds->width;
 x += 16, guest_ptr += cmp_bytes, server_ptr += cmp_bytes) {
 if (!vnc_get_bit(vd->guest.dirty[y], (x / 16)))
 continue;
 vnc_clear_bit(vd->guest.dirty[y], (x / 16));
+
+   // If this happens, we'll overrun the line.  If it
+   // happens on the last row, we'll corrupt the heap.
+   if (cmp_bytes > bpp * (vd->guest.ds->width - x))
+   cmp_bytes = bpp * (vd->guest.ds->width - x);
+   
 if (memcmp(server_ptr, guest_ptr, cmp_bytes) == 0)
 continue;
 memcpy(server_ptr, guest_ptr, cmp_bytes);




Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Reinhard Max

On Tue, 4 May 2010 at 16:50, Anthony Liguori wrote:


Personally, I'm inclined to agree that this is a client problem.


That would be a violation of the "be liberal in what you accept and 
conservative in what you produce" principle and there are plenty of 
scenarios where even a client that Does It Right[tm] would fail, one 
example being that both ends are in networks that use public IPv6 
addresses, but have no IPv6 routing to each other (and yes, I've seen 
such networks).



cu
Reinhard




Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Reinhard Max

On Tue, 4 May 2010 at 23:47, Gerd Hoffmann wrote:

My tigervnc (tightvnc successor) has IPv6 support and handles this 
just fine.


Well, as I wrote, the code is not just used for vnc, but also for 
server sockets that (if I got that right) could be connected from 
telnet or netcat implementations that don't speak IPv6.


When going for multiple listening sockets in qemu we have to figure 
how we'll handle this in a number of places as there is no single 
listening address any more.


Well, that's what my patch is about. Did you take a look at it?


Reporting the vnc server address in QMP is one.


Not sure what QMP is (this was the first time I looked at QEMU's 
internals), but I think my patch only leaves one place TODO where I 
chose to report only the first address for now, but it shouldn't be 
too hard to fix that as well.


BTW, in some places I circumvented the need for reporting multiple 
addresses by simply reporting the name that was passed to QEMU 
instead.



cu
Reinhard




[Qemu-devel] [PATCH 1/2] Fix clearing modifiers on focus loss

2010-05-05 Thread Brad Jorsch
It seems there is an intention to clear all modifier keys (in the SDL
front-end) when the window loses focus, but it seems that it doesn't
work right. Let's just explicitly do it when we lose input focus.

Signed-off-by: Brad Jorsch 
---
 sdl.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/sdl.c b/sdl.c
index f6fabf1..c155113 100644
--- a/sdl.c
+++ b/sdl.c
@@ -728,6 +728,9 @@ static void sdl_refresh(DisplayState *ds)
 }
 break;
 case SDL_ACTIVEEVENT:
+if (ev->active.state == SDL_APPINPUTFOCUS && !ev->active.gain) {
+reset_keys();
+}
 if (gui_grab && ev->active.state == SDL_APPINPUTFOCUS &&
 !ev->active.gain && !gui_fullscreen_initial_grab) {
 sdl_grab_end();
-- 
1.7.1





[Qemu-devel] [PATCH 0/2] Release modifier keys (including Meta) on loss of focus

2010-05-05 Thread Brad Jorsch
I got tired of my guests thinking that the logo key was being constantly
held down whenever I used Meta-Tab to switch away from the qemu window.

It turns out that both the "clear modifiers on focus loss" feature in
the SDL front-end was broken and Meta (i.e. the "logo" keys) wasn't
being considered a modifier.





[Qemu-devel] [PATCH 2/2] Count "logo" keys as modifier keys

2010-05-05 Thread Brad Jorsch
These are often mapped to Meta on Linux. And I for one use Meta-Tab as
the equivalent of Windows's Alt-Tab, so not having it marked as a
modifier leaves the guest thinking it's permanently pressed.

Signed-off-by: Brad Jorsch 
---
 sdl.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/sdl.c b/sdl.c
index c155113..cc7b1bb 100644
--- a/sdl.c
+++ b/sdl.c
@@ -385,6 +385,8 @@ static void sdl_process_key(SDL_KeyboardEvent *ev)
 case 0x9d:  /* Right CTRL */
 case 0x38:  /* Left ALT */
 case 0xb8: /* Right ALT */
+case 0xdb:  /* Left META */
+case 0xdc: /* Right META */
 if (ev->type == SDL_KEYUP)
 modifiers_state[keycode] = 0;
 else
-- 
1.7.1





Re: [Qemu-devel] Re: [PATCH] virtio-spec: document block CMD and FLUSH

2010-05-05 Thread Rusty Russell
On Wed, 5 May 2010 05:47:05 am Jamie Lokier wrote:
> Jens Axboe wrote:
> > On Tue, May 04 2010, Rusty Russell wrote:
> > > ISTR someone mentioning a desire for such an API years ago, so CC'ing the
> > > usual I/O suspects...
> > 
> > It would be nice to have a more fuller API for this, but the reality is
> > that only the flush approach is really workable. Even just strict
> > ordering of requests could only be supported on SCSI, and even there the
> > kernel still lacks proper guarantees on error handling to prevent
> > reordering there.
> 
> There's a few I/O scheduling differences that might be useful:
> 
> 1. The I/O scheduler could freely move WRITEs before a FLUSH but not
>before a BARRIER.  That might be useful for time-critical WRITEs,
>and those issued by high I/O priority.

This is only because noone actually wants flushes or barriers, though
I/O people seem to only offer that.  We really want " must
occur before ".  That offers maximum choice to the I/O subsystem
and potentially to smart (virtual?) disks.

> 2. The I/O scheduler could move WRITEs after a FLUSH if the FLUSH is
>only for data belonging to a particular file (e.g. fdatasync with
>no file size change, even on btrfs if O_DIRECT was used for the
>writes being committed).  That would entail tagging FLUSHes and
>WRITEs with a fs-specific identifier (such as inode number), opaque
>to the scheduler which only checks equality.

This is closer.  In userspace I'd be happy with a "all prior writes to this
struct file before all future writes".  Even if the original guarantees were
stronger (ie. inode basis).  We currently implement transactions using 4 fsync
/msync pairs.

write_recovery_data(fd);
fsync(fd);
msync(mmap);
write_recovery_header(fd);
fsync(fd);
msync(mmap);
overwrite_with_new_data(fd);
fsync(fd);
msync(mmap);
remove_recovery_header(fd);
fsync(fd);
msync(mmap);

Yet we really only need ordering, not guarantees about it actually hitting
disk before returning.

> In other words, FLUSH can be more relaxed than BARRIER inside the
> kernel.  It's ironic that we think of fsync as stronger than
> fbarrier outside the kernel :-)

It's an implementation detail; barrier has less flexibility because it has
less information about what is required. I'm saying I want to give you as
much information as I can, even if you don't use it yet.

Thanks,
Rusty.




[Qemu-devel] Re: [PATCH] [RESEND] Make char muxer more robust wrt small FIFOs

2010-05-05 Thread Jan Kiszka
Jan Kiszka wrote:
> Anthony Liguori wrote:
>> On 05/04/2010 11:01 AM, Alexander Graf wrote:
>>> Am 04.05.2010 um 16:34 schrieb Anthony Liguori :
>>>
 On 05/04/2010 09:30 AM, Alexander Graf wrote:
> Am 04.05.2010 um 15:44 schrieb Anthony Liguori :
>
>> On 04/20/2010 11:56 AM, Alexander Graf wrote:
>>> Virtio-Console can only process one character at a time. Using it 
>>> on S390
>>> gave me strage "lags" where I got the character I pressed before when
>>> pressing one. So I typed in "abc" and only received "a", then 
>>> pressed "d"
>>> but the guest received "b" and so on.
>>>
>>> While the stdio driver calls a poll function that just processes 
>>> on its
>>> queue in case virtio-console can't take multiple characters at 
>>> once, the
>>> muxer does not have such callbacks, so it can't empty its queue.
>>>
>>> To work around that limitation, I introduced a new timer that only 
>>> gets
>>> active when the guest can not receive any more characters. In that 
>>> case
>>> it polls again after a while to check if the guest is now 
>>> receiving input.
>>>
>>> This patch fixes input when using -nographic on s390 for me.
>>>
>> I think this is really a kvm issue.  I assume it's because s390 
>> idles in the kernel so you never drop to userspace to repoll the 
>> descriptor.
> There is no polling for the muxer. That's why it never knows when 
> virtio-console can receive again.
 Maybe I'm missing something simple, but it looks to me like the muxer 
 is polling.  mux_chr_can_read() is going to eventually poll the muxed 
 devices to figure this out.

 If the root of the problem is that mux_chr_can_read() isn't being 
 invoked for a prolonged period of time, the real issue is the problem 
 I described.
>>> The problem is that the select list of fds includes the stdio fd, so 
>>> that gets notified and is coupled with virtio-console, but there's 
>>> nothing passing that on to mux and I don't think it'd be clever to 
>>> expose internal data to the muxer to tell it about the backend fds.
>> When stdio is readable, it should invoke qemu_chr_read() with the read 
>> data which in turn ought to invoke mux_chr_read().
>>
>> I'm not sure I understand what signalling is missing.  Jan, does the 
>> problem Alex describes ring a bell?  I seem to recall you saying that 
>> mux was still fundamentally broken but ought to work most of the time...
> 
> That problem was (and still is) that the muxer needs to accept
> characters even if the active front-end device is not in order to filter
> out control sequences. Once its queue is full, it will start dropping
> those the active device would not if directly connected. Could only be
> solved via some peek service on pending front-end data.
> 
> I think Alex' problem can be addressed by registering
> qemu_set_fd_handler2(..., backend->read_poll, mux_chr_read, ...). That
> means the backend has to tell us about its read poll handler (if any).

Nonsense.

In fact, the problem is the former issue: As the muxer reads the
character the front-end is currently unable to receive, polling may stop
until as the back-end has some further chars to deliver.

But interestingly, the stdio back-end has a (single-byte) fifo as well.
It just drives it a bit differently.

Alex, does this help as well?

diff --git a/qemu-char.c b/qemu-char.c
index ac65a1c..2b115a4 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -404,6 +404,8 @@ static int mux_chr_can_read(void *opaque)
 MuxDriver *d = chr->opaque;
 int m = d->focus;
 
+mux_chr_accept_input(opaque);
+
 if ((d->prod[m] - d->cons[m]) < MUX_BUFFER_SIZE)
 return 1;
 if (d->chr_can_read[m])
@@ -418,8 +420,6 @@ static void mux_chr_read(void *opaque, const uint8_t *buf, 
int size)
 int m = d->focus;
 int i;
 
-mux_chr_accept_input (opaque);
-
 for(i = 0; i < size; i++)
 if (mux_proc_byte(chr, d, buf[i])) {
 if (d->prod[m] == d->cons[m] &&


I'm trying to reproduce in parallel.

Jan



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] Re: [PATCH] virtio-spec: document block CMD and FLUSH

2010-05-05 Thread Rusty Russell
On Wed, 5 May 2010 04:24:59 am Christoph Hellwig wrote:
> On Fri, Feb 19, 2010 at 12:22:20AM +0200, Michael S. Tsirkin wrote:
> > I took a stub at documenting CMD and FLUSH request types in virtio
> > block.  Christoph, could you look over this please?
> > 
> > I note that the interface seems full of warts to me,
> > this might be a first step to cleaning them.
> 
> The whole virtio-blk interface is full of warts.  It has been
> extended rather ad-hoc, so that is rather expected.
> 
> > One issue I struggled with especially is how type
> > field mixes bits and non-bit values. I ended up
> > simply defining all legal values, so that we have
> > CMD = 2, CMD_OUT = 3 and so on.
> 
> It's basically a complete mess without much logic behind it.
> 
> > +\change_unchanged
> > +the high bit
> > +\change_inserted 0 1266497301
> > + (VIRTIO_BLK_T_BARRIER)
> > +\change_unchanged
> > + indicates that this request acts as a barrier and that all preceeding 
> > requests
> > + must be complete before this one, and all following requests must not be
> > + started until this is complete.
> > +
> > +\change_inserted 0 1266504385
> > + Note that a barrier does not flush caches in the underlying backend device
> > + in host, and thus does not serve as data consistency guarantee.
> > + Driver must use FLUSH request to flush the host cache.
> > +\change_unchanged
> 
> I'm not sure it's even worth documenting it.  I can't see any way to
> actually implement safe behaviour with the VIRTIO_BLK_T_BARRIER-style
> barriers.
> 
> 
> Btw, did I mention that .lyx is a a really horrible format to review
> diffs for?  Plain latex would be a lot better..

Yeah, or export as text post that diff for content review.  I do like the
patches to the lyx source though (I check all versions into revision control,
before and after merging changes, which makes it easy to produce annotated
versions).

Cheers,
Rusty.

> 




[Qemu-devel] Re: VNC heap corruption when display width is not a multiple of 16

2010-05-05 Thread Andrew Lutomirski
On Tue, May 4, 2010 at 6:12 PM, Andrew Lutomirski  wrote:
> Hi all-
>

> The patch below (obviously only for testing) demonstrates the problem
> and prevents the crash, but it introduces a black line 8 pixels wide
> on the right when running at 1400x1050.  I'm not sure what's going on
> -- either I messed something up (entirely possible) or there's another
> bug somewhere else.

Nope -- the black line is there in unpatched qemu as well.  I just
didn't notice it because qemu was too busy crashing :)

--Andy




[Qemu-devel] Re: [PATCH] [RESEND] Make char muxer more robust wrt small FIFOs

2010-05-05 Thread Jan Kiszka
Jan Kiszka wrote:
> Alex, does this help as well?
> 
> diff --git a/qemu-char.c b/qemu-char.c
> index ac65a1c..2b115a4 100644
> --- a/qemu-char.c
> +++ b/qemu-char.c
> @@ -404,6 +404,8 @@ static int mux_chr_can_read(void *opaque)
>  MuxDriver *d = chr->opaque;
>  int m = d->focus;
>  
> +mux_chr_accept_input(opaque);
> +
>  if ((d->prod[m] - d->cons[m]) < MUX_BUFFER_SIZE)
>  return 1;
>  if (d->chr_can_read[m])
> @@ -418,8 +420,6 @@ static void mux_chr_read(void *opaque, const uint8_t 
> *buf, int size)
>  int m = d->focus;
>  int i;
>  
> -mux_chr_accept_input (opaque);
> -
>  for(i = 0; i < size; i++)
>  if (mux_proc_byte(chr, d, buf[i])) {
>  if (d->prod[m] == d->cons[m] &&
> 
> 
> I'm trying to reproduce in parallel.

Works for me. Will post as proper patch later.

Jan



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] [Commit 20d97356] Fix OpenBSD build

2010-05-05 Thread Kevin Wolf
Hi Blue Swirl,

did I miss the patch posted for this commit? I can't find it on the
mailing list. I think the BlockDriver declarations should really stay at
the end of the source file of each driver.

What about using a different way to get rid of the static forward
declaration, namely using bdrv_find_format() instead of referencing it
directly? If you agree I'll post patches to revert your change and use
this solution instead.

Kevin




Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Daniel P. Berrange
On Tue, May 04, 2010 at 03:49:50PM +0200, Reinhard Max wrote:
> Hi,
> 
> I am maintaining the tightvnc package for openSUSE and was recently 
> confronted with an alleged vnc problem with QWMU that turned out to be 
> a shortcoming in QEMU's code for handling TCP server sockets, which is 
> used by the vnc and char modules.
> 
> The problem occurs when the address to listen on is given as a name 
> which resolves to multiple IP addresses the most prominent example 
> being "localhost" resolving to 127.0.0.1 and ::1 .
> 
> The existing code stopped walking the list of addresses returned by 
> getaddrinfo() as soon as one socket was successfully opened and bound. 
> The result was that a qemu instance started with "-vnc localhost:42" 
> only listened on ::1, wasn't reachable through 127.0.0.1. The fact 
> that the code set the IPV6_V6ONLY socket option didn't help, because 
> that option only works when the socket gets bound to the IPv6 wildcard 
> address (::), but is useless for explicit address bindings.

This seems to be something we overlooked in the initial impl. I don't
see any alternative but to make QEMU listen on multiple sockets in
the scenario you describe. An even clear example is to consider binding
QEMU to a machine with multiple non-localhost addresses, eg

   myserver.com resolving to 192.168.122.41 + 2a00:123:456::1

because there's no way that the kernel can know/decide to automatically
listen on 192.168.122.41, when given the address 2a00:123:456::1 and if
the machine has many NICs, you can't assume the wildcard address is 
suitable either.

> The attached patch against QEMU 0.11.0 extends inet_listen() to create 
> sockets for as many addresses from the address list as possible and 
> adapts its callers and their data structures to deal with a linked 
> list of socket FDs rather than a single file descriptor.

The approach looks reasonable, though the patch is somewhat mangled
by the mix of tabs + spaces

Regards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|




Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Gerd Hoffmann

  Hi,


When going for multiple listening sockets in qemu we have to figure
how we'll handle this in a number of places as there is no single
listening address any more.


Well, that's what my patch is about.


Sure.


Did you take a look at it?


Briefly, yes.  Overall it looks sensible to me.  Devil is in the details 
though, see below.


Noticed that it probably should get a few helper functions to handle 
FdLists to avoid the quite simliar open-coded loop-over-all-fds loops 
all over the place.



Reporting the vnc server address in QMP is one.


Not sure what QMP is (this was the first time I looked at QEMU's
internals),


You'll run into qmp for sure when forward-porting the patches to the 
latest qemu bits.  It is the machine-readable version of the monitor 
protocol (in qemu 0.12+).



but I think my patch only leaves one place TODO where I
chose to report only the first address for now, but it shouldn't be too
hard to fix that as well.


Yea.  I've noticed that TODO ;)


BTW, in some places I circumvented the need for reporting multiple
addresses by simply reporting the name that was passed to QEMU instead.


This is one of the issues which needs to be addressed somehow.

First I think qemu should be self-consistent here, i.e. either report 
the (single) name or the list of addressed everythere.


Second we have to care about the current users (especially libvirt). 
Today qemu usually reports the address I think.  Thus I tend to stick to 
addresses to keep them happy.


We'll have a externally visible change in any case though.  Either the 
switch from the address to the name or the switch from a single address 
to a list of addresses.  Both changes might break existing users.


cheers,
  Gerd




[Qemu-devel] [PATCH] block: Avoid unchecked casts for AIOCBs

2010-05-05 Thread Kevin Wolf
Use container_of for one direction and &acb->common for the other one.

Signed-off-by: Kevin Wolf 
---
 block.c  |3 ++-
 block/blkdebug.c |4 ++--
 block/qcow.c |2 +-
 block/qcow2.c|2 +-
 block/vdi.c  |2 +-
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/block.c b/block.c
index 7e3552d..10a2b31 100644
--- a/block.c
+++ b/block.c
@@ -2119,7 +2119,8 @@ typedef struct BlockDriverAIOCBSync {
 
 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
 {
-BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
+BlockDriverAIOCBSync *acb =
+container_of(blockacb, BlockDriverAIOCBSync, common);
 qemu_bh_delete(acb->bh);
 acb->bh = NULL;
 qemu_aio_release(acb);
diff --git a/block/blkdebug.c b/block/blkdebug.c
index bb4a91a..8325f75 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -320,7 +320,7 @@ static void error_callback_bh(void *opaque)
 
 static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb)
 {
-BlkdebugAIOCB *acb = (BlkdebugAIOCB*) blockacb;
+BlkdebugAIOCB *acb = container_of(blockacb, BlkdebugAIOCB, common);
 qemu_aio_release(acb);
 }
 
@@ -347,7 +347,7 @@ static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
 acb->bh = bh;
 qemu_bh_schedule(bh);
 
-return (BlockDriverAIOCB*) acb;
+return &acb->common;
 }
 
 static BlockDriverAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
diff --git a/block/qcow.c b/block/qcow.c
index 2883c40..449858f 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -502,7 +502,7 @@ typedef struct QCowAIOCB {
 
 static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
 {
-QCowAIOCB *acb = (QCowAIOCB *)blockacb;
+QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
 if (acb->hd_aiocb)
 bdrv_aio_cancel(acb->hd_aiocb);
 qemu_aio_release(acb);
diff --git a/block/qcow2.c b/block/qcow2.c
index 21ed6f8..40bd32a 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -340,7 +340,7 @@ typedef struct QCowAIOCB {
 
 static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
 {
-QCowAIOCB *acb = (QCowAIOCB *)blockacb;
+QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
 if (acb->hd_aiocb)
 bdrv_aio_cancel(acb->hd_aiocb);
 qemu_aio_release(acb);
diff --git a/block/vdi.c b/block/vdi.c
index 1d257b4..2b4d2c2 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -469,7 +469,7 @@ static int vdi_is_allocated(BlockDriverState *bs, int64_t 
sector_num,
 static void vdi_aio_cancel(BlockDriverAIOCB *blockacb)
 {
 /* TODO: This code is untested. How can I get it executed? */
-VdiAIOCB *acb = (VdiAIOCB *)blockacb;
+VdiAIOCB *acb = container_of(blockacb, VdiAIOCB, common);
 logout("\n");
 if (acb->hd_aiocb) {
 bdrv_aio_cancel(acb->hd_aiocb);
-- 
1.6.6.1





[Qemu-devel] [PATCH] ide: Fix ide_dma_cancel

2010-05-05 Thread Kevin Wolf
When cancelling a request, bdrv_aio_cancel may decide that it waits for
completion of a request rather than for cancellation. IDE therefore can't
abandon its DMA status before calling bdrv_aio_cancel; otherwise the callback
of a completed request would use invalid data.

Signed-off-by: Kevin Wolf 
---
 hw/ide/core.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 0757528..3cd55e3 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2838,10 +2838,6 @@ static void ide_dma_restart(IDEState *s, int is_read)
 void ide_dma_cancel(BMDMAState *bm)
 {
 if (bm->status & BM_STATUS_DMAING) {
-bm->status &= ~BM_STATUS_DMAING;
-/* cancel DMA request */
-bm->unit = -1;
-bm->dma_cb = NULL;
 if (bm->aiocb) {
 #ifdef DEBUG_AIO
 printf("aio_cancel\n");
@@ -2849,6 +2845,10 @@ void ide_dma_cancel(BMDMAState *bm)
 bdrv_aio_cancel(bm->aiocb);
 bm->aiocb = NULL;
 }
+bm->status &= ~BM_STATUS_DMAING;
+/* cancel DMA request */
+bm->unit = -1;
+bm->dma_cb = NULL;
 }
 }
 
-- 
1.6.6.1





Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Reinhard Max

Hi,

On Wed, 5 May 2010 at 10:53, Gerd Hoffmann wrote:

Noticed that it probably should get a few helper functions to handle 
FdLists to avoid the quite simliar open-coded loop-over-all-fds 
loops all over the place.


indeed, thanks for the hint. I now have functions to create a new list 
element from an fd number and to destroy a list. Not sure if straight 
loops over existing lists can be further optimized, though.


You'll run into qmp for sure when forward-porting the patches to the 
latest qemu bits.  It is the machine-readable version of the monitor 
protocol (in qemu 0.12+).


I guess that's the qemu_opt_set() calls at the end of 
inet_listen_opts()?


First I think qemu should be self-consistent here, i.e. either 
report the (single) name or the list of addressed everythere.


Yes, this mixture wasn't meant to be final, but it helped me getting 
the initial patch done with a minimal set of changes.



Second we have to care about the current users (especially libvirt).


Wouldn't the users of that bit of information run it through 
getaddrinfo() anyways when trying to connect? So to them it shouldn't 
matter whether the name or an ASCII representation of the address is 
used.


Today qemu usually reports the address I think.  Thus I tend to 
stick to addresses to keep them happy.


But wouldn't going from single address to multiple addresses be a 
bigger change for the users (and likely break them all) while going 
from address to name would only break those that were not using 
getaddrinfo() to translate the address into its binary representation.


OTOH, going for multiple addresses would also allow starting qemu with 
more than a single -vnc option, which doesn't seem to be possible 
right now, and wich might come handy in situations when the set of 
addresses a qemu instance should be listening on is not available as a 
single DNS name.



cu
Reinhard




Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Gerd Hoffmann

  Hi,


You'll run into qmp for sure when forward-porting the patches to the
latest qemu bits. It is the machine-readable version of the monitor
protocol (in qemu 0.12+).


I guess that's the qemu_opt_set() calls at the end of inet_listen_opts()?


See docs in QMP/*, the changes in monitor.c and q${type}.[ch]

qemu_opt_set() in inet_listen_opts() is only slightly related.  It is 
used to report back the address we've actually bound to.  Used by 'info 
chardev' and I think vnc too.  Yes, that has to be changed somehow ...



Second we have to care about the current users (especially libvirt).


Wouldn't the users of that bit of information run it through
getaddrinfo() anyways when trying to connect? So to them it shouldn't
matter whether the name or an ASCII representation of the address is used.


I don't know how it is used.


Today qemu usually reports the address I think. Thus I tend to stick
to addresses to keep them happy.


But wouldn't going from single address to multiple addresses be a bigger
change for the users (and likely break them all) while going from
address to name would only break those that were not using getaddrinfo()
to translate the address into its binary representation.


It is probably best to bring this up on the libvirt list, this is the
most important user of those interfaces and I think other virtual machine
management folks are reading there too.

I personally don't care too much which way we pick.


OTOH, going for multiple addresses would also allow starting qemu with
more than a single -vnc option, which doesn't seem to be possible right
now, and wich might come handy in situations when the set of addresses a
qemu instance should be listening on is not available as a single DNS name.


Good point.

cheers,
  Gerd




[Qemu-devel] question on virtio

2010-05-05 Thread Michael S. Tsirkin
Hi!
I see this in virtio_ring.c:

/* Put entry in available array (but don't update avail->idx *
   until they do sync). */

Why is it done this way?
It seems that updating the index straight away would be simpler, while
this might allow the host to specilatively look up the buffer and handle
it, without waiting for the kick.

-- 
MST




[Qemu-devel] Re: [PATCH v7 0/6]

2010-05-05 Thread Juan Quintela
Amit Shah  wrote:
> Hello,
>
> This series lets interested callers ask for an -EAGAIN return from the
> chardev backends (only unix and tcp sockets as of now) to implement
> their own flow control.
>
> A new call, qemu_chr_write_nb() is added, that will fallback to
> qemu_chr_write() if the backend file isn't non-blocking or if no
> callback was supplied.
>
> Support for other backend types is easy to add and will be done in
> later patches.
>
> Please apply.
>
> v7:
> - constify handlers (Blue Swirl)
> - remove 'write_cb', a leftover from previous design (Juan Quintela)
> - return ret instead of -EAGAIN in virtio-console (Juan)
> - use pre-allocated meta-buffer instead of allocating one each time
>   (Juan)
>
> v6:
> - Continue write operation on EINTR instead of returning partial
>   writes (which was a change from prev. behaviour) (Gerd)
>
> v5:
> - Fix bug pointed out by Gerd
> - Convert to using a struct for passing on handlers to
>   qemu_chr_add_handlers() instead of passing each one
>   individually. Simplifies patches. (Inspired by Juan's comment)
> - Re-arranged patches
>
> Amit Shah (6):
>   virtio-console: Factor out common init between console and generic
> ports
>   char: Add a QemuChrHandlers struct to initialise chardev handlers
>   char: Let writers know how much data was written in case of errors
>   char: Add qemu_chr_write_nb() for nonblocking writes
>   char: unix/tcp: Add a non-blocking write handler
>   virtio-console: Throttle virtio-serial-bus if we can't consume any
> more guest data

Acked-by: Juan Quintela 

Testing the porting to vmstate of this.

Later, Juan.




Re: [Qemu-devel] [PATCH, RFC] block: separate raw images from the file protocol

2010-05-05 Thread Kevin Wolf
Am 04.05.2010 22:58, schrieb Stefan Weil:
> Am 08.04.2010 11:50, schrieb Kevin Wolf:
>> Am 07.04.2010 22:30, schrieb Christoph Hellwig:
>>
>>> We're running into various problems because the "raw" file access, which
>>> is used internally by the various image formats is entangled with the
>>> "raw" image format, which maps the VM view 1:1 to a file system.
>>>
>>> This patch renames the raw file backends to the file protocol which
>>> is treated like other protocols (e.g. nbd and http) and adds a new
>>> "raw" image format which is just a wrapper around calls to the underlying
>>> protocol.
>>>  
>> As you know and as I mentioned in previous discussions this approach is
>> exactly what I think we need in the block layer.
>>
>> You provided a nice long patch description that covers almost
>> everything, so I think I can put the greatest part of my comments there.
>>
>>
>>> The patch is surprisingly simple, besides changing the probing logical
>>> in block.c to only look for image formats when using bdrv_open and
>>> renaming of the old raw protocols to file there's almost nothing in there.
>>>
>>> One thing that looks suspicious in the patch is moving the actual
>>> posix file creation from raw-posix into the new raw image.  This is
>>> a layering violation, but exactly the same as done by all other image
>>> formats implementing the create operations, and not easily fixable
>>> without a major API change in this area.
>>>  
>> This is not only a layering violation, but also buggy in this case.
>> raw-win32.c has a different implementation of raw_create which wouldn't
>> be called any more.
>>
>> The two solutions that I see are making raw_create a wrapper that calls
>> the create function of the protocol, or do make the step and use bdrv_*
>> in the create functions of the drivers. I think the former is what could
>> be done to keep this patch simple, but the latter is what we should aim
>> for longer term.
>>
>>
>>> The only issues still open are in the handling of the host devices.
>>> Firstly in current qemu we can specifiy the host* format names
>>> on various command line acceping images, but the new code can't
>>> do that without adding some translation.  Second the layering breaks
>>> the no_zero_init flag in the BlockDriver used by qemu-img.  I'm not
>>> happy how this is done per-driver instead of per-state so I'll
>>> prepare a separate patch to clean this up.
>>>  
>> Hm, I don't like that very much, but there's probably no sane way around
>> it. It's clearly a property of the protocol and not of a single device,
>> but protocols might be stacked and just checking the first one doesn't
>> give the right result.
>>
>> Anyway, before merging this patch we obviously need to fix this kind of
>> things (is it caught by qemu-iotests, by the way?). I'm not sure if we
>> should add a compatibility translation of host_device =>  raw or if we
>> should just remove support for that completely. It would be helpful to
>> know if this is actually used.
>>
>>
>>> There's some more cleanup opportunity after this patch, e.g. using
>>> separate lists and registration functions for image formats vs
>>> protocols and maybe even host drivers, but this can be done at a
>>> later stage.
>>>
>>> Also there's a check for protocol in bdrv_open for the BDRV_O_SNAPSHOT
>>> case that I don't quite understand, but which I fear won't work as
>>> expected - possibly even before this patch.
>>>  
>> You mean that is_protocol thing? It comes into play when you do strange
>> things like qemu -hda fat:/tmp/testdir -snapshot and I think it actually
>> does work.
>>
>> Hm, apropos vvfat... Should vvfat actually be implemented as raw backed
>> by vvfat now instead of using vvfat directly? We could then forbid
>> protocols to be used directly.
>>
>>
>>> Note that this patch requires various recent block patches from Kevin
>>> and me, which should all be in his block queue.
>>>
>>> Signed-off-by: Christoph Hellwig
>>>
>>> Index: qemu/Makefile.objs
>>> ===
>>> --- qemu.orig/Makefile.objs 2010-04-07 13:56:27.429254145 +0200
>>> +++ qemu/Makefile.objs  2010-04-07 22:01:24.974284455 +0200
>>> @@ -12,7 +12,7 @@ block-obj-y += nbd.o block.o aio.o aes.o
>>>   block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
>>>   block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>>>
>>> -block-nested-y += cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o 
>>> vvfat.o
>>> +block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o 
>>> vpc.o vvfat.o
>>>   block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o 
>>> qcow2-snapshot.o
>>>   block-nested-y += parallels.o nbd.o
>>>   block-nested-$(CONFIG_WIN32) += raw-win32.o
>>>  
>> This hunk only applies with fuzz on my queue (caused by blkdebug.o). Can
>> you make sure to rebase the final version on the queue?
>>
>>
>>> @@ -1026,12 +985,12 @@ static int hdev_create(const char *filen
>>>

[Qemu-devel] [PATCH 0/3] local cursor patches.

2010-05-05 Thread Gerd Hoffmann
  Hi,

Local cursor bits, rehashed after discussions with anthony.

cheers,
  Gerd





[Qemu-devel] [PATCH 2/3] use new cursor struct + functions for vmware vga and sdl.

2010-05-05 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann 
---
 hw/vmware_vga.c |   40 +++-
 sdl.c   |   52 +---
 2 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/hw/vmware_vga.c b/hw/vmware_vga.c
index e709369..bf2a699 100644
--- a/hw/vmware_vga.c
+++ b/hw/vmware_vga.c
@@ -477,13 +477,43 @@ struct vmsvga_cursor_definition_s {
 static inline void vmsvga_cursor_define(struct vmsvga_state_s *s,
 struct vmsvga_cursor_definition_s *c)
 {
-int i;
-for (i = SVGA_BITMAP_SIZE(c->width, c->height) - 1; i >= 0; i --)
-c->mask[i] = ~c->mask[i];
+QEMUCursor *qc;
+int i, pixels;
+
+qc = cursor_alloc(c->width, c->height);
+qc->hot_x = c->hot_x;
+qc->hot_y = c->hot_y;
+switch (c->bpp) {
+case 1:
+cursor_set_mono(qc, 0xff, 0x00, (void*)c->image,
+1, (void*)c->mask);
+#ifdef DEBUG
+cursor_print_ascii_art(qc, "vmware/mono");
+#endif
+break;
+case 32:
+/* fill alpha channel from mask, set color to zero */
+cursor_set_mono(qc, 0x00, 0x00, (void*)c->mask,
+1, (void*)c->mask);
+/* add in rgb values */
+pixels = c->width * c->height;
+for (i = 0; i < pixels; i++) {
+qc->data[i] |= c->image[i] & 0xff;
+}
+#ifdef DEBUG
+cursor_print_ascii_art(qc, "vmware/32bit");
+#endif
+break;
+default:
+fprintf(stderr, "%s: unhandled bpp %d, using fallback cursor\n",
+__FUNCTION__, c->bpp);
+cursor_put(qc);
+qc = cursor_builtin_left_ptr();
+}
 
 if (s->vga.ds->cursor_define)
-s->vga.ds->cursor_define(c->width, c->height, c->bpp, c->hot_x, 
c->hot_y,
-(uint8_t *) c->image, (uint8_t *) c->mask);
+s->vga.ds->cursor_define(qc);
+cursor_put(qc);
 }
 #endif
 
diff --git a/sdl.c b/sdl.c
index 16a48e9..7c9ddbf 100644
--- a/sdl.c
+++ b/sdl.c
@@ -778,49 +778,23 @@ static void sdl_mouse_warp(int x, int y, int on)
 guest_x = x, guest_y = y;
 }
 
-static void sdl_mouse_define(int width, int height, int bpp,
- int hot_x, int hot_y,
- uint8_t *image, uint8_t *mask)
+static void sdl_mouse_define(QEMUCursor *c)
 {
-uint8_t sprite[256], *line;
-int x, y, dst, bypl, src = 0;
+uint8_t *image, *mask;
+int bpl;
+
 if (guest_sprite)
 SDL_FreeCursor(guest_sprite);
 
-memset(sprite, 0, 256);
-bypl = ((width * bpp + 31) >> 5) << 2;
-for (y = 0, dst = 0; y < height; y ++, image += bypl) {
-line = image;
-for (x = 0; x < width; x ++, dst ++) {
-switch (bpp) {
-case 32:
-src = *(line ++); src |= *(line ++); src |= *(line ++); line++;
-break;
-case 24:
-src = *(line ++); src |= *(line ++); src |= *(line ++);
-break;
-case 16:
-case 15:
-src = *(line ++); src |= *(line ++);
-break;
-case 8:
-src = *(line ++);
-break;
-case 4:
-src = 0xf & (line[x >> 1] >> ((x & 1)) << 2);
-break;
-case 2:
-src = 3 & (line[x >> 2] >> ((x & 3)) << 1);
-break;
-case 1:
-src = 1 & (line[x >> 3] >> (x & 7));
-break;
-}
-if (!src)
-sprite[dst >> 3] |= (1 << (~dst & 7)) & mask[dst >> 3];
-}
-}
-guest_sprite = SDL_CreateCursor(sprite, mask, width, height, hot_x, hot_y);
+bpl = cursor_get_mono_bpl(c);
+image = qemu_mallocz(bpl * c->height);
+mask  = qemu_mallocz(bpl * c->height);
+cursor_get_mono_image(c, 0x00, image);
+cursor_get_mono_mask(c, 0, mask);
+guest_sprite = SDL_CreateCursor(image, mask, c->width, c->height,
+c->hot_x, c->hot_y);
+qemu_free(image);
+qemu_free(mask);
 
 if (guest_cursor &&
 (gui_grab || kbd_mouse_is_absolute() || absolute_enabled))
-- 
1.6.6.1





[Qemu-devel] [PATCH 1/3] cursor: add cursor functions.

2010-05-05 Thread Gerd Hoffmann
Add a new cursor type to console.h and a bunch of functions to
deal with cursors the (new) cursor.c file.

Signed-off-by: Gerd Hoffmann 
---
 Makefile.objs |3 +-
 console.h |   24 ++-
 cursor.c  |  208 +
 3 files changed, 232 insertions(+), 3 deletions(-)
 create mode 100644 cursor.c

diff --git a/Makefile.objs b/Makefile.objs
index ecdd53e..1ee6e9d 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -48,7 +48,8 @@ common-obj-y = $(block-obj-y)
 common-obj-y += $(net-obj-y)
 common-obj-y += $(qobject-obj-y)
 common-obj-$(CONFIG_LINUX) += $(fsdev-obj-$(CONFIG_LINUX))
-common-obj-y += readline.o console.o async.o qemu-error.o
+common-obj-y += readline.o console.o cursor.o async.o qemu-error.o
+
 common-obj-y += tcg-runtime.o host-utils.o
 common-obj-y += irq.o ioport.o input.o
 common-obj-$(CONFIG_PTIMER) += ptimer.o
diff --git a/console.h b/console.h
index 6def115..88861cb 100644
--- a/console.h
+++ b/console.h
@@ -126,6 +126,27 @@ struct DisplaySurface {
 struct PixelFormat pf;
 };
 
+/* cursor data format is 32bit RGBA */
+typedef struct QEMUCursor {
+int width, height;
+int hot_x, hot_y;
+int refcount;
+uint32_tdata[];
+} QEMUCursor;
+
+QEMUCursor *cursor_alloc(int width, int height);
+void cursor_get(QEMUCursor *c);
+void cursor_put(QEMUCursor *c);
+QEMUCursor *cursor_builtin_hidden(void);
+QEMUCursor *cursor_builtin_left_ptr(void);
+void cursor_print_ascii_art(QEMUCursor *c, const char *prefix);
+int cursor_get_mono_bpl(QEMUCursor *c);
+void cursor_set_mono(QEMUCursor *c,
+ uint32_t foreground, uint32_t background, uint8_t *image,
+ int transparent, uint8_t *mask);
+void cursor_get_mono_image(QEMUCursor *c, int foreground, uint8_t *mask);
+void cursor_get_mono_mask(QEMUCursor *c, int transparent, uint8_t *mask);
+
 struct DisplayChangeListener {
 int idle;
 uint64_t gui_timer_interval;
@@ -158,8 +179,7 @@ struct DisplayState {
 struct DisplayChangeListener* listeners;
 
 void (*mouse_set)(int x, int y, int on);
-void (*cursor_define)(int width, int height, int bpp, int hot_x, int hot_y,
-  uint8_t *image, uint8_t *mask);
+void (*cursor_define)(QEMUCursor *cursor);
 
 struct DisplayState *next;
 };
diff --git a/cursor.c b/cursor.c
new file mode 100644
index 000..3995a31
--- /dev/null
+++ b/cursor.c
@@ -0,0 +1,208 @@
+#include "qemu-common.h"
+#include "console.h"
+
+static const char cursor_hidden_32[32*32];
+static const char cursor_left_ptr_32[32*32] = {
+""
+" X  "
+" XX "
+" X.X"
+" X..X   "
+" X...X  "
+" XX "
+" X.X"
+" X..X   "
+" X...X  "
+" XX "
+" X.X"
+" X..X..X"
+" X.X X..X   "
+" XX  X..X   "
+" XX..X  "
+"  X..X  "
+"   X..X "
+"   X..X "
+"XX  "
+""
+};
+
+/* for creating built-in cursors */
+static void cursor_parse_ascii_art(QEMUCursor *c, const char *ptr)
+{
+int i, pixels;
+
+pixels = c->width * c->height;
+for (i = 0; i < pixels; i++) {
+switch (ptr[i]) {
+case 'X': /* black */
+c->data[i] = 0xff00;
+break;
+case '.': /* white */
+c->data[i] = 0x;
+break;
+case ' ': /* transparent */
+default:
+c->data[i] = 0x;
+break;
+}
+}
+}
+
+/* nice for debugging */
+void cursor_print_ascii_art(QEMUCursor *c, const char *prefix)
+{
+uint32_t *data = c->data;
+int x,y;
+
+for (y = 0; y < c->height; y++) {
+fprintf(stderr, "%s: %2d: |", prefix, y);
+for (x = 0; x < c->width; x++, data++) {
+if ((*data & 0xff00) != 0xff00) {
+fprintf(stderr, " "); /* transparent */
+} else if ((*data & 0x00ff) == 0x00ff) {
+fprintf(stderr, "."); /* white */
+} else if ((*data & 0x00ff) == 0x) {
+fprintf(stderr, "X"); /* black */
+} else {
+fprintf(stderr, "o"); /* other */
+}
+}
+fprintf(stderr, "|\n");
+}
+}
+
+QEMUCursor *cursor_builtin_hidden(void)
+{
+QEMUCursor *c;
+
+c = cursor_alloc(32, 32);
+cursor_parse_ascii_art(c, cursor_hidden_32);
+return c;
+}
+
+QEMUCursor *cursor_builtin_left_

[Qemu-devel] [PATCH 3/3] vnc: rich cursor support.

2010-05-05 Thread Gerd Hoffmann
Uses VNC_ENCODING_RICH_CURSOR.  Adding XCURSOR support should be
possible without much trouble.  Shouldn't be needed though as
RICH_CURSOR is a superset of XCURSOR.

Signed-off-by: Gerd Hoffmann 
---
 vnc.c|   67 +++---
 vnc.h|8 ++-
 vnchextile.h |7 +++--
 3 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/vnc.c b/vnc.c
index b1a3fdb..b97eae7 100644
--- a/vnc.c
+++ b/vnc.c
@@ -554,7 +554,8 @@ static void vnc_dpy_resize(DisplayState *ds)
 }
 
 /* fastest code */
-static void vnc_write_pixels_copy(VncState *vs, void *pixels, int size)
+static void vnc_write_pixels_copy(VncState *vs, struct PixelFormat *pf,
+  void *pixels, int size)
 {
 vnc_write(vs, pixels, size);
 }
@@ -604,12 +605,12 @@ void vnc_convert_pixel(VncState *vs, uint8_t *buf, 
uint32_t v)
 }
 }
 
-static void vnc_write_pixels_generic(VncState *vs, void *pixels1, int size)
+static void vnc_write_pixels_generic(VncState *vs, struct PixelFormat *pf,
+ void *pixels1, int size)
 {
 uint8_t buf[4];
-VncDisplay *vd = vs->vd;
 
-if (vd->server->pf.bytes_per_pixel == 4) {
+if (pf->bytes_per_pixel == 4) {
 uint32_t *pixels = pixels1;
 int n, i;
 n = size >> 2;
@@ -617,7 +618,7 @@ static void vnc_write_pixels_generic(VncState *vs, void 
*pixels1, int size)
 vnc_convert_pixel(vs, buf, pixels[i]);
 vnc_write(vs, buf, vs->clientds.pf.bytes_per_pixel);
 }
-} else if (vd->server->pf.bytes_per_pixel == 2) {
+} else if (pf->bytes_per_pixel == 2) {
 uint16_t *pixels = pixels1;
 int n, i;
 n = size >> 1;
@@ -625,7 +626,7 @@ static void vnc_write_pixels_generic(VncState *vs, void 
*pixels1, int size)
 vnc_convert_pixel(vs, buf, pixels[i]);
 vnc_write(vs, buf, vs->clientds.pf.bytes_per_pixel);
 }
-} else if (vd->server->pf.bytes_per_pixel == 1) {
+} else if (pf->bytes_per_pixel == 1) {
 uint8_t *pixels = pixels1;
 int n, i;
 n = size;
@@ -646,7 +647,7 @@ void vnc_raw_send_framebuffer_update(VncState *vs, int x, 
int y, int w, int h)
 
 row = vd->server->data + y * ds_get_linesize(vs->ds) + x * 
ds_get_bytes_per_pixel(vs->ds);
 for (i = 0; i < h; i++) {
-vs->write_pixels(vs, row, w * ds_get_bytes_per_pixel(vs->ds));
+vs->write_pixels(vs, &vd->server->pf, row, w * 
ds_get_bytes_per_pixel(vs->ds));
 row += ds_get_linesize(vs->ds);
 }
 }
@@ -752,6 +753,50 @@ static void vnc_dpy_copy(DisplayState *ds, int src_x, int 
src_y, int dst_x, int
 }
 }
 
+static void vnc_mouse_set(int x, int y, int visible)
+{
+/* can we ask the client(s) to move the pointer ??? */
+}
+
+static int vnc_cursor_define(VncState *vs)
+{
+QEMUCursor *c = vs->vd->cursor;
+PixelFormat pf = qemu_default_pixelformat(32);
+int isize;
+
+if (vnc_has_feature(vs, VNC_FEATURE_RICH_CURSOR)) {
+vnc_write_u8(vs,  VNC_MSG_SERVER_FRAMEBUFFER_UPDATE);
+vnc_write_u8(vs,  0);  /*  padding */
+vnc_write_u16(vs, 1);  /*  # of rects  */
+vnc_framebuffer_update(vs, c->hot_x, c->hot_y, c->width, c->height,
+   VNC_ENCODING_RICH_CURSOR);
+isize = c->width * c->height * vs->clientds.pf.bytes_per_pixel;
+vnc_write_pixels_generic(vs, &pf, c->data, isize);
+vnc_write(vs, vs->vd->cursor_mask, vs->vd->cursor_msize);
+return 0;
+}
+return -1;
+}
+
+static void vnc_dpy_cursor_define(QEMUCursor *c)
+{
+VncDisplay *vd = vnc_display;
+VncState *vs;
+
+cursor_put(vd->cursor);
+qemu_free(vd->cursor_mask);
+
+vd->cursor = c;
+cursor_get(vd->cursor);
+vd->cursor_msize = cursor_get_mono_bpl(c) * c->height;
+vd->cursor_mask = qemu_mallocz(vd->cursor_msize);
+cursor_get_mono_mask(c, 0, vd->cursor_mask);
+
+QTAILQ_FOREACH(vs, &vd->clients, next) {
+vnc_cursor_define(vs);
+}
+}
+
 static int find_and_clear_dirty_height(struct VncState *vs,
int y, int last_x, int x)
 {
@@ -1622,6 +1667,9 @@ static void set_encodings(VncState *vs, int32_t 
*encodings, size_t n_encodings)
 case VNC_ENCODING_POINTER_TYPE_CHANGE:
 vs->features |= VNC_FEATURE_POINTER_TYPE_CHANGE_MASK;
 break;
+case VNC_ENCODING_RICH_CURSOR:
+vs->features |= VNC_FEATURE_RICH_CURSOR_MASK;
+break;
 case VNC_ENCODING_EXT_KEY_EVENT:
 send_ext_key_event_ack(vs);
 break;
@@ -1642,8 +1690,9 @@ static void set_encodings(VncState *vs, int32_t 
*encodings, size_t n_encodings)
 break;
 }
 }
-
 check_pointer_type_change(&vs->mouse_mode_notifier);
+if (vs->vd->cursor)
+vnc_cursor_define(vs);
 }
 
 static void set_pixel_conversion(VncState *vs)
@@ -2288,6 +2337,8 @@ void vnc_displ

[Qemu-devel] Re: [PATCH v7 2/6] char: Add a QemuChrHandlers struct to initialise chardev handlers

2010-05-05 Thread Anthony Liguori

On 05/04/2010 04:39 PM, Amit Shah wrote:

Instead of passing each handler in the qemu_add_handlers() function,
create a struct of handlers that can be passed to the function instead.

Signed-off-by: Amit Shah
---
  gdbstub.c|9 +++--
  hw/debugcon.c|2 +-
  hw/escc.c|9 +++--
  hw/etraxfs_ser.c |   13 +
  hw/mcf_uart.c|9 +++--
  hw/pl011.c   |9 +++--
  hw/pxa2xx.c  |   13 +
  hw/serial.c  |9 +++--
  hw/sh_serial.c   |   12 +---
  hw/syborg_serial.c   |9 +++--
  hw/usb-serial.c  |9 +++--
  hw/virtio-console.c  |9 +++--
  hw/xen_console.c |   16 +++-
  hw/xilinx_uartlite.c |   11 +--
  monitor.c|   19 +++
  net/slirp.c  |8 ++--
  qemu-char.c  |   27 ++-
  qemu-char.h  |   12 
  18 files changed, 151 insertions(+), 54 deletions(-)

diff --git a/gdbstub.c b/gdbstub.c
index 93c4850..ae29db1 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2621,6 +2621,12 @@ static void gdb_sigterm_handler(int signal)
  }
  #endif

+static const QemuChrHandlers gdb_handlers = {
+.fd_can_read = gdb_chr_can_receive,
+.fd_read = gdb_chr_receive,
+.fd_event = gdb_chr_event,
+};
+
  int gdbserver_start(const char *device)
  {
  GDBState *s;
@@ -2650,8 +2656,7 @@ int gdbserver_start(const char *device)
  if (!chr)
  return -1;

-qemu_chr_add_handlers(chr, gdb_chr_can_receive, gdb_chr_receive,
-  gdb_chr_event, NULL);
+qemu_chr_add_handlers(chr,&gdb_handlers, NULL);
  }
   


This sort of change isn't a bad one but if we're making this change, we 
should change the signature of these functions to also take a 
QemuChrHandlers *.


Also, let's drop the qemu_ and Qemu prefixes while we're at it.

Regards,

Anthony Liguori


  s = gdbserver_state;
diff --git a/hw/debugcon.c b/hw/debugcon.c
index 5ee6821..e79a595 100644
--- a/hw/debugcon.c
+++ b/hw/debugcon.c
@@ -73,7 +73,7 @@ static void debugcon_init_core(DebugconState *s)
  exit(1);
  }

-qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, s);
+qemu_chr_add_handlers(s->chr, NULL, s);
  }

  static int debugcon_isa_initfn(ISADevice *dev)
diff --git a/hw/escc.c b/hw/escc.c
index 6d2fd36..4057bb4 100644
--- a/hw/escc.c
+++ b/hw/escc.c
@@ -890,6 +890,12 @@ void slavio_serial_ms_kbd_init(target_phys_addr_t base, 
qemu_irq irq,
  sysbus_mmio_map(s, 0, base);
  }

+static const QemuChrHandlers serial_handlers = {
+.fd_can_read = serial_can_receive,
+.fd_read = serial_receive1,
+.fd_event = serial_event,
+};
+
  static int escc_init1(SysBusDevice *dev)
  {
  SerialState *s = FROM_SYSBUS(SerialState, dev);
@@ -903,8 +909,7 @@ static int escc_init1(SysBusDevice *dev)
  s->chn[i].chn = 1 - i;
  s->chn[i].clock = s->frequency / 2;
  if (s->chn[i].chr) {
-qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
-  serial_receive1, serial_event,&s->chn[i]);
+qemu_chr_add_handlers(s->chn[i].chr,&serial_handlers,&s->chn[i]);
  }
  }
  s->chn[0].otherchn =&s->chn[1];
diff --git a/hw/etraxfs_ser.c b/hw/etraxfs_ser.c
index e1f9615..0c0c485 100644
--- a/hw/etraxfs_ser.c
+++ b/hw/etraxfs_ser.c
@@ -161,6 +161,12 @@ static void serial_event(void *opaque, int event)

  }

+static const QemuChrHandlers serial_handlers = {
+.fd_can_read = serial_can_receive,
+.fd_read = serial_receive,
+.fd_event = serial_event,
+};
+
  static int etraxfs_ser_init(SysBusDevice *dev)
  {
  struct etrax_serial *s = FROM_SYSBUS(typeof (*s), dev);
@@ -174,10 +180,9 @@ static int etraxfs_ser_init(SysBusDevice *dev)
  ser_regs = cpu_register_io_memory(ser_read, ser_write, s);
  sysbus_init_mmio(dev, R_MAX * 4, ser_regs);
  s->chr = qdev_init_chardev(&dev->qdev);
-if (s->chr)
-qemu_chr_add_handlers(s->chr,
-  serial_can_receive, serial_receive,
-  serial_event, s);
+if (s->chr) {
+qemu_chr_add_handlers(s->chr,&serial_handlers, s);
+}
  return 0;
  }

diff --git a/hw/mcf_uart.c b/hw/mcf_uart.c
index d16bac7..d2ce5f6 100644
--- a/hw/mcf_uart.c
+++ b/hw/mcf_uart.c
@@ -268,6 +268,12 @@ static void mcf_uart_receive(void *opaque, const uint8_t 
*buf, int size)
  mcf_uart_push_byte(s, buf[0]);
  }

+static const QemuChrHandlers mcf_uart_handlers = {
+.fd_can_read = mcf_uart_can_receive,
+.fd_read = mcf_uart_receive,
+.fd_event = mcf_uart_event,
+};
+
  void *mcf_uart_init(qemu_irq irq, CharDriverState *chr)
  {
  mcf_uart_state *s;
@@ -276,8 +282,7 @@ void *mcf_uart_init(qemu_irq irq, CharDriverState *chr)
  s->chr = chr;
  s->irq = irq;
  if (chr) {
-qemu_chr_add_handlers(chr, mcf_uart_can_receive, mcf_uart

[Qemu-devel] Re: [PATCH v7 3/6] char: Let writers know how much data was written in case of errors

2010-05-05 Thread Anthony Liguori

On 05/04/2010 04:39 PM, Amit Shah wrote:

On writing errors, we just returned -1 even if some bytes were already
written out. Ensure we return the number of bytes written before we
return the error (on a subsequent call to qemu_chr_write()).

Signed-off-by: Amit Shah
---
  qemu-char.c |   12 +++-
  1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index 76ad12c..decf687 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -507,6 +507,9 @@ int send_all(int fd, const void *buf, int len1)
  while (len>  0) {
  ret = send(fd, buf, len, 0);
  if (ret<  0) {
+if (len1 - len) {
+return len1 - len;
+}
  errno = WSAGetLastError();
  if (errno != WSAEWOULDBLOCK) {
  return -1;
@@ -531,8 +534,15 @@ static int unix_write(int fd, const uint8_t *buf, int len1)
  while (len>  0) {
  ret = write(fd, buf, len);
  if (ret<  0) {
-if (errno != EINTR&&  errno != EAGAIN)
+if (errno == EINTR) {
+continue;
+}
+if (len1 - len) {
+return len1 - len;
+}
+if (errno != EAGAIN) {
  return -1;
+}
  } else if (ret == 0) {
  break;
  } else {
   


This will break lots of things.  The contract for send_all and 
unix_write is that the transmit all data.


Regards,

Anthony Liguori






[Qemu-devel] Re: [PATCH v7 4/6] char: Add qemu_chr_write_nb() for nonblocking writes

2010-05-05 Thread Anthony Liguori

On 05/04/2010 04:39 PM, Amit Shah wrote:

For char devices whose backing files are open in non-blocking mode,
non-blocking writes can now be made using qemu_chr_write_nb().

For non-blocking chardevs, we can return -EAGAIN to callers of
qemu_chr_write_nb(). When the backend is ready to accept more data,
we can let the caller know via a callback.

-EAGAIN is returned only when the backend's file is non-blocking
and a callback is registered by the caller when invoking
qemu_chr_add_handlers().

In case a backend doesn't support non-blocking writes,
qemu_chr_write_nb() invokes qemu_chr_write().

Individual callers can update their code to add a callback handler,
call qemu_chr_write_nb() instead of qemu_chr_write() if they wish to
receive -EAGAIN notifications.

No backend currently supports non-blocking writes.

Signed-off-by: Amit Shah
---
  net/socket.c  |4 ++--
  qemu-char.c   |   31 ---
  qemu-char.h   |8 
  qemu_socket.h |3 ++-
  4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 1c4e153..8a401e6 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -56,8 +56,8 @@ static ssize_t net_socket_receive(VLANClientState *nc, const 
uint8_t *buf, size_
  uint32_t len;
  len = htonl(size);

-send_all(s->fd, (const uint8_t *)&len, sizeof(len));
-return send_all(s->fd, buf, size);
+send_all(s->fd, (const uint8_t *)&len, sizeof(len), false);
+return send_all(s->fd, buf, size, false);
  }

  static ssize_t net_socket_receive_dgram(VLANClientState *nc, const uint8_t 
*buf, size_t size)
diff --git a/qemu-char.c b/qemu-char.c
index decf687..5e4dec3 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -145,6 +145,16 @@ int qemu_chr_write(CharDriverState *s, const uint8_t *buf, 
int len)
  return s->chr_write(s, buf, len);
  }

+ssize_t qemu_chr_write_nb(CharDriverState *s, const uint8_t *buf, size_t len)
+{
+if (!s->nonblock) {
+/* Fallback to blocking write if no callback registered */
+return qemu_chr_write(s, buf, len);
+}
+
+return s->chr_write_nb(s, buf, len);
+}
   


I really dislike the idea of adding another function for this.  Can you 
explain why you need this functionality for virtio-console and why this 
functionality isn't needed for everything else?


Regards,

Anthony Liguori


  int qemu_chr_ioctl(CharDriverState *s, int cmd, void *arg)
  {
  if (!s->chr_ioctl)
@@ -203,11 +213,15 @@ void qemu_chr_add_handlers(CharDriverState *s,
  }
  s->chr_can_read = handlers->fd_can_read;
  s->chr_read = handlers->fd_read;
+s->chr_write_unblocked = handlers->fd_write_unblocked;
  s->chr_event = handlers->fd_event;
  s->handler_opaque = opaque;
  if (s->chr_update_read_handler)
  s->chr_update_read_handler(s);

+/* We'll set this at connect-time */
+s->nonblock = false;
+
  /* We're connecting to an already opened device, so let's make sure we
 also get the open event */
  if (s->opened) {
@@ -499,7 +513,7 @@ static CharDriverState *qemu_chr_open_mux(CharDriverState 
*drv)


  #ifdef _WIN32
-int send_all(int fd, const void *buf, int len1)
+int send_all(int fd, const void *buf, int len1, bool nonblock)
  {
  int ret, len;

@@ -526,7 +540,7 @@ int send_all(int fd, const void *buf, int len1)

  #else

-static int unix_write(int fd, const uint8_t *buf, int len1)
+static int unix_write(int fd, const uint8_t *buf, int len1, bool nonblock)
  {
  int ret, len;

@@ -540,6 +554,9 @@ static int unix_write(int fd, const uint8_t *buf, int len1)
  if (len1 - len) {
  return len1 - len;
  }
+if (errno == EAGAIN&&  nonblock) {
+return -EAGAIN;
+}
  if (errno != EAGAIN) {
  return -1;
  }
@@ -553,9 +570,9 @@ static int unix_write(int fd, const uint8_t *buf, int len1)
  return len1 - len;
  }

-int send_all(int fd, const void *buf, int len1)
+int send_all(int fd, const void *buf, int len1, bool nonblock)
  {
-return unix_write(fd, buf, len1);
+return unix_write(fd, buf, len1, nonblock);
  }
  #endif /* !_WIN32 */

@@ -572,7 +589,7 @@ static int stdio_nb_clients = 0;
  static int fd_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
  {
  FDCharDriver *s = chr->opaque;
-return send_all(s->fd_out, buf, len);
+return send_all(s->fd_out, buf, len, false);
  }

  static int fd_chr_read_poll(void *opaque)
@@ -884,7 +901,7 @@ static int pty_chr_write(CharDriverState *chr, const 
uint8_t *buf, int len)
  pty_chr_update_read_handler(chr);
  return 0;
  }
-return send_all(s->fd, buf, len);
+return send_all(s->fd, buf, len, false);
  }

  static int pty_chr_read_poll(void *opaque)
@@ -1949,7 +1966,7 @@ static int tcp_chr_write(CharDriverState *chr, const 
uint8_t *buf, int len)
  {
  TCPCharDriver *s = chr->opaque;
  if (s->connected) {
-return s

Re: [Qemu-devel] [PATCH 1/6] bochs: use pread

2010-05-05 Thread Anthony Liguori

On 05/04/2010 05:44 AM, Christoph Hellwig wrote:

Use pread instead of lseek + read in preparation of using the qemu
block API.

Signed-off-by: Christoph Hellwig
   


Looks good to me.

Reviewed-by: Anthony Liguori 

Regards,

Anthony Liguori


Index: qemu-kevin/block/bochs.c
===
--- qemu-kevin.orig/block/bochs.c   2010-05-03 12:58:53.419012621 +0200
+++ qemu-kevin/block/bochs.c2010-05-03 12:59:13.873005360 +0200
@@ -125,7 +125,7 @@ static int bochs_open(BlockDriverState *

  s->fd = fd;

-if (read(fd,&bochs, sizeof(bochs)) != sizeof(bochs)) {
+if (pread(fd,&bochs, sizeof(bochs), 0) != sizeof(bochs)) {
  goto fail;
  }

@@ -144,14 +144,10 @@ static int bochs_open(BlockDriverState *
bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
  }

-if (lseek(s->fd, le32_to_cpu(bochs.header), SEEK_SET) == (off_t)-1) {
-goto fail;
-}
-
  s->catalog_size = le32_to_cpu(bochs.extra.redolog.catalog);
  s->catalog_bitmap = qemu_malloc(s->catalog_size * 4);
-if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) !=
-   s->catalog_size * 4)
+if (pread(s->fd, s->catalog_bitmap, s->catalog_size * 4,
+  le32_to_cpu(bochs.header)) != s->catalog_size * 4)
goto fail;
  for (i = 0; i<  s->catalog_size; i++)
le32_to_cpus(&s->catalog_bitmap[i]);
@@ -169,54 +165,35 @@ static int bochs_open(BlockDriverState *
  return -1;
  }

-static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num)
+static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
  {
  BDRVBochsState *s = bs->opaque;
  int64_t offset = sector_num * 512;
-int64_t extent_index, extent_offset, bitmap_offset, block_offset;
+int64_t extent_index, extent_offset, bitmap_offset;
  char bitmap_entry;

  // seek to sector
  extent_index = offset / s->extent_size;
  extent_offset = (offset % s->extent_size) / 512;

-if (s->catalog_bitmap[extent_index] == 0x)
-{
-// fprintf(stderr, "page not allocated [%x - %x:%x]\n",
-// sector_num, extent_index, extent_offset);
-   return -1; // not allocated
+if (s->catalog_bitmap[extent_index] == 0x) {
+   return -1; /* not allocated */
  }

  bitmap_offset = s->data_offset + (512 * s->catalog_bitmap[extent_index] *
(s->extent_blocks + s->bitmap_blocks));
-block_offset = bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));

-//fprintf(stderr, "sect: %x [ext i: %x o: %x] ->  %x bitmap: %x block: 
%x\n",
-// sector_num, extent_index, extent_offset,
-// le32_to_cpu(s->catalog_bitmap[extent_index]),
-// bitmap_offset, block_offset);
-
-// read in bitmap for current extent
-if (lseek(s->fd, bitmap_offset + (extent_offset / 8), SEEK_SET) ==
-(off_t)-1) {
+/* read in bitmap for current extent */
+if (pread(s->fd,&bitmap_entry, 1, bitmap_offset + (extent_offset / 8))
+!= 1) {
  return -1;
  }

-if (read(s->fd,&bitmap_entry, 1) != 1)
-return -1;
-
-if (!((bitmap_entry>>  (extent_offset % 8))&  1))
-{
-// fprintf(stderr, "sector (%x) in bitmap not allocated\n",
-// sector_num);
-   return -1; // not allocated
+if (!((bitmap_entry>>  (extent_offset % 8))&  1)) {
+   return -1; /* not allocated */
  }

-if (lseek(s->fd, block_offset, SEEK_SET) == (off_t)-1) {
-return -1;
-}
-
-return 0;
+return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
  }

  static int bochs_read(BlockDriverState *bs, int64_t sector_num,
@@ -226,13 +203,13 @@ static int bochs_read(BlockDriverState *
  int ret;

  while (nb_sectors>  0) {
-   if (!seek_to_sector(bs, sector_num))
-   {
-   ret = read(s->fd, buf, 512);
-   if (ret != 512)
-   return -1;
-   }
-   else
+int64_t block_offset = seek_to_sector(bs, sector_num);
+if (block_offset>= 0) {
+ret = pread(s->fd, buf, 512, block_offset);
+if (ret != 512) {
+return -1;
+}
+} else
  memset(buf, 0, 512);
  nb_sectors--;
  sector_num++;



   






[Qemu-devel] Re: [PATCH] [RESEND] Make char muxer more robust wrt small FIFOs

2010-05-05 Thread Alexander Graf


Am 05.05.2010 um 10:08 schrieb Jan Kiszka :


Jan Kiszka wrote:

Alex, does this help as well?

diff --git a/qemu-char.c b/qemu-char.c
index ac65a1c..2b115a4 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -404,6 +404,8 @@ static int mux_chr_can_read(void *opaque)
MuxDriver *d = chr->opaque;
int m = d->focus;

+mux_chr_accept_input(opaque);
+
if ((d->prod[m] - d->cons[m]) < MUX_BUFFER_SIZE)
return 1;
if (d->chr_can_read[m])
@@ -418,8 +420,6 @@ static void mux_chr_read(void *opaque, const  
uint8_t *buf, int size)

int m = d->focus;
int i;

-mux_chr_accept_input (opaque);
-
for(i = 0; i < size; i++)
if (mux_proc_byte(chr, d, buf[i])) {
if (d->prod[m] == d->cons[m] &&


I'm trying to reproduce in parallel.


Works for me. Will post as proper patch later.


I'll try it out next week, after my vacation is over :)

Alex








[Qemu-devel] Re: [PATCH v7 4/6] char: Add qemu_chr_write_nb() for nonblocking writes

2010-05-05 Thread Amit Shah
On (Wed) May 05 2010 [08:16:37], Anthony Liguori wrote:
> On 05/04/2010 04:39 PM, Amit Shah wrote:
>> For char devices whose backing files are open in non-blocking mode,
>> non-blocking writes can now be made using qemu_chr_write_nb().
>>
>> For non-blocking chardevs, we can return -EAGAIN to callers of
>> qemu_chr_write_nb(). When the backend is ready to accept more data,
>> we can let the caller know via a callback.
>>
>> -EAGAIN is returned only when the backend's file is non-blocking
>> and a callback is registered by the caller when invoking
>> qemu_chr_add_handlers().
>>
>> In case a backend doesn't support non-blocking writes,
>> qemu_chr_write_nb() invokes qemu_chr_write().
>>
>> Individual callers can update their code to add a callback handler,
>> call qemu_chr_write_nb() instead of qemu_chr_write() if they wish to
>> receive -EAGAIN notifications.
>>
>> No backend currently supports non-blocking writes.
>>
>> Signed-off-by: Amit Shah
>>
>> +ssize_t qemu_chr_write_nb(CharDriverState *s, const uint8_t *buf, size_t 
>> len)
>> +{
>> +if (!s->nonblock) {
>> +/* Fallback to blocking write if no callback registered */
>> +return qemu_chr_write(s, buf, len);
>> +}
>> +
>> +return s->chr_write_nb(s, buf, len);
>> +}
>>
>
> I really dislike the idea of adding another function for this.

This suggestion came from Gerd to separate out a write() that blocks and
a write_nb() that doesn't block on -EAGAIN.

>  Can you  
> explain why you need this functionality for virtio-console and why this  
> functionality isn't needed for everything else?

I don't know about everything else; but for virtio-console, a fast guest
could swamp a host chardev with data and while the host chardev blocks
to flush out all the data, a notification to ask the guest to stop is
needed so that we don't lose any data.

(virtio-console asks virtio-serial to throttle any more data, and
virtio-serial doesn't send any more data to this port till it signals
otherwise. The guest, in the meantime, keeps filling its queue till the
queue is full. Once that happens, writes in guests return with -EAGAIN.)

Amit




[Qemu-devel] Re: [PATCH v7 3/6] char: Let writers know how much data was written in case of errors

2010-05-05 Thread Amit Shah
On (Wed) May 05 2010 [08:15:19], Anthony Liguori wrote:
> On 05/04/2010 04:39 PM, Amit Shah wrote:
>> On writing errors, we just returned -1 even if some bytes were already
>> written out. Ensure we return the number of bytes written before we
>> return the error (on a subsequent call to qemu_chr_write()).
>>
>> Signed-off-by: Amit Shah
>> ---
>>   qemu-char.c |   12 +++-
>>   1 files changed, 11 insertions(+), 1 deletions(-)
>>
>> diff --git a/qemu-char.c b/qemu-char.c
>> index 76ad12c..decf687 100644
>> --- a/qemu-char.c
>> +++ b/qemu-char.c
>> @@ -507,6 +507,9 @@ int send_all(int fd, const void *buf, int len1)
>>   while (len>  0) {
>>   ret = send(fd, buf, len, 0);
>>   if (ret<  0) {
>> +if (len1 - len) {
>> +return len1 - len;
>> +}
>>   errno = WSAGetLastError();
>>   if (errno != WSAEWOULDBLOCK) {
>>   return -1;
>> @@ -531,8 +534,15 @@ static int unix_write(int fd, const uint8_t *buf, int 
>> len1)
>>   while (len>  0) {
>>   ret = write(fd, buf, len);
>>   if (ret<  0) {
>> -if (errno != EINTR&&  errno != EAGAIN)
>> +if (errno == EINTR) {
>> +continue;
>> +}
>> +if (len1 - len) {
>> +return len1 - len;
>> +}
>> +if (errno != EAGAIN) {
>>   return -1;
>> +}
>>   } else if (ret == 0) {
>>   break;
>>   } else {
>>
>
> This will break lots of things.  The contract for send_all and  
> unix_write is that the transmit all data.

The current behaviour remains unchanged for all the users. Only callers
of qemu_chr_write_nb() will get an -EAGAIN return.

Amit




[Qemu-devel] Re: [PATCH v7 2/6] char: Add a QemuChrHandlers struct to initialise chardev handlers

2010-05-05 Thread Amit Shah
On (Wed) May 05 2010 [08:13:58], Anthony Liguori wrote:
> On 05/04/2010 04:39 PM, Amit Shah wrote:
>> Instead of passing each handler in the qemu_add_handlers() function,
>> create a struct of handlers that can be passed to the function instead.
>>
>> Signed-off-by: Amit Shah


>> -qemu_chr_add_handlers(chr, gdb_chr_can_receive, gdb_chr_receive,
>> -  gdb_chr_event, NULL);
>> +qemu_chr_add_handlers(chr,&gdb_handlers, NULL);
>>   }
>>
>
> This sort of change isn't a bad one but if we're making this change, we  
> should change the signature of these functions to also take a  
> QemuChrHandlers *.

What do you mean?

> Also, let's drop the qemu_ and Qemu prefixes while we're at it.

In a later series, please. -ETOOMANYRESENDS already..

Amit




[Qemu-devel] Re: [PATCH v7 4/6] char: Add qemu_chr_write_nb() for nonblocking writes

2010-05-05 Thread Paul Brook
> I really dislike the idea of adding another function for this.  Can you
> explain why you need this functionality for virtio-console and why this
> functionality isn't needed for everything else?

This functionality should (in principle) be used by all serial port 
implementations.

Physical serial ports are sufficiently crufty and low-performance that noone 
actually uses them nowadays.  I expect that the only significant real-world 
use is for serial consoles, which never send enough data to care that writes 
stall the whole machine.

With virtio-serial we've made serial ports a viable solution to a whole range 
of problems.  It's likely that applications that may send nontrivial amounts 
of data, or clients will not be ready to process the data immediately.

Paul




[Qemu-devel] vgabios + qemu: issues and plans.

2010-05-05 Thread Gerd Hoffmann

  Hi,

Today we have two vgabios versions in qemu:  The standard one 
(vgabios.bin) and the cirrus one (vgabios-cirrus.bin).


The cirrus vgabios is a PCI ROM.  We can (and do) load it into the ROM 
PCI bar.  The vgabios checks the pci config space to figure where the 
linear framebuffer (for vesa graphics) is mapped to.  It knows how to 
program the cirrus.


The standard bios isn't a PCI ROM.  We have to load it using the seabios 
firmware interface.  It expects to find the linear framebuffer at the 
magic address 0xe000.  It uses the bochs extentions to implement 
vesa graphics support.


So, what is wrong with this?

First, I'd like to be able to load the vgabios via PCI ROM bar on all 
pci vga cards (stdvga, vmware, soon qxl).  The PCI ID in the bios has to 
match the PCI ID of the card, so we'll need a bunch of vga bios 
binaries, all identical except for the PCI ID.  Or we need some kind of 
binary patching.  Otherwise seabios will not load them from the PCI ROM bar.


Second, I want to get rid of the magic address 0xe000 (except for 
isa-vga).  This is basically just a matter of updating to vgabios 
version 0.6c.  And this needs one vga bios binary per vga card too as 
the PCI ID is used to lookup the card (and then the framebuffer address) 
in PCI config space.


Comments?  Especially on the binary patching?  Worth it?  Or just build 
a bunch of binaries?  They are not *that* big after all ...


cheers,
  Gerd




[Qemu-devel] Re: [PATCH v7 4/6] char: Add qemu_chr_write_nb() for nonblocking writes

2010-05-05 Thread Anthony Liguori

On 05/05/2010 08:34 AM, Paul Brook wrote:

I really dislike the idea of adding another function for this.  Can you
explain why you need this functionality for virtio-console and why this
functionality isn't needed for everything else?
 

This functionality should (in principle) be used by all serial port
implementations.

Physical serial ports are sufficiently crufty and low-performance that noone
actually uses them nowadays.  I expect that the only significant real-world
use is for serial consoles, which never send enough data to care that writes
stall the whole machine.
   


We don't implement control flow in the character driver layer today.  
Different backends use different policies.  Some drop data (like pty) 
while other block (like tcp).


This patch adds optional control flow in a pretty crufty way to *some* 
backends but not all.  This just adds a bunch of complexity that will 
certainly introduce bugs.


If we're going to make the char drivers implement control flow, then I 
think we should do it universally--not as an optional feature.  For 
devices that can't participate in control flow, we should decide where 
the policy should be implemented (front-end or back-end) and in either 
approach, it's easy enough to make dropping data or blocking a choice.


Regards,

Anthony Liguori


With virtio-serial we've made serial ports a viable solution to a whole range
of problems.  It's likely that applications that may send nontrivial amounts
of data, or clients will not be ready to process the data immediately.

Paul
   






[Qemu-devel] Re: [PATCH v7 3/6] char: Let writers know how much data was written in case of errors

2010-05-05 Thread Anthony Liguori

On 05/05/2010 08:23 AM, Amit Shah wrote:

On (Wed) May 05 2010 [08:15:19], Anthony Liguori wrote:
   

On 05/04/2010 04:39 PM, Amit Shah wrote:
 

On writing errors, we just returned -1 even if some bytes were already
written out. Ensure we return the number of bytes written before we
return the error (on a subsequent call to qemu_chr_write()).

Signed-off-by: Amit Shah
---
   qemu-char.c |   12 +++-
   1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index 76ad12c..decf687 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -507,6 +507,9 @@ int send_all(int fd, const void *buf, int len1)
   while (len>   0) {
   ret = send(fd, buf, len, 0);
   if (ret<   0) {
+if (len1 - len) {
+return len1 - len;
+}
   errno = WSAGetLastError();
   if (errno != WSAEWOULDBLOCK) {
   return -1;
@@ -531,8 +534,15 @@ static int unix_write(int fd, const uint8_t *buf, int len1)
   while (len>   0) {
   ret = write(fd, buf, len);
   if (ret<   0) {
-if (errno != EINTR&&   errno != EAGAIN)
+if (errno == EINTR) {
+continue;
+}
+if (len1 - len) {
+return len1 - len;
+}
+if (errno != EAGAIN) {
   return -1;
+}
   } else if (ret == 0) {
   break;
   } else {

   

This will break lots of things.  The contract for send_all and
unix_write is that the transmit all data.
 

The current behaviour remains unchanged for all the users. Only callers
of qemu_chr_write_nb() will get an -EAGAIN return.
   


No, send_all used to send all data.  After your change, it only sends 
what it can the first time.  The same with unix_write.


Regards,

Anthony Liguori


Amit
   






[Qemu-devel] Re: [PATCH v7 2/6] char: Add a QemuChrHandlers struct to initialise chardev handlers

2010-05-05 Thread Anthony Liguori

On 05/05/2010 08:25 AM, Amit Shah wrote:

On (Wed) May 05 2010 [08:13:58], Anthony Liguori wrote:
   

On 05/04/2010 04:39 PM, Amit Shah wrote:
 

Instead of passing each handler in the qemu_add_handlers() function,
create a struct of handlers that can be passed to the function instead.

Signed-off-by: Amit Shah
   


   

-qemu_chr_add_handlers(chr, gdb_chr_can_receive, gdb_chr_receive,
-  gdb_chr_event, NULL);
+qemu_chr_add_handlers(chr,&gdb_handlers, NULL);
   }

   

This sort of change isn't a bad one but if we're making this change, we
should change the signature of these functions to also take a
QemuChrHandlers *.
 

What do you mean?
   


-typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
+typedef void IOReadHandler(QemuChrHandlers *handler, const uint8_t 
*buf, int size);


Then you can embed the structure in the state structure and use 
container_of to get the state.  It's more type safe and it avoids adding 
a global variable.



Also, let's drop the qemu_ and Qemu prefixes while we're at it.
 

In a later series, please. -ETOOMANYRESENDS already..
   


It's part of CODING_STYLE...  I think it's important to change the 
signatures so a new series will be required.


Regards,

Anthony Liguori


Amit
   






[Qemu-devel] [PATCH v2 5/5] lsi: Handle removal of selected devices

2010-05-05 Thread Jan Kiszka
We must not store references to selected devices as they may be
hot-removed. Instead, look up the device based on its tag right before
using it. If the device disappeared, throw an interrupt and disconnect.

Signed-off-by: Jan Kiszka 
---

Changes in v2:
 - Fixed incorrect tag->id conversion (missing LSI_TAG_VALID masking)

 hw/lsi53c895a.c |   60 +++
 1 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c
index f088d06..f5a91ba 100644
--- a/hw/lsi53c895a.c
+++ b/hw/lsi53c895a.c
@@ -175,7 +175,6 @@ do { fprintf(stderr, "lsi_scsi: error: " fmt , ## 
__VA_ARGS__);} while (0)
 
 typedef struct lsi_request {
 uint32_t tag;
-SCSIDevice *dev;
 uint32_t dma_len;
 uint8_t *dma_buf;
 uint32_t pending;
@@ -202,7 +201,6 @@ typedef struct {
  * 3 if a DMA operation is in progress.  */
 int waiting;
 SCSIBus bus;
-SCSIDevice *select_dev;
 int current_lun;
 /* The tag is a combination of the device ID and the SCSI tag.  */
 uint32_t select_tag;
@@ -518,11 +516,25 @@ static void lsi_resume_script(LSIState *s)
 }
 }
 
+static void lsi_disconnect(LSIState *s)
+{
+s->scntl1 &= ~LSI_SCNTL1_CON;
+s->sstat1 &= ~PHASE_MASK;
+}
+
+static void lsi_bad_selection(LSIState *s, uint32_t id)
+{
+DPRINTF("Selected absent target %d\n", id);
+lsi_script_scsi_interrupt(s, 0, LSI_SIST1_STO);
+lsi_disconnect(s);
+}
+
 /* Initiate a SCSI layer data transfer.  */
 static void lsi_do_dma(LSIState *s, int out)
 {
-uint32_t count;
+uint32_t count, id;
 target_phys_addr_t addr;
+SCSIDevice *dev;
 
 assert(s->current);
 if (!s->current->dma_len) {
@@ -531,6 +543,13 @@ static void lsi_do_dma(LSIState *s, int out)
 return;
 }
 
+id = (s->current->tag >> 8) & 0xf;
+dev = s->bus.devs[id];
+if (!dev) {
+lsi_bad_selection(s, id);
+return;
+}
+
 count = s->dbc;
 if (count > s->current->dma_len)
 count = s->current->dma_len;
@@ -550,8 +569,7 @@ static void lsi_do_dma(LSIState *s, int out)
 s->dbc -= count;
 
 if (s->current->dma_buf == NULL) {
-s->current->dma_buf = s->current->dev->info->get_buf(s->current->dev,
- s->current->tag);
+s->current->dma_buf = dev->info->get_buf(dev, s->current->tag);
 }
 
 /* ??? Set SFBR to first data byte.  */
@@ -565,10 +583,10 @@ static void lsi_do_dma(LSIState *s, int out)
 s->current->dma_buf = NULL;
 if (out) {
 /* Write the data.  */
-s->current->dev->info->write_data(s->current->dev, 
s->current->tag);
+dev->info->write_data(dev, s->current->tag);
 } else {
 /* Request any remaining data.  */
-s->current->dev->info->read_data(s->current->dev, s->current->tag);
+dev->info->read_data(dev, s->current->tag);
 }
 } else {
 s->current->dma_buf += count;
@@ -715,7 +733,9 @@ static void lsi_command_complete(SCSIBus *bus, int reason, 
uint32_t tag,
 
 static void lsi_do_command(LSIState *s)
 {
+SCSIDevice *dev;
 uint8_t buf[16];
+uint32_t id;
 int n;
 
 DPRINTF("Send command len=%d\n", s->dbc);
@@ -725,19 +745,24 @@ static void lsi_do_command(LSIState *s)
 s->sfbr = buf[0];
 s->command_complete = 0;
 
+id = (s->select_tag >> 8) & 0xf;
+dev = s->bus.devs[id];
+if (!dev) {
+lsi_bad_selection(s, id);
+return;
+}
+
 assert(s->current == NULL);
 s->current = qemu_mallocz(sizeof(lsi_request));
 s->current->tag = s->select_tag;
-s->current->dev = s->select_dev;
 
-n = s->current->dev->info->send_command(s->current->dev, s->current->tag, 
buf,
-s->current_lun);
+n = dev->info->send_command(dev, s->current->tag, buf, s->current_lun);
 if (n > 0) {
 lsi_set_phase(s, PHASE_DI);
-s->current->dev->info->read_data(s->current->dev, s->current->tag);
+dev->info->read_data(dev, s->current->tag);
 } else if (n < 0) {
 lsi_set_phase(s, PHASE_DO);
-s->current->dev->info->write_data(s->current->dev, s->current->tag);
+dev->info->write_data(dev, s->current->tag);
 }
 
 if (!s->command_complete) {
@@ -771,12 +796,6 @@ static void lsi_do_status(LSIState *s)
 lsi_add_msg_byte(s, 0); /* COMMAND COMPLETE */
 }
 
-static void lsi_disconnect(LSIState *s)
-{
-s->scntl1 &= ~LSI_SCNTL1_CON;
-s->sstat1 &= ~PHASE_MASK;
-}
-
 static void lsi_do_msgin(LSIState *s)
 {
 int len;
@@ -1092,9 +,7 @@ again:
 s->sstat0 |= LSI_SSTAT0_WOA;
 s->scntl1 &= ~LSI_SCNTL1_IARB;
 if (id >= LSI_MAX_DEVS || !s->bus.devs[id]) {
-DPRINTF("Selected absent target %d\n", id);
-lsi_script_scsi_interrupt(s, 0, LSI_SIST1_STO);
-  

[Qemu-devel] Re: [PATCH v7 3/6] char: Let writers know how much data was written in case of errors

2010-05-05 Thread Amit Shah
On (Wed) May 05 2010 [08:54:48], Anthony Liguori wrote:
> On 05/05/2010 08:23 AM, Amit Shah wrote:
>> On (Wed) May 05 2010 [08:15:19], Anthony Liguori wrote:
>>
>>> On 05/04/2010 04:39 PM, Amit Shah wrote:
>>>  
 On writing errors, we just returned -1 even if some bytes were already
 written out. Ensure we return the number of bytes written before we
 return the error (on a subsequent call to qemu_chr_write()).

 Signed-off-by: Amit Shah
 ---
qemu-char.c |   12 +++-
1 files changed, 11 insertions(+), 1 deletions(-)

 diff --git a/qemu-char.c b/qemu-char.c
 index 76ad12c..decf687 100644
 --- a/qemu-char.c
 +++ b/qemu-char.c
 @@ -507,6 +507,9 @@ int send_all(int fd, const void *buf, int len1)
while (len>   0) {
ret = send(fd, buf, len, 0);
if (ret<   0) {
 +if (len1 - len) {
 +return len1 - len;
 +}
errno = WSAGetLastError();
if (errno != WSAEWOULDBLOCK) {
return -1;
 @@ -531,8 +534,15 @@ static int unix_write(int fd, const uint8_t *buf, int 
 len1)
while (len>   0) {
ret = write(fd, buf, len);
if (ret<   0) {
 -if (errno != EINTR&&   errno != EAGAIN)
 +if (errno == EINTR) {
 +continue;
 +}
 +if (len1 - len) {
 +return len1 - len;
 +}
 +if (errno != EAGAIN) {
return -1;
 +}
} else if (ret == 0) {
break;
} else {


>>> This will break lots of things.  The contract for send_all and
>>> unix_write is that the transmit all data.
>>>  
>> The current behaviour remains unchanged for all the users. Only callers
>> of qemu_chr_write_nb() will get an -EAGAIN return.
>>
>
> No, send_all used to send all data.  After your change, it only sends  
> what it can the first time.  The same with unix_write.

Where do you see this?

Amit




[Qemu-devel] Re: [PATCH v7 4/6] char: Add qemu_chr_write_nb() for nonblocking writes

2010-05-05 Thread Paul Brook
> On 05/05/2010 08:34 AM, Paul Brook wrote:
> >> I really dislike the idea of adding another function for this.  Can you
> >> explain why you need this functionality for virtio-console and why this
> >> functionality isn't needed for everything else?
> > 
> > This functionality should (in principle) be used by all serial port
> > implementations.
> > 
> > Physical serial ports are sufficiently crufty and low-performance that
> > noone actually uses them nowadays.  I expect that the only significant
> > real-world use is for serial consoles, which never send enough data to
> > care that writes stall the whole machine.
> 
> We don't implement control flow in the character driver layer today.
> Different backends use different policies.  Some drop data (like pty)
> while other block (like tcp).

Really? I thought data was only dropped when no client was connected, and that 
there was a user visible option to control this.  Either way, I agree that 
this should be done consistently.
 
> This patch adds optional control flow in a pretty crufty way to *some*
> backends but not all.  This just adds a bunch of complexity that will
> certainly introduce bugs.

I admit I've only really looked at the device emulation side of the interface, 
not the chardev backend implementation.

Paul




Re: [Qemu-devel] [PATCH 5/6] parallels: use pread

2010-05-05 Thread Kevin Wolf
Am 04.05.2010 12:45, schrieb Christoph Hellwig:
> Use pread instead of lseek + read in preparation of using the qemu
> block API.
> 
> Signed-off-by: Christoph Hellwig 
> 
> Index: qemu-kevin/block/parallels.c
> ===
> --- qemu-kevin.orig/block/parallels.c 2010-05-03 13:00:09.711253925 +0200
> +++ qemu-kevin/block/parallels.c  2010-05-03 13:04:15.686033993 +0200
> @@ -83,7 +83,7 @@ static int parallels_open(BlockDriverSta
>  
>  s->fd = fd;
>  
> -if (read(fd, &ph, sizeof(ph)) != sizeof(ph))
> +if (pread(fd, &ph, sizeof(ph), 0) != sizeof(ph))
>  goto fail;
>  
>  if (memcmp(ph.magic, HEADER_MAGIC, 16) ||
> @@ -93,14 +93,11 @@ static int parallels_open(BlockDriverSta
>  
>  bs->total_sectors = le32_to_cpu(ph.nb_sectors);
>  
> -if (lseek(s->fd, 64, SEEK_SET) != 64)
> - goto fail;
> -
>  s->tracks = le32_to_cpu(ph.tracks);
>  
>  s->catalog_size = le32_to_cpu(ph.catalog_entries);
>  s->catalog_bitmap = qemu_malloc(s->catalog_size * 4);
> -if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) !=
> +if (pread(s->fd, s->catalog_bitmap, s->catalog_size * 4, 64) !=
>   s->catalog_size * 4)
>   goto fail;
>  for (i = 0; i < s->catalog_size; i++)
> @@ -114,28 +111,18 @@ fail:
>  return -1;
>  }
>  
> -static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num)
> +static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
>  {
>  BDRVParallelsState *s = bs->opaque;
>  uint32_t index, offset;
> -uint64_t position;
>  
>  index = sector_num / s->tracks;
>  offset = sector_num % s->tracks;
>  
> -// not allocated
> +/* not allocated */
>  if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0))
>   return -1;
> -
> -position = (uint64_t)(s->catalog_bitmap[index] + offset) * 512;
> -
> -//fprintf(stderr, "sector: %llx index=%x offset=%x pointer=%x 
> position=%x\n",
> -//   sector_num, index, offset, s->catalog_bitmap[index], position);
> -
> -if (lseek(s->fd, position, SEEK_SET) != position)
> - return -1;
> -
> -return 0;
> +return (uint64_t)(s->catalog_bitmap[index] + offset) * 512;
>  }
>  
>  static int parallels_read(BlockDriverState *bs, int64_t sector_num,
> @@ -144,11 +131,13 @@ static int parallels_read(BlockDriverSta
>  BDRVParallelsState *s = bs->opaque;
>  
>  while (nb_sectors > 0) {
> - if (!seek_to_sector(bs, sector_num)) {
> - if (read(s->fd, buf, 512) != 512)
> - return -1;
> - } else
> +uint64_t position = seek_to_sector(bs, sector_num);
> +if (position >= 0) {

position should be a signed int64_t, otherwise the condition is always true.

Kevin




Re: [Qemu-devel] simple block driver cleanups

2010-05-05 Thread Kevin Wolf
Am 04.05.2010 15:22, schrieb Christoph Hellwig:
> On Tue, May 04, 2010 at 02:38:07PM +0200, Kevin Wolf wrote:
>> Am 04.05.2010 12:43, schrieb Christoph Hellwig:
>>> This series cleans up the simple read-only block drivers to use the
>>> qemu block device API to access their backing devices, making the code
>>> simpler and usable over nbd/curl.  I've not touched dmg yet as it's even
>>> more bitrot than usual and deserves it's own series.
>>
>> Have you already added something to qemu-iotests locally to test this or
>> did you test manually?
> 
> I tested cloop manually, I haven't found usable creation tools for the
> others yet.

Okay, so I have reviewed the patches now and except for that one thing
in parallels they look good.

For cloop I trust your test, and for bochs I did a very basic test
myself (however, I doubt that anyone uses this driver, considering how
hard it is to create such an image...). For parallels I still need to
find out how to create an image.

I've applied cloop and bochs to the block branch, and I'll wait for a v2
with the parallels patches.

Kevin




Re: [Qemu-devel] [PATCH 1/2] qemu-error: Introduce get_errno_name()

2010-05-05 Thread Luiz Capitulino
On Tue, 04 May 2010 16:56:19 -0500
Anthony Liguori  wrote:

> On 05/04/2010 03:30 PM, Luiz Capitulino wrote:
> >
> >   StateVmSaveFailed is not like CommandFailed, there are five errors
> > in do_savevm() and StateVmSaveFailed happens to be one of them.
> >
> >   But I understand what you mean and I have considered doing something
> > like it, one of the problems though is that I'm not sure 'source' is
> > enough to determine where the error has happened.
> >
> >   Consider do_savevm() again. We have three 'operations' that might
> > fail: delete an existing snapshot, save the VM state and create the
> > snapshot. All those operations can return -EIO as an error.
> >
> 
> Maybe those three operations should return distinct errnos?

 I don't think this is possible, as we would have to guarantee that no
function called by a handler return the same errno.

 Taking the block layer as an example. Most block drivers handlers check
if the driver really exist (-ENOMEDIUM) and if the driver supports the
operation being requested (-ENOTSUP).

 How can we have unique errnos in this case?

 Also remember that we're only talking about the surface. The call
chain is deep. We have almost a hundred handlers, they use functions
from almost all subsystems.

> That way, we can make more useful QErrors.

 Perhaps, the question boils down to how QErrors should be done.

 Today, we're doing it like this, consider handler foo(), it does the following:

  1. connect somewhere
  2. send some data
  3. close

 All operations performed can fail and we want to report that. Currently,
afaiu we're doing the following (Markus correct me if I'm wrong):

  1. ConnectFailed
  2. SendFailed
  3. CloseFailed

 An obvious problem is that we don't say why it has failed. But this is
what errno is for and I thought we could use it someway. The advantage
of this approach is that, errors are high-level. It's easy to identify
what went wrong and we can have very good error messages for them.

 Now, if I got it right, you're suggesting we should do:

  1. BadFileDescriptor, Interrupted, NoPermission ...
 (or anything connect() may return)
  2. IOError ...
  3. IOError, BadFileDescriptor

 This makes sense, but if I'm a user (or a QMP client) I don't want this:

(qemu) savevm foobar
Bad file descriptor

 I'd prefer this instead:

(qemu) savevm foobar
Can't delete 'foobar': Bad file descriptor

 Or:

(qemu) savevm foobar
Can't save VM state: I/O Error

> >   So, the first question is: would you map EIO to an QError? Just like
> > you did for SocketIOError? If so, how would you know which operation
> > has failed? Would you put its name in source? Or have an additional
> > 'operation' key?
> >
> >   A related problem is not to degrade the current set of error messages
> > we offer to the users. For do_savevm()'s 'save state' operation, current
> > message is:
> >
> > "Error -7 while writing VM"
> >
> >   With StateVmSaveFailed it becomes:
> >
> > "Failed to save VM state ("EIO")"
> >
> 
> I don't think this is that useful to preserve because as you said, it 
> could be one of three things.

 But as I said above, we have to say which operation has failed, otherwise
it's very difficult to diagnose the problem.




Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Daniel P. Berrange
On Tue, May 04, 2010 at 04:50:34PM -0500, Anthony Liguori wrote:
> On 05/04/2010 04:47 PM, Gerd Hoffmann wrote:
> >On 05/04/10 18:23, Anthony Liguori wrote:
> >>On 05/04/2010 08:49 AM, Reinhard Max wrote:
> >>>Hi,
> >>>
> >>>I am maintaining the tightvnc package for openSUSE and was recently
> >>>confronted with an alleged vnc problem with QWMU that turned out to be
> >>>a shortcoming in QEMU's code for handling TCP server sockets, which is
> >>>used by the vnc and char modules.
> >>>
> >>>The problem occurs when the address to listen on is given as a name
> >>>which resolves to multiple IP addresses the most prominent example
> >>>being "localhost" resolving to 127.0.0.1 and ::1 .
> >
> >My tigervnc (tightvnc successor) has IPv6 support and handles this 
> >just fine.  There is also the option to force qemu to listen on ipv4 
> >(or ipv6) only.
> >
> >>>The existing code stopped walking the list of addresses returned by
> >>>getaddrinfo() as soon as one socket was successfully opened and bound.
> >>>The result was that a qemu instance started with "-vnc localhost:42"
> >>>only listened on ::1, wasn't reachable through 127.0.0.1. The fact
> >>>that the code set the IPV6_V6ONLY socket option didn't help, because
> >>>that option only works when the socket gets bound to the IPv6 wildcard
> >>>address (::), but is useless for explicit address bindings.
> >
> >Indeed.
> >
> >>But that said, I'm not sure we're doing the wrong thing right now. Gerd,
> >>what do you think about this behavior?
> >
> >Reinhard is correct.  If a hostname resolves to multiple addresses 
> >like this ...
> >
> >   zweiblum kraxel ~# host zweiblum
> >   zweiblum.home.kraxel.org has address 192.168.2.101
> >   zweiblum.home.kraxel.org has IPv6 address 
> >2001:6f8:1cb3:2:216:41ff:fee1:3d40
> >
> >... qemu should listen on all addresses returned.  Which in turn 
> >requires multiple listening sockets.
> >
> >Changing this is a big deal though, thats why I've taken the somewhat 
> >unclean shortcut to listen on the first match only when implementing 
> >this.  Clients are supposed to try to connect to all addresses 
> >returned by the lookup (and they do if they got IPv6 support), thus 
> >this usually doesn't cause trouble in practice.
> >
> >When going for multiple listening sockets in qemu we have to figure 
> >how we'll handle this in a number of places as there is no single 
> >listening address any more.  Reporting the vnc server address in QMP 
> >is one.  I'm sure there are more.
> 
> Okay, that makes sense.  Personally, I'm inclined to agree that this is 
> a client problem.

It isn't a client problem if QEMU is not listening on all the required
addresses. In Gerd's case 

> >   zweiblum.home.kraxel.org has address 192.168.2.101
> >   zweiblum.home.kraxel.org has IPv6 address
> >2001:6f8:1cb3:2:216:41ff:fee1:3d40

If QEMU only listens on 2001:6f8:1cb3:2:216:41ff:fee1:3d40, and a VNC
client that only knows IPv4 tries to connect it will fail. There's nothing
the client can do to solve this. QEMU has to be listening on all addresses 
associated with a name for the dual-stack setup to provide correct fallback 
for clients.


Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|




[Qemu-devel] [PATCH] char: Flush read buffer in mux_chr_can_read

2010-05-05 Thread Jan Kiszka
Move the buffer flush from mux_chr_read to mux_chr_can_read. While the
latter is called periodically, the former will only be invoked when new
characters arrive at the back-end. This caused problems to front-end
drivers whenever they were unable to read data immediately, e.g.
virtio-console attached to stdio.

Signed-off-by: Jan Kiszka 
---
 qemu-char.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/qemu-char.c b/qemu-char.c
index ac65a1c..2b115a4 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -404,6 +404,8 @@ static int mux_chr_can_read(void *opaque)
 MuxDriver *d = chr->opaque;
 int m = d->focus;
 
+mux_chr_accept_input(opaque);
+
 if ((d->prod[m] - d->cons[m]) < MUX_BUFFER_SIZE)
 return 1;
 if (d->chr_can_read[m])
@@ -418,8 +420,6 @@ static void mux_chr_read(void *opaque, const uint8_t *buf, 
int size)
 int m = d->focus;
 int i;
 
-mux_chr_accept_input (opaque);
-
 for(i = 0; i < size; i++)
 if (mux_proc_byte(chr, d, buf[i])) {
 if (d->prod[m] == d->cons[m] &&




Re: [Qemu-devel] [PATCH, RFC] block: separate raw images from the file protocol

2010-05-05 Thread Stefan Weil

Am 05.05.2010 14:45, schrieb Kevin Wolf:

Am 04.05.2010 22:58, schrieb Stefan Weil:

This patch (commit 84a12e6648444f517055138a7d7f25a22d7e1029)
breaks QEMU for Win32:

QEMU can no longer access \\.\PhysicalDrive0 - a feature I use quite 
often.


Found by git bisect, tested like this: qemu \\.\PhysicalDrive0


Does the attached patch fix the problem?

Kevin



Yes, the patch fixes the problem.

It needs a small modification to compile for non-win32 systems.

Stefan





Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Daniel P. Berrange
On Wed, May 05, 2010 at 10:53:00AM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> >>When going for multiple listening sockets in qemu we have to figure
> >>how we'll handle this in a number of places as there is no single
> >>listening address any more.
> >
> >Well, that's what my patch is about.
> 
> Sure.
> 
> >Did you take a look at it?
> 
> Briefly, yes.  Overall it looks sensible to me.  Devil is in the details 
> though, see below.
> 
> Noticed that it probably should get a few helper functions to handle 
> FdLists to avoid the quite simliar open-coded loop-over-all-fds loops 
> all over the place.
> 
> >>Reporting the vnc server address in QMP is one.
> >
> >Not sure what QMP is (this was the first time I looked at QEMU's
> >internals),
> 
> You'll run into qmp for sure when forward-porting the patches to the 
> latest qemu bits.  It is the machine-readable version of the monitor 
> protocol (in qemu 0.12+).
> 
> >but I think my patch only leaves one place TODO where I
> >chose to report only the first address for now, but it shouldn't be too
> >hard to fix that as well.
> 
> Yea.  I've noticed that TODO ;)
> 
> >BTW, in some places I circumvented the need for reporting multiple
> >addresses by simply reporting the name that was passed to QEMU instead.
> 
> This is one of the issues which needs to be addressed somehow.
> 
> First I think qemu should be self-consistent here, i.e. either report 
> the (single) name or the list of addressed everythere.
> 
> Second we have to care about the current users (especially libvirt). 
> Today qemu usually reports the address I think.  Thus I tend to stick to 
> addresses to keep them happy.

>From a libvirt POV the only place we use the address is in the graphics
notification event(s). For the event we know which specific address out 
of the list to report - the one we just did accept() on. libvirt does not 
use the 'info vnc' or 'query-vnc' output at all, so whether that reports 
IP address of hostname doesn't matter to us. It is probably easiest to 
just make it report the same info that was provided on the CLI to -vnc 


Regards,
Daniel
-- 
|: Red Hat, Engineering, London-o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org-o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|




[Qemu-devel] [PATCH 00/12] Cleanup linux-user/elfload.c, v2

2010-05-05 Thread Richard Henderson
Changes v1->v2:
  * Drop VDSO loading for the moment; let's concentrate on getting the
basic elf loading in shape first.
  * Fix SH4-EB ELF_DATA definition
  * Re-base after pbrook's guest_base changes for the main executable.


r~



Richard Henderson (12):
  linux-user: Handle filesz < memsz for any PT_LOAD segment.
  Add more DT_* and AT_* constants to qemu's copy of elf.h.
  linux-user: Reindent elfload.c.
  linux-user: Reduce lseek+reads while loading elf files.
  linux-user: Define ELF_DATA generically.
  linux-user: Clean up byte-swapping in elfload.c.
  linux-user: Load symbols from the interpreter.
  linux-user: Improve consistency checking in elf headers.
  linux-user: Put the stack guard page at the top.
  linux-user: Remove partial support for a.out interpreters.
  linux-user: Extract load_elf_image from load_elf_interp.
  linux-user: Re-use load_elf_image for the main binary.

 elf.h  |   44 ++
 linux-user/elfload.c   | 1789 
 linux-user/linuxload.c |   17 +-
 linux-user/qemu.h  |7 +-
 4 files changed, 793 insertions(+), 1064 deletions(-)





[Qemu-devel] [PATCH 02/12] Add more DT_* and AT_* constants to qemu's copy of elf.h.

2010-05-05 Thread Richard Henderson
Moving some PPC AT_* constants from elfload.c at the same time.

Signed-off-by: Richard Henderson 
---
 elf.h|   44 
 linux-user/elfload.c |9 -
 2 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/elf.h b/elf.h
index c84c8ab..012532c 100644
--- a/elf.h
+++ b/elf.h
@@ -146,8 +146,37 @@ typedef int64_t  Elf64_Sxword;
 #define DT_DEBUG   21
 #define DT_TEXTREL 22
 #define DT_JMPREL  23
+#define DT_BINDNOW 24
+#define DT_INIT_ARRAY  25
+#define DT_FINI_ARRAY  26
+#define DT_INIT_ARRAYSZ27
+#define DT_FINI_ARRAYSZ28
+#define DT_RUNPATH 29
+#define DT_FLAGS   30
+#define DT_LOOS0x600d
+#define DT_HIOS0x6000
 #define DT_LOPROC  0x7000
 #define DT_HIPROC  0x7fff
+
+/* DT_ entries which fall between DT_VALRNGLO and DT_VALRNDHI use
+   the d_val field of the Elf*_Dyn structure.  I.e. they contain scalars.  */
+#define DT_VALRNGLO0x6d00
+#define DT_VALRNGHI0x6dff
+
+/* DT_ entries which fall between DT_ADDRRNGLO and DT_ADDRRNGHI use
+   the d_ptr field of the Elf*_Dyn structure.  I.e. they contain pointers.  */
+#define DT_ADDRRNGLO   0x6e00
+#define DT_ADDRRNGHI   0x6eff
+
+#defineDT_VERSYM   0x6ff0
+#define DT_RELACOUNT   0x6ff9
+#define DT_RELCOUNT0x6ffa
+#define DT_FLAGS_1 0x6ffb
+#define DT_VERDEF  0x6ffc
+#define DT_VERDEFNUM   0x6ffd
+#define DT_VERNEED 0x6ffe
+#define DT_VERNEEDNUM  0x6fff
+
 #define DT_MIPS_RLD_VERSION0x7001
 #define DT_MIPS_TIME_STAMP 0x7002
 #define DT_MIPS_ICHECKSUM  0x7003
@@ -206,6 +235,21 @@ typedef int64_t  Elf64_Sxword;
 #define AT_PLATFORM 15  /* string identifying CPU for optimizations */
 #define AT_HWCAP  16/* arch dependent hints at CPU capabilities */
 #define AT_CLKTCK 17   /* frequency at which times() increments */
+#define AT_FPUCW  18   /* info about fpu initialization by kernel */
+#define AT_DCACHEBSIZE 19  /* data cache block size */
+#define AT_ICACHEBSIZE 20  /* instruction cache block size */
+#define AT_UCACHEBSIZE 21  /* unified cache block size */
+#define AT_IGNOREPPC   22  /* ppc only; entry should be ignored */
+#define AT_SECURE  23  /* boolean, was exec suid-like? */
+#define AT_BASE_PLATFORM 24/* string identifying real platforms */
+#define AT_RANDOM  25  /* address of 16 random bytes */
+#define AT_EXECFN  31  /* filename of the executable */
+#define AT_SYSINFO 32  /* address of kernel entry point */
+#define AT_SYSINFO_EHDR33  /* address of kernel vdso */
+#define AT_L1I_CACHESHAPE 34   /* shapes of the caches: */
+#define AT_L1D_CACHESHAPE 35   /*   bits 0-3: cache associativity.  */
+#define AT_L2_CACHESHAPE  36   /*   bits 4-7: log2 of line size.  */
+#define AT_L3_CACHESHAPE  37   /*   val&~255: cache size.  */
 
 typedef struct dynamic{
   Elf32_Sword d_tag;
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 58728ff..edd852f 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -480,15 +480,6 @@ static uint32_t get_elf_hwcap(void)
 }
 
 /*
- * We need to put in some extra aux table entries to tell glibc what
- * the cache block size is, so it can use the dcbz instruction safely.
- */
-#define AT_DCACHEBSIZE  19
-#define AT_ICACHEBSIZE  20
-#define AT_UCACHEBSIZE  21
-/* A special ignored type value for PPC, for glibc compatibility.  */
-#define AT_IGNOREPPC22
-/*
  * The requirements here are:
  * - keep the final alignment of sp (sp & 0xf)
  * - make sure the 32-bit value at the first 16 byte aligned position of
-- 
1.7.0.1





[Qemu-devel] [PATCH 10/12] linux-user: Remove partial support for a.out interpreters.

2010-05-05 Thread Richard Henderson
At the bottom of the a.out support was the unimplemented load_aout_interp
function.  There were other portions of the support that didn't look
right; when I went to look in the Linux kernel for clarification, I found
that the support for such interpreters has been removed from binfmt_elf.
There doesn't seem to be any reason to keep this broken support in qemu.

Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c |   79 ++
 1 files changed, 9 insertions(+), 70 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index bd4b8fc..77267a4 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -819,10 +819,6 @@ struct exec
 #define TARGET_ELF_PAGESTART(_v) ((_v) & ~(unsigned 
long)(TARGET_ELF_EXEC_PAGESIZE-1))
 #define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1))
 
-#define INTERPRETER_NONE 0
-#define INTERPRETER_AOUT 1
-#define INTERPRETER_ELF 2
-
 #define DLINFO_ITEMS 12
 
 static inline void memcpy_fromfs(void * to, const void * from, unsigned long n)
@@ -830,8 +826,6 @@ static inline void memcpy_fromfs(void * to, const void * 
from, unsigned long n)
 memcpy(to, from, n);
 }
 
-static int load_aout_interp(void * exptr, int interp_fd);
-
 #ifdef BSWAP_NEEDED
 static void bswap_ehdr(struct elfhdr *ehdr)
 {
@@ -1069,7 +1063,7 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, 
int envc,
struct elfhdr * exec,
abi_ulong load_addr,
abi_ulong load_bias,
-   abi_ulong interp_load_addr, int ibcs,
+   abi_ulong interp_load_addr,
struct image_info *info)
 {
 abi_ulong sp;
@@ -1099,7 +1093,7 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, 
int envc,
 size += DLINFO_ARCH_ITEMS * 2;
 #endif
 size += envc + argc + 2;
-size += (!ibcs ? 3 : 1);/* argc itself */
+size += 1;  /* argc itself */
 size *= n;
 if (size & 15)
 sp -= 16 - (size & 15);
@@ -1141,7 +1135,7 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, 
int envc,
 
 info->saved_auxv = sp;
 
-sp = loader_build_argptr(envc, argc, sp, p, !ibcs);
+sp = loader_build_argptr(envc, argc, sp, p, 0);
 return sp;
 }
 
@@ -1394,11 +1388,9 @@ int load_elf_binary(struct linux_binprm * bprm, struct 
target_pt_regs * regs,
 {
 struct elfhdr elf_ex;
 struct elfhdr interp_elf_ex;
-struct exec interp_ex;
 int interpreter_fd = -1; /* avoid warning */
 abi_ulong load_addr, load_bias;
 int load_addr_set = 0;
-unsigned int interpreter_type = INTERPRETER_NONE;
 unsigned char ibcs2_interpreter;
 int i;
 abi_ulong mapped_addr;
@@ -1412,7 +1404,6 @@ int load_elf_binary(struct linux_binprm * bprm, struct 
target_pt_regs * regs,
 abi_ulong start_code, end_code, start_data, end_data;
 abi_ulong reloc_func_desc = 0;
 abi_ulong elf_stack;
-char passed_fileno[6];
 
 ibcs2_interpreter = 0;
 status = 0;
@@ -1462,7 +1453,6 @@ int load_elf_binary(struct linux_binprm * bprm, struct 
target_pt_regs * regs,
 end_code = 0;
 start_data = 0;
 end_data = 0;
-interp_ex.a_info = 0;
 
 elf_ppnt = elf_phdata;
 for(i=0;i < elf_ex.e_phnum; i++) {
@@ -1527,59 +1517,22 @@ int load_elf_binary(struct linux_binprm * bprm, struct 
target_pt_regs * regs,
 memset(bprm->buf, 0, BPRM_BUF_SIZE - retval);
 }
 
-interp_ex = *((struct exec *) bprm->buf); /* aout exec-header */
-interp_elf_ex = *((struct elfhdr *) bprm->buf); /* elf exec-header 
*/
+interp_elf_ex = *((struct elfhdr *) bprm->buf);
 }
 elf_ppnt++;
 }
 
 /* Some simple consistency checks for the interpreter */
-if (elf_interpreter){
-interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
-
-/* Now figure out which format our binary is */
-if ((N_MAGIC(interp_ex) != OMAGIC) && (N_MAGIC(interp_ex) != ZMAGIC) &&
-(N_MAGIC(interp_ex) != QMAGIC)) {
-interpreter_type = INTERPRETER_ELF;
-}
-
+if (elf_interpreter) {
 if (!elf_check_ident(&interp_elf_ex)) {
-interpreter_type &= ~INTERPRETER_ELF;
-}
-
-if (!interpreter_type) {
 free(elf_interpreter);
 free(elf_phdata);
 close(bprm->fd);
+close(interpreter_fd);
 return -ELIBBAD;
 }
 }
 
-/* OK, we are done with that, now set up the arg stuff,
-   and then start this sucker up */
-
-{
-char * passed_p;
-
-if (interpreter_type == INTERPRETER_AOUT) {
-snprintf(passed_fileno, sizeof(passed_fileno), "%d", bprm->fd);
-passed_p = passed_fileno;
-
-if (elf_interpreter) {
-bprm->p = copy_elf_strings(1,&passed_p,bprm->pag

Re: [Qemu-devel] simple block driver cleanups

2010-05-05 Thread Christoph Hellwig
On Wed, May 05, 2010 at 04:28:08PM +0200, Kevin Wolf wrote:
> For cloop I trust your test, and for bochs I did a very basic test
> myself (however, I doubt that anyone uses this driver, considering how
> hard it is to create such an image...). For parallels I still need to
> find out how to create an image.

Btw, I did a bit more testing than my trivial qemu-io testing before,
and it seem that cloop is utterly broken in qemu mainline already.
Just booting with a small cloop compressed filesystem image attached
is enough to give me tons of I/O errors.  When using ide I can even
crash qemu by trying to mount it.





[Qemu-devel] [PATCH 12/12] linux-user: Re-use load_elf_image for the main binary.

2010-05-05 Thread Richard Henderson
This requires moving the PT_INTERP extraction and GUEST_BASE
handling into load_elf_image.  Key this off a non-null pointer
argument to receive the interpreter name.

Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c |  380 ++
 1 files changed, 103 insertions(+), 277 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 0167414..d435564 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -811,9 +811,6 @@ struct exec
 #define ZMAGIC 0413
 #define QMAGIC 0314
 
-/* max code+data+bss+brk space allocated to ET_DYN executables */
-#define ET_DYN_MAP_SIZE (128 * 1024 * 1024)
-
 /* Necessary parameters */
 #define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE
 #define TARGET_ELF_PAGESTART(_v) ((_v) & ~(unsigned 
long)(TARGET_ELF_EXEC_PAGESIZE-1))
@@ -1150,7 +1147,7 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, 
int envc,
On return: INFO values will be filled in, as necessary or available.  */
 
 static void load_elf_image(const char *image_name, int image_fd,
-   struct image_info *info,
+   struct image_info *info, char **pinterp_name,
char bprm_buf[BPRM_BUF_SIZE])
 {
 struct elfhdr *ehdr = (struct elfhdr *)bprm_buf;
@@ -1210,6 +1207,67 @@ static void load_elf_image(const char *image_name, int 
image_fd,
 if (load_addr == -1) {
 goto exit_perror;
 }
+} else if (pinterp_name != NULL) {
+/* This is the main executable.  Make sure that the low
+   address does not conflict with MMAP_MIN_ADDR or the
+   QEMU application itself.  */
+#if defined(CONFIG_USE_GUEST_BASE)
+/*
+ * In case where user has not explicitly set the guest_base, we
+ * probe here that should we set it automatically.
+ */
+if (!have_guest_base) {
+unsigned long host_start, real_start, host_size;
+
+/* Round addresses to page boundaries.  */
+loaddr &= qemu_host_page_mask;
+hiaddr = HOST_PAGE_ALIGN(hiaddr);
+
+if (loaddr < mmap_min_addr) {
+host_start = HOST_PAGE_ALIGN(mmap_min_addr);
+} else {
+host_start = loaddr;
+if (host_start != loaddr) {
+errmsg = "Address overflow loading ELF binary";
+goto exit_errmsg;
+}
+}
+host_size = hiaddr - loaddr;
+while (1) {
+/* Do not use mmap_find_vma here because that is limited to the
+   guest address space.  We are going to make the
+   guest address space fit whatever we're given.  */
+real_start = (unsigned long)
+mmap((void *)host_start, host_size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
+if (real_start == (unsigned long)-1) {
+goto exit_perror;
+}
+if (real_start == host_start) {
+break;
+}
+/* That address didn't work.  Unmap and try a different one.
+   The address the host picked because is typically right at
+   the top of the host address space and leaves the guest with
+   no usable address space.  Resort to a linear search.  We
+   already compensated for mmap_min_addr, so this should not
+   happen often.  Probably means we got unlucky and host
+   address space randomization put a shared library somewhere
+   inconvenient.  */
+munmap((void *)real_start, host_size);
+host_start += qemu_host_page_size;
+if (host_start == loaddr) {
+/* Theoretically possible if host doesn't have any suitably
+   aligned areas.  Normally the first mmap will fail.  */
+errmsg = "Unable to find space for application";
+goto exit_errmsg;
+}
+}
+qemu_log("Relocating guest address space from 0x"
+ TARGET_ABI_FMT_lx " to 0x%lx\n", loaddr, real_start);
+guest_base = real_start - loaddr;
+}
+#endif
 }
 load_bias = load_addr - loaddr;
 
@@ -1271,6 +1329,33 @@ static void load_elf_image(const char *image_name, int 
image_fd,
 info->brk = vaddr_em;
 }
 }
+} else if (eppnt->p_type == PT_INTERP && pinterp_name) {
+char *interp_name;
+
+if (*pinterp_name) {
+errmsg = "Multiple PT_INTERP entries";
+goto exit_errmsg;
+}
+interp_name = malloc(eppnt->p_filesz);
+if (!interp_name) {
+goto exit_perror;
+}

[Qemu-devel] [PATCH 05/12] linux-user: Define ELF_DATA generically.

2010-05-05 Thread Richard Henderson
The only consideration on this value is the target endianness.
The existing defines were incorrect for alpha and sh4eb.

Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c |   30 ++
 1 files changed, 6 insertions(+), 24 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index dec86d6..8732bc0 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -95,6 +95,12 @@ enum {
 #define ELIBBAD 80
 #endif
 
+#ifdef TARGET_WORDS_BIGENDIAN
+#define ELF_DATAELFDATA2MSB
+#else
+#define ELF_DATAELFDATA2LSB
+#endif
+
 typedef target_ulongtarget_elf_greg_t;
 #ifdef USE_UID16
 typedef uint16_ttarget_uid_t;
@@ -132,7 +138,6 @@ static uint32_t get_elf_hwcap(void)
 #define elf_check_arch(x) ( ((x) == ELF_ARCH) )
 
 #define ELF_CLASS  ELFCLASS64
-#define ELF_DATA   ELFDATA2LSB
 #define ELF_ARCH   EM_X86_64
 
 static inline void init_thread(struct target_pt_regs *regs, struct image_info 
*infop)
@@ -196,7 +201,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, 
const CPUState *env)
  * These are used to set parameters in the core dumps.
  */
 #define ELF_CLASS   ELFCLASS32
-#define ELF_DATAELFDATA2LSB
 #define ELF_ARCHEM_386
 
 static inline void init_thread(struct target_pt_regs *regs,
@@ -259,11 +263,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, 
const CPUState *env)
 #define elf_check_arch(x) ( (x) == EM_ARM )
 
 #define ELF_CLASS   ELFCLASS32
-#ifdef TARGET_WORDS_BIGENDIAN
-#define ELF_DATAELFDATA2MSB
-#else
-#define ELF_DATAELFDATA2LSB
-#endif
 #define ELF_ARCHEM_ARM
 
 static inline void init_thread(struct target_pt_regs *regs,
@@ -352,7 +351,6 @@ enum
 #endif
 
 #define ELF_CLASS   ELFCLASS64
-#define ELF_DATAELFDATA2MSB
 #define ELF_ARCHEM_SPARCV9
 
 #define STACK_BIAS  2047
@@ -382,7 +380,6 @@ static inline void init_thread(struct target_pt_regs *regs,
 #define elf_check_arch(x) ( (x) == EM_SPARC )
 
 #define ELF_CLASS   ELFCLASS32
-#define ELF_DATAELFDATA2MSB
 #define ELF_ARCHEM_SPARC
 
 static inline void init_thread(struct target_pt_regs *regs,
@@ -416,11 +413,6 @@ static inline void init_thread(struct target_pt_regs *regs,
 
 #endif
 
-#ifdef TARGET_WORDS_BIGENDIAN
-#define ELF_DATAELFDATA2MSB
-#else
-#define ELF_DATAELFDATA2LSB
-#endif
 #define ELF_ARCHEM_PPC
 
 /* Feature masks for the Aux Vector Hardware Capabilities (AT_HWCAP).
@@ -554,11 +546,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, 
const CPUState *env)
 #else
 #define ELF_CLASS   ELFCLASS32
 #endif
-#ifdef TARGET_WORDS_BIGENDIAN
-#define ELF_DATAELFDATA2MSB
-#else
-#define ELF_DATAELFDATA2LSB
-#endif
 #define ELF_ARCHEM_MIPS
 
 static inline void init_thread(struct target_pt_regs *regs,
@@ -626,7 +613,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, 
const CPUState *env)
 #define elf_check_arch(x) ( (x) == EM_XILINX_MICROBLAZE )
 
 #define ELF_CLASS   ELFCLASS32
-#define ELF_DATAELFDATA2MSB
 #define ELF_ARCHEM_XILINX_MICROBLAZE
 
 static inline void init_thread(struct target_pt_regs *regs,
@@ -648,7 +634,6 @@ static inline void init_thread(struct target_pt_regs *regs,
 #define elf_check_arch(x) ( (x) == EM_SH )
 
 #define ELF_CLASS ELFCLASS32
-#define ELF_DATA  ELFDATA2LSB
 #define ELF_ARCH  EM_SH
 
 static inline void init_thread(struct target_pt_regs *regs,
@@ -704,7 +689,6 @@ static inline void elf_core_copy_regs(target_elf_gregset_t 
*regs,
 #define elf_check_arch(x) ( (x) == EM_CRIS )
 
 #define ELF_CLASS ELFCLASS32
-#define ELF_DATA  ELFDATA2LSB
 #define ELF_ARCH  EM_CRIS
 
 static inline void init_thread(struct target_pt_regs *regs,
@@ -724,7 +708,6 @@ static inline void init_thread(struct target_pt_regs *regs,
 #define elf_check_arch(x) ( (x) == EM_68K )
 
 #define ELF_CLASS   ELFCLASS32
-#define ELF_DATAELFDATA2MSB
 #define ELF_ARCHEM_68K
 
 /* ??? Does this need to do anything?
@@ -778,7 +761,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, 
const CPUState *env)
 #define elf_check_arch(x) ( (x) == ELF_ARCH )
 
 #define ELF_CLASS  ELFCLASS64
-#define ELF_DATA   ELFDATA2MSB
 #define ELF_ARCH   EM_ALPHA
 
 static inline void init_thread(struct target_pt_regs *regs,
-- 
1.7.0.1





[Qemu-devel] [PATCH 07/12] linux-user: Load symbols from the interpreter.

2010-05-05 Thread Richard Henderson
First, adjust load_symbols to accept a load_bias parameter.  At the same
time, read the entire section header table in one go, use pread instead
f lseek+read for the symbol and string tables, and properly free
allocated structures on error exit paths.

Second, adjust load_elf_interp to compute load_bias.  This requires
finding out the built-in load addresses.  Which allows us to honor a
pre-linked interpreter image when possible, and eliminate the hard-coded
INTERP_MAP_SIZE value.

Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c |  189 +++---
 1 files changed, 101 insertions(+), 88 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index f12161c..723e956 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -811,9 +811,6 @@ struct exec
 #define ZMAGIC 0413
 #define QMAGIC 0314
 
-/* max code+data+bss space allocated to elf interpreter */
-#define INTERP_MAP_SIZE (32 * 1024 * 1024)
-
 /* max code+data+bss+brk space allocated to ET_DYN executables */
 #define ET_DYN_MAP_SIZE (128 * 1024 * 1024)
 
@@ -902,6 +899,7 @@ static inline void bswap_sym(struct elf_sym *sym) { }
 #ifdef USE_ELF_CORE_DUMP
 static int elf_core_dump(int, const CPUState *);
 #endif /* USE_ELF_CORE_DUMP */
+static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias);
 
 /*
  * 'copy_elf_strings()' copies argument/envelope strings from user
@@ -1127,15 +1125,11 @@ static abi_ulong load_elf_interp(struct elfhdr * 
interp_elf_ex,
  char bprm_buf[BPRM_BUF_SIZE])
 {
 struct elf_phdr *elf_phdata  =  NULL;
-struct elf_phdr *eppnt;
-abi_ulong load_addr = 0;
-int load_addr_set = 0;
+abi_ulong load_addr, load_bias, loaddr, hiaddr;
 int retval;
 abi_ulong error;
 int i;
 
-error = 0;
-
 bswap_ehdr(interp_elf_ex);
 /* First of all, some simple consistency checks */
 if ((interp_elf_ex->e_type != ET_EXEC &&
@@ -1144,7 +1138,6 @@ static abi_ulong load_elf_interp(struct elfhdr * 
interp_elf_ex,
 return ~((abi_ulong)0UL);
 }
 
-
 /* Now read in all of the header information */
 
 if (sizeof(struct elf_phdr) * interp_elf_ex->e_phnum > TARGET_PAGE_SIZE)
@@ -1177,41 +1170,56 @@ static abi_ulong load_elf_interp(struct elfhdr * 
interp_elf_ex,
 }
 bswap_phdr(elf_phdata, interp_elf_ex->e_phnum);
 
+/* Find the maximum size of the image and allocate an appropriate
+   amount of memory to handle that.  */
+loaddr = -1, hiaddr = 0;
+for (i = 0; i < interp_elf_ex->e_phnum; ++i) {
+if (elf_phdata[i].p_type == PT_LOAD) {
+abi_ulong a = elf_phdata[i].p_vaddr;
+if (a < loaddr) {
+loaddr = a;
+}
+a += elf_phdata[i].p_memsz;
+if (a > hiaddr) {
+hiaddr = a;
+}
+}
+}
+
+load_addr = loaddr;
 if (interp_elf_ex->e_type == ET_DYN) {
-/* in order to avoid hardcoding the interpreter load
-   address in qemu, we allocate a big enough memory zone */
-error = target_mmap(0, INTERP_MAP_SIZE,
-PROT_NONE, MAP_PRIVATE | MAP_ANON,
--1, 0);
-if (error == -1) {
+/* The image indicates that it can be loaded anywhere.  Find a
+   location that can hold the memory space required.  If the
+   image is pre-linked, LOADDR will be non-zero.  Since we do
+   not supply MAP_FIXED here we'll use that address if and
+   only if it remains available.  */
+load_addr = target_mmap(loaddr, hiaddr - loaddr, PROT_NONE,
+MAP_PRIVATE | MAP_ANON | MAP_NORESERVE,
+-1, 0);
+if (load_addr == -1) {
 perror("mmap");
 exit(-1);
 }
-load_addr = error;
-load_addr_set = 1;
 }
+load_bias = load_addr - loaddr;
 
-eppnt = elf_phdata;
-for(i=0; ie_phnum; i++, eppnt++)
+for (i = 0; i < interp_elf_ex->e_phnum; i++) {
+struct elf_phdr *eppnt = elf_phdata + i;
 if (eppnt->p_type == PT_LOAD) {
-int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
+abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em;
 int elf_prot = 0;
-abi_ulong vaddr = 0;
 
 if (eppnt->p_flags & PF_R) elf_prot =  PROT_READ;
 if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
 if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
-if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) {
-elf_type |= MAP_FIXED;
-vaddr = eppnt->p_vaddr;
-}
-error = target_mmap(load_addr+TARGET_ELF_PAGESTART(vaddr),
-eppnt->p_filesz + 
TARGET_ELF_PAGEOFFSET(eppnt->p_vaddr),
-elf_prot,
-elf_type,
-

Re: [Qemu-devel] Patch to improve handling of server sockets

2010-05-05 Thread Reinhard Max

Hi,

I've attached a revised version of the patch against git as of this 
morning.


It has even more FIXMEs in it than the previous version, because of 
the changes that went into QEMU since 0.11.0 and I think I'll have to 
leave those to you guys who are more familiar with QEMUs internals 
than I am, but I hope my pactch can serve as a starting point getting 
the socket handling in QEMU more correct and flexible.


On Wed, 5 May 2010 at 09:29, Daniel P. Berrange wrote:

The approach looks reasonable, though the patch is somewhat mangled 
by the mix of tabs + spaces


Well, that's what XEmacs gives me by default. ;)

How about adding magic comments for vim and emacs to the source files 
that bring the editors in line with the project's coding style to make 
it easier for new contributors to follow the style?


BTW, while converting the tabs to spaces in my changes, I found that 
there are also quite a few tabs in the code already.



cu
Reinharddiff --git a/qemu-char.c b/qemu-char.c
index ac65a1c..aeb5afb 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -1915,7 +1915,8 @@ return_err:
 /* TCP Net console */
 
 typedef struct {
-int fd, listen_fd;
+int fd;
+FdList *listen_fds;
 int connected;
 int max_size;
 int do_telnetopt;
@@ -2068,6 +2069,7 @@ static void tcp_chr_read(void *opaque)
 TCPCharDriver *s = chr->opaque;
 uint8_t buf[READ_BUF_LEN];
 int len, size;
+FdList *fdl;
 
 if (!s->connected || s->max_size <= 0)
 return;
@@ -2078,9 +2080,8 @@ static void tcp_chr_read(void *opaque)
 if (size == 0) {
 /* connection closed */
 s->connected = 0;
-if (s->listen_fd >= 0) {
-qemu_set_fd_handler(s->listen_fd, tcp_chr_accept, NULL, chr);
-}
+for (fdl = s->listen_fds; fdl != NULL; fdl = fdl->next)
+qemu_set_fd_handler(fdl->fd, tcp_chr_accept, NULL, fdl);
 qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
 closesocket(s->fd);
 s->fd = -1;
@@ -2127,7 +2128,8 @@ static void socket_set_nodelay(int fd)
 
 static void tcp_chr_accept(void *opaque)
 {
-CharDriverState *chr = opaque;
+FdList *fds = opaque;
+CharDriverState *chr = fds->opaque;
 TCPCharDriver *s = chr->opaque;
 struct sockaddr_in saddr;
 #ifndef _WIN32
@@ -2148,7 +2150,7 @@ static void tcp_chr_accept(void *opaque)
 	len = sizeof(saddr);
 	addr = (struct sockaddr *)&saddr;
 	}
-fd = qemu_accept(s->listen_fd, addr, &len);
+fd = qemu_accept(fds->fd, addr, &len);
 if (fd < 0 && errno != EINTR) {
 return;
 } else if (fd >= 0) {
@@ -2161,21 +2163,24 @@ static void tcp_chr_accept(void *opaque)
 if (s->do_nodelay)
 socket_set_nodelay(fd);
 s->fd = fd;
-qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL);
+for (fds = s->listen_fds; fds != NULL; fds = fds->next)
+qemu_set_fd_handler(fds->fd, NULL, NULL, NULL);
 tcp_chr_connect(chr);
 }
 
 static void tcp_chr_close(CharDriverState *chr)
 {
 TCPCharDriver *s = chr->opaque;
+FdList *fds;
 if (s->fd >= 0) {
 qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
 closesocket(s->fd);
 }
-if (s->listen_fd >= 0) {
-qemu_set_fd_handler(s->listen_fd, NULL, NULL, NULL);
-closesocket(s->listen_fd);
+for (fds = s->listen_fds; fds != NULL; fds = fds->next) {
+qemu_set_fd_handler(fds->fd, NULL, NULL, NULL);
+closesocket(fds->fd);
 }
+fdlist_free(fds);
 qemu_free(s);
 qemu_chr_event(chr, CHR_EVENT_CLOSED);
 }
@@ -2190,6 +2195,7 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts)
 int do_nodelay;
 int is_unix;
 int is_telnet;
+FdList *socks = NULL;
 
 is_listen  = qemu_opt_get_bool(opts, "server", 0);
 is_waitconnect = qemu_opt_get_bool(opts, "wait", 1);
@@ -2205,17 +2211,20 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts)
 if (is_unix) {
 if (is_listen) {
 fd = unix_listen_opts(opts);
+if (fd <= 0)
+socks = fdlist_new(fd);
 } else {
 fd = unix_connect_opts(opts);
 }
 } else {
 if (is_listen) {
-fd = inet_listen_opts(opts, 0);
+socks = inet_listen_opts(opts, 0);
 } else {
 fd = inet_connect_opts(opts);
 }
 }
-if (fd < 0)
+
+if (socks == NULL && fd < 0)
 goto fail;
 
 if (!is_waitconnect)
@@ -2223,7 +2232,7 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts)
 
 s->connected = 0;
 s->fd = -1;
-s->listen_fd = -1;
+s->listen_fds = NULL;
 s->msgfd = -1;
 s->is_unix = is_unix;
 s->do_nodelay = do_nodelay && !is_unix;
@@ -2234,8 +2243,11 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts)
 chr->get_msgfd = tcp_get_msgfd;
 
 if (is_listen) {
-s->listen_fd = fd;
-qemu_set_fd_handler(s->listen_fd, tcp_chr_accept, NULL, chr

  1   2   >