Thanks, looks good. Some minor comments below, On Thu, May 15, 2014 at 03:16:47PM +0800, Jason Wang wrote: > It's hard to track all mac addresses and their configurations (e.g > vlan or ipv6) in qemu. Without those informations, it's impossible to
s/those informations/this information/ > build proper garp packet after migration. The only possible solution > to this is let guest (who knew all configurations) to do this. s/knew/knows/ > > So, this patch introduces a new readonly config status bit of virtio-net, > VIRTIO_NET_S_ANNOUNCE which is used to notify guest to announce > presence of its link through config update interrupt.When guest has > done the announcement, it should ack the notification through > VIRTIO_NET_CTRL_ANNOUNCE_ACK cmd. This feature is negotiated by a new > feature bit VIRTIO_NET_F_ANNOUNCE (which has already been supported by > Linux guest). > > During load, a counter of announcing rounds were set so that the after s/were/is/ s/the after/after/ > the vm is running it can trigger rounds of config interrupts to notify > the guest to build and send the correct garps. > > Tested with ping to guest with vlan during migration. Without the > patch, lots of the packets were lost after migration. With the patch, > could not notice packet loss after migration. below changelog should go after ---, until the ack list. > > Reference: > RFC v2: https://lists.gnu.org/archive/html/qemu-devel/2014-04/msg01750.html > RFC v1: https://lists.gnu.org/archive/html/qemu-devel/2014-03/msg02648.html > V7: https://lists.gnu.org/archive/html/qemu-devel/2013-03/msg01127.html > > Changes from RFC v2: > - use QEMU_CLOCK_VIRTUAL instead of QEMU_CLOCK_REALTIME > - compat self announce for 2.0 machine type > > Changes from RFC v1: > - clean VIRTIO_NET_S_ANNOUNCE bit during reset > - free announce timer during clean > - make announce work for non-vhost case > > Changes from V7: > - Instead of introducing a global method for each kind of nic, this > version limits the changes to virtio-net itself. > > Cc: Liuyongan <liuyon...@huawei.com> > Cc: Amos Kong <ak...@redhat.com> > Signed-off-by: Jason Wang <jasow...@redhat.com> > --- > hw/net/virtio-net.c | 48 > ++++++++++++++++++++++++++++++++++++++++ > include/hw/i386/pc.h | 5 ++++ > include/hw/virtio/virtio-net.h | 16 +++++++++++++ > 3 files changed, 69 insertions(+), 0 deletions(-) > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > index 940a7cf..98d59e9 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -25,6 +25,7 @@ > #include "monitor/monitor.h" > > #define VIRTIO_NET_VM_VERSION 11 > +#define VIRTIO_NET_ANNOUNCE_ROUNDS 3 > > #define MAC_TABLE_ENTRIES 64 > #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ I would make it 5 to be consistent with SELF_ANNOUNCE_ROUNDS in savevm.c in fact, let's move SELF_ANNOUNCE_ROUNDS to include/migration/vmstate.h and reuse it. > @@ -99,6 +100,25 @@ static bool virtio_net_started(VirtIONet *n, uint8_t > status) > (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running; > } > > +static void virtio_net_announce_timer(void *opaque) > +{ > + VirtIONet *n = opaque; > + VirtIODevice *vdev = VIRTIO_DEVICE(n); > + > + if (n->announce && I would make it > 0 here, just in case it becomes negative as a result of some bug. > + virtio_net_started(n, vdev->status) && > + vdev->guest_features & (0x1 << VIRTIO_NET_F_GUEST_ANNOUNCE) && > + vdev->guest_features & (0x1 << VIRTIO_NET_F_CTRL_VQ)) { > + > + n->announce--; > + n->status |= VIRTIO_NET_S_ANNOUNCE; > + virtio_notify_config(vdev); > + } else { > + timer_del(n->announce_timer); why do this here? > + n->announce = 0; why is this here? > + } > +} > + > static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) > { > VirtIODevice *vdev = VIRTIO_DEVICE(n); > @@ -147,6 +167,8 @@ static void virtio_net_set_status(struct VirtIODevice > *vdev, uint8_t status) > > virtio_net_vhost_status(n, status); > > + virtio_net_announce_timer(n); > + why do this here? why not right after we set announce counter? > for (i = 0; i < n->max_queues; i++) { > q = &n->vqs[i]; > > @@ -322,6 +344,9 @@ static void virtio_net_reset(VirtIODevice *vdev) > n->nobcast = 0; > /* multiqueue is disabled by default */ > n->curr_queues = 1; > + timer_del(n->announce_timer); > + n->announce = 0; > + n->status &= ~VIRTIO_NET_S_ANNOUNCE; > > /* Flush any MAC and VLAN filter table state */ > n->mac_table.in_use = 0; > @@ -731,6 +756,22 @@ static int virtio_net_handle_vlan_table(VirtIONet *n, > uint8_t cmd, > return VIRTIO_NET_OK; > } > > +static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, > + struct iovec *iov, unsigned int > iov_cnt) > +{ > + if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK) { > + n->status &= ~VIRTIO_NET_S_ANNOUNCE; > + if (n->announce) { I would make it > 0 here, just in case it becomes negative as a result of some bug. > + timer_mod(n->announce_timer, > + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 50 + > + (VIRTIO_NET_ANNOUNCE_ROUNDS - n->announce - 1) * 100); savevm.c has this code, and a comment: /* delay 50ms, 150ms, 250ms, ... */ 50 + (SELF_ANNOUNCE_ROUNDS - count - 1) * 100 how about an API in include/migration/vmstate.h static inline int64_t self_announce_delay(int round) { assert(round < SELF_ANNOUNCE_ROUNDS && round > 0); /* delay 50ms, 150ms, 250ms, ... */ return 50 + (SELF_ANNOUNCE_ROUNDS - round - 1) * 100; } or something to this end. > + } > + return VIRTIO_NET_OK; > + } else { > + return VIRTIO_NET_ERR; > + } > +} > + > static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, > struct iovec *iov, unsigned int iov_cnt) > { > @@ -794,6 +835,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, > VirtQueue *vq) > status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); > } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { > status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); > + } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { > + status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); > } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { > status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); > } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { > @@ -1451,6 +1494,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, > int version_id) > qemu_get_subqueue(n->nic, i)->link_down = link_down; > } > > + n->announce = VIRTIO_NET_ANNOUNCE_ROUNDS; Well if virtio_net_handle_announce runs now it will start timer in the past, right? This seems wrong. > return 0; > } > > @@ -1562,6 +1606,8 @@ static void virtio_net_device_realize(DeviceState *dev, > Error **errp) > qemu_macaddr_default_if_unset(&n->nic_conf.macaddr); > memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac)); > n->status = VIRTIO_NET_S_LINK_UP; > + n->announce_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, > + virtio_net_announce_timer, n); > > if (n->netclient_type) { > /* > @@ -1642,6 +1688,8 @@ static void virtio_net_device_unrealize(DeviceState > *dev, Error **errp) > } > } > > + timer_del(n->announce_timer); > + timer_free(n->announce_timer); > g_free(n->vqs); > qemu_del_nic(n->nic); > virtio_cleanup(vdev); > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index 32a7687..f93b427 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -271,6 +271,11 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t > *); > .driver = "apic",\ > .property = "version",\ > .value = stringify(0x11),\ > + },\ > + {\ > + .driver = "virtio-net-pci",\ > + .property = "guest_announce",\ > + .value = "off",\ > } > > #define PC_COMPAT_1_7 \ > diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h > index 4b32440..ca1a9c5 100644 > --- a/include/hw/virtio/virtio-net.h > +++ b/include/hw/virtio/virtio-net.h > @@ -49,12 +49,14 @@ > #define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */ > #define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */ > #define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ > +#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce itself */ > #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow > * Steering */ > > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ > +#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ > > #define TX_TIMER_INTERVAL 150000 /* 150 us */ > > @@ -193,6 +195,8 @@ typedef struct VirtIONet { > char *netclient_name; > char *netclient_type; > uint64_t curr_guest_offloads; > + QEMUTimer *announce_timer; > + int announce; rename announce_counter pls. > } VirtIONet; > > #define VIRTIO_NET_CTRL_MAC 1 > @@ -213,6 +217,17 @@ typedef struct VirtIONet { > #define VIRTIO_NET_CTRL_VLAN_DEL 1 > > /* > + * Control link announce acknowledgement > + * bunch of typos and vagueness below > + * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that > + * driver has recevied the notification and device would clear the > + * VIRTIO_NET_S_ANNOUNCE bit in the status filed after it received > + * this command. fixed version: VIRTIO_NET_S_ANNOUNCE bit in the status field requests link announcement from guest driver. The driver is notified by config space change interrupt. The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that the driver has received the notification. It makes the device clear the bit VIRTIO_NET_S_ANNOUNCE in the status field. > + */ > +#define VIRTIO_NET_CTRL_ANNOUNCE 3 > + #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 > + > +/* > * Control Multiqueue > * > * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET > @@ -251,6 +266,7 @@ struct virtio_net_ctrl_mq { > DEFINE_PROP_BIT("guest_tso6", _state, _field, > VIRTIO_NET_F_GUEST_TSO6, true), \ > DEFINE_PROP_BIT("guest_ecn", _state, _field, VIRTIO_NET_F_GUEST_ECN, > true), \ > DEFINE_PROP_BIT("guest_ufo", _state, _field, VIRTIO_NET_F_GUEST_UFO, > true), \ > + DEFINE_PROP_BIT("guest_announce", _state, _field, > VIRTIO_NET_F_GUEST_ANNOUNCE, true), \ > DEFINE_PROP_BIT("host_tso4", _state, _field, VIRTIO_NET_F_HOST_TSO4, > true), \ > DEFINE_PROP_BIT("host_tso6", _state, _field, VIRTIO_NET_F_HOST_TSO6, > true), \ > DEFINE_PROP_BIT("host_ecn", _state, _field, VIRTIO_NET_F_HOST_ECN, > true), \ > -- > 1.7.1