On Tue, Feb 19, 2019 at 02:24:11PM +0800, Jason Wang wrote: > > On 2019/2/19 上午1:07, Wei Xu wrote: > >On Mon, Feb 18, 2019 at 03:27:21PM +0800, Jason Wang wrote: > >>On 2019/2/14 下午12:26, w...@redhat.com wrote: > >>>From: Wei Xu <w...@redhat.com> > >>> > >>>Add packed ring headcount check. > >>> > >>>Common part of split/packed ring are kept. > >>> > >>>Signed-off-by: Wei Xu <w...@redhat.com> > >>>--- > >>> hw/virtio/virtio.c | 197 > >>> ++++++++++++++++++++++++++++++++++++++++++++++++----- > >>> 1 file changed, 179 insertions(+), 18 deletions(-) > >>> > >>>diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c > >>>index f2ff980..832287b 100644 > >>>--- a/hw/virtio/virtio.c > >>>+++ b/hw/virtio/virtio.c > >>>@@ -368,6 +368,17 @@ int virtio_queue_ready(VirtQueue *vq) > >>> return vq->vring.avail != 0; > >>> } > >>>+static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc > >>>*desc, > >>>+ MemoryRegionCache *cache, int i) > >>>+{ > >>>+ address_space_read_cached(cache, i * sizeof(VRingPackedDesc), > >>>+ desc, sizeof(VRingPackedDesc)); > >>>+ virtio_tswap16s(vdev, &desc->flags); > >>>+ virtio_tswap64s(vdev, &desc->addr); > >>>+ virtio_tswap32s(vdev, &desc->len); > >>>+ virtio_tswap16s(vdev, &desc->id); > >>>+} > >>>+ > >>> static void vring_packed_desc_read_flags(VirtIODevice *vdev, > >>> VRingPackedDesc *desc, MemoryRegionCache *cache, int > >>> i) > >>> { > >>>@@ -667,9 +678,9 @@ static int virtqueue_read_next_desc(VirtIODevice > >>>*vdev, VRingDesc *desc, > >>> return VIRTQUEUE_READ_DESC_MORE; > >>> } > >>>-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, > >>>- unsigned int *out_bytes, > >>>- unsigned max_in_bytes, unsigned > >>>max_out_bytes) > >>>+static void virtqueue_split_get_avail_bytes(VirtQueue *vq, > >>>+ unsigned int *in_bytes, unsigned int > >>>*out_bytes, > >>>+ unsigned max_in_bytes, unsigned max_out_bytes) > >>> { > >>> VirtIODevice *vdev = vq->vdev; > >>> unsigned int max, idx; > >>>@@ -679,27 +690,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, > >>>unsigned int *in_bytes, > >>> int64_t len = 0; > >>> int rc; > >>>- if (unlikely(!vq->vring.desc)) { > >>>- if (in_bytes) { > >>>- *in_bytes = 0; > >>>- } > >>>- if (out_bytes) { > >>>- *out_bytes = 0; > >>>- } > >>>- return; > >>>- } > >>>- > >>> rcu_read_lock(); > >>> idx = vq->last_avail_idx; > >>> total_bufs = in_total = out_total = 0; > >>> max = vq->vring.num; > >>> caches = vring_get_region_caches(vq); > >>>- if (caches->desc.len < max * sizeof(VRingDesc)) { > >>>- virtio_error(vdev, "Cannot map descriptor ring"); > >>>- goto err; > >>>- } > >>>- > >>> while ((rc = virtqueue_num_heads(vq, idx)) > 0) { > >>> MemoryRegionCache *desc_cache = &caches->desc; > >>> unsigned int num_bufs; > >>>@@ -792,6 +788,171 @@ err: > >>> goto done; > >>> } > >>>+static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, > >>>+ unsigned int *in_bytes, unsigned int > >>>*out_bytes, > >>>+ unsigned max_in_bytes, unsigned max_out_bytes) > >>>+{ > >>>+ VirtIODevice *vdev = vq->vdev; > >>>+ unsigned int max, idx; > >>>+ unsigned int total_bufs, in_total, out_total; > >>>+ MemoryRegionCache *desc_cache; > >>>+ VRingMemoryRegionCaches *caches; > >>>+ MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; > >>>+ int64_t len = 0; > >>>+ VRingPackedDesc desc; > >>>+ bool wrap_counter; > >>>+ > >>>+ rcu_read_lock(); > >>>+ idx = vq->last_avail_idx; > >>>+ wrap_counter = vq->last_avail_wrap_counter; > >>>+ total_bufs = in_total = out_total = 0; > >>>+ > >>>+ max = vq->vring.num; > >>>+ caches = vring_get_region_caches(vq); > >>>+ desc_cache = &caches->desc; > >>>+ vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx); > >>>+ while (is_desc_avail(&desc, wrap_counter)) { > >>>+ unsigned int num_bufs; > >>>+ unsigned int i = 0; > >>>+ > >>>+ num_bufs = total_bufs; > >>>+ > >>>+ /* Make sure flags has been read before all the fields. */ > >>>+ smp_rmb(); > >>>+ vring_packed_desc_read(vdev, &desc, desc_cache, idx); > >> > >>It's better to have single function to deal with reading flags and > >>descriptors and check its availability like packed ring. > >There is something different between split and packed ring here. > >For split ring, 'avail_idx' and descriptor are separately used so the > >interfaces of them are straightforward, while the flag and data fields > >of the descriptors for packed ring are mixed and independent accesses to > >them have been brought in, it is good to use them as what they are supposed > >to work. :) > > > >Another neat way is to pack the two operations to a new one, but it would > >introduce memory cache parameter passing. > > > >So personally I prefer to keep it unchanged, still want to sort it out? > > > It's as simple as another helper that call read_flags() and desc_read()?
OK, will try one. > > Btw, it's better to have a consistent naming for the function like > vring_packed_flags_read(). OK, thanks. Wei > > Thanks > > > > > >> > >>>+ > >>>+ if (desc.flags & VRING_DESC_F_INDIRECT) { > >>>+ if (desc.len % sizeof(VRingPackedDesc)) { > >>>+ virtio_error(vdev, "Invalid size for indirect buffer > >>>table"); > >>>+ goto err; > >>>+ } > >>>+ > >>>+ /* If we've got too many, that implies a descriptor loop. */ > >>>+ if (num_bufs >= max) { > >>>+ virtio_error(vdev, "Looped descriptor"); > >>>+ goto err; > >>>+ } > >>>+ > >>>+ /* loop over the indirect descriptor table */ > >>>+ len = address_space_cache_init(&indirect_desc_cache, > >>>+ vdev->dma_as, > >>>+ desc.addr, desc.len, false); > >>>+ desc_cache = &indirect_desc_cache; > >>>+ if (len < desc.len) { > >>>+ virtio_error(vdev, "Cannot map indirect buffer"); > >>>+ goto err; > >>>+ } > >>>+ > >>>+ max = desc.len / sizeof(VRingPackedDesc); > >>>+ num_bufs = i = 0; > >>>+ vring_packed_desc_read(vdev, &desc, desc_cache, i); > >>>+ } > >>>+ > >>>+ do { > >>>+ /* If we've got too many, that implies a descriptor loop. */ > >>>+ if (++num_bufs > max) { > >>>+ virtio_error(vdev, "Looped descriptor"); > >>>+ goto err; > >>>+ } > >>>+ > >>>+ if (desc.flags & VRING_DESC_F_WRITE) { > >>>+ in_total += desc.len; > >>>+ } else { > >>>+ out_total += desc.len; > >>>+ } > >>>+ if (in_total >= max_in_bytes && out_total >= max_out_bytes) { > >>>+ goto done; > >>>+ } > >>>+ > >>>+ if (desc_cache == &indirect_desc_cache) { > >>>+ if (++i >= max) { > >>>+ break; > >>>+ } > >>>+ vring_packed_desc_read(vdev, &desc, desc_cache, i); > >>>+ } else { > >>>+ if (++idx >= vq->vring.num) { > >>>+ idx -= vq->vring.num; > >>>+ wrap_counter ^= 1; > >>>+ } > >>>+ vring_packed_desc_read(vdev, &desc, desc_cache, idx); > >>>+ } > >>>+ /* Make sure flags has been read after all the other fields */ > >>>+ smp_rmb(); > >> > >>I don't think we need this according to the spec: > >> > >>" > >> > >>The driver always makes the first descriptor in the list > >>available after the rest of the list has been written out into > >>the ring. This guarantees that the device will never observe a > >>partial scatter/gather list in the ring. > >> > >>" > >> > >>So when the first is available, the rest of the chain should be available, > >>otherwise device may see partial chain. > >As always, I will remove it, thanks a lot. > > > >Wei > > > >>Thanks > >> > >> > >>>+ } while (desc.flags & VRING_DESC_F_NEXT); > >>>+ > >>>+ if (desc_cache == &indirect_desc_cache) { > >>>+ address_space_cache_destroy(&indirect_desc_cache); > >>>+ total_bufs++; > >>>+ /* We missed one step on for indirect desc */ > >>>+ idx++; > >>>+ if (++idx >= vq->vring.num) { > >>>+ idx -= vq->vring.num; > >>>+ wrap_counter ^= 1; > >>>+ } > >>>+ } else { > >>>+ total_bufs = num_bufs; > >>>+ } > >>>+ > >>>+ desc_cache = &caches->desc; > >>>+ vring_packed_desc_read_flags(vdev, &desc, desc_cache, idx); > >>>+ } > >>>+ > >>>+ /* Record the index and wrap counter for a kick we want */ > >>>+ vq->shadow_avail_idx = idx; > >>>+ vq->avail_wrap_counter = wrap_counter; > >>>+done: > >>>+ address_space_cache_destroy(&indirect_desc_cache); > >>>+ if (in_bytes) { > >>>+ *in_bytes = in_total; > >>>+ } > >>>+ if (out_bytes) { > >>>+ *out_bytes = out_total; > >>>+ } > >>>+ rcu_read_unlock(); > >>>+ return; > >>>+ > >>>+err: > >>>+ in_total = out_total = 0; > >>>+ goto done; > >>>+} > >>>+ > >>>+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, > >>>+ unsigned int *out_bytes, > >>>+ unsigned max_in_bytes, unsigned > >>>max_out_bytes) > >>>+{ > >>>+ uint16_t desc_size; > >>>+ VRingMemoryRegionCaches *caches; > >>>+ > >>>+ if (unlikely(!vq->vring.desc)) { > >>>+ goto err; > >>>+ } > >>>+ > >>>+ caches = vring_get_region_caches(vq); > >>>+ desc_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? > >>>+ sizeof(VRingPackedDesc) : > >>>sizeof(VRingDesc); > >>>+ if (caches->desc.len < vq->vring.num * desc_size) { > >>>+ virtio_error(vq->vdev, "Cannot map descriptor ring"); > >>>+ goto err; > >>>+ } > >>>+ > >>>+ if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { > >>>+ virtqueue_packed_get_avail_bytes(vq, in_bytes, out_bytes, > >>>+ max_in_bytes, max_out_bytes); > >>>+ } else { > >>>+ virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes, > >>>+ max_in_bytes, max_out_bytes); > >>>+ } > >>>+ > >>>+ return; > >>>+err: > >>>+ if (in_bytes) { > >>>+ *in_bytes = 0; > >>>+ } > >>>+ if (out_bytes) { > >>>+ *out_bytes = 0; > >>>+ } > >>>+} > >>>+ > >>> int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, > >>> unsigned int out_bytes) > >>> {