While working back and forth with dlg@ on his idea of adding in async task queues to vmd devices, he started to introduce a cleaner way to handle virtqueues. In short, we can just track the host virtual address and read/write to that location.
This has the benefit of cleaning up a good chunk of code, mostly in the vioscsi device. If my count is correct, it's close to 100 lines removed. Testing on a variety of systems, I see anywhere from a small (5%) to a large (50%) gain in things like vionet performance depending on the host hardware and the guest OS. (Linux guests push packets faster, on average.) If/when this lands, it'll be easier to introduce the async taskq diff and also clean up more of the virtio codebase. Lots of duplicate logic in basic virtio-pci config handling, like updating queue address or size. Testing is appreciated, but also looking for OKs. diff refs/heads/master refs/heads/vmd-vq commit - 9f38b5b2272f7ca113158ec7943c959ae9b22683 commit + 509bbee70e5a11cfb69b93202a24fb032c72ac43 blob - aea2fc1d5e8903c97cb7076bfb9e5f265f416378 blob + 3a9af7790b24914fb6e7716a049fd0428803af9b --- usr.sbin/vmd/vioscsi.c +++ usr.sbin/vmd/vioscsi.c @@ -81,6 +81,7 @@ vioscsi_next_ring_item(struct vioscsi_dev *dev, struct { used->ring[used->idx & VIOSCSI_QUEUE_MASK].id = idx; used->ring[used->idx & VIOSCSI_QUEUE_MASK].len = desc->len; + __sync_synchronize(); used->idx++; dev->vq[dev->cfg.queue_notify].last_avail = @@ -157,7 +158,7 @@ vioscsi_reg_name(uint8_t reg) switch (reg) { case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature"; case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature"; - case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address"; + case VIRTIO_CONFIG_QUEUE_PFN: return "queue pfn"; case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size"; case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select"; case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify"; @@ -1672,8 +1673,8 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint DPRINTF("%s: guest feature set to %u", __func__, dev->cfg.guest_feature); break; - case VIRTIO_CONFIG_QUEUE_ADDRESS: - dev->cfg.queue_address = *data; + case VIRTIO_CONFIG_QUEUE_PFN: + dev->cfg.queue_pfn = *data; vioscsi_update_qa(dev); break; case VIRTIO_CONFIG_QUEUE_SELECT: @@ -1692,7 +1693,7 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint if (dev->cfg.device_status == 0) { log_debug("%s: device reset", __func__); dev->cfg.guest_feature = 0; - dev->cfg.queue_address = 0; + dev->cfg.queue_pfn = 0; vioscsi_update_qa(dev); dev->cfg.queue_size = 0; vioscsi_update_qs(dev); @@ -1988,8 +1989,8 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint case VIRTIO_CONFIG_GUEST_FEATURES: *data = dev->cfg.guest_feature; break; - case VIRTIO_CONFIG_QUEUE_ADDRESS: - *data = dev->cfg.queue_address; + case VIRTIO_CONFIG_QUEUE_PFN: + *data = dev->cfg.queue_pfn; break; case VIRTIO_CONFIG_QUEUE_SIZE: if (sz == 4) @@ -2033,25 +2034,38 @@ vioscsi_update_qs(struct vioscsi_dev *dev) void vioscsi_update_qs(struct vioscsi_dev *dev) { + struct virtio_vq_info *vq_info; + /* Invalid queue? */ if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES) { dev->cfg.queue_size = 0; return; } - /* Update queue address/size based on queue select */ - dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; - dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; + vq_info = &dev->vq[dev->cfg.queue_select]; + + /* Update queue pfn/size based on queue select */ + dev->cfg.queue_pfn = vq_info->q_gpa >> 12; + dev->cfg.queue_size = vq_info->qs; } void vioscsi_update_qa(struct vioscsi_dev *dev) { + struct virtio_vq_info *vq_info; + void *hva = NULL; + /* Invalid queue? */ if (dev->cfg.queue_select >= VIRTIO_MAX_QUEUES) return; - dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; + vq_info = &dev->vq[dev->cfg.queue_select]; + vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; + + hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOSCSI_QUEUE_SIZE)); + if (hva == NULL) + fatal("vioscsi_update_qa"); + vq_info->q_hva = hva; } /* @@ -2067,13 +2081,12 @@ vioscsi_notifyq(struct vioscsi_dev *dev) int vioscsi_notifyq(struct vioscsi_dev *dev) { - uint64_t q_gpa; - uint32_t vr_sz; - int cnt, ret; + int cnt, ret = 0; char *vr; struct virtio_scsi_req_hdr req; struct virtio_scsi_res_hdr resp; struct virtio_vq_acct acct; + struct virtio_vq_info *vq_info; ret = 0; @@ -2081,34 +2094,21 @@ vioscsi_notifyq(struct vioscsi_dev *dev) if (dev->cfg.queue_notify >= VIRTIO_MAX_QUEUES) return (ret); - vr_sz = vring_size(VIOSCSI_QUEUE_SIZE); - q_gpa = dev->vq[dev->cfg.queue_notify].qa; - q_gpa = q_gpa * VIRTIO_PAGE_SIZE; + vq_info = &dev->vq[dev->cfg.queue_notify]; + vr = vq_info->q_hva; + if (vr == NULL) + fatalx("%s: null vring", __func__); - vr = calloc(1, vr_sz); - if (vr == NULL) { - log_warn("%s: calloc error getting vioscsi ring", __func__); - return (ret); - } - - if (read_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error reading gpa 0x%llx", __func__, q_gpa); - goto out; - } - /* Compute offsets in ring of descriptors, avail ring, and used ring */ acct.desc = (struct vring_desc *)(vr); - acct.avail = (struct vring_avail *)(vr + - dev->vq[dev->cfg.queue_notify].vq_availoffset); - acct.used = (struct vring_used *)(vr + - dev->vq[dev->cfg.queue_notify].vq_usedoffset); + acct.avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); + acct.used = (struct vring_used *)(vr + vq_info->vq_usedoffset); - acct.idx = - dev->vq[dev->cfg.queue_notify].last_avail & VIOSCSI_QUEUE_MASK; + acct.idx = vq_info->last_avail & VIOSCSI_QUEUE_MASK; if ((acct.avail->idx & VIOSCSI_QUEUE_MASK) == acct.idx) { - log_warnx("%s:nothing to do?", __func__); - goto out; + log_debug("%s - nothing to do?", __func__); + return (0); } cnt = 0; @@ -2180,14 +2180,10 @@ vioscsi_notifyq(struct vioscsi_dev *dev) ret = 1; dev->cfg.isr_status = 1; - /* Move ring indexes */ + + /* Move ring indexes (updates the used ring index) */ vioscsi_next_ring_item(dev, acct.avail, acct.used, acct.req_desc, acct.req_idx); - - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } goto next_msg; } @@ -2202,138 +2198,48 @@ vioscsi_notifyq(struct vioscsi_dev *dev) case TEST_UNIT_READY: case START_STOP: ret = vioscsi_handle_tur(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case PREVENT_ALLOW: ret = vioscsi_handle_prevent_allow(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case READ_TOC: ret = vioscsi_handle_read_toc(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case READ_CAPACITY: ret = vioscsi_handle_read_capacity(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case READ_CAPACITY_16: ret = vioscsi_handle_read_capacity_16(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case READ_COMMAND: ret = vioscsi_handle_read_6(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case READ_10: ret = vioscsi_handle_read_10(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case INQUIRY: ret = vioscsi_handle_inquiry(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case MODE_SENSE: ret = vioscsi_handle_mode_sense(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case MODE_SENSE_BIG: ret = vioscsi_handle_mode_sense_big(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case GET_EVENT_STATUS_NOTIFICATION: ret = vioscsi_handle_gesn(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case READ_DISC_INFORMATION: ret = vioscsi_handle_read_disc_info(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case GET_CONFIGURATION: ret = vioscsi_handle_get_config(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case MECHANISM_STATUS: ret = vioscsi_handle_mechanism_status(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; case REPORT_LUNS: ret = vioscsi_handle_report_luns(dev, &req, &acct); - if (ret) { - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("%s: error writing vioring", - __func__); - } - } break; default: log_warnx("%s: unsupported opcode 0x%02x,%s", @@ -2348,6 +2254,5 @@ out: acct.idx = (acct.idx + 1) & VIOSCSI_QUEUE_MASK; } out: - free(vr); return (ret); } blob - c1739b9bbbf35804c4e85fa470497fea3a2fff36 blob + 5d91da231b855b670970ed040657497086461636 --- usr.sbin/vmd/virtio.c +++ usr.sbin/vmd/virtio.c @@ -89,7 +89,7 @@ virtio_reg_name(uint8_t reg) switch (reg) { case VIRTIO_CONFIG_DEVICE_FEATURES: return "device feature"; case VIRTIO_CONFIG_GUEST_FEATURES: return "guest feature"; - case VIRTIO_CONFIG_QUEUE_ADDRESS: return "queue address"; + case VIRTIO_CONFIG_QUEUE_PFN: return "queue address"; case VIRTIO_CONFIG_QUEUE_SIZE: return "queue size"; case VIRTIO_CONFIG_QUEUE_SELECT: return "queue select"; case VIRTIO_CONFIG_QUEUE_NOTIFY: return "queue notify"; @@ -123,40 +123,52 @@ viornd_update_qs(void) void viornd_update_qs(void) { + struct virtio_vq_info *vq_info; + /* Invalid queue? */ if (viornd.cfg.queue_select > 0) { viornd.cfg.queue_size = 0; return; } - /* Update queue address/size based on queue select */ - viornd.cfg.queue_address = viornd.vq[viornd.cfg.queue_select].qa; - viornd.cfg.queue_size = viornd.vq[viornd.cfg.queue_select].qs; + vq_info = &viornd.vq[viornd.cfg.queue_select]; + + /* Update queue pfn/size based on queue select */ + viornd.cfg.queue_pfn = vq_info->q_gpa >> 12; + viornd.cfg.queue_size = vq_info->qs; } /* Update queue address */ void viornd_update_qa(void) { + struct virtio_vq_info *vq_info; + void *hva = NULL; + /* Invalid queue? */ if (viornd.cfg.queue_select > 0) return; - viornd.vq[viornd.cfg.queue_select].qa = viornd.cfg.queue_address; + vq_info = &viornd.vq[viornd.cfg.queue_select]; + vq_info->q_gpa = (uint64_t)viornd.cfg.queue_pfn * VIRTIO_PAGE_SIZE; + + hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIORND_QUEUE_SIZE)); + if (hva == NULL) + fatal("viornd_update_qa"); + vq_info->q_hva = hva; } int viornd_notifyq(void) { - uint64_t q_gpa; - uint32_t vr_sz; size_t sz; int dxx, ret; uint16_t aidx, uidx; - char *buf, *rnd_data; + char *vr, *rnd_data; struct vring_desc *desc; struct vring_avail *avail; struct vring_used *used; + struct virtio_vq_info *vq_info; ret = 0; @@ -164,27 +176,15 @@ viornd_notifyq(void) if (viornd.cfg.queue_notify > 0) return (0); - vr_sz = vring_size(VIORND_QUEUE_SIZE); - q_gpa = viornd.vq[viornd.cfg.queue_notify].qa; - q_gpa = q_gpa * VIRTIO_PAGE_SIZE; + vq_info = &viornd.vq[viornd.cfg.queue_notify]; + vr = vq_info->q_hva; + if (vr == NULL) + fatalx("%s: null vring", __func__); - buf = calloc(1, vr_sz); - if (buf == NULL) { - log_warn("calloc error getting viornd ring"); - return (0); - } + desc = (struct vring_desc *)(vr); + avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); + used = (struct vring_used *)(vr + vq_info->vq_usedoffset); - if (read_mem(q_gpa, buf, vr_sz)) { - free(buf); - return (0); - } - - desc = (struct vring_desc *)(buf); - avail = (struct vring_avail *)(buf + - viornd.vq[viornd.cfg.queue_notify].vq_availoffset); - used = (struct vring_used *)(buf + - viornd.vq[viornd.cfg.queue_notify].vq_usedoffset); - aidx = avail->idx & VIORND_QUEUE_MASK; uidx = used->idx & VIORND_QUEUE_MASK; @@ -209,18 +209,13 @@ viornd_notifyq(void) viornd.cfg.isr_status = 1; used->ring[uidx].id = dxx; used->ring[uidx].len = sz; + __sync_synchronize(); used->idx++; - - if (write_mem(q_gpa, buf, vr_sz)) { - log_warnx("viornd: error writing vio ring"); - } } free(rnd_data); } else fatal("memory allocation error for viornd data"); - free(buf); - return (ret); } @@ -241,8 +236,8 @@ virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, u case VIRTIO_CONFIG_GUEST_FEATURES: viornd.cfg.guest_feature = *data; break; - case VIRTIO_CONFIG_QUEUE_ADDRESS: - viornd.cfg.queue_address = *data; + case VIRTIO_CONFIG_QUEUE_PFN: + viornd.cfg.queue_pfn = *data; viornd_update_qa(); break; case VIRTIO_CONFIG_QUEUE_SELECT: @@ -266,8 +261,8 @@ virtio_rnd_io(int dir, uint16_t reg, uint32_t *data, u case VIRTIO_CONFIG_GUEST_FEATURES: *data = viornd.cfg.guest_feature; break; - case VIRTIO_CONFIG_QUEUE_ADDRESS: - *data = viornd.cfg.queue_address; + case VIRTIO_CONFIG_QUEUE_PFN: + *data = viornd.cfg.queue_pfn; break; case VIRTIO_CONFIG_QUEUE_SIZE: *data = viornd.cfg.queue_size; @@ -294,25 +289,38 @@ vioblk_update_qa(struct vioblk_dev *dev) void vioblk_update_qa(struct vioblk_dev *dev) { + struct virtio_vq_info *vq_info; + void *hva = NULL; + /* Invalid queue? */ if (dev->cfg.queue_select > 0) return; - dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; + vq_info = &dev->vq[dev->cfg.queue_select]; + vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; + + hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIOBLK_QUEUE_SIZE)); + if (hva == NULL) + fatal("vioblk_update_qa"); + vq_info->q_hva = hva; } void vioblk_update_qs(struct vioblk_dev *dev) { + struct virtio_vq_info *vq_info; + /* Invalid queue? */ if (dev->cfg.queue_select > 0) { dev->cfg.queue_size = 0; return; } - /* Update queue address/size based on queue select */ - dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; - dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; + vq_info = &dev->vq[dev->cfg.queue_select]; + + /* Update queue pfn/size based on queue select */ + dev->cfg.queue_pfn = vq_info->q_gpa >> 12; + dev->cfg.queue_size = vq_info->qs; } static void @@ -424,55 +432,41 @@ vioblk_notifyq(struct vioblk_dev *dev) int vioblk_notifyq(struct vioblk_dev *dev) { - uint64_t q_gpa; - uint32_t vr_sz; uint16_t idx, cmd_desc_idx, secdata_desc_idx, ds_desc_idx; uint8_t ds; - int cnt, ret; + int cnt; off_t secbias; char *vr; struct vring_desc *desc, *cmd_desc, *secdata_desc, *ds_desc; struct vring_avail *avail; struct vring_used *used; struct virtio_blk_req_hdr cmd; + struct virtio_vq_info *vq_info; - ret = 0; - /* Invalid queue? */ if (dev->cfg.queue_notify > 0) return (0); - vr_sz = vring_size(VIOBLK_QUEUE_SIZE); - q_gpa = dev->vq[dev->cfg.queue_notify].qa; - q_gpa = q_gpa * VIRTIO_PAGE_SIZE; + vq_info = &dev->vq[dev->cfg.queue_notify]; + vr = vq_info->q_hva; + if (vr == NULL) + fatalx("%s: null vring", __func__); - vr = calloc(1, vr_sz); - if (vr == NULL) { - log_warn("calloc error getting vioblk ring"); - return (0); - } - - if (read_mem(q_gpa, vr, vr_sz)) { - log_warnx("error reading gpa 0x%llx", q_gpa); - goto out; - } - /* Compute offsets in ring of descriptors, avail ring, and used ring */ desc = (struct vring_desc *)(vr); - avail = (struct vring_avail *)(vr + - dev->vq[dev->cfg.queue_notify].vq_availoffset); - used = (struct vring_used *)(vr + - dev->vq[dev->cfg.queue_notify].vq_usedoffset); + avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); + used = (struct vring_used *)(vr + vq_info->vq_usedoffset); - idx = dev->vq[dev->cfg.queue_notify].last_avail & VIOBLK_QUEUE_MASK; + idx = vq_info->last_avail & VIOBLK_QUEUE_MASK; if ((avail->idx & VIOBLK_QUEUE_MASK) == idx) { - log_warnx("vioblk queue notify - nothing to do?"); - goto out; + log_debug("%s - nothing to do?", __func__); + return (0); } while (idx != (avail->idx & VIOBLK_QUEUE_MASK)) { + ds = VIRTIO_BLK_S_IOERR; cmd_desc_idx = avail->ring[idx] & VIOBLK_QUEUE_MASK; cmd_desc = &desc[cmd_desc_idx]; @@ -691,22 +685,17 @@ vioblk_notifyq(struct vioblk_dev *dev) goto out; } - ret = 1; dev->cfg.isr_status = 1; used->ring[used->idx & VIOBLK_QUEUE_MASK].id = cmd_desc_idx; used->ring[used->idx & VIOBLK_QUEUE_MASK].len = cmd_desc->len; + __sync_synchronize(); used->idx++; - dev->vq[dev->cfg.queue_notify].last_avail = avail->idx & - VIOBLK_QUEUE_MASK; - if (write_mem(q_gpa, vr, vr_sz)) - log_warnx("%s: error writing vio ring", __func__); - + vq_info->last_avail = avail->idx & VIOBLK_QUEUE_MASK; idx = (idx + 1) & VIOBLK_QUEUE_MASK; } out: - free(vr); - return (ret); + return (1); } int @@ -729,8 +718,8 @@ virtio_blk_io(int dir, uint16_t reg, uint32_t *data, u case VIRTIO_CONFIG_GUEST_FEATURES: dev->cfg.guest_feature = *data; break; - case VIRTIO_CONFIG_QUEUE_ADDRESS: - dev->cfg.queue_address = *data; + case VIRTIO_CONFIG_QUEUE_PFN: + dev->cfg.queue_pfn = *data; vioblk_update_qa(dev); break; case VIRTIO_CONFIG_QUEUE_SELECT: @@ -747,7 +736,7 @@ virtio_blk_io(int dir, uint16_t reg, uint32_t *data, u if (dev->cfg.device_status == 0) { log_debug("%s: device reset", __func__); dev->cfg.guest_feature = 0; - dev->cfg.queue_address = 0; + dev->cfg.queue_pfn = 0; vioblk_update_qa(dev); dev->cfg.queue_size = 0; vioblk_update_qs(dev); @@ -890,8 +879,8 @@ virtio_blk_io(int dir, uint16_t reg, uint32_t *data, u case VIRTIO_CONFIG_GUEST_FEATURES: *data = dev->cfg.guest_feature; break; - case VIRTIO_CONFIG_QUEUE_ADDRESS: - *data = dev->cfg.queue_address; + case VIRTIO_CONFIG_QUEUE_PFN: + *data = dev->cfg.queue_pfn; break; case VIRTIO_CONFIG_QUEUE_SIZE: if (sz == 4) @@ -951,8 +940,8 @@ virtio_net_io(int dir, uint16_t reg, uint32_t *data, u case VIRTIO_CONFIG_GUEST_FEATURES: dev->cfg.guest_feature = *data; break; - case VIRTIO_CONFIG_QUEUE_ADDRESS: - dev->cfg.queue_address = *data; + case VIRTIO_CONFIG_QUEUE_PFN: + dev->cfg.queue_pfn = *data; vionet_update_qa(dev); break; case VIRTIO_CONFIG_QUEUE_SELECT: @@ -969,7 +958,7 @@ virtio_net_io(int dir, uint16_t reg, uint32_t *data, u if (dev->cfg.device_status == 0) { log_debug("%s: device reset", __func__); dev->cfg.guest_feature = 0; - dev->cfg.queue_address = 0; + dev->cfg.queue_pfn = 0; vionet_update_qa(dev); dev->cfg.queue_size = 0; vionet_update_qs(dev); @@ -1003,8 +992,8 @@ virtio_net_io(int dir, uint16_t reg, uint32_t *data, u case VIRTIO_CONFIG_GUEST_FEATURES: *data = dev->cfg.guest_feature; break; - case VIRTIO_CONFIG_QUEUE_ADDRESS: - *data = dev->cfg.queue_address; + case VIRTIO_CONFIG_QUEUE_PFN: + *data = dev->cfg.queue_pfn; break; case VIRTIO_CONFIG_QUEUE_SIZE: *data = dev->cfg.queue_size; @@ -1036,11 +1025,20 @@ vionet_update_qa(struct vionet_dev *dev) void vionet_update_qa(struct vionet_dev *dev) { + struct virtio_vq_info *vq_info; + void *hva = NULL; + /* Invalid queue? */ if (dev->cfg.queue_select > 1) return; - dev->vq[dev->cfg.queue_select].qa = dev->cfg.queue_address; + vq_info = &dev->vq[dev->cfg.queue_select]; + vq_info->q_gpa = (uint64_t)dev->cfg.queue_pfn * VIRTIO_PAGE_SIZE; + + hva = hvaddr_mem(vq_info->q_gpa, vring_size(VIONET_QUEUE_SIZE)); + if (hva == NULL) + fatal("vionet_update_qa"); + vq_info->q_hva = hva; } /* @@ -1049,15 +1047,19 @@ vionet_update_qs(struct vionet_dev *dev) void vionet_update_qs(struct vionet_dev *dev) { + struct virtio_vq_info *vq_info; + /* Invalid queue? */ if (dev->cfg.queue_select > 1) { dev->cfg.queue_size = 0; return; } - /* Update queue address/size based on queue select */ - dev->cfg.queue_address = dev->vq[dev->cfg.queue_select].qa; - dev->cfg.queue_size = dev->vq[dev->cfg.queue_select].qs; + vq_info = &dev->vq[dev->cfg.queue_select]; + + /* Update queue pfn/size based on queue select */ + dev->cfg.queue_pfn = vq_info->q_gpa >> 12; + dev->cfg.queue_size = vq_info->qs; } /* @@ -1073,17 +1075,14 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_ int vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_t sz, int *spc) { - uint64_t q_gpa; - uint32_t vr_sz; uint16_t dxx, idx, hdr_desc_idx, chain_hdr_idx; - int ret = 0; char *vr = NULL; size_t bufsz = 0, off = 0, pkt_offset = 0, chunk_size = 0; size_t chain_len = 0; struct vring_desc *desc, *pkt_desc, *hdr_desc; struct vring_avail *avail; struct vring_used *used; - struct vring_used_elem *ue; + struct virtio_vq_info *vq_info; struct virtio_net_hdr hdr; size_t hdr_sz; @@ -1095,33 +1094,23 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_ hdr_sz = sizeof(hdr); if (!(dev->cfg.device_status & VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK)) - return ret; - - vr_sz = vring_size(VIONET_QUEUE_SIZE); - q_gpa = dev->vq[RXQ].qa; - q_gpa = q_gpa * VIRTIO_PAGE_SIZE; - - vr = calloc(1, vr_sz); - if (vr == NULL) { - log_warn("rx enq: calloc error getting vionet ring"); return (0); - } - if (read_mem(q_gpa, vr, vr_sz)) { - log_warnx("rx enq: error reading gpa 0x%llx", q_gpa); - goto out; - } + vq_info = &dev->vq[RXQ]; + vr = vq_info->q_hva; + if (vr == NULL) + fatalx("%s: null vring", __func__); /* Compute offsets in ring of descriptors, avail ring, and used ring */ desc = (struct vring_desc *)(vr); - avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset); - used = (struct vring_used *)(vr + dev->vq[RXQ].vq_usedoffset); + avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); + used = (struct vring_used *)(vr + vq_info->vq_usedoffset); - idx = dev->vq[RXQ].last_avail & VIONET_QUEUE_MASK; - if ((dev->vq[RXQ].notified_avail & VIONET_QUEUE_MASK) == idx) { + idx = vq_info->last_avail & VIONET_QUEUE_MASK; + if ((vq_info->notified_avail & VIONET_QUEUE_MASK) == idx) { log_debug("%s: insufficient available buffer capacity, " "dropping packet.", __func__); - goto out; + return (0); } hdr_desc_idx = avail->ring[idx] & VIONET_QUEUE_MASK; @@ -1138,7 +1127,7 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_ if (!(pkt_desc->flags & VRING_DESC_F_WRITE)) { log_warnx("%s: invalid descriptor, not writable", __func__); - goto out; + return (0); } /* How much data do we get to write? */ @@ -1158,7 +1147,7 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_ pkt + pkt_offset, chunk_size)) { log_warnx("%s: failed to write to buffer 0x%llx", __func__, pkt_desc->addr); - goto out; + return (0); } chain_len += chunk_size + off; @@ -1168,19 +1157,8 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_ dxx = pkt_desc->next & VIONET_QUEUE_MASK; } while (bufsz < sz && pkt_desc->flags & VRING_DESC_F_NEXT); - /* Update the list of used buffers. */ - ue = &used->ring[(used->idx) & VIONET_QUEUE_MASK]; - ue->id = chain_hdr_idx; - ue->len = chain_len; - off = ((char *)ue - vr); - if (write_mem(q_gpa + off, ue, sizeof(*ue))) { - log_warnx("%s: error updating rx used ring", __func__); - goto out; - } - /* Move our marker in the ring...*/ - used->idx++; - dev->vq[RXQ].last_avail = (dev->vq[RXQ].last_avail + 1) & + vq_info->last_avail = (vq_info->last_avail + 1) & VIONET_QUEUE_MASK; /* Prepend the virtio net header in the first buffer. */ @@ -1189,23 +1167,21 @@ vionet_enq_rx(struct vionet_dev *dev, char *pkt, size_ if (write_mem(hdr_desc->addr, &hdr, hdr_sz)) { log_warnx("vionet: rx enq header write_mem error @ 0x%llx", hdr_desc->addr); - goto out; + return (0); } /* Update the index field in the used ring. This must be done last. */ dev->cfg.isr_status = 1; - off = (char *)&used->idx - vr; - *spc = (dev->vq[RXQ].notified_avail - dev->vq[RXQ].last_avail) & - VIONET_QUEUE_MASK; + *spc = (vq_info->notified_avail - vq_info->last_avail) + & VIONET_QUEUE_MASK; - if (write_mem(q_gpa + off, &used->idx, sizeof(used->idx))) - log_warnx("vionet: error writing vio ring"); + /* Update the list of used buffers. */ + used->ring[used->idx & VIONET_QUEUE_MASK].id = chain_hdr_idx; + used->ring[used->idx & VIONET_QUEUE_MASK].len = chain_len; + __sync_synchronize(); + used->idx++; - ret = 1; - -out: - free(vr); - return (ret); + return (1); } /* @@ -1277,33 +1253,18 @@ vionet_notify_rx(struct vionet_dev *dev) void vionet_notify_rx(struct vionet_dev *dev) { - uint64_t q_gpa; - uint32_t vr_sz; char *vr; struct vring_avail *avail; + struct virtio_vq_info *vq_info; - vr_sz = vring_size(VIONET_QUEUE_SIZE); - q_gpa = dev->vq[RXQ].qa; - q_gpa = q_gpa * VIRTIO_PAGE_SIZE; + vq_info = &dev->vq[RXQ]; + vr = vq_info->q_hva; + if (vr == NULL) + fatalx("%s: null vring", __func__); - vr = malloc(vr_sz); - if (vr == NULL) { - log_warn("malloc error getting vionet ring"); - return; - } - - if (read_mem(q_gpa, vr, vr_sz)) { - log_warnx("error reading gpa 0x%llx", q_gpa); - free(vr); - return; - } - /* Compute offset into avail ring */ - avail = (struct vring_avail *)(vr + dev->vq[RXQ].vq_availoffset); - - dev->vq[RXQ].notified_avail = avail->idx - 1; - - free(vr); + avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); + vq_info->notified_avail = avail->idx - 1; } /* @@ -1312,12 +1273,11 @@ vionet_notifyq(struct vionet_dev *dev) int vionet_notifyq(struct vionet_dev *dev) { - int ret; + int ret = 0; switch (dev->cfg.queue_notify) { case RXQ: vionet_notify_rx(dev); - ret = 0; break; case TXQ: ret = vionet_notify_tx(dev); @@ -1329,7 +1289,6 @@ vionet_notifyq(struct vionet_dev *dev) */ log_debug("%s: notify for unimplemented queue ID %d", __func__, dev->cfg.queue_notify); - ret = 0; break; } @@ -1342,49 +1301,34 @@ vionet_notify_tx(struct vionet_dev *dev) int vionet_notify_tx(struct vionet_dev *dev) { - uint64_t q_gpa; - uint32_t vr_sz; uint16_t idx, pkt_desc_idx, hdr_desc_idx, dxx, cnt; size_t pktsz, chunk_size = 0; - ssize_t dhcpsz; - int ret, num_enq, ofs, spc; - char *vr, *pkt, *dhcppkt; + ssize_t dhcpsz = 0; + int num_enq, ofs, spc = 0; + char *vr = NULL, *pkt = NULL, *dhcppkt = NULL; struct vring_desc *desc, *pkt_desc, *hdr_desc; struct vring_avail *avail; struct vring_used *used; + struct virtio_vq_info *vq_info; struct ether_header *eh; - dhcpsz = 0; - vr = pkt = dhcppkt = NULL; - ret = spc = 0; + vq_info = &dev->vq[TXQ]; + vr = vq_info->q_hva; + if (vr == NULL) + fatalx("%s: null vring", __func__); - vr_sz = vring_size(VIONET_QUEUE_SIZE); - q_gpa = dev->vq[TXQ].qa; - q_gpa = q_gpa * VIRTIO_PAGE_SIZE; - - vr = calloc(1, vr_sz); - if (vr == NULL) { - log_warn("calloc error getting vionet ring"); - goto out; - } - - if (read_mem(q_gpa, vr, vr_sz)) { - log_warnx("error reading gpa 0x%llx", q_gpa); - goto out; - } - /* Compute offsets in ring of descriptors, avail ring, and used ring */ desc = (struct vring_desc *)(vr); - avail = (struct vring_avail *)(vr + dev->vq[TXQ].vq_availoffset); - used = (struct vring_used *)(vr + dev->vq[TXQ].vq_usedoffset); + avail = (struct vring_avail *)(vr + vq_info->vq_availoffset); + used = (struct vring_used *)(vr + vq_info->vq_usedoffset); num_enq = 0; - idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK; + idx = vq_info->last_avail & VIONET_QUEUE_MASK; if ((avail->idx & VIONET_QUEUE_MASK) == idx) { - log_warnx("vionet tx queue notify - nothing to do?"); - goto out; + log_debug("%s - nothing to do?", __func__); + return (0); } while ((avail->idx & VIONET_QUEUE_MASK) != idx) { @@ -1502,36 +1446,29 @@ vionet_notify_tx(struct vionet_dev *dev) } drop_packet: - ret = 1; dev->cfg.isr_status = 1; used->ring[used->idx & VIONET_QUEUE_MASK].id = hdr_desc_idx; used->ring[used->idx & VIONET_QUEUE_MASK].len = hdr_desc->len; + __sync_synchronize(); used->idx++; - dev->vq[TXQ].last_avail++; + vq_info->last_avail = avail->idx & VIONET_QUEUE_MASK; + idx = (idx + 1) & VIONET_QUEUE_MASK; + num_enq++; - idx = dev->vq[TXQ].last_avail & VIONET_QUEUE_MASK; - free(pkt); pkt = NULL; } - if (write_mem(q_gpa, vr, vr_sz)) { - log_warnx("vionet: tx error writing vio ring"); - } + if (dhcpsz > 0) + vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc); - if (dhcpsz > 0) { - if (vionet_enq_rx(dev, dhcppkt, dhcpsz, &spc)) - ret = 1; - } - out: - free(vr); free(pkt); free(dhcppkt); - return (ret); + return (1); } int @@ -1662,8 +1599,8 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_ case VIRTIO_CONFIG_GUEST_FEATURES: vmmci.cfg.guest_feature = *data; break; - case VIRTIO_CONFIG_QUEUE_ADDRESS: - vmmci.cfg.queue_address = *data; + case VIRTIO_CONFIG_QUEUE_PFN: + vmmci.cfg.queue_pfn = *data; break; case VIRTIO_CONFIG_QUEUE_SELECT: vmmci.cfg.queue_select = *data; @@ -1703,8 +1640,8 @@ vmmci_io(int dir, uint16_t reg, uint32_t *data, uint8_ case VIRTIO_CONFIG_GUEST_FEATURES: *data = vmmci.cfg.guest_feature; break; - case VIRTIO_CONFIG_QUEUE_ADDRESS: - *data = vmmci.cfg.queue_address; + case VIRTIO_CONFIG_QUEUE_PFN: + *data = vmmci.cfg.queue_pfn; break; case VIRTIO_CONFIG_QUEUE_SIZE: *data = vmmci.cfg.queue_size; blob - 128b469018aefa35f451c1f92be986fc691b26c9 blob + c51f80b6d188a1eef7fd22976dd3073ce2de2d24 --- usr.sbin/vmd/virtio.h +++ usr.sbin/vmd/virtio.h @@ -64,13 +64,18 @@ #define VIRTIO_MAX_QUEUES 3 /* + * Rename the address config register to be more descriptive. + */ +#define VIRTIO_CONFIG_QUEUE_PFN VIRTIO_CONFIG_QUEUE_ADDRESS + +/* * This struct stores notifications from a virtio driver. There is * one such struct per virtio device. */ struct virtio_io_cfg { uint32_t device_feature; uint32_t guest_feature; - uint32_t queue_address; + uint32_t queue_pfn; uint16_t queue_size; uint16_t queue_select; uint16_t queue_notify; @@ -93,8 +98,11 @@ struct virtio_vq_info { */ struct virtio_vq_info { /* Guest physical address of virtq */ - uint32_t qa; + uint64_t q_gpa; + /* Host virtual address of virtq */ + void *q_hva; + /* Queue size: number of queue entries in virtq */ uint32_t qs; blob - f1d9b97741c11f8cc4faa3f79658cd87135d2b29 blob + 383bd66220116cee194d4c7147fdb6b72410b8d2 --- usr.sbin/vmd/vm.c +++ usr.sbin/vmd/vm.c @@ -1991,6 +1991,47 @@ read_mem(paddr_t src, void *buf, size_t len) } /* + * hvaddr_mem + * + * Translate a guest physical address to a host virtual address, checking the + * provided memory range length to confirm it's contiguous within the same + * guest memory range (vm_mem_range). + * + * Parameters: + * gpa: guest physical address to translate + * len: number of bytes in the intended range + * + * Return values: + * void* to host virtual memory on success + * NULL on error, setting errno to: + * EFAULT: gpa falls outside guest memory ranges + * EINVAL: requested len extends beyond memory range + */ +void * +hvaddr_mem(paddr_t gpa, size_t len) +{ + struct vm_mem_range *vmr; + size_t off; + + vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, gpa, len); + if (vmr == NULL) { + log_warnx("%s: failed - invalid gpa: 0x%lx\n", __func__, gpa); + errno = EFAULT; + return (NULL); + } + + off = gpa - vmr->vmr_gpa; + if (len > (vmr->vmr_size - off)) { + log_warnx("%s: failed - invalid memory range: gpa=0x%lx, " + "len=%zu", __func__, gpa, len); + errno = EINVAL; + return (NULL); + } + + return ((char *)vmr->vmr_va + off); +} + +/* * vcpu_assert_pic_irq * * Injects the specified IRQ on the supplied vcpu/vm blob - c27d03df7336a2c8ede22ec0d83b74d327fb3244 blob + 17750811887dc4ae88c8919986d0708228f81169 --- usr.sbin/vmd/vmd.h +++ usr.sbin/vmd/vmd.h @@ -460,6 +460,7 @@ int write_mem(paddr_t, const void *buf, size_t); void vm_pipe_send(struct vm_dev_pipe *, enum pipe_msg_type); enum pipe_msg_type vm_pipe_recv(struct vm_dev_pipe *); int write_mem(paddr_t, const void *buf, size_t); +void* hvaddr_mem(paddr_t, size_t); /* config.c */ int config_init(struct vmd *);