[PATCH 2/6] vmbus: keep pointer to ring buffer page
Avoid going from struct page to virt address (and back) by just keeping pointer to the allocated pages instead of virt address. Signed-off-by: Stephen Hemminger --- drivers/hv/channel.c | 20 +--- drivers/uio/uio_hv_generic.c | 5 +++-- include/linux/hyperv.h | 2 +- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 33e6db02dbab..56ec0d96d876 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -91,11 +91,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, unsigned long flags; int ret, err = 0; struct page *page; + unsigned int order; if (send_ringbuffer_size % PAGE_SIZE || recv_ringbuffer_size % PAGE_SIZE) return -EINVAL; + order = get_order(send_ringbuffer_size + recv_ringbuffer_size); + spin_lock_irqsave(&newchannel->lock, flags); if (newchannel->state == CHANNEL_OPEN_STATE) { newchannel->state = CHANNEL_OPENING_STATE; @@ -110,21 +113,17 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, /* Allocate the ring buffer */ page = alloc_pages_node(cpu_to_node(newchannel->target_cpu), - GFP_KERNEL|__GFP_ZERO, - get_order(send_ringbuffer_size + - recv_ringbuffer_size)); + GFP_KERNEL|__GFP_ZERO, order); if (!page) - page = alloc_pages(GFP_KERNEL|__GFP_ZERO, - get_order(send_ringbuffer_size + -recv_ringbuffer_size)); + page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order); if (!page) { err = -ENOMEM; goto error_set_chnstate; } - newchannel->ringbuffer_pages = page_address(page); + newchannel->ringbuffer_page = page; newchannel->ringbuffer_pagecount = (send_ringbuffer_size + recv_ringbuffer_size) >> PAGE_SHIFT; @@ -239,8 +238,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, error_free_pages: hv_ringbuffer_cleanup(&newchannel->outbound); hv_ringbuffer_cleanup(&newchannel->inbound); - __free_pages(page, -get_order(send_ringbuffer_size + recv_ringbuffer_size)); + __free_pages(page, order); error_set_chnstate: newchannel->state = CHANNEL_OPEN_STATE; return err; @@ -658,8 +656,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel) hv_ringbuffer_cleanup(&channel->outbound); hv_ringbuffer_cleanup(&channel->inbound); - free_pages((unsigned long)channel->ringbuffer_pages, - get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); + __free_pages(channel->ringbuffer_page, +get_order(channel->ringbuffer_pagecount << PAGE_SHIFT)); out: return ret; diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index a08860260f55..ba67a5267557 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -130,11 +130,12 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, = container_of(kobj, struct vmbus_channel, kobj); struct hv_device *dev = channel->primary_channel->device_obj; u16 q_idx = channel->offermsg.offer.sub_channel_index; + void *ring_buffer = page_address(channel->ringbuffer_page); dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n", q_idx, vma_pages(vma), vma->vm_pgoff); - return vm_iomap_memory(vma, virt_to_phys(channel->ringbuffer_pages), + return vm_iomap_memory(vma, virt_to_phys(ring_buffer), channel->ringbuffer_pagecount << PAGE_SHIFT); } @@ -223,7 +224,7 @@ hv_uio_probe(struct hv_device *dev, /* mem resources */ pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings"; pdata->info.mem[TXRX_RING_MAP].addr - = (uintptr_t)dev->channel->ringbuffer_pages; + = (uintptr_t)page_address(dev->channel->ringbuffer_page); pdata->info.mem[TXRX_RING_MAP].size = dev->channel->ringbuffer_pagecount << PAGE_SHIFT; pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6c4575c7f46b..a6c32d2d090b 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -739,7 +739,7 @@ struct vmbus_channel { u32 ringbuffer_gpadlhandle; /* Allocated memory for ring buffer */ - void *ringbuffer_pages; + struc
[PATCH 1/6] vmbus: pass channel to hv_process_channel_removal
Rather than passing relid and then looking up the channel. Pass the channel directly, since caller already knows it. Signed-off-by: Stephen Hemminger --- drivers/hv/channel.c | 3 +-- drivers/hv/channel_mgmt.c | 17 + drivers/hv/vmbus_drv.c| 3 +-- include/linux/hyperv.h| 2 +- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 741857d80da1..33e6db02dbab 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -690,8 +690,7 @@ void vmbus_close(struct vmbus_channel *channel) wait_for_completion(&cur_channel->rescind_event); mutex_lock(&vmbus_connection.channel_mutex); vmbus_close_internal(cur_channel); - hv_process_channel_removal( - cur_channel->offermsg.child_relid); + hv_process_channel_removal(cur_channel); } else { mutex_lock(&vmbus_connection.channel_mutex); vmbus_close_internal(cur_channel); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 0f0e091c117c..b7c48ebdf6a1 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -385,21 +385,14 @@ static void vmbus_release_relid(u32 relid) trace_vmbus_release_relid(&msg, ret); } -void hv_process_channel_removal(u32 relid) +void hv_process_channel_removal(struct vmbus_channel *channel) { + struct vmbus_channel *primary_channel; unsigned long flags; - struct vmbus_channel *primary_channel, *channel; BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - - /* -* Make sure channel is valid as we may have raced. -*/ - channel = relid2channel(relid); - if (!channel) - return; - BUG_ON(!channel->rescind); + if (channel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(channel->target_cpu, @@ -429,7 +422,7 @@ void hv_process_channel_removal(u32 relid) cpumask_clear_cpu(channel->target_cpu, &primary_channel->alloced_cpus_in_node); - vmbus_release_relid(relid); + vmbus_release_relid(channel->offermsg.child_relid); free_channel(channel); } @@ -943,7 +936,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) * The channel is currently not open; * it is safe for us to cleanup the channel. */ - hv_process_channel_removal(rescind->child_relid); + hv_process_channel_removal(channel); } else { complete(&channel->rescind_event); } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index e6d8fdac6d8b..007ee8e5986a 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -864,10 +864,9 @@ static void vmbus_device_release(struct device *device) struct vmbus_channel *channel = hv_dev->channel; mutex_lock(&vmbus_connection.channel_mutex); - hv_process_channel_removal(channel->offermsg.child_relid); + hv_process_channel_removal(channel); mutex_unlock(&vmbus_connection.channel_mutex); kfree(hv_dev); - } /* The one and only one */ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2c3798bcb01c..6c4575c7f46b 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1443,7 +1443,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, const int *srv_version, int srv_vercnt, int *nego_fw_version, int *nego_srv_version); -void hv_process_channel_removal(u32 relid); +void hv_process_channel_removal(struct vmbus_channel *channel); void vmbus_setevent(struct vmbus_channel *channel); /* -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH 5/6] hv_uio_generic: map ringbuffer phys addr
The ring buffer is contiguous IOVA and is mapped via phys addr for sysfs file. Use same method for the UIO mapping. Signed-off-by: Stephen Hemminger --- drivers/uio/uio_hv_generic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index ba67a5267557..53f5610c6065 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -224,10 +224,10 @@ hv_uio_probe(struct hv_device *dev, /* mem resources */ pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings"; pdata->info.mem[TXRX_RING_MAP].addr - = (uintptr_t)page_address(dev->channel->ringbuffer_page); + = (uintptr_t)virt_to_phys(page_address(dev->channel->ringbuffer_page)); pdata->info.mem[TXRX_RING_MAP].size = dev->channel->ringbuffer_pagecount << PAGE_SHIFT; - pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL; + pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_IOVA; pdata->info.mem[INT_PAGE_MAP].name = "int_page"; pdata->info.mem[INT_PAGE_MAP].addr -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH 4/6] uio: introduce UIO_MEM_IOVA
Introduce the concept of mapping physical memory locations that are normal memory. The new type UIO_MEM_IOVA are similar to existing UIO_MEM_PHYS but the backing memory is not marked as uncached. Also, indent related switch to the currently used style. Signed-off-by: Stephen Hemminger --- drivers/uio/uio.c | 24 +--- include/linux/uio_driver.h | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 0ffb324aa038..e601bd3fbae1 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -738,7 +738,8 @@ static int uio_mmap_physical(struct vm_area_struct *vma) return -EINVAL; vma->vm_ops = &uio_physical_vm_ops; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (idev->info->mem[mi].memtype == UIO_MEM_PHYS) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); /* * We cannot use the vm_iomap_memory() helper here, @@ -795,18 +796,19 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma) } switch (idev->info->mem[mi].memtype) { - case UIO_MEM_PHYS: - ret = uio_mmap_physical(vma); - break; - case UIO_MEM_LOGICAL: - case UIO_MEM_VIRTUAL: - ret = uio_mmap_logical(vma); - break; - default: - ret = -EINVAL; + case UIO_MEM_IOVA: + case UIO_MEM_PHYS: + ret = uio_mmap_physical(vma); + break; + case UIO_MEM_LOGICAL: + case UIO_MEM_VIRTUAL: + ret = uio_mmap_logical(vma); + break; + default: + ret = -EINVAL; } -out: + out: mutex_unlock(&idev->info_lock); return ret; } diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 6f8b68cd460f..a3cd7cb67a69 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -133,6 +133,7 @@ extern void uio_event_notify(struct uio_info *info); #define UIO_MEM_PHYS 1 #define UIO_MEM_LOGICAL2 #define UIO_MEM_VIRTUAL 3 +#define UIO_MEM_IOVA 4 /* defines for uio_port->porttype */ #define UIO_PORT_NONE 0 -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH 6/6] uio_hv_generic: defer opening vmbus until first use
This fixes two design flaws in hv_uio_generic. Since hv_uio_probe is called from vmbus_probe with lock held it potentially can cause sleep in an atomic section because vmbus_open will wait for response from host. The hv_uio_generic driver could not handle applications exiting and restarting because the vmbus channel was persistent. Change the semantics so that the buffers are allocated on probe, but not attached to host until device is opened. Signed-off-by: Stephen Hemminger --- drivers/uio/uio_hv_generic.c | 102 --- 1 file changed, 72 insertions(+), 30 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 53f5610c6065..9bd837accdb5 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -55,6 +55,7 @@ enum hv_uio_map { struct hv_uio_private_data { struct uio_info info; struct hv_device *device; + atomic_t refcnt; void*recv_buf; u32 recv_gpadl; @@ -128,12 +129,10 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, { struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj); - struct hv_device *dev = channel->primary_channel->device_obj; - u16 q_idx = channel->offermsg.offer.sub_channel_index; void *ring_buffer = page_address(channel->ringbuffer_page); - dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n", - q_idx, vma_pages(vma), vma->vm_pgoff); + if (channel->state != CHANNEL_OPENED_STATE) + return -ENODEV; return vm_iomap_memory(vma, virt_to_phys(ring_buffer), channel->ringbuffer_pagecount << PAGE_SHIFT); @@ -176,57 +175,101 @@ hv_uio_new_channel(struct vmbus_channel *new_sc) } } +/* free the reserved buffers for send and receive */ static void hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata) { - if (pdata->send_gpadl) + if (pdata->send_gpadl) { vmbus_teardown_gpadl(dev->channel, pdata->send_gpadl); - vfree(pdata->send_buf); + pdata->send_gpadl = 0; + vfree(pdata->send_buf); + } - if (pdata->recv_gpadl) + if (pdata->recv_gpadl) { vmbus_teardown_gpadl(dev->channel, pdata->recv_gpadl); - vfree(pdata->recv_buf); + pdata->recv_gpadl = 0; + vfree(pdata->recv_buf); + } +} + +/* VMBus primary channel is opened on first use */ +static int +hv_uio_open(struct uio_info *info, struct inode *inode) +{ + struct hv_uio_private_data *pdata + = container_of(info, struct hv_uio_private_data, info); + struct hv_device *dev = pdata->device; + int ret; + + if (atomic_inc_return(&pdata->refcnt) != 1) + return 0; + + ret = vmbus_connect_ring(dev->channel, +hv_uio_channel_cb, dev->channel); + + if (ret == 0) + dev->channel->inbound.ring_buffer->interrupt_mask = 1; + + return ret; +} + +/* VMBus primary channel is closed on last close */ +static int +hv_uio_release(struct uio_info *info, struct inode *inode) +{ + struct hv_uio_private_data *pdata + = container_of(info, struct hv_uio_private_data, info); + struct hv_device *dev = pdata->device; + int ret = 0; + + if (atomic_dec_and_test(&pdata->refcnt)) + ret = vmbus_disconnect_ring(dev->channel); + + return ret; } static int hv_uio_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { + struct vmbus_channel *channel = dev->channel; struct hv_uio_private_data *pdata; + void *ring_buffer; int ret; + /* Communicating with host has to be via shared memory not hypercall */ + if (!channel->offermsg.monitor_allocated) { + dev_err(&dev->device, "vmbus channel requires hypercall\n"); + return -ENOTSUPP; + } + pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; - ret = vmbus_open(dev->channel, HV_RING_SIZE * PAGE_SIZE, -HV_RING_SIZE * PAGE_SIZE, NULL, 0, -hv_uio_channel_cb, dev->channel); + ret = vmbus_alloc_ring(channel, HV_RING_SIZE * PAGE_SIZE, + HV_RING_SIZE * PAGE_SIZE); if (ret) goto fail; - /* Communicating with host has to be via shared memory not hypercall */ - if (!dev->channel->offermsg.monitor_allocated) { - dev_err(&dev->device, "vmbus channel requires hypercall\n"); - ret = -ENOTSUPP; - goto fail_c
[PATCH 0/6] fix Hyper-V uio restart
This set of patches fixes the problem where DPDK applications using hv_uio_generic driver can not be successfully restarted. In order to get this working it required small change to uio to allow for mapping without no-cache. And refactoring of how ring buffer is setup in vmbus code. It could be backported as a fix, to 4.19 but that is not an LTS so probably not worth it. Stephen Hemminger (6): vmbus: pass channel to hv_process_channel_removal vmbus: keep pointer to ring buffer page vmbus: split ring buffer allocation from open uio: introduce UIO_MEM_IOVA hv_uio_generic: map ringbuffer phys addr uio_hv_generic: defer opening vmbus until first use drivers/hv/channel.c | 276 --- drivers/hv/channel_mgmt.c| 17 +-- drivers/hv/ring_buffer.c | 1 + drivers/hv/vmbus_drv.c | 3 +- drivers/uio/uio.c| 24 +-- drivers/uio/uio_hv_generic.c | 107 ++ include/linux/hyperv.h | 13 +- include/linux/uio_driver.h | 1 + 8 files changed, 262 insertions(+), 180 deletions(-) -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH 3/6] vmbus: split ring buffer allocation from open
The UIO driver needs the ring buffer to be persistent(reused) across open/close. Split the allocation and setup of ring buffer out of vmbus_open. For normal usage vmbus_open/vmbus_close there are no changes; only impacts uio_hv_generic which needs to keep ring buffer memory and reuse when application restarts. Signed-off-by: Stephen Hemminger --- drivers/hv/channel.c | 267 ++- drivers/hv/ring_buffer.c | 1 + include/linux/hyperv.h | 9 ++ 3 files changed, 162 insertions(+), 115 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 56ec0d96d876..ddadb7efd1cc 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -79,84 +79,96 @@ void vmbus_setevent(struct vmbus_channel *channel) } EXPORT_SYMBOL_GPL(vmbus_setevent); -/* - * vmbus_open - Open the specified channel. - */ -int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, -u32 recv_ringbuffer_size, void *userdata, u32 userdatalen, -void (*onchannelcallback)(void *context), void *context) +/* vmbus_free_ring - drop mapping of ring buffer */ +void vmbus_free_ring(struct vmbus_channel *channel) { - struct vmbus_channel_open_channel *open_msg; - struct vmbus_channel_msginfo *open_info = NULL; - unsigned long flags; - int ret, err = 0; - struct page *page; - unsigned int order; + hv_ringbuffer_cleanup(&channel->outbound); + hv_ringbuffer_cleanup(&channel->inbound); - if (send_ringbuffer_size % PAGE_SIZE || - recv_ringbuffer_size % PAGE_SIZE) - return -EINVAL; + if (channel->ringbuffer_page) { + __free_pages(channel->ringbuffer_page, +get_order(channel->ringbuffer_pagecount + << PAGE_SHIFT)); + channel->ringbuffer_page = NULL; + } +} +EXPORT_SYMBOL_GPL(vmbus_free_ring); - order = get_order(send_ringbuffer_size + recv_ringbuffer_size); +/* vmbus_alloc_ring - allocate and map pages for ring buffer */ +int vmbus_alloc_ring(struct vmbus_channel *newchannel, +u32 send_size, u32 recv_size) +{ + struct page *page; + int order; - spin_lock_irqsave(&newchannel->lock, flags); - if (newchannel->state == CHANNEL_OPEN_STATE) { - newchannel->state = CHANNEL_OPENING_STATE; - } else { - spin_unlock_irqrestore(&newchannel->lock, flags); + if (send_size % PAGE_SIZE || recv_size % PAGE_SIZE) return -EINVAL; - } - spin_unlock_irqrestore(&newchannel->lock, flags); - - newchannel->onchannel_callback = onchannelcallback; - newchannel->channel_callback_context = context; /* Allocate the ring buffer */ + order = get_order(send_size + recv_size); page = alloc_pages_node(cpu_to_node(newchannel->target_cpu), GFP_KERNEL|__GFP_ZERO, order); if (!page) page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order); - if (!page) { - err = -ENOMEM; - goto error_set_chnstate; - } + if (!page) + return -ENOMEM; newchannel->ringbuffer_page = page; - newchannel->ringbuffer_pagecount = (send_ringbuffer_size + - recv_ringbuffer_size) >> PAGE_SHIFT; + newchannel->ringbuffer_pagecount = (send_size + recv_size) >> PAGE_SHIFT; + newchannel->ringbuffer_send_offset = send_size >> PAGE_SHIFT; - ret = hv_ringbuffer_init(&newchannel->outbound, page, -send_ringbuffer_size >> PAGE_SHIFT); + return 0; +} +EXPORT_SYMBOL_GPL(vmbus_alloc_ring); - if (ret != 0) { - err = ret; - goto error_free_pages; - } +static int __vmbus_open(struct vmbus_channel *newchannel, + void *userdata, u32 userdatalen, + void (*onchannelcallback)(void *context), void *context) +{ + struct vmbus_channel_open_channel *open_msg; + struct vmbus_channel_msginfo *open_info = NULL; + struct page *page = newchannel->ringbuffer_page; + u32 send_pages, recv_pages; + unsigned long flags; + int err; - ret = hv_ringbuffer_init(&newchannel->inbound, -&page[send_ringbuffer_size >> PAGE_SHIFT], -recv_ringbuffer_size >> PAGE_SHIFT); - if (ret != 0) { - err = ret; - goto error_free_pages; + if (userdatalen > MAX_USER_DEFINED_BYTES) + return -EINVAL; + + send_pages = newchannel->ringbuffer_send_offset; + recv_pages = newchannel
[PATCH 0/6] fix Hyper-V uio restart
This set of patches fixes the problem where DPDK applications using hv_uio_generic driver can not be successfully restarted. In order to get this working it required small change to uio to allow for mapping without no-cache. And refactoring of how ring buffer is setup in vmbus code. It could be backported as a fix, to 4.19 but that is not an LTS so probably not worth it. v2 - add refcount unwind in hv_uio_generic open in case of error Stephen Hemminger (6): vmbus: pass channel to hv_process_channel_removal vmbus: keep pointer to ring buffer page vmbus: split ring buffer allocation from open uio: introduce UIO_MEM_IOVA hv_uio_generic: map ringbuffer phys addr uio_hv_generic: defer opening vmbus until first use drivers/hv/channel.c | 276 --- drivers/hv/channel_mgmt.c| 17 +-- drivers/hv/ring_buffer.c | 1 + drivers/hv/vmbus_drv.c | 3 +- drivers/uio/uio.c| 24 +-- drivers/uio/uio_hv_generic.c | 109 ++ include/linux/hyperv.h | 13 +- include/linux/uio_driver.h | 1 + 8 files changed, 264 insertions(+), 180 deletions(-) -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH 3/6] vmbus: split ring buffer allocation from open
The UIO driver needs the ring buffer to be persistent(reused) across open/close. Split the allocation and setup of ring buffer out of vmbus_open. For normal usage vmbus_open/vmbus_close there are no changes; only impacts uio_hv_generic which needs to keep ring buffer memory and reuse when application restarts. Signed-off-by: Stephen Hemminger --- drivers/hv/channel.c | 267 ++- drivers/hv/ring_buffer.c | 1 + include/linux/hyperv.h | 9 ++ 3 files changed, 162 insertions(+), 115 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 56ec0d96d876..ddadb7efd1cc 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -79,84 +79,96 @@ void vmbus_setevent(struct vmbus_channel *channel) } EXPORT_SYMBOL_GPL(vmbus_setevent); -/* - * vmbus_open - Open the specified channel. - */ -int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, -u32 recv_ringbuffer_size, void *userdata, u32 userdatalen, -void (*onchannelcallback)(void *context), void *context) +/* vmbus_free_ring - drop mapping of ring buffer */ +void vmbus_free_ring(struct vmbus_channel *channel) { - struct vmbus_channel_open_channel *open_msg; - struct vmbus_channel_msginfo *open_info = NULL; - unsigned long flags; - int ret, err = 0; - struct page *page; - unsigned int order; + hv_ringbuffer_cleanup(&channel->outbound); + hv_ringbuffer_cleanup(&channel->inbound); - if (send_ringbuffer_size % PAGE_SIZE || - recv_ringbuffer_size % PAGE_SIZE) - return -EINVAL; + if (channel->ringbuffer_page) { + __free_pages(channel->ringbuffer_page, +get_order(channel->ringbuffer_pagecount + << PAGE_SHIFT)); + channel->ringbuffer_page = NULL; + } +} +EXPORT_SYMBOL_GPL(vmbus_free_ring); - order = get_order(send_ringbuffer_size + recv_ringbuffer_size); +/* vmbus_alloc_ring - allocate and map pages for ring buffer */ +int vmbus_alloc_ring(struct vmbus_channel *newchannel, +u32 send_size, u32 recv_size) +{ + struct page *page; + int order; - spin_lock_irqsave(&newchannel->lock, flags); - if (newchannel->state == CHANNEL_OPEN_STATE) { - newchannel->state = CHANNEL_OPENING_STATE; - } else { - spin_unlock_irqrestore(&newchannel->lock, flags); + if (send_size % PAGE_SIZE || recv_size % PAGE_SIZE) return -EINVAL; - } - spin_unlock_irqrestore(&newchannel->lock, flags); - - newchannel->onchannel_callback = onchannelcallback; - newchannel->channel_callback_context = context; /* Allocate the ring buffer */ + order = get_order(send_size + recv_size); page = alloc_pages_node(cpu_to_node(newchannel->target_cpu), GFP_KERNEL|__GFP_ZERO, order); if (!page) page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order); - if (!page) { - err = -ENOMEM; - goto error_set_chnstate; - } + if (!page) + return -ENOMEM; newchannel->ringbuffer_page = page; - newchannel->ringbuffer_pagecount = (send_ringbuffer_size + - recv_ringbuffer_size) >> PAGE_SHIFT; + newchannel->ringbuffer_pagecount = (send_size + recv_size) >> PAGE_SHIFT; + newchannel->ringbuffer_send_offset = send_size >> PAGE_SHIFT; - ret = hv_ringbuffer_init(&newchannel->outbound, page, -send_ringbuffer_size >> PAGE_SHIFT); + return 0; +} +EXPORT_SYMBOL_GPL(vmbus_alloc_ring); - if (ret != 0) { - err = ret; - goto error_free_pages; - } +static int __vmbus_open(struct vmbus_channel *newchannel, + void *userdata, u32 userdatalen, + void (*onchannelcallback)(void *context), void *context) +{ + struct vmbus_channel_open_channel *open_msg; + struct vmbus_channel_msginfo *open_info = NULL; + struct page *page = newchannel->ringbuffer_page; + u32 send_pages, recv_pages; + unsigned long flags; + int err; - ret = hv_ringbuffer_init(&newchannel->inbound, -&page[send_ringbuffer_size >> PAGE_SHIFT], -recv_ringbuffer_size >> PAGE_SHIFT); - if (ret != 0) { - err = ret; - goto error_free_pages; + if (userdatalen > MAX_USER_DEFINED_BYTES) + return -EINVAL; + + send_pages = newchannel->ringbuffer_send_offset; + recv_pages = newchannel
[PATCH 5/6] hv_uio_generic: map ringbuffer phys addr
The ring buffer is contiguous IOVA and is mapped via phys addr for sysfs file. Use same method for the UIO mapping. Signed-off-by: Stephen Hemminger --- drivers/uio/uio_hv_generic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index ba67a5267557..53f5610c6065 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -224,10 +224,10 @@ hv_uio_probe(struct hv_device *dev, /* mem resources */ pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings"; pdata->info.mem[TXRX_RING_MAP].addr - = (uintptr_t)page_address(dev->channel->ringbuffer_page); + = (uintptr_t)virt_to_phys(page_address(dev->channel->ringbuffer_page)); pdata->info.mem[TXRX_RING_MAP].size = dev->channel->ringbuffer_pagecount << PAGE_SHIFT; - pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL; + pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_IOVA; pdata->info.mem[INT_PAGE_MAP].name = "int_page"; pdata->info.mem[INT_PAGE_MAP].addr -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH 2/6] vmbus: keep pointer to ring buffer page
Avoid going from struct page to virt address (and back) by just keeping pointer to the allocated pages instead of virt address. Signed-off-by: Stephen Hemminger --- drivers/hv/channel.c | 20 +--- drivers/uio/uio_hv_generic.c | 5 +++-- include/linux/hyperv.h | 2 +- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 33e6db02dbab..56ec0d96d876 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -91,11 +91,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, unsigned long flags; int ret, err = 0; struct page *page; + unsigned int order; if (send_ringbuffer_size % PAGE_SIZE || recv_ringbuffer_size % PAGE_SIZE) return -EINVAL; + order = get_order(send_ringbuffer_size + recv_ringbuffer_size); + spin_lock_irqsave(&newchannel->lock, flags); if (newchannel->state == CHANNEL_OPEN_STATE) { newchannel->state = CHANNEL_OPENING_STATE; @@ -110,21 +113,17 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, /* Allocate the ring buffer */ page = alloc_pages_node(cpu_to_node(newchannel->target_cpu), - GFP_KERNEL|__GFP_ZERO, - get_order(send_ringbuffer_size + - recv_ringbuffer_size)); + GFP_KERNEL|__GFP_ZERO, order); if (!page) - page = alloc_pages(GFP_KERNEL|__GFP_ZERO, - get_order(send_ringbuffer_size + -recv_ringbuffer_size)); + page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order); if (!page) { err = -ENOMEM; goto error_set_chnstate; } - newchannel->ringbuffer_pages = page_address(page); + newchannel->ringbuffer_page = page; newchannel->ringbuffer_pagecount = (send_ringbuffer_size + recv_ringbuffer_size) >> PAGE_SHIFT; @@ -239,8 +238,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, error_free_pages: hv_ringbuffer_cleanup(&newchannel->outbound); hv_ringbuffer_cleanup(&newchannel->inbound); - __free_pages(page, -get_order(send_ringbuffer_size + recv_ringbuffer_size)); + __free_pages(page, order); error_set_chnstate: newchannel->state = CHANNEL_OPEN_STATE; return err; @@ -658,8 +656,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel) hv_ringbuffer_cleanup(&channel->outbound); hv_ringbuffer_cleanup(&channel->inbound); - free_pages((unsigned long)channel->ringbuffer_pages, - get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); + __free_pages(channel->ringbuffer_page, +get_order(channel->ringbuffer_pagecount << PAGE_SHIFT)); out: return ret; diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index a08860260f55..ba67a5267557 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -130,11 +130,12 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, = container_of(kobj, struct vmbus_channel, kobj); struct hv_device *dev = channel->primary_channel->device_obj; u16 q_idx = channel->offermsg.offer.sub_channel_index; + void *ring_buffer = page_address(channel->ringbuffer_page); dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n", q_idx, vma_pages(vma), vma->vm_pgoff); - return vm_iomap_memory(vma, virt_to_phys(channel->ringbuffer_pages), + return vm_iomap_memory(vma, virt_to_phys(ring_buffer), channel->ringbuffer_pagecount << PAGE_SHIFT); } @@ -223,7 +224,7 @@ hv_uio_probe(struct hv_device *dev, /* mem resources */ pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings"; pdata->info.mem[TXRX_RING_MAP].addr - = (uintptr_t)dev->channel->ringbuffer_pages; + = (uintptr_t)page_address(dev->channel->ringbuffer_page); pdata->info.mem[TXRX_RING_MAP].size = dev->channel->ringbuffer_pagecount << PAGE_SHIFT; pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6c4575c7f46b..a6c32d2d090b 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -739,7 +739,7 @@ struct vmbus_channel { u32 ringbuffer_gpadlhandle; /* Allocated memory for ring buffer */ - void *ringbuffer_pages; + struc
[PATCH 1/6] vmbus: pass channel to hv_process_channel_removal
Rather than passing relid and then looking up the channel. Pass the channel directly, since caller already knows it. Signed-off-by: Stephen Hemminger --- drivers/hv/channel.c | 3 +-- drivers/hv/channel_mgmt.c | 17 + drivers/hv/vmbus_drv.c| 3 +-- include/linux/hyperv.h| 2 +- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 741857d80da1..33e6db02dbab 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -690,8 +690,7 @@ void vmbus_close(struct vmbus_channel *channel) wait_for_completion(&cur_channel->rescind_event); mutex_lock(&vmbus_connection.channel_mutex); vmbus_close_internal(cur_channel); - hv_process_channel_removal( - cur_channel->offermsg.child_relid); + hv_process_channel_removal(cur_channel); } else { mutex_lock(&vmbus_connection.channel_mutex); vmbus_close_internal(cur_channel); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 0f0e091c117c..b7c48ebdf6a1 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -385,21 +385,14 @@ static void vmbus_release_relid(u32 relid) trace_vmbus_release_relid(&msg, ret); } -void hv_process_channel_removal(u32 relid) +void hv_process_channel_removal(struct vmbus_channel *channel) { + struct vmbus_channel *primary_channel; unsigned long flags; - struct vmbus_channel *primary_channel, *channel; BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - - /* -* Make sure channel is valid as we may have raced. -*/ - channel = relid2channel(relid); - if (!channel) - return; - BUG_ON(!channel->rescind); + if (channel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(channel->target_cpu, @@ -429,7 +422,7 @@ void hv_process_channel_removal(u32 relid) cpumask_clear_cpu(channel->target_cpu, &primary_channel->alloced_cpus_in_node); - vmbus_release_relid(relid); + vmbus_release_relid(channel->offermsg.child_relid); free_channel(channel); } @@ -943,7 +936,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) * The channel is currently not open; * it is safe for us to cleanup the channel. */ - hv_process_channel_removal(rescind->child_relid); + hv_process_channel_removal(channel); } else { complete(&channel->rescind_event); } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index e6d8fdac6d8b..007ee8e5986a 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -864,10 +864,9 @@ static void vmbus_device_release(struct device *device) struct vmbus_channel *channel = hv_dev->channel; mutex_lock(&vmbus_connection.channel_mutex); - hv_process_channel_removal(channel->offermsg.child_relid); + hv_process_channel_removal(channel); mutex_unlock(&vmbus_connection.channel_mutex); kfree(hv_dev); - } /* The one and only one */ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2c3798bcb01c..6c4575c7f46b 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1443,7 +1443,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, const int *srv_version, int srv_vercnt, int *nego_fw_version, int *nego_srv_version); -void hv_process_channel_removal(u32 relid); +void hv_process_channel_removal(struct vmbus_channel *channel); void vmbus_setevent(struct vmbus_channel *channel); /* -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH 6/6] uio_hv_generic: defer opening vmbus until first use
This fixes two design flaws in hv_uio_generic. Since hv_uio_probe is called from vmbus_probe with lock held it potentially can cause sleep in an atomic section because vmbus_open will wait for response from host. The hv_uio_generic driver could not handle applications exiting and restarting because the vmbus channel was persistent. Change the semantics so that the buffers are allocated on probe, but not attached to host until device is opened. Signed-off-by: Stephen Hemminger --- drivers/uio/uio_hv_generic.c | 104 +-- 1 file changed, 74 insertions(+), 30 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 53f5610c6065..f2ec981d66cb 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -55,6 +55,7 @@ enum hv_uio_map { struct hv_uio_private_data { struct uio_info info; struct hv_device *device; + atomic_t refcnt; void*recv_buf; u32 recv_gpadl; @@ -128,12 +129,10 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, { struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj); - struct hv_device *dev = channel->primary_channel->device_obj; - u16 q_idx = channel->offermsg.offer.sub_channel_index; void *ring_buffer = page_address(channel->ringbuffer_page); - dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n", - q_idx, vma_pages(vma), vma->vm_pgoff); + if (channel->state != CHANNEL_OPENED_STATE) + return -ENODEV; return vm_iomap_memory(vma, virt_to_phys(ring_buffer), channel->ringbuffer_pagecount << PAGE_SHIFT); @@ -176,57 +175,103 @@ hv_uio_new_channel(struct vmbus_channel *new_sc) } } +/* free the reserved buffers for send and receive */ static void hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata) { - if (pdata->send_gpadl) + if (pdata->send_gpadl) { vmbus_teardown_gpadl(dev->channel, pdata->send_gpadl); - vfree(pdata->send_buf); + pdata->send_gpadl = 0; + vfree(pdata->send_buf); + } - if (pdata->recv_gpadl) + if (pdata->recv_gpadl) { vmbus_teardown_gpadl(dev->channel, pdata->recv_gpadl); - vfree(pdata->recv_buf); + pdata->recv_gpadl = 0; + vfree(pdata->recv_buf); + } +} + +/* VMBus primary channel is opened on first use */ +static int +hv_uio_open(struct uio_info *info, struct inode *inode) +{ + struct hv_uio_private_data *pdata + = container_of(info, struct hv_uio_private_data, info); + struct hv_device *dev = pdata->device; + int ret; + + if (atomic_inc_return(&pdata->refcnt) != 1) + return 0; + + ret = vmbus_connect_ring(dev->channel, +hv_uio_channel_cb, dev->channel); + + if (ret == 0) + dev->channel->inbound.ring_buffer->interrupt_mask = 1; + else + atomic_dec(&pdata->refcount); + + return ret; +} + +/* VMBus primary channel is closed on last close */ +static int +hv_uio_release(struct uio_info *info, struct inode *inode) +{ + struct hv_uio_private_data *pdata + = container_of(info, struct hv_uio_private_data, info); + struct hv_device *dev = pdata->device; + int ret = 0; + + if (atomic_dec_and_test(&pdata->refcnt)) + ret = vmbus_disconnect_ring(dev->channel); + + return ret; } static int hv_uio_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { + struct vmbus_channel *channel = dev->channel; struct hv_uio_private_data *pdata; + void *ring_buffer; int ret; + /* Communicating with host has to be via shared memory not hypercall */ + if (!channel->offermsg.monitor_allocated) { + dev_err(&dev->device, "vmbus channel requires hypercall\n"); + return -ENOTSUPP; + } + pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; - ret = vmbus_open(dev->channel, HV_RING_SIZE * PAGE_SIZE, -HV_RING_SIZE * PAGE_SIZE, NULL, 0, -hv_uio_channel_cb, dev->channel); + ret = vmbus_alloc_ring(channel, HV_RING_SIZE * PAGE_SIZE, + HV_RING_SIZE * PAGE_SIZE); if (ret) goto fail; - /* Communicating with host has to be via shared memory not hypercall */ - if (!dev->channel->offermsg.monitor_allocated) { - dev_err(&dev->device, "vmbus channel requires hypercall\n
[PATCH 4/6] uio: introduce UIO_MEM_IOVA
Introduce the concept of mapping physical memory locations that are normal memory. The new type UIO_MEM_IOVA are similar to existing UIO_MEM_PHYS but the backing memory is not marked as uncached. Also, indent related switch to the currently used style. Signed-off-by: Stephen Hemminger --- drivers/uio/uio.c | 24 +--- include/linux/uio_driver.h | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 0ffb324aa038..e601bd3fbae1 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -738,7 +738,8 @@ static int uio_mmap_physical(struct vm_area_struct *vma) return -EINVAL; vma->vm_ops = &uio_physical_vm_ops; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (idev->info->mem[mi].memtype == UIO_MEM_PHYS) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); /* * We cannot use the vm_iomap_memory() helper here, @@ -795,18 +796,19 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma) } switch (idev->info->mem[mi].memtype) { - case UIO_MEM_PHYS: - ret = uio_mmap_physical(vma); - break; - case UIO_MEM_LOGICAL: - case UIO_MEM_VIRTUAL: - ret = uio_mmap_logical(vma); - break; - default: - ret = -EINVAL; + case UIO_MEM_IOVA: + case UIO_MEM_PHYS: + ret = uio_mmap_physical(vma); + break; + case UIO_MEM_LOGICAL: + case UIO_MEM_VIRTUAL: + ret = uio_mmap_logical(vma); + break; + default: + ret = -EINVAL; } -out: + out: mutex_unlock(&idev->info_lock); return ret; } diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 6f8b68cd460f..a3cd7cb67a69 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -133,6 +133,7 @@ extern void uio_event_notify(struct uio_info *info); #define UIO_MEM_PHYS 1 #define UIO_MEM_LOGICAL2 #define UIO_MEM_VIRTUAL 3 +#define UIO_MEM_IOVA 4 /* defines for uio_port->porttype */ #define UIO_PORT_NONE 0 -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH v3 4/6] uio: introduce UIO_MEM_IOVA
Introduce the concept of mapping physical memory locations that are normal memory. The new type UIO_MEM_IOVA are similar to existing UIO_MEM_PHYS but the backing memory is not marked as uncached. Also, indent related switch to the currently used style. Signed-off-by: Stephen Hemminger --- drivers/uio/uio.c | 24 +--- include/linux/uio_driver.h | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 0ffb324aa038..e601bd3fbae1 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -738,7 +738,8 @@ static int uio_mmap_physical(struct vm_area_struct *vma) return -EINVAL; vma->vm_ops = &uio_physical_vm_ops; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (idev->info->mem[mi].memtype == UIO_MEM_PHYS) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); /* * We cannot use the vm_iomap_memory() helper here, @@ -795,18 +796,19 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma) } switch (idev->info->mem[mi].memtype) { - case UIO_MEM_PHYS: - ret = uio_mmap_physical(vma); - break; - case UIO_MEM_LOGICAL: - case UIO_MEM_VIRTUAL: - ret = uio_mmap_logical(vma); - break; - default: - ret = -EINVAL; + case UIO_MEM_IOVA: + case UIO_MEM_PHYS: + ret = uio_mmap_physical(vma); + break; + case UIO_MEM_LOGICAL: + case UIO_MEM_VIRTUAL: + ret = uio_mmap_logical(vma); + break; + default: + ret = -EINVAL; } -out: + out: mutex_unlock(&idev->info_lock); return ret; } diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 6f8b68cd460f..a3cd7cb67a69 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -133,6 +133,7 @@ extern void uio_event_notify(struct uio_info *info); #define UIO_MEM_PHYS 1 #define UIO_MEM_LOGICAL2 #define UIO_MEM_VIRTUAL 3 +#define UIO_MEM_IOVA 4 /* defines for uio_port->porttype */ #define UIO_PORT_NONE 0 -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH v3 5/6] hv_uio_generic: map ringbuffer phys addr
The ring buffer is contiguous IOVA and is mapped via phys addr for sysfs file. Use same method for the UIO mapping. Signed-off-by: Stephen Hemminger --- drivers/uio/uio_hv_generic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index ba67a5267557..53f5610c6065 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -224,10 +224,10 @@ hv_uio_probe(struct hv_device *dev, /* mem resources */ pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings"; pdata->info.mem[TXRX_RING_MAP].addr - = (uintptr_t)page_address(dev->channel->ringbuffer_page); + = (uintptr_t)virt_to_phys(page_address(dev->channel->ringbuffer_page)); pdata->info.mem[TXRX_RING_MAP].size = dev->channel->ringbuffer_pagecount << PAGE_SHIFT; - pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL; + pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_IOVA; pdata->info.mem[INT_PAGE_MAP].name = "int_page"; pdata->info.mem[INT_PAGE_MAP].addr -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH v3 6/6] uio_hv_generic: defer opening vmbus until first use
This fixes two design flaws in hv_uio_generic. Since hv_uio_probe is called from vmbus_probe with lock held it potentially can cause sleep in an atomic section because vmbus_open will wait for response from host. The hv_uio_generic driver could not handle applications exiting and restarting because the vmbus channel was persistent. Change the semantics so that the buffers are allocated on probe, but not attached to host until device is opened. Signed-off-by: Stephen Hemminger --- drivers/uio/uio_hv_generic.c | 104 +-- 1 file changed, 74 insertions(+), 30 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 53f5610c6065..c2493d011225 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -55,6 +55,7 @@ enum hv_uio_map { struct hv_uio_private_data { struct uio_info info; struct hv_device *device; + atomic_t refcnt; void*recv_buf; u32 recv_gpadl; @@ -128,12 +129,10 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, { struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj); - struct hv_device *dev = channel->primary_channel->device_obj; - u16 q_idx = channel->offermsg.offer.sub_channel_index; void *ring_buffer = page_address(channel->ringbuffer_page); - dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n", - q_idx, vma_pages(vma), vma->vm_pgoff); + if (channel->state != CHANNEL_OPENED_STATE) + return -ENODEV; return vm_iomap_memory(vma, virt_to_phys(ring_buffer), channel->ringbuffer_pagecount << PAGE_SHIFT); @@ -176,57 +175,103 @@ hv_uio_new_channel(struct vmbus_channel *new_sc) } } +/* free the reserved buffers for send and receive */ static void hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata) { - if (pdata->send_gpadl) + if (pdata->send_gpadl) { vmbus_teardown_gpadl(dev->channel, pdata->send_gpadl); - vfree(pdata->send_buf); + pdata->send_gpadl = 0; + vfree(pdata->send_buf); + } - if (pdata->recv_gpadl) + if (pdata->recv_gpadl) { vmbus_teardown_gpadl(dev->channel, pdata->recv_gpadl); - vfree(pdata->recv_buf); + pdata->recv_gpadl = 0; + vfree(pdata->recv_buf); + } +} + +/* VMBus primary channel is opened on first use */ +static int +hv_uio_open(struct uio_info *info, struct inode *inode) +{ + struct hv_uio_private_data *pdata + = container_of(info, struct hv_uio_private_data, info); + struct hv_device *dev = pdata->device; + int ret; + + if (atomic_inc_return(&pdata->refcnt) != 1) + return 0; + + ret = vmbus_connect_ring(dev->channel, +hv_uio_channel_cb, dev->channel); + + if (ret == 0) + dev->channel->inbound.ring_buffer->interrupt_mask = 1; + else + atomic_dec(&pdata->refcnt); + + return ret; +} + +/* VMBus primary channel is closed on last close */ +static int +hv_uio_release(struct uio_info *info, struct inode *inode) +{ + struct hv_uio_private_data *pdata + = container_of(info, struct hv_uio_private_data, info); + struct hv_device *dev = pdata->device; + int ret = 0; + + if (atomic_dec_and_test(&pdata->refcnt)) + ret = vmbus_disconnect_ring(dev->channel); + + return ret; } static int hv_uio_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { + struct vmbus_channel *channel = dev->channel; struct hv_uio_private_data *pdata; + void *ring_buffer; int ret; + /* Communicating with host has to be via shared memory not hypercall */ + if (!channel->offermsg.monitor_allocated) { + dev_err(&dev->device, "vmbus channel requires hypercall\n"); + return -ENOTSUPP; + } + pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; - ret = vmbus_open(dev->channel, HV_RING_SIZE * PAGE_SIZE, -HV_RING_SIZE * PAGE_SIZE, NULL, 0, -hv_uio_channel_cb, dev->channel); + ret = vmbus_alloc_ring(channel, HV_RING_SIZE * PAGE_SIZE, + HV_RING_SIZE * PAGE_SIZE); if (ret) goto fail; - /* Communicating with host has to be via shared memory not hypercall */ - if (!dev->channel->offermsg.monitor_allocated) { - dev_err(&dev->device, "vmbus channel requires hypercall\n");
[PATCH v3 2/6] vmbus: keep pointer to ring buffer page
Avoid going from struct page to virt address (and back) by just keeping pointer to the allocated pages instead of virt address. Signed-off-by: Stephen Hemminger --- drivers/hv/channel.c | 20 +--- drivers/uio/uio_hv_generic.c | 5 +++-- include/linux/hyperv.h | 2 +- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 33e6db02dbab..56ec0d96d876 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -91,11 +91,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, unsigned long flags; int ret, err = 0; struct page *page; + unsigned int order; if (send_ringbuffer_size % PAGE_SIZE || recv_ringbuffer_size % PAGE_SIZE) return -EINVAL; + order = get_order(send_ringbuffer_size + recv_ringbuffer_size); + spin_lock_irqsave(&newchannel->lock, flags); if (newchannel->state == CHANNEL_OPEN_STATE) { newchannel->state = CHANNEL_OPENING_STATE; @@ -110,21 +113,17 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, /* Allocate the ring buffer */ page = alloc_pages_node(cpu_to_node(newchannel->target_cpu), - GFP_KERNEL|__GFP_ZERO, - get_order(send_ringbuffer_size + - recv_ringbuffer_size)); + GFP_KERNEL|__GFP_ZERO, order); if (!page) - page = alloc_pages(GFP_KERNEL|__GFP_ZERO, - get_order(send_ringbuffer_size + -recv_ringbuffer_size)); + page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order); if (!page) { err = -ENOMEM; goto error_set_chnstate; } - newchannel->ringbuffer_pages = page_address(page); + newchannel->ringbuffer_page = page; newchannel->ringbuffer_pagecount = (send_ringbuffer_size + recv_ringbuffer_size) >> PAGE_SHIFT; @@ -239,8 +238,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, error_free_pages: hv_ringbuffer_cleanup(&newchannel->outbound); hv_ringbuffer_cleanup(&newchannel->inbound); - __free_pages(page, -get_order(send_ringbuffer_size + recv_ringbuffer_size)); + __free_pages(page, order); error_set_chnstate: newchannel->state = CHANNEL_OPEN_STATE; return err; @@ -658,8 +656,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel) hv_ringbuffer_cleanup(&channel->outbound); hv_ringbuffer_cleanup(&channel->inbound); - free_pages((unsigned long)channel->ringbuffer_pages, - get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); + __free_pages(channel->ringbuffer_page, +get_order(channel->ringbuffer_pagecount << PAGE_SHIFT)); out: return ret; diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index a08860260f55..ba67a5267557 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -130,11 +130,12 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj, = container_of(kobj, struct vmbus_channel, kobj); struct hv_device *dev = channel->primary_channel->device_obj; u16 q_idx = channel->offermsg.offer.sub_channel_index; + void *ring_buffer = page_address(channel->ringbuffer_page); dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n", q_idx, vma_pages(vma), vma->vm_pgoff); - return vm_iomap_memory(vma, virt_to_phys(channel->ringbuffer_pages), + return vm_iomap_memory(vma, virt_to_phys(ring_buffer), channel->ringbuffer_pagecount << PAGE_SHIFT); } @@ -223,7 +224,7 @@ hv_uio_probe(struct hv_device *dev, /* mem resources */ pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings"; pdata->info.mem[TXRX_RING_MAP].addr - = (uintptr_t)dev->channel->ringbuffer_pages; + = (uintptr_t)page_address(dev->channel->ringbuffer_page); pdata->info.mem[TXRX_RING_MAP].size = dev->channel->ringbuffer_pagecount << PAGE_SHIFT; pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6c4575c7f46b..a6c32d2d090b 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -739,7 +739,7 @@ struct vmbus_channel { u32 ringbuffer_gpadlhandle; /* Allocated memory for ring buffer */ - void *ringbuffer_pages; + struc
[PATCH v3 1/6] vmbus: pass channel to hv_process_channel_removal
Rather than passing relid and then looking up the channel. Pass the channel directly, since caller already knows it. Signed-off-by: Stephen Hemminger --- drivers/hv/channel.c | 3 +-- drivers/hv/channel_mgmt.c | 17 + drivers/hv/vmbus_drv.c| 3 +-- include/linux/hyperv.h| 2 +- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 741857d80da1..33e6db02dbab 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -690,8 +690,7 @@ void vmbus_close(struct vmbus_channel *channel) wait_for_completion(&cur_channel->rescind_event); mutex_lock(&vmbus_connection.channel_mutex); vmbus_close_internal(cur_channel); - hv_process_channel_removal( - cur_channel->offermsg.child_relid); + hv_process_channel_removal(cur_channel); } else { mutex_lock(&vmbus_connection.channel_mutex); vmbus_close_internal(cur_channel); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 0f0e091c117c..b7c48ebdf6a1 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -385,21 +385,14 @@ static void vmbus_release_relid(u32 relid) trace_vmbus_release_relid(&msg, ret); } -void hv_process_channel_removal(u32 relid) +void hv_process_channel_removal(struct vmbus_channel *channel) { + struct vmbus_channel *primary_channel; unsigned long flags; - struct vmbus_channel *primary_channel, *channel; BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - - /* -* Make sure channel is valid as we may have raced. -*/ - channel = relid2channel(relid); - if (!channel) - return; - BUG_ON(!channel->rescind); + if (channel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(channel->target_cpu, @@ -429,7 +422,7 @@ void hv_process_channel_removal(u32 relid) cpumask_clear_cpu(channel->target_cpu, &primary_channel->alloced_cpus_in_node); - vmbus_release_relid(relid); + vmbus_release_relid(channel->offermsg.child_relid); free_channel(channel); } @@ -943,7 +936,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) * The channel is currently not open; * it is safe for us to cleanup the channel. */ - hv_process_channel_removal(rescind->child_relid); + hv_process_channel_removal(channel); } else { complete(&channel->rescind_event); } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index e6d8fdac6d8b..007ee8e5986a 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -864,10 +864,9 @@ static void vmbus_device_release(struct device *device) struct vmbus_channel *channel = hv_dev->channel; mutex_lock(&vmbus_connection.channel_mutex); - hv_process_channel_removal(channel->offermsg.child_relid); + hv_process_channel_removal(channel); mutex_unlock(&vmbus_connection.channel_mutex); kfree(hv_dev); - } /* The one and only one */ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2c3798bcb01c..6c4575c7f46b 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1443,7 +1443,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, const int *srv_version, int srv_vercnt, int *nego_fw_version, int *nego_srv_version); -void hv_process_channel_removal(u32 relid); +void hv_process_channel_removal(struct vmbus_channel *channel); void vmbus_setevent(struct vmbus_channel *channel); /* -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH v3 0/6] fix hv_uio_generic open/close
This set of patches fixes the problem where DPDK applications using hv_uio_generic driver can not be successfully restarted. In order to get this working it required small change to uio to allow for mapping without no-cache. And refactoring of how ring buffer is setup in vmbus code. It could be backported as a fix, to 4.19 but that is not an LTS so probably not worth it. v3 - fix typo (sent wrong version for v2) v2 - fix refcount when hv_uio_open fails Stephen Hemminger (6): vmbus: pass channel to hv_process_channel_removal vmbus: keep pointer to ring buffer page vmbus: split ring buffer allocation from open uio: introduce UIO_MEM_IOVA hv_uio_generic: map ringbuffer phys addr uio_hv_generic: defer opening vmbus until first use drivers/hv/channel.c | 276 --- drivers/hv/channel_mgmt.c| 17 +-- drivers/hv/ring_buffer.c | 1 + drivers/hv/vmbus_drv.c | 3 +- drivers/uio/uio.c| 24 +-- drivers/uio/uio_hv_generic.c | 109 ++ include/linux/hyperv.h | 13 +- include/linux/uio_driver.h | 1 + 8 files changed, 264 insertions(+), 180 deletions(-) -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH v3 3/6] vmbus: split ring buffer allocation from open
The UIO driver needs the ring buffer to be persistent(reused) across open/close. Split the allocation and setup of ring buffer out of vmbus_open. For normal usage vmbus_open/vmbus_close there are no changes; only impacts uio_hv_generic which needs to keep ring buffer memory and reuse when application restarts. Signed-off-by: Stephen Hemminger --- drivers/hv/channel.c | 267 ++- drivers/hv/ring_buffer.c | 1 + include/linux/hyperv.h | 9 ++ 3 files changed, 162 insertions(+), 115 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 56ec0d96d876..ddadb7efd1cc 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -79,84 +79,96 @@ void vmbus_setevent(struct vmbus_channel *channel) } EXPORT_SYMBOL_GPL(vmbus_setevent); -/* - * vmbus_open - Open the specified channel. - */ -int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, -u32 recv_ringbuffer_size, void *userdata, u32 userdatalen, -void (*onchannelcallback)(void *context), void *context) +/* vmbus_free_ring - drop mapping of ring buffer */ +void vmbus_free_ring(struct vmbus_channel *channel) { - struct vmbus_channel_open_channel *open_msg; - struct vmbus_channel_msginfo *open_info = NULL; - unsigned long flags; - int ret, err = 0; - struct page *page; - unsigned int order; + hv_ringbuffer_cleanup(&channel->outbound); + hv_ringbuffer_cleanup(&channel->inbound); - if (send_ringbuffer_size % PAGE_SIZE || - recv_ringbuffer_size % PAGE_SIZE) - return -EINVAL; + if (channel->ringbuffer_page) { + __free_pages(channel->ringbuffer_page, +get_order(channel->ringbuffer_pagecount + << PAGE_SHIFT)); + channel->ringbuffer_page = NULL; + } +} +EXPORT_SYMBOL_GPL(vmbus_free_ring); - order = get_order(send_ringbuffer_size + recv_ringbuffer_size); +/* vmbus_alloc_ring - allocate and map pages for ring buffer */ +int vmbus_alloc_ring(struct vmbus_channel *newchannel, +u32 send_size, u32 recv_size) +{ + struct page *page; + int order; - spin_lock_irqsave(&newchannel->lock, flags); - if (newchannel->state == CHANNEL_OPEN_STATE) { - newchannel->state = CHANNEL_OPENING_STATE; - } else { - spin_unlock_irqrestore(&newchannel->lock, flags); + if (send_size % PAGE_SIZE || recv_size % PAGE_SIZE) return -EINVAL; - } - spin_unlock_irqrestore(&newchannel->lock, flags); - - newchannel->onchannel_callback = onchannelcallback; - newchannel->channel_callback_context = context; /* Allocate the ring buffer */ + order = get_order(send_size + recv_size); page = alloc_pages_node(cpu_to_node(newchannel->target_cpu), GFP_KERNEL|__GFP_ZERO, order); if (!page) page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order); - if (!page) { - err = -ENOMEM; - goto error_set_chnstate; - } + if (!page) + return -ENOMEM; newchannel->ringbuffer_page = page; - newchannel->ringbuffer_pagecount = (send_ringbuffer_size + - recv_ringbuffer_size) >> PAGE_SHIFT; + newchannel->ringbuffer_pagecount = (send_size + recv_size) >> PAGE_SHIFT; + newchannel->ringbuffer_send_offset = send_size >> PAGE_SHIFT; - ret = hv_ringbuffer_init(&newchannel->outbound, page, -send_ringbuffer_size >> PAGE_SHIFT); + return 0; +} +EXPORT_SYMBOL_GPL(vmbus_alloc_ring); - if (ret != 0) { - err = ret; - goto error_free_pages; - } +static int __vmbus_open(struct vmbus_channel *newchannel, + void *userdata, u32 userdatalen, + void (*onchannelcallback)(void *context), void *context) +{ + struct vmbus_channel_open_channel *open_msg; + struct vmbus_channel_msginfo *open_info = NULL; + struct page *page = newchannel->ringbuffer_page; + u32 send_pages, recv_pages; + unsigned long flags; + int err; - ret = hv_ringbuffer_init(&newchannel->inbound, -&page[send_ringbuffer_size >> PAGE_SHIFT], -recv_ringbuffer_size >> PAGE_SHIFT); - if (ret != 0) { - err = ret; - goto error_free_pages; + if (userdatalen > MAX_USER_DEFINED_BYTES) + return -EINVAL; + + send_pages = newchannel->ringbuffer_send_offset; + recv_pages = newchannel
[PATCH v2 0/2] hv_netvsc: associate VF and PV device by serial number
The Hyper-V implementation of PCI controller has concept of 32 bit serial number (not to be confused with PCI-E serial number). This value is sent in the protocol from the host to indicate SR-IOV VF device is attached to a synthetic NIC. Using the serial number (instead of MAC address) to associate the two devices avoids lots of potential problems when there are duplicate MAC addresses from tunnels or layered devices. The patch set is broken into two parts, one is for the PCI controller and the other is for the netvsc device. Normally, these go through different trees but sending them together here for better review. The PCI changes were submitted previously, but the main review comment was "why do you need this?". This is why. v2 - slot name can be shorter. remove locking when creating pci_slots; see comment for explaination Stephen Hemminger (2): PCI: hv: support reporting serial number as slot information hv_netvsc: pair VF based on serial number drivers/net/hyperv/netvsc.c | 3 ++ drivers/net/hyperv/netvsc_drv.c | 58 - drivers/pci/controller/pci-hyperv.c | 37 ++ 3 files changed, 73 insertions(+), 25 deletions(-) -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH v2 2/2] hv_netvsc: pair VF based on serial number
Matching network device based on MAC address is problematic since a non VF network device can be creted with a duplicate MAC address causing confusion and problems. The VMBus API does provide a serial number that is a better matching method. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc.c | 3 ++ drivers/net/hyperv/netvsc_drv.c | 58 +++-- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 31c3d77b4733..fe01e141c8f8 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1203,6 +1203,9 @@ static void netvsc_send_vf(struct net_device *ndev, net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; + netdev_info(ndev, "VF slot %u %s\n", + net_device_ctx->vf_serial, + net_device_ctx->vf_alloc ? "added" : "removed"); } static void netvsc_receive_inband(struct net_device *ndev, diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 1121a1ec407c..9dedc1463e88 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1894,20 +1894,6 @@ static void netvsc_link_change(struct work_struct *w) rtnl_unlock(); } -static struct net_device *get_netvsc_bymac(const u8 *mac) -{ - struct net_device_context *ndev_ctx; - - list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { - struct net_device *dev = hv_get_drvdata(ndev_ctx->device_ctx); - - if (ether_addr_equal(mac, dev->perm_addr)) - return dev; - } - - return NULL; -} - static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) { struct net_device_context *net_device_ctx; @@ -2036,26 +2022,48 @@ static void netvsc_vf_setup(struct work_struct *w) rtnl_unlock(); } +/* Find netvsc by VMBus serial number. + * The PCI hyperv controller records the serial number as the slot. + */ +static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) +{ + struct device *parent = vf_netdev->dev.parent; + struct net_device_context *ndev_ctx; + struct pci_dev *pdev; + + if (!parent || !dev_is_pci(parent)) + return NULL; /* not a PCI device */ + + pdev = to_pci_dev(parent); + if (!pdev->slot) { + netdev_notice(vf_netdev, "no PCI slot information\n"); + return NULL; + } + + list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) { + if (!ndev_ctx->vf_alloc) + continue; + + if (ndev_ctx->vf_serial == pdev->slot->number) + return hv_get_drvdata(ndev_ctx->device_ctx); + } + + netdev_notice(vf_netdev, + "no netdev found for slot %u\n", pdev->slot->number); + return NULL; +} + static int netvsc_register_vf(struct net_device *vf_netdev) { - struct net_device *ndev; struct net_device_context *net_device_ctx; - struct device *pdev = vf_netdev->dev.parent; struct netvsc_device *netvsc_dev; + struct net_device *ndev; int ret; if (vf_netdev->addr_len != ETH_ALEN) return NOTIFY_DONE; - if (!pdev || !dev_is_pci(pdev) || dev_is_pf(pdev)) - return NOTIFY_DONE; - - /* -* We will use the MAC address to locate the synthetic interface to -* associate with the VF interface. If we don't find a matching -* synthetic interface, move on. -*/ - ndev = get_netvsc_bymac(vf_netdev->perm_addr); + ndev = get_netvsc_byslot(vf_netdev); if (!ndev) return NOTIFY_DONE; -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH v2 1/2] PCI: hv: support reporting serial number as slot information
The Hyper-V host API for PCI provides a unique "serial number" which can be used as basis for sysfs PCI slot table. This can be useful for cases where userspace wants to find the PCI device based on serial number. When an SR-IOV NIC is added, the host sends an attach message with serial number. The kernel doesn't use the serial number, but it is useful when doing the same thing in a userspace driver such as the DPDK. By having /sys/bus/pci/slots/N it provides a direct way to find the matching PCI device. There maybe some cases where serial number is not unique such as when using GPU's. But the PCI slot infrastructure will handle that. This has a side effect which may also be useful. The common udev network device naming policy uses the slot information (rather than PCI address). Signed-off-by: Stephen Hemminger --- drivers/pci/controller/pci-hyperv.c | 37 + 1 file changed, 37 insertions(+) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index c00f82cc54aa..ee80e79db21a 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -89,6 +89,9 @@ static enum pci_protocol_version_t pci_protocol_version; #define STATUS_REVISION_MISMATCH 0xC059 +/* space for 32bit serial number as string */ +#define SLOT_NAME_SIZE 11 + /* * Message Types */ @@ -494,6 +497,7 @@ struct hv_pci_dev { struct list_head list_entry; refcount_t refs; enum hv_pcichild_state state; + struct pci_slot *pci_slot; struct pci_function_description desc; bool reported_missing; struct hv_pcibus_device *hbus; @@ -1457,6 +1461,34 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus) spin_unlock_irqrestore(&hbus->device_list_lock, flags); } +/* + * Assign entries in sysfs pci slot directory. + * + * Note that this function does not need to lock the children list + * because it is called from pci_devices_present_work which + * is serialized with hv_eject_device_work because they are on the + * same ordered workqueue. Therefore hbus->children list will not change + * even when pci_create_slot sleeps. + */ +static void hv_pci_assign_slots(struct hv_pcibus_device *hbus) +{ + struct hv_pci_dev *hpdev; + char name[SLOT_NAME_SIZE]; + int slot_nr; + + list_for_each_entry(hpdev, &hbus->children, list_entry) { + if (hpdev->pci_slot) + continue; + + slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot)); + snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser); + hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr, + name, NULL); + if (!hpdev->pci_slot) + pr_warn("pci_create slot %s failed\n", name); + } +} + /** * create_root_hv_pci_bus() - Expose a new root PCI bus * @hbus: Root PCI bus, as understood by this driver @@ -1480,6 +1512,7 @@ static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus) pci_lock_rescan_remove(); pci_scan_child_bus(hbus->pci_bus); pci_bus_assign_resources(hbus->pci_bus); + hv_pci_assign_slots(hbus); pci_bus_add_devices(hbus->pci_bus); pci_unlock_rescan_remove(); hbus->state = hv_pcibus_installed; @@ -1742,6 +1775,7 @@ static void pci_devices_present_work(struct work_struct *work) */ pci_lock_rescan_remove(); pci_scan_child_bus(hbus->pci_bus); + hv_pci_assign_slots(hbus); pci_unlock_rescan_remove(); break; @@ -1858,6 +1892,9 @@ static void hv_eject_device_work(struct work_struct *work) list_del(&hpdev->list_entry); spin_unlock_irqrestore(&hpdev->hbus->device_list_lock, flags); + if (hpdev->pci_slot) + pci_destroy_slot(hpdev->pci_slot); + memset(&ctxt, 0, sizeof(ctxt)); ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message; ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE; -- 2.18.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net-next 17/22] hv_netvsc: fix return type of ndo_start_xmit function
On Thu, 20 Sep 2018 20:33:01 +0800 YueHaibing wrote: > The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', > which is a typedef for an enum type, so make sure the implementation in > this driver has returns 'netdev_tx_t' value, and change the function > return type to netdev_tx_t. > > Found by coccinelle. > > Signed-off-by: YueHaibing > --- > drivers/net/hyperv/netvsc_drv.c | 10 +++--- > 1 file changed, 7 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c > index 3af6d8d..056c472 100644 > --- a/drivers/net/hyperv/netvsc_drv.c > +++ b/drivers/net/hyperv/netvsc_drv.c > @@ -511,7 +511,8 @@ static int netvsc_vf_xmit(struct net_device *net, struct > net_device *vf_netdev, > return rc; > } > > -static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) > +static netdev_tx_t > +netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) > { > struct net_device_context *net_device_ctx = netdev_priv(net); > struct hv_netvsc_packet *packet = NULL; > @@ -528,8 +529,11 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct > net_device *net) >*/ > vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev); > if (vf_netdev && netif_running(vf_netdev) && > - !netpoll_tx_running(net)) > - return netvsc_vf_xmit(net, vf_netdev, skb); > + !netpoll_tx_running(net)) { > + ret = netvsc_vf_xmit(net, vf_netdev, skb); > + if (ret) > + return NETDEV_TX_BUSY; > + } Sorry, the new code is wrong. It will fall through if ret == 0 (NETDEV_TX_OK) Please review and test your patches. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH v2 0/2] hv_netvsc: associate VF and PV device by serial number
On Thu, 20 Sep 2018 15:18:20 +0100 Lorenzo Pieralisi wrote: > On Fri, Sep 14, 2018 at 12:54:55PM -0700, Stephen Hemminger wrote: > > The Hyper-V implementation of PCI controller has concept of 32 bit serial > > number > > (not to be confused with PCI-E serial number). This value is sent in the > > protocol > > from the host to indicate SR-IOV VF device is attached to a synthetic NIC. > > > > Using the serial number (instead of MAC address) to associate the two > > devices > > avoids lots of potential problems when there are duplicate MAC addresses > > from > > tunnels or layered devices. > > > > The patch set is broken into two parts, one is for the PCI controller > > and the other is for the netvsc device. Normally, these go through different > > trees but sending them together here for better review. The PCI changes > > were submitted previously, but the main review comment was "why do you > > need this?". This is why. > > The question was more whether we should convert this serial number into > a PCI slot number (that has user space visibility and that is what you are > after) to improve the current matching, I do not question why you need > it, just for the records. The name slot is way overloaded in this context. There is windows slot number which comes from Hyperv pci address slot which pci-hyperv sets from windows slot pci slot api value which for normal devices comes from ACPI this patch gets it from serial number The netvsc driver needed to be able to find a PCI device based on the serial number. The serial number was not visible in any current PCI-hyperv controller values. The windows slot (wslot) is not the same the serial number. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net-next, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch
On Thu, 20 Sep 2018 17:06:59 + Haiyang Zhang wrote: > +static inline void rsc_add_data > + (struct netvsc_channel *nvchan, > + const struct ndis_pkt_8021q_info *vlan, > + const struct ndis_tcp_ip_checksum_info *csum_info, > + void *data, u32 len) > +{ Could this be changed to look more like a function and skip the inline. The compiler will end up inlining it anyway. static void rsc_add_data(struct netvsc_channel *nvchan, ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net-next, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch
On Thu, 20 Sep 2018 20:56:46 + Haiyang Zhang wrote: > > -Original Message- > > From: Stephen Hemminger > > Sent: Thursday, September 20, 2018 4:48 PM > > To: Haiyang Zhang > > Cc: Haiyang Zhang ; da...@davemloft.net; > > net...@vger.kernel.org; o...@aepfle.de; linux-ker...@vger.kernel.org; > > de...@linuxdriverproject.org; vkuznets > > Subject: Re: [PATCH net-next, 1/3] hv_netvsc: Add support for LRO/RSC in the > > vSwitch > > > > On Thu, 20 Sep 2018 17:06:59 + > > Haiyang Zhang wrote: > > > > > +static inline void rsc_add_data > > > + (struct netvsc_channel *nvchan, > > > + const struct ndis_pkt_8021q_info *vlan, > > > + const struct ndis_tcp_ip_checksum_info *csum_info, > > > + void *data, u32 len) > > > +{ > > > > Could this be changed to look more like a function and skip the inline. > > The compiler will end up inlining it anyway. > > > > static void rsc_add_data(struct netvsc_channel *nvchan, > > How about this? > static inline > void rsc_add_data(struct netvsc_channel *nvchan, > Sure that matches other code in that file ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch
On Fri, 21 Sep 2018 18:20:35 + Haiyang Zhang wrote: Overall, this looks good. Some minor suggestions. > +struct nvsc_rsc { > + const struct ndis_pkt_8021q_info *vlan; > + const struct ndis_tcp_ip_checksum_info *csum_info; > + u8 is_last; /* last RNDIS msg in a vmtransfer_page */ > + u32 cnt; /* #fragments in an RSC packet */ > + u32 pktlen; /* Full packet length */ > + void *data[NVSP_RSC_MAX]; > + u32 len[NVSP_RSC_MAX]; > +}; > + This new state structure is state on a per-channel basis. Do you really need this to be persistent across packets? Could this be on stack or do you need it to handle split packets arriving in different polls? Or is the stack space a problem? Also, maybe data and length could be in one structure since they are related. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch
On Fri, 21 Sep 2018 18:51:54 + Haiyang Zhang wrote: > > -Original Message- > > From: Stephen Hemminger > > Sent: Friday, September 21, 2018 2:37 PM > > To: Haiyang Zhang > > Cc: Haiyang Zhang ; da...@davemloft.net; > > net...@vger.kernel.org; o...@aepfle.de; linux-ker...@vger.kernel.org; > > de...@linuxdriverproject.org; vkuznets > > Subject: Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in > > the vSwitch > > > > On Fri, 21 Sep 2018 18:20:35 + > > Haiyang Zhang wrote: > > > > Overall, this looks good. Some minor suggestions. > > > > > +struct nvsc_rsc { > > > + const struct ndis_pkt_8021q_info *vlan; > > > + const struct ndis_tcp_ip_checksum_info *csum_info; > > > + u8 is_last; /* last RNDIS msg in a vmtransfer_page */ > > > + u32 cnt; /* #fragments in an RSC packet */ > > > + u32 pktlen; /* Full packet length */ > > > + void *data[NVSP_RSC_MAX]; > > > + u32 len[NVSP_RSC_MAX]; > > > +}; > > > + > > > > This new state structure is state on a per-channel basis. > > Do you really need this to be persistent across packets? > > > > Could this be on stack or do you need it to handle split packets arriving in > > different polls? Or is the stack space a problem? > > > > Also, maybe data and length could be in one structure since they are > > related. > > The stack space is a problem. NVSP_RSC_MAX is 562, which is defined by host. > It will be too large for limited stack space. > > struct nvsc_rsc includes the data, len, cnt, chksum, vlan for one RSC packet. > They > are all related to construction of one SKB and its meta data. So I put them in > one structure. > > Thanks, > - Haiyang > That makes sense. How big is sizeof(struct net_device) + netdev_priv now? Need to make sure it doesn't become an order 2 (ie keep it less than 4K). ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net-next] hv_netvsc: fix return type of ndo_start_xmit function
On Wed, 26 Sep 2018 18:25:10 +0800 YueHaibing wrote: > The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', > which is a typedef for an enum type, so make sure the implementation in > this driver has returns 'netdev_tx_t' value, and change the function > return type to netdev_tx_t. > > As suggestion from Haiyang Zhang , if netvsc_vf_xmit > fails, We are not sure if the error can go away after retrying, returning > NETDEV_TX_BUSY may cause infinite retry from the upper layer. > so just return NETDEV_TX_OK at there. > > Found by coccinelle. > > Signed-off-by: YueHaibing > --- > drivers/net/hyperv/netvsc_drv.c | 9 ++--- > 1 file changed, 6 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c > index ec69974..a1d44b4 100644 > --- a/drivers/net/hyperv/netvsc_drv.c > +++ b/drivers/net/hyperv/netvsc_drv.c > @@ -511,7 +511,8 @@ static int netvsc_vf_xmit(struct net_device *net, struct > net_device *vf_netdev, > return rc; > } > > -static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) > +static netdev_tx_t > +netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) > { > struct net_device_context *net_device_ctx = netdev_priv(net); > struct hv_netvsc_packet *packet = NULL; > @@ -528,8 +529,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct > net_device *net) >*/ > vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev); > if (vf_netdev && netif_running(vf_netdev) && > - !netpoll_tx_running(net)) > - return netvsc_vf_xmit(net, vf_netdev, skb); > + !netpoll_tx_running(net)) { > + netvsc_vf_xmit(net, vf_netdev, skb); > + return NETDEV_TX_OK; > + } > > /* We will atmost need two pages to describe the rndis >* header. We can only transmit MAX_PAGE_BUFFER_COUNT number Your patch loses the possible return values of netvsc_vf_xmit. A suggested better fix would be to make netvsc_vf_xmit return netdev_tx_t type. And this means the return value of dev_queue_xmit needs to be netdev_tx_t. Please don't merge this as is. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH] hv_netvsc: Make sure out channel is fully opened on send
On Thu, 27 Sep 2018 10:57:05 +0200 Mohammed Gamal wrote: > On Wed, 2018-09-26 at 17:13 +, Haiyang Zhang wrote: > > > -Original Message- > > > From: Mohammed Gamal > > > Sent: Wednesday, September 26, 2018 12:34 PM > > > To: Stephen Hemminger ; netdev@vger.kernel. > > > org > > > Cc: KY Srinivasan ; Haiyang Zhang > > > ; vkuznets ; > > > ot...@redhat.com; cavery ; linux- > > > ker...@vger.kernel.org; de...@linuxdriverproject.org; Mohammed > > > Gamal > > > > > > Subject: [PATCH] hv_netvsc: Make sure out channel is fully opened > > > on send > > > > > > Dring high network traffic changes to network interface parameters > > > such as > > > number of channels or MTU can cause a kernel panic with a NULL > > > pointer > > > dereference. This is due to netvsc_device_remove() being called and > > > deallocating the channel ring buffers, which can then be accessed > > > by > > > netvsc_send_pkt() before they're allocated on calling > > > netvsc_device_add() > > > > > > The patch fixes this problem by checking the channel state and > > > returning > > > ENODEV if not yet opened. We also move the call to > > > hv_ringbuf_avail_percent() > > > which may access the uninitialized ring buffer. > > > > > > Signed-off-by: Mohammed Gamal > > > --- > > >  drivers/net/hyperv/netvsc.c | 7 ++- > > >  1 file changed, 6 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/net/hyperv/netvsc.c > > > b/drivers/net/hyperv/netvsc.c index > > > fe01e14..75f1b31 100644 > > > --- a/drivers/net/hyperv/netvsc.c > > > +++ b/drivers/net/hyperv/netvsc.c > > > @@ -825,7 +825,12 @@ static inline int netvsc_send_pkt( > > >  struct netdev_queue *txq = netdev_get_tx_queue(ndev, > > > packet->q_idx); > > >  u64 req_id; > > >  int ret; > > > - u32 ring_avail = > > > hv_get_avail_to_write_percent(&out_channel- > > > > outbound); > > > > > > + u32 ring_avail; > > > + > > > + if (out_channel->state != CHANNEL_OPENED_STATE) > > > + return -ENODEV; > > > + > > > + ring_avail = hv_get_avail_to_write_percent(&out_channel- > > > >outbound); > > > > When you reproducing the NULL ptr panic, does your kernel include the > > following patch? > > hv_netvsc: common detach logic > > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/c > > ommit/?id=7b2ee50c0cd513a176a26a71f2989facdd75bfea > > > Yes it is included. And the commit did reduce the occurrence of this > race condition, but it still nevertheless occurs albeit rarely. > > > We call netif_tx_disable(ndev) and netif_device_detach(ndev) before > > doing the changes > > on MTU or #channels. So there should be no call to start_xmit() when > > channel is not ready. > > > > If you see the check for CHANNEL_OPENED_STATE is still necessary on > > upstream kernel (including > > the patch " common detach logic "), we should debug further on the > > code and find out the > > root cause. > > > > Thanks, > > - Haiyang > > > ___ > devel mailing list > de...@linuxdriverproject.org > http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel Is there some workload, that can be used to reproduce this? The stress test from Vitaly with changing parameters while running network traffic passes now. Can you reproduce this with the upstream current kernel? Adding the check in start xmit is still racy, and won't cure the problem. Another solution would be to add a grace period in the netvsc detach logic. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net-next, v2] hv_netvsc: fix vf serial matching with pci slot info
On Fri, 12 Oct 2018 20:55:15 + Haiyang Zhang wrote: Thanks for fixing this. > + if (kstrtou32(kobject_name(&pdev->slot->kobj), 10, &serial)) { > + netdev_notice(vf_netdev, "Invalid vf serial:%s\n", > + pdev->slot->kobj.name); > + return NULL; > + } Shouldn't this use kobject_name() in the message as well. Looking at the pci.h code there is already an API to get name from slot (it uses kobject_name()). So please use that one. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net-next, v3] hv_netvsc: fix vf serial matching with pci slot info
On Mon, 15 Oct 2018 19:06:15 + Haiyang Zhang wrote: > From: Haiyang Zhang > > The VF device's serial number is saved as a string in PCI slot's > kobj name, not the slot->number. This patch corrects the netvsc > driver, so the VF device can be successfully paired with synthetic > NIC. > > Fixes: 00d7ddba1143 ("hv_netvsc: pair VF based on serial number") > Reported-by: Vitaly Kuznetsov > Signed-off-by: Haiyang Zhang Reviewed-by: Stephen Hemminger ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH V2 3/4] vmbus: add per-channel sysfs info
On Thu, 18 Oct 2018 17:19:53 +0200 Olaf Hering wrote: > Am Sun, 17 Sep 2017 20:54:18 -0700 > schrieb k...@exchange.microsoft.com: > > > This extends existing vmbus related sysfs structure to provide per-channel > > state information. This is useful when diagnosing issues with multiple > > queues in networking and storage. > > > +++ b/drivers/hv/vmbus_drv.c > > +static ssize_t write_avail_show(const struct vmbus_channel *channel, char > > *buf) > > +{ > > + const struct hv_ring_buffer_info *rbi = &channel->outbound; > > + > > + return sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi)); > > +} > > +VMBUS_CHAN_ATTR_RO(write_avail); > > This is upstream since a year. > > But I wonder how this can work if vmbus_device_register is called, > and then something reads the populated sysfs files before vmbus_open returns. > Nothing protects rbi->ring_buffer in this case, which remains NULL > until vmbus_open populates it. > > A simple reproduce, with a modular kernel, is to boot with init=/bin/bash > head /sys/bus/vmbus/devices/*/channels/*/* > > Olaf Good catch, actually the problem goes across all of the ring buffer sysfs files so it existed long before that. The channel ring buffer could be missing. I am less worried about the open from init case, and more worried about issues when channels are closed (as happens when changing number of channels on a net device). As Al has pointed out for years, sysfs is riddled with dangling reference issues. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH V2 3/4] vmbus: add per-channel sysfs info
On Thu, 18 Oct 2018 15:32:35 + Michael Kelley wrote: > From Olaf Hering Sent: Thursday, October 18, 2018 8:20 AM > > > > > This extends existing vmbus related sysfs structure to provide per-channel > > > state information. This is useful when diagnosing issues with multiple > > > queues in networking and storage. > > > > > +++ b/drivers/hv/vmbus_drv.c > > > +static ssize_t write_avail_show(const struct vmbus_channel *channel, > > > char *buf) > > > +{ > > > + const struct hv_ring_buffer_info *rbi = &channel->outbound; > > > + > > > + return sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi)); > > > +} > > > +VMBUS_CHAN_ATTR_RO(write_avail); > > > > This is upstream since a year. > > > > But I wonder how this can work if vmbus_device_register is called, > > and then something reads the populated sysfs files before vmbus_open > > returns. > > Nothing protects rbi->ring_buffer in this case, which remains NULL > > until vmbus_open populates it. > > > > A simple reproduce, with a modular kernel, is to boot with init=/bin/bash > > head /sys/bus/vmbus/devices/*/channels/*/* > > > > There are multiple race conditions with this and other VMbus sysfs > information. > There's a race on the close path as well. I've got an action on my list to > get it > cleaned up. > > Michael > There is also a bunch of issues with code like: static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, char *buf) { struct hv_device *hv_dev = device_to_hv_device(dev); if (!hv_dev->channel) return -ENODEV; return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid); } Which should be using ACCESS_ONCE on hv_dev->channel or doing proper RCU. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH] hyperv: replace mutex_is_locked with lockdep
lockdep_assert_held is better at checking for locking requirements since it doesn't get confused if someone else is holding the mutex. Inspired by changes in network drivers by Lance Roy. Signed-off-by: Stephen Hemminger --- drivers/hv/channel_mgmt.c | 2 +- drivers/hv/connection.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 6277597d3d58..abdaf8ac0002 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -385,7 +385,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel) struct vmbus_channel *primary_channel; unsigned long flags; - BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + lockdep_assert_held(&vmbus_connection.channel_mutex); BUG_ON(!channel->rescind); if (channel->target_cpu != get_cpu()) { diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index f4d08c8ac7f8..0adaec0db85a 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -307,7 +307,7 @@ struct vmbus_channel *relid2channel(u32 relid) struct list_head *cur, *tmp; struct vmbus_channel *cur_sc; - BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + lockdep_assert_held(&vmbus_connection.channel_mutex); list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { if (channel->offermsg.child_relid == relid) { -- 2.19.2 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH] vmbus: fix subchannel removal
The changes to split ring allocation from open/close, broke the cleanup of subchannels. This resulted in problems using uio on network devices because the subchannel was left behind when the network device was unbound. The cause was in the disconnect logic which used list splice to move the subchannel list into a local variable. This won't work because the subchannel list is needed later during the process of the rescind messages (relid2channel). The fix is to just leave the subchannel list in place which is what the original code did. The list is cleaned up later when the host rescind is processed. Fixes: ae6935ed7d42 ("vmbus: split ring buffer allocation from open") Signed-off-by: Stephen Hemminger --- drivers/hv/channel.c | 10 +- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index fe00b12e4417..bea4c9850247 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -701,20 +701,12 @@ static int vmbus_close_internal(struct vmbus_channel *channel) int vmbus_disconnect_ring(struct vmbus_channel *channel) { struct vmbus_channel *cur_channel, *tmp; - unsigned long flags; - LIST_HEAD(list); int ret; if (channel->primary_channel != NULL) return -EINVAL; - /* Snapshot the list of subchannels */ - spin_lock_irqsave(&channel->lock, flags); - list_splice_init(&channel->sc_list, &list); - channel->num_sc = 0; - spin_unlock_irqrestore(&channel->lock, flags); - - list_for_each_entry_safe(cur_channel, tmp, &list, sc_list) { + list_for_each_entry_safe(cur_channel, tmp, &channel->sc_list, sc_list) { if (cur_channel->rescind) wait_for_completion(&cur_channel->rescind_event); -- 2.19.2 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH] uio_hv_generic: set callbacks on open
This fixes the problem where uio application was unable to use multple queues on restart. The root cause is that the callbacks are cleared on disconnect. Change to setting up callbacks everytime in open. Fixes: cdfa835c6e5e ("uio_hv_generic: defer opening vmbus until first use") Reported-by: Mohammed Gamal Signed-off-by: Stephen Hemminger --- drivers/uio/uio_hv_generic.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index c2493d011225..3c5169eb23f5 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -204,9 +204,11 @@ hv_uio_open(struct uio_info *info, struct inode *inode) if (atomic_inc_return(&pdata->refcnt) != 1) return 0; + vmbus_set_chn_rescind_callback(dev->channel, hv_uio_rescind); + vmbus_set_sc_create_callback(dev->channel, hv_uio_new_channel); + ret = vmbus_connect_ring(dev->channel, hv_uio_channel_cb, dev->channel); - if (ret == 0) dev->channel->inbound.ring_buffer->interrupt_mask = 1; else @@ -334,9 +336,6 @@ hv_uio_probe(struct hv_device *dev, goto fail_close; } - vmbus_set_chn_rescind_callback(channel, hv_uio_rescind); - vmbus_set_sc_create_callback(channel, hv_uio_new_channel); - ret = sysfs_create_bin_file(&channel->kobj, &ring_buffer_bin_attr); if (ret) dev_notice(&dev->device, -- 2.19.2 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH] Drivers: hv: vmbus: Return -EINVAL for the sys files for unopened channels
On Thu, 13 Dec 2018 16:35:43 + Dexuan Cui wrote: > Before 98f4c651762c, we returned zeros for unopened channels. > With 98f4c651762c, we started to return random on-stack values. > > We'd better return -EINVAL instead. > > Fixes: 98f4c651762c ("hv: move ringbuffer bus attributes to dev_groups") > Cc: sta...@vger.kernel.org > Cc: K. Y. Srinivasan > Cc: Haiyang Zhang > Cc: Stephen Hemminger > Signed-off-by: Dexuan Cui The concept looks fine, but maybe it would be simpler to move it into hv_ringbuffer_get_debuginfo and have it return an error code. Since so much of the code is repeated, I would probably make a macro which generates the code as well. Something like this: >From c6bbdbcde933c85098f7b3e71650a8479d52810c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 17 Dec 2018 09:13:24 -0800 Subject: [PATCH] hv: vmbus: check for ring in debug info --- drivers/hv/ring_buffer.c | 31 +- drivers/hv/vmbus_drv.c | 71 ++-- include/linux/hyperv.h | 5 +-- 3 files changed, 79 insertions(+), 28 deletions(-) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 64d0c85d5161..1f1a55e07733 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -164,26 +164,25 @@ hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi, } /* Get various debug metrics for the specified ring buffer. */ -void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, -struct hv_ring_buffer_debug_info *debug_info) +int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info) { u32 bytes_avail_towrite; u32 bytes_avail_toread; - if (ring_info->ring_buffer) { - hv_get_ringbuffer_availbytes(ring_info, - &bytes_avail_toread, - &bytes_avail_towrite); - - debug_info->bytes_avail_toread = bytes_avail_toread; - debug_info->bytes_avail_towrite = bytes_avail_towrite; - debug_info->current_read_index = - ring_info->ring_buffer->read_index; - debug_info->current_write_index = - ring_info->ring_buffer->write_index; - debug_info->current_interrupt_mask = - ring_info->ring_buffer->interrupt_mask; - } + if (!ring_info->ring_buffer) + return -EINVAL; + + hv_get_ringbuffer_availbytes(ring_info, +&bytes_avail_toread, +&bytes_avail_towrite); + debug_info->bytes_avail_toread = bytes_avail_toread; + debug_info->bytes_avail_towrite = bytes_avail_towrite; + debug_info->current_read_index = ring_info->ring_buffer->read_index; + debug_info->current_write_index = ring_info->ring_buffer->write_index; + debug_info->current_interrupt_mask + = ring_info->ring_buffer->interrupt_mask; + return 0; } EXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 283d184280af..403fee01572c 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -313,10 +313,16 @@ static ssize_t out_intr_mask_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; + return sprintf(buf, "%d\n", outbound.current_interrupt_mask); } static DEVICE_ATTR_RO(out_intr_mask); @@ -326,10 +332,15 @@ static ssize_t out_read_index_show(struct device *dev, { struct hv_device *hv_dev = device_to_hv_device(dev); struct hv_ring_buffer_debug_info outbound; + int ret; if (!hv_dev->channel) return -ENODEV; - hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound); + + ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, + &outbound); + if (ret < 0) + return ret; return sprintf(buf, "%d\n", outbound.current_read_index); } static DEVICE_ATTR_RO(out_read_index); @@ -340,10 +351,15 @@ static ssize_t out_write_index_show(struct device *dev, { struct hv_device *hv_dev = devi
Re: [PATCH] Drivers: hv: vmbus: Return -EINVAL for the sys files for unopened channels
On Mon, 17 Dec 2018 18:00:29 + Dexuan Cui wrote: > > From: Stephen Hemminger > > On Thu, 13 Dec 2018 16:35:43 + > > Dexuan Cui wrote: > > > > > Before 98f4c651762c, we returned zeros for unopened channels. > > > With 98f4c651762c, we started to return random on-stack values. > > > > > > We'd better return -EINVAL instead. > > > > The concept looks fine, but maybe it would be simpler to move it into > > hv_ringbuffer_get_debuginfo and have it return an error code. > > > > Since so much of the code is repeated, I would probably make a > > macro which generates the code as well. > > > > Something like this: > > Thanks, Stephen! Now the patch has been in char-misc's char-misc-linus > branch, so IMO we may as well leave it as is (considering the code here is > unlikely to be frqeuencly changed), and we have a smaller patch this way. :-) > > But, yes, I agree with you that generally we should make a common > function to avoid duplicate code. > > Thanks, > -- Dexuan The old code was risky because it would silently return stack garbage. Having an error check in get_debuginfo would eliminate that. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH] Drivers: hv: vmbus: Return -EINVAL for the sys files for unopened channels
On Mon, 17 Dec 2018 18:44:12 + Dexuan Cui wrote: > > From: devel On Behalf Of > > Dexuan Cui > > Sent: Monday, December 17, 2018 10:31 AM > > > From: Stephen Hemminger > > > > > > The old code was risky because it would silently return stack garbage. > > > Having an error check in get_debuginfo would eliminate that. > > > > OK, then let me make another patch based on the latest char-misc-linus. > > > > -- Dexuan > > Hi Stephen, your patch can apply cleanly. Let me rebase your patch to > char-misc-linus, do a test, and then post it with your Signed-off-by and > mine: > I assume you're Ok with this. Please let me know in case it's not. :-) > > Thanks, > -- Dexuan Sure. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH v3] Drivers: hv: vmbus: Expose counters for interrupts and full conditions
> +static ssize_t channel_intr_in_full_show(const struct vmbus_channel > *channel, > + char *buf) > +{ > + return sprintf(buf, "%llu\n", channel->intr_in_full); > +} intr_in_full is u64, which is not the same as unsigned long long. to be correct you need a cast here. > > diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h > > index dcb6977afce9..7e5239123276 100644 > > --- a/include/linux/hyperv.h > > +++ b/include/linux/hyperv.h > > @@ -751,6 +751,27 @@ struct vmbus_channel { > > u64 interrupts; /* Host to Guest interrupts */ > > u64 sig_events; /* Guest to Host events */ > > > > + /* Interrupt counts for 2 types of Guest to Host interrupts */ > > + u64 intr_in_full; /* in ring buffer, full to not full */ > > + u64 intr_out_empty; /* out ring buffer, empty to not empty */ > > + > > + /* > > +* The total number of write operations that encountered a full > > +* outbound ring buffer. > > +*/ > > + u64 out_full_total; > > + /* > > +* The number of write operations that were the first to encounter a > > +* full outbound ring buffer. > > +*/ > > + u64 out_full_first; Adding more fields changes cache layout which can cause additional cache miss in the hot path. > > + /* > > +* Indicates that a full outbound ring buffer was encountered. The flag > > +* is set to true when a full outbound ring buffer is encountered and > > +* set to false when a write to the outbound ring buffer is completed. > > +*/ > > + bool out_full_flag; Discussion on kernel mailing list. Recommends against putting bool in structures since that pads to full sizeof(int). Could this be part of a bitfield? > > /* Channel callback's invoked in softirq context */ > > struct tasklet_struct callback_event; > > void (*onchannel_callback)(void *context); > > @@ -936,6 +957,23 @@ static inline void *get_per_channel_state(struct > > vmbus_channel *c) > > static inline void set_channel_pending_send_size(struct vmbus_channel *c, > > u32 size) > > { > > + unsigned long flags; > > + > > + spin_lock_irqsave(&c->outbound.ring_lock, flags); > > + > > + if (size) { > > + ++c->out_full_total; > > + > > + if (!c->out_full_flag) { > > + ++c->out_full_first; > > + c->out_full_flag = true; > > + } > > + } else { > > + c->out_full_flag = false; > > + } > > + > > + spin_unlock_irqrestore(&c->outbound.ring_lock, flags); If this is called often, the additional locking will impact performance. > > c->outbound.ring_buffer->pending_send_sz = size; > > } > > Could I propose another alternative. It might be more useful to count the guest to host interaction events rather than the ring buffer. For example the number of calls to: vmbus_set_event which means host exit call vmbus_setevent fastpath using sync_set_bit calls to rinbuffer_write that returned -EAGAIN These would require less locking, reuse existing code paths and not require additional state. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH] PCI: hv: Add hv_pci_remove_slots() when we unload the driver
On Thu, 7 Feb 2019 20:36:32 + Dexuan Cui wrote: > When we unload pci-hyperv, the host doesn't send us a PCI_EJECT message. > In this case we also need to make sure the sysfs pci slot directory > is removed, otherwise "cat /sys/bus/pci/slots/2/address" will trigger > "BUG: unable to handle kernel paging request". And, if we unload/reload > the driver several times, we'll have multiple pci slot directories in > /sys/bus/pci/slots/ like this: > > root@localhost:~# ls -rtl /sys/bus/pci/slots/ > total 0 > drwxr-xr-x 2 root root 0 Feb 7 10:49 2 > drwxr-xr-x 2 root root 0 Feb 7 10:49 2-1 > drwxr-xr-x 2 root root 0 Feb 7 10:51 2-2 > > The patch adds the missing code, and in hv_eject_device_work() it also > moves pci_destroy_slot() to an earlier place where we hold the pci lock. > > Fixes: a15f2c08c708 ("PCI: hv: support reporting serial number as slot > information") > Signed-off-by: Dexuan Cui > Cc: sta...@vger.kernel.org > Cc: Stephen Hemminger Acked-by: Stephen Hemminger ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH 1/2] Drivers: hv: vmbus: Change server monitor_pages index to 0
On Fri, 8 Feb 2019 04:58:52 -0500 Kimberly Brown wrote: > Change the monitor_pages index in server_monitor_pending_show() to '0'. > '0' is the correct monitor_pages index for the server. A comment for the > monitor_pages field in the vmbus_connection struct definition indicates > that the 1st page is for parent->child notifications. In addition, the > server_monitor_latency_show() and server_monitor_conn_id_show() > functions use monitor_pages index '0'. > > Signed-off-by: Kimberly Brown > --- > drivers/hv/vmbus_drv.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c > index 403fee01572c..f2a79f5129d7 100644 > --- a/drivers/hv/vmbus_drv.c > +++ b/drivers/hv/vmbus_drv.c > @@ -234,7 +234,7 @@ static ssize_t server_monitor_pending_show(struct device > *dev, > return -ENODEV; > return sprintf(buf, "%d\n", > channel_pending(hv_dev->channel, > -vmbus_connection.monitor_pages[1])); > +vmbus_connection.monitor_pages[0])); > } > static DEVICE_ATTR_RO(server_monitor_pending); Looks good. I wonder if ever gets used though since it returned incorrect data... Acked-by: Stephen Hemminger ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH 2/2] Drivers: hv: vmbus: Display nothing in sysfs if monitor_allocated not set
On Fri, 8 Feb 2019 05:01:12 -0500 Kimberly Brown wrote: You are right, the current behavior is broken. It would be good to add a description of under what conditions monitor is not used. Is this some part of a project emulating Hyper-V? > + > + if (!hv_dev->channel->offermsg.monitor_allocated) > + return sprintf(buf, "\n"); If monitor is not used, why not return an error instead of empty data. Any program (or user) would have to handle that already. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH 2/2] Drivers: hv: vmbus: Display nothing in sysfs if monitor_allocated not set
On Mon, 11 Feb 2019 02:01:18 -0500 Kimberly Brown wrote: > On Fri, Feb 08, 2019 at 02:32:09PM -0800, Stephen Hemminger wrote: > > On Fri, 8 Feb 2019 05:01:12 -0500 > > Kimberly Brown wrote: > > > > You are right, the current behavior is broken. > > It would be good to add a description of under what conditions > > monitor is not used. Is this some part of a project emulating > > Hyper-V? > > > > I'm not sure which conditions determine whether the monitor mechanism is > used. I've searched the Hypervisor TLFS, and I couldn't find any > information. If you have any suggestions for where I can find this > information, please let me know. The monitor page stuff pre-dates my involvement with Hyper-V. KY might know. But based on comments it looks like it was added to avoid hypercalls for each message. It probably showed up in Windows Server 2012 timeframe. To test you might want to dig up Windows Server 2008. > No, I'm not working on a project emulating Hyper-V. OK, I had heard that KVM project was doing something with QEMU. > > > > > + > > > + if (!hv_dev->channel->offermsg.monitor_allocated) > > > + return sprintf(buf, "\n"); > > > > If monitor is not used, why not return an error instead of empty > > data. Any program (or user) would have to handle that already. > > I think that returning an error instead is fine. I'll make this change > in the next version of the patch. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH 2/2] Drivers: hv: vmbus: Display nothing in sysfs if monitor_allocated not set
On Thu, 14 Feb 2019 01:11:03 -0500 Kimberly Brown wrote: > On Mon, Feb 11, 2019 at 10:02:47AM -0800, Stephen Hemminger wrote: > > On Mon, 11 Feb 2019 02:01:18 -0500 > > Kimberly Brown wrote: > > > > > On Fri, Feb 08, 2019 at 02:32:09PM -0800, Stephen Hemminger wrote: > > > > On Fri, 8 Feb 2019 05:01:12 -0500 > > > > Kimberly Brown wrote: > > > > > > > > You are right, the current behavior is broken. > > > > It would be good to add a description of under what conditions > > > > monitor is not used. Is this some part of a project emulating > > > > Hyper-V? > > > > > > > > > > I'm not sure which conditions determine whether the monitor mechanism is > > > used. I've searched the Hypervisor TLFS, and I couldn't find any > > > information. If you have any suggestions for where I can find this > > > information, please let me know. > > > > The monitor page stuff pre-dates my involvement with Hyper-V. KY might know. > > But based on comments it looks like it was added to avoid hypercalls > > for each message. It probably showed up in Windows Server 2012 timeframe. > > > > To test you might want to dig up Windows Server 2008. > > > > It looks like the monitor mechanism has always been used. It's present in the > earliest commit that I can find: 3e7ee4902fe6 ("add the Hyper-V virtual bus") > from 2009. > > I propose that the following sentences be added to the sysfs documentation for > the affected attributes: > > "The monitor page mechanism is used for performance critical channels > (storage, > network, etc.). Channels that do not use the monitor page mechanism will > return > EINVAL." > > I think that this provides sufficient information for a user to understand why > opening an affected file can return EINVAL. What do you think? Thanks for following up. I agree with you EINVAL works as a solution. My understanding is that their are two ways a channel can work. The first one is for the guest to send a hyper call to the host to indicate when data is available. The other is for the guest to indicate by setting a bit in shared memory with host. The shared memory approach reduces host/guest overhead and allows for more opportunities for batching in the ring. The host checks the shared memory on a polling interval defined in the latency field. The hypercall method does not use the monitor page. It has lower latency (no polling). ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
RE: [PATCH V3 1/10] X86/Hyper-V: Add parameter offset for hyperv_fill_flush_guest_mapping_list()
int hyperv_fill_flush_guest_mapping_list( struct hv_guest_mapping_flush_list *flush, - u64 start_gfn, u64 pages) + int offset, u64 start_gfn, u64 pages) { u64 cur = start_gfn; u64 additional_pages; - int gpa_n = 0; + int gpa_n = offset; do { /* Do you mean to support negative offsets here? Maybe unsigned would be better? ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH 0/3] pci-hyperv: fix memory leak and add pci_destroy_slot()
On Mon, 4 Mar 2019 21:34:47 + Dexuan Cui wrote: > Patch #1 fixes a memory leak caused by incorrectly-maintained hpdev->refs. > > Patch #2 and #3 make sure the "slot" is removed in all the scenarios. > Without them, in the quick hot-add/hot-remove test, systemd-dev may easily > crash when trying to access a dangling sys file in /sys/bus/pci/slots/: > "BUG: unable to handle kernel paging request". > > BTW, Patch #2 was posted on Feb 7, 2019, and this is the v2: the change > to hv_eject_device_work() in v1 is removed, as the change is only needed > when we hot-remove the device and remove the pci-hyperv driver at the > same time. It looks more work is required to make this scenaro work > correctly, and since removing the driver is not really a "usual" usage, > we can address this scenario in the future. > > Please review the patchset. > > Dexuan Cui (3): > PCI: hv: Fix a memory leak in hv_eject_device_work() > PCI: hv: Add hv_pci_remove_slots() when we unload the driver > PCI: hv: Add pci_destroy_slot() in pci_devices_present_work(), if > necessary > > drivers/pci/controller/pci-hyperv.c | 23 +++ > 1 file changed, 23 insertions(+) Thanks for fixing this. Reviewed-by: Stephen Hemminger ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH] isdn: hysdn: Fix error spaces around '*'
On Fri, 2 Aug 2019 19:56:02 + Jose Carlos Cazarin Filho wrote: > Fix checkpath error: > CHECK: spaces preferred around that '*' (ctx:WxV) > +extern hysdn_card *card_root;/* pointer to first card */ > > Signed-off-by: Jose Carlos Cazarin Filho Read the TODO, these drivers are scheduled for removal, so changes are not helpful at this time. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH] isdn: hysdn: fix code style error from checkpatch
On Fri, 2 Aug 2019 19:50:17 + Ricardo Bruno Lopes da Silva wrote: > Fix error bellow from checkpatch. > > WARNING: Block comments use * on subsequent lines > +/*** > + > > Signed-off-by: Ricardo Bruno Lopes da Silva Read the TODO, these drivers are scheduled for removal, so changes are not helpful at this time. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net] netvsc: increase default receive buffer size
The default receive buffer size was reduced by recent change to a value which was appropriate for 10G and Windows Server 2016. But the value is too small for full performance with 40G on Azure. Increase the default back to maximum supported by host. Fixes: 8b5327975ae1 ("netvsc: allow controlling send/recv buffer size") Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c538a4f15f3b..d4902ee5f260 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -49,7 +49,7 @@ #define NETVSC_MIN_TX_SECTIONS 10 #define NETVSC_DEFAULT_TX 192 /* ~1M */ #define NETVSC_MIN_RX_SECTIONS 10 /* ~64K */ -#define NETVSC_DEFAULT_RX 2048/* ~4M */ +#define NETVSC_DEFAULT_RX 10485 /* Max ~16M */ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net] netvsc: increase default receive buffer size
On Thu, 14 Sep 2017 10:02:03 -0700 (PDT) David Miller wrote: > From: Stephen Hemminger > Date: Thu, 14 Sep 2017 09:31:07 -0700 > > > The default receive buffer size was reduced by recent change > > to a value which was appropriate for 10G and Windows Server 2016. > > But the value is too small for full performance with 40G on Azure. > > Increase the default back to maximum supported by host. > > > > Fixes: 8b5327975ae1 ("netvsc: allow controlling send/recv buffer size") > > Signed-off-by: Stephen Hemminger > > What other side effects are there to making this buffer so large? > > Just curious... It increase latency and exercises bufferbloat avoidance on TCP. The problem was the smaller buffer caused regressions in UDP benchmarks on 40G Azure. One could argue that this is not a reasonable benchmark but people run it. Apparently, Windows already went the same thing and uses an even bigger buffer. Longer term there will be more internal discussion with different teams about what the receive latency and buffering needs to be. Also, the issue goes away when doing accelerated networking (SR-IOV) is more widely used. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH 02/16] hyper-v: trace vmbus_on_message()
On Thu, 21 Sep 2017 10:17:18 +0200 Vitaly Kuznetsov wrote: > Steven Rostedt writes: > > > On Wed, 20 Sep 2017 19:21:53 +0200 > > Vitaly Kuznetsov wrote: > > > >> diff --git a/drivers/hv/hv_trace.h b/drivers/hv/hv_trace.h > >> index 9a29ef55477d..72911dfc9682 100644 > >> --- a/drivers/hv/hv_trace.h > >> +++ b/drivers/hv/hv_trace.h > >> @@ -14,6 +14,14 @@ TRACE_EVENT(vmbus_on_msg_dpc, > >>TP_printk("message %u received", __entry->msgtype) > >>); > >> > >> +TRACE_EVENT(vmbus_on_message, > >> + TP_PROTO(const struct vmbus_channel_message_header *hdr), > >> + TP_ARGS(hdr), > >> + TP_STRUCT__entry(__field(unsigned int, msgtype)), > >> + TP_fast_assign(__entry->msgtype = hdr->msgtype), > >> + TP_printk("processing message %u", __entry->msgtype) > >> + ); > > > > Whenever you have two trace events with everything the same but the > > TP_printk(), you can save a little space by using DEFINE_EVENT_PRINT() > > > > DECLARE_EVENT_CLASS(vmbus_hdr_msg, > > TP_PROTO(const struct vmbus_channel_message_header *hdr), > > TP_ARGS(hdr), > > TP_STRUCT__entry(__field(unsigned int, msgtype), > > TP_fast_assign(__entry->msg = hdr->msgtype;), > > TP_printk("msgtype=%d", __entry->msgtype) > > ); > > > > DEFINE_EVENT_PRINT(vmbus_hdr_msg, vmbus_on_msg_dpc, > > TP_PROTO(const struct vmbus_channel_message_header *hdr), > > TP_ARGS(hdr), > > TP_printk("message %u received", __entry->msgtype)); > > > > DEFINE_EVENT_PRINT(vmbus_hdr_msg, vmbus_on_message, > > TP_PROTO(const struct vmbus_channel_message_header *hdr), > > TP_ARGS(hdr), > > TP_printk("processing message %u", __entry->msgtype)); > > > > This will use the same functions required to save and record the > > message but will use a different function to output it to the trace. > > Oh, thanks! This seems to be useful for > vmbus_on_msg_dpc/vmbus_on_message only as all the rest needs to parse > different structures. Will use it in v2. > I just used this patch. Since function name is already in the trace message no need to have different print's for each one. >From ff85967810c216eb01d181789af4f56bd00dc9b9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 3 Oct 2017 09:24:11 -0700 Subject: [PATCH 3/4] hyperv: fix warnings in trace print This gets rid of the build warnings from unused printf format. And uses common class for print. --- drivers/hv/hv_trace.h | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/hv/hv_trace.h b/drivers/hv/hv_trace.h index be7762955650..4755c4640e39 100644 --- a/drivers/hv/hv_trace.h +++ b/drivers/hv/hv_trace.h @@ -11,18 +11,18 @@ DECLARE_EVENT_CLASS(vmbus_hdr_msg, TP_ARGS(hdr), TP_STRUCT__entry(__field(unsigned int, msgtype)), TP_fast_assign(__entry->msgtype = hdr->msgtype;), - TP_printk("msgtype=%d", __entry->msgtype) + TP_printk("msgtype=%u", __entry->msgtype) ); -DEFINE_EVENT_PRINT(vmbus_hdr_msg, vmbus_on_msg_dpc, +DEFINE_EVENT(vmbus_hdr_msg, vmbus_on_msg_dpc, TP_PROTO(const struct vmbus_channel_message_header *hdr), - TP_ARGS(hdr), - TP_printk("message %u received", __entry->msgtype)); + TP_ARGS(hdr) +); -DEFINE_EVENT_PRINT(vmbus_hdr_msg, vmbus_on_message, +DEFINE_EVENT(vmbus_hdr_msg, vmbus_on_message, TP_PROTO(const struct vmbus_channel_message_header *hdr), -TP_ARGS(hdr), -TP_printk("processing message %u", __entry->msgtype)); +TP_ARGS(hdr) +); TRACE_EVENT(vmbus_onoffer, TP_PROTO(const struct vmbus_channel_offer_channel *offer), -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH v2 16/16] hyper-v: trace vmbus_send_tl_connect_request()
I added an additional set of trace points for when channel gets notified or signals host. diff -urNp linux-msft/drivers/hv/channel.c msft-4.14-rc3/drivers/hv/channel.c --- linux-msft/drivers/hv/channel.c 2017-10-03 10:06:54.893209237 -0700 +++ msft-4.14-rc3/drivers/hv/channel.c 2017-10-03 10:07:35.501665114 -0700 @@ -55,6 +55,8 @@ void vmbus_setevent(struct vmbus_channel { struct hv_monitor_page *monitorpage; + trace_vmbus_setevent(channel); + /* * For channels marked as in "low latency" mode * bypass the monitor page mechanism. diff -urNp linux-msft/drivers/hv/connection.c msft-4.14-rc3/drivers/hv/connection.c --- linux-msft/drivers/hv/connection.c 2017-10-03 10:06:54.893209237 -0700 +++ msft-4.14-rc3/drivers/hv/connection.c 2017-10-03 10:07:35.501665114 -0700 @@ -322,6 +322,8 @@ void vmbus_on_event(unsigned long data) struct vmbus_channel *channel = (void *) data; unsigned long time_limit = jiffies + 2; + trace_vmbus_on_event(channel); + do { void (*callback_fn)(void *); diff -urNp linux-msft/drivers/hv/hv_trace.h msft-4.14-rc3/drivers/hv/hv_trace.h --- linux-msft/drivers/hv/hv_trace.h2017-10-03 10:08:06.514014019 -0700 +++ msft-4.14-rc3/drivers/hv/hv_trace.h 2017-10-03 10:07:35.505665159 -0700 @@ -294,6 +294,29 @@ TRACE_EVENT(vmbus_send_tl_connect_reques ) ); +DECLARE_EVENT_CLASS(vmbus_channel, + TP_PROTO(const struct vmbus_channel *channel), + TP_ARGS(channel), + TP_STRUCT__entry(__field(u32, relid)), + TP_fast_assign(__entry->relid = channel->offermsg.child_relid), + TP_printk("relid 0x%x", __entry->relid) +); + +DEFINE_EVENT(vmbus_channel, vmbus_chan_sched, + TP_PROTO(const struct vmbus_channel *channel), + TP_ARGS(channel) +); + +DEFINE_EVENT(vmbus_channel, vmbus_setevent, + TP_PROTO(const struct vmbus_channel *channel), + TP_ARGS(channel) +); + +DEFINE_EVENT(vmbus_channel, vmbus_on_event, + TP_PROTO(const struct vmbus_channel *channel), + TP_ARGS(channel) +); + #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE diff -urNp linux-msft/drivers/hv/vmbus_drv.c msft-4.14-rc3/drivers/hv/vmbus_drv.c --- linux-msft/drivers/hv/vmbus_drv.c 2017-10-03 10:06:54.897209282 -0700 +++ msft-4.14-rc3/drivers/hv/vmbus_drv.c2017-10-03 10:07:35.505665159 -0700 @@ -948,6 +948,7 @@ static void vmbus_chan_sched(struct hv_p continue; ++channel->interrupts_in; + trace_vmbus_chan_sched(channel); switch (channel->callback_mode) { case HV_CALL_ISR: ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH] vmbus: initialize reserved fields in messages
Make sure and initialize reserved fields in messages to host, rather than passing stack junk. Signed-off-by: Stephen Hemminger --- drivers/hv/channel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index efd5db743319..9f48f454bde0 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -745,6 +745,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */ desc.length8 = (u16)(packetlen_aligned >> 3); desc.transactionid = requestid; + desc.reserved = 0; desc.rangecount = pagecount; for (i = 0; i < pagecount; i++) { @@ -788,6 +789,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, desc->dataoffset8 = desc_size >> 3; /* in 8-bytes granularity */ desc->length8 = (u16)(packetlen_aligned >> 3); desc->transactionid = requestid; + desc->reserved = 0; desc->rangecount = 1; bufferlist[0].iov_base = desc; -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH v3 00/17] Hyper-V: add tracing to VMBus module and trace messages/events
On Thu, 5 Oct 2017 16:50:27 +0200 Vitaly Kuznetsov wrote: > Changes since v2: > - Use DEFINE_EVENT in PATCH1/2 to avoid compile warnings [Stephen Hemminger] > - Add PATCH17 tracing channel events [Stephen Hemminger] > > Messages between guest and host are used in Hyper-V as control flow. To > simplify debugging various issues which are often hard to reproduce add > tracepoints to all message senders and handlers. This is not a performance > critical path and tracing overhead should be negligible. > > The example usage and output is: > > Enable all tracing events: > # echo 1 > /sys/kernel/debug/tracing/events/hyperv/enable > > Do something which causes messages to be sent between host and guest, e.g. > hot remove a VMBus device. > > Check events: > # cat /sys/kernel/debug/tracing/trace > > # tracer: nop > # > # _-=> irqs-off > # / _=> need-resched > #| / _---=> hardirq/softirq > #|| / _--=> preempt-depth > #||| / delay > # TASK-PID CPU# TIMESTAMP FUNCTION > # | | | | | > -0 [011] ..s. 122.981583: vmbus_on_msg_dpc: msgtype=1 > kworker/11:7-1506 [011] 122.981597: vmbus_on_message: msgtype=1 > kworker/11:7-1506 [011] 122.981598: vmbus_onoffer: child_relid > 0x10, monitorid 0x2, is_dedicated 1, connection_id 0x10010, if_type > f8615163-df3e-46c5-913f-f2d2f965ed0e, if_instance > 6676e078-e4b3-44da-8a7d-12eafb577d31, chn_flags 0x0, mmio_megabytes 0, > sub_channel_index 0 > kworker/11:7-1506 [011] 122.982130: vmbus_establish_gpadl_header: > sending child_relid 0x10, gpadl 0xe1e34, range_buflen 2056 rangecount 1, ret 0 > kworker/11:7-1506 [011] 122.982133: vmbus_establish_gpadl_body: > sending msgnumber 0, gpadl 0xe1e34, ret 0 > kworker/11:7-1506 [011] 122.982136: vmbus_establish_gpadl_body: > sending msgnumber 0, gpadl 0xe1e34, ret 0 > kworker/11:7-1506 [011] 122.982137: vmbus_establish_gpadl_body: > sending msgnumber 0, gpadl 0xe1e34, ret 0 > kworker/11:7-1506 [011] 122.982139: vmbus_establish_gpadl_body: > sending msgnumber 0, gpadl 0xe1e34, ret 0 > kworker/11:7-1506 [011] 122.982141: vmbus_establish_gpadl_body: > sending msgnumber 0, gpadl 0xe1e34, ret 0 > kworker/11:7-1506 [011] 122.982142: vmbus_establish_gpadl_body: > sending msgnumber 0, gpadl 0xe1e34, ret 0 > kworker/11:7-1506 [011] 122.982144: vmbus_establish_gpadl_body: > sending msgnumber 0, gpadl 0xe1e34, ret 0 > kworker/11:7-1506 [011] 122.982146: vmbus_establish_gpadl_body: > sending msgnumber 0, gpadl 0xe1e34, ret 0 > kworker/11:7-1506 [011] 122.982148: vmbus_establish_gpadl_body: > sending msgnumber 0, gpadl 0xe1e34, ret 0 > -0 [011] ..s. 122.982336: vmbus_on_msg_dpc: msgtype=10 > -0 [011] ..s. 122.982337: vmbus_ongpadl_created: > child_relid 0x10, gpadl 0xe1e34, creation_status 0 > kworker/11:7-1506 [011] 122.982351: vmbus_open: sending > child_relid 0x10, openid 16, gpadlhandle 0xe1e34, target_vp 0xb, offset 0x80, > ret 0 > kworker/3:1-214 [003] 123.015007: vmbus_setevent: relid 0x5 > -0 [011] ..s. 123.029467: vmbus_on_msg_dpc: msgtype=6 > -0 [011] ..s. 123.029470: vmbus_onopen_result: > child_relid 0x10, openid 16, status 0 > kworker/11:7-1506 [011] 123.029492: vmbus_setevent: relid 0x10 > -0 [011] d.h. 123.029533: vmbus_chan_sched: relid 0x10 > kworker/11:7-1506 [011] 123.029539: vmbus_setevent: relid 0x10 > > CHANNELMSG_UNLOAD/CHANNELMSG_UNLOAD_RESPONSE are not traced as these are > mostly used on crash. > > Vitaly Kuznetsov (17): > hyper-v: trace vmbus_on_msg_dpc() > hyper-v: trace vmbus_on_message() > hyper-v: trace vmbus_onoffer() > hyper-v: trace vmbus_onoffer_rescind() > hyper-v: trace vmbus_onopen_result() > hyper-v: trace vmbus_ongpadl_created() > hyper-v: trace vmbus_ongpadl_torndown() > hyper-v: trace vmbus_onversion_response() > hyper-v: trace vmbus_request_offers() > hyper-v: trace vmbus_open() > hyper-v: trace vmbus_close_internal() > hyper-v: trace vmbus_establish_gpadl() > hyper-v: trace vmbus_teardown_gpadl() > hyper-v: trace vmbus_negotiate_version() > hyper-v: trace vmbus_release_relid() > hyper-v: trace vmbus_send_tl_connect_request() > hyper-v: trace channel events > > drivers/hv/Makefile | 4 +- > drivers/hv/channel.c | 21 ++- > drivers/hv/channel_m
Re: [PATCH 2/2] uio: Prefer MSI(X) interrupts in PCI drivers
On Fri, 6 Oct 2017 13:50:44 + "Stahl, Manuel" wrote: > MSI(X) interrupts are not shared between devices. So when available > those should be preferred over legacy interrupts. > > Signed-off-by: Manuel Stahl > --- >  drivers/uio/uio_pci_dmem_genirq.c | 27 --- >  drivers/uio/uio_pci_generic.c | 24 ++-- >  2 files changed, 38 insertions(+), 13 deletions(-) The last time I tried to do MSI-X with pci-generic it got rejected by the maintainer. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH 2/2] uio: Prefer MSI(X) interrupts in PCI drivers
On Fri, 20 Oct 2017 14:50:44 +0200 "gre...@linuxfoundation.org" wrote: > On Fri, Oct 06, 2017 at 07:57:00AM -0700, Stephen Hemminger wrote: > > On Fri, 6 Oct 2017 13:50:44 + > > "Stahl, Manuel" wrote: > > > > > MSI(X) interrupts are not shared between devices. So when available > > > those should be preferred over legacy interrupts. > > > > > > Signed-off-by: Manuel Stahl > > > --- > > >  drivers/uio/uio_pci_dmem_genirq.c | 27 --- > > >  drivers/uio/uio_pci_generic.c | 24 ++-- > > >  2 files changed, 38 insertions(+), 13 deletions(-) > > > > The last time I tried to do MSI-X with pci-generic it got rejected > > by the maintainer. > > Hm, yeah, this would break users today that do not have msi-x, right? > > Not good, Manuel, how well did you test this? > > thanks, > > greg k-h Look at https://patchwork.kernel.org/patch/7303021/ The objection was more that UIO developers did not like that UIO was (already) being used for DMA without IOMMU, and MSI-x has DMA because of vector table. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net-next 1/4] hv_netvsc: netvsc_teardown_gpadl() split
On Tue, 31 Oct 2017 14:42:01 +0100 Vitaly Kuznetsov wrote: > It was found that in some cases host refuses to teardown GPADL for send/ > receive buffers (probably when some work with these buffere is scheduled or > ongoing). Change the teardown logic to be: > 1) Send NVSP_MSG1_TYPE_REVOKE_* messages > 2) Close the channel > 3) Teardown GPADLs. > This seems to work reliably. > > Signed-off-by: Vitaly Kuznetsov > --- > drivers/net/hyperv/netvsc.c | 69 > +++-- > 1 file changed, 36 insertions(+), 33 deletions(-) > > diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c > index 5bb6a20072dd..bfc79698b8f4 100644 > --- a/drivers/net/hyperv/netvsc.c > +++ b/drivers/net/hyperv/netvsc.c > @@ -100,12 +100,11 @@ static void free_netvsc_device_rcu(struct netvsc_device > *nvdev) > call_rcu(&nvdev->rcu, free_netvsc_device); > } > > -static void netvsc_destroy_buf(struct hv_device *device) > +static void netvsc_revoke_buf(struct hv_device *device, > + struct netvsc_device *net_device) > { > struct nvsp_message *revoke_packet; > struct net_device *ndev = hv_get_drvdata(device); > - struct net_device_context *ndc = netdev_priv(ndev); > - struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev); > int ret; > > /* > @@ -148,28 +147,6 @@ static void netvsc_destroy_buf(struct hv_device *device) > net_device->recv_section_cnt = 0; > } > > - /* Teardown the gpadl on the vsp end */ > - if (net_device->recv_buf_gpadl_handle) { > - ret = vmbus_teardown_gpadl(device->channel, > -net_device->recv_buf_gpadl_handle); > - > - /* If we failed here, we might as well return and have a leak > - * rather than continue and a bugchk > - */ > - if (ret != 0) { > - netdev_err(ndev, > -"unable to teardown receive buffer's > gpadl\n"); > - return; > - } > - net_device->recv_buf_gpadl_handle = 0; > - } > - > - if (net_device->recv_buf) { > - /* Free up the receive buffer */ > - vfree(net_device->recv_buf); > - net_device->recv_buf = NULL; > - } > - > /* Deal with the send buffer we may have setup. >* If we got a send section size, it means we received a >* NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent > @@ -210,7 +187,35 @@ static void netvsc_destroy_buf(struct hv_device *device) > } > net_device->send_section_cnt = 0; > } > - /* Teardown the gpadl on the vsp end */ > +} > + > +static void netvsc_teardown_gpadl(struct hv_device *device, > + struct netvsc_device *net_device) > +{ > + struct net_device *ndev = hv_get_drvdata(device); > + int ret; > + > + if (net_device->recv_buf_gpadl_handle) { > + ret = vmbus_teardown_gpadl(device->channel, > +net_device->recv_buf_gpadl_handle); > + > + /* If we failed here, we might as well return and have a leak > + * rather than continue and a bugchk > + */ > + if (ret != 0) { > + netdev_err(ndev, > +"unable to teardown receive buffer's > gpadl\n"); > + return; > + } > + net_device->recv_buf_gpadl_handle = 0; > + } > + > + if (net_device->recv_buf) { > + /* Free up the receive buffer */ > + vfree(net_device->recv_buf); > + net_device->recv_buf = NULL; > + } > + > if (net_device->send_buf_gpadl_handle) { > ret = vmbus_teardown_gpadl(device->channel, > net_device->send_buf_gpadl_handle); > @@ -420,7 +425,8 @@ static int netvsc_init_buf(struct hv_device *device, > goto exit; > > cleanup: > - netvsc_destroy_buf(device); > + netvsc_revoke_buf(device, net_device); > + netvsc_teardown_gpadl(device, net_device); > > exit: > return ret; > @@ -539,11 +545,6 @@ static int netvsc_connect_vsp(struct hv_device *device, > return ret; > } > > -static void netvsc_disconnect_vsp(struct hv_device *device) > -{ > - netvsc_destroy_buf(device); > -} > - > /* > * netvsc_device_remove - Callback when the root bus device is removed > */ > @@ -557,7 +558,7 @@ void netvsc_device_remove(struct hv_device *device) > > cancel_work_sync(&net_device->subchan_work); > > - netvsc_disconnect_vsp(device); > + netvsc_revoke_buf(device, net_device); > > RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); > > @@ -570,6 +571,8 @@ void netvsc_device_remove(struct hv_device *device) > /* Now, we can close the channel safely */ > vmbus_close(device->channel
Re: [PATCH net-next 2/4] hv_netvsc: protect nvdev->extension with RCU
On Tue, 31 Oct 2017 14:42:02 +0100 Vitaly Kuznetsov wrote: > @@ -2002,7 +2002,9 @@ static int netvsc_probe(struct hv_device *dev, > device_info.recv_sections = NETVSC_DEFAULT_RX; > device_info.recv_section_size = NETVSC_RECV_SECTION_SIZE; > > + rtnl_lock(); > nvdev = rndis_filter_device_add(dev, &device_info); > + rtnl_unlock(); rtnl is not necessary here. probe can not be bothered by other changes. > --- a/drivers/net/hyperv/rndis_filter.c > +++ b/drivers/net/hyperv/rndis_filter.c > @@ -402,20 +402,27 @@ int rndis_filter_receive(struct net_device *ndev, >void *data, u32 buflen) > { > struct net_device_context *net_device_ctx = netdev_priv(ndev); > - struct rndis_device *rndis_dev = net_dev->extension; > + struct rndis_device *rndis_dev; > struct rndis_message *rndis_msg = data; > + int ret = 0; > + > + rcu_read_lock_bh(); > + > + rndis_dev = rcu_dereference_bh(net_dev->extension); filter_receive is already called only from NAPI only and has RCU lock and soft irq disabled. This is not necessary. > - net_dev->extension = NULL; > + rcu_assign_pointer(net_dev->extension, NULL); > + > + synchronize_rcu(); rcu_assign_pointer with NULL is never a good idea. And synchronize_rcu is slow. Since net_device is already protected by RCU (for deletion) it should not be necessary. Thank you for trying to address these races. But it should be done carefully not by just slapping RCU everywhere. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net-next 3/4] hv_netvsc: reset net_device_ctx->nvdev with rcu_assign_pointer()
On Tue, 31 Oct 2017 07:09:58 -0700 Eric Dumazet wrote: > On Tue, 2017-10-31 at 14:42 +0100, Vitaly Kuznetsov wrote: > > RCU_INIT_POINTER() is not suitable here as it doesn't give us ordering > > guarantees (see the comment in rcupdate.h). This is also not a hotpath. > > > > Signed-off-by: Vitaly Kuznetsov > > --- > > drivers/net/hyperv/netvsc.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c > > index bfc79698b8f4..12efb3e34775 100644 > > --- a/drivers/net/hyperv/netvsc.c > > +++ b/drivers/net/hyperv/netvsc.c > > @@ -560,7 +560,7 @@ void netvsc_device_remove(struct hv_device *device) > > > > netvsc_revoke_buf(device, net_device); > > > > - RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); > > + rcu_assign_pointer(net_device_ctx->nvdev, NULL); > > I see no point for this patch. > > Setting a NULL pointer needs no barrier at all. Agreed with Eric. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH 10/17] hyper-v: trace vmbus_open()
On Tue, 31 Oct 2017 13:48:00 +0100 Greg KH wrote: > On Mon, Oct 30, 2017 at 10:31:34AM -0400, Steven Rostedt wrote: > > On Mon, 30 Oct 2017 11:32:20 +0100 > > Greg KH wrote: > > > > > On Mon, Oct 30, 2017 at 11:07:01AM +0100, Vitaly Kuznetsov wrote: > > > > Greg KH writes: > > > > > > > > > On Mon, Oct 30, 2017 at 09:16:19AM +0100, Vitaly Kuznetsov wrote: > > > > >> Greg KH writes: > > > > >> > > > > >> > On Sun, Oct 29, 2017 at 12:21:09PM -0700, > > > > >> > k...@exchange.microsoft.com wrote: > > > > >> >> From: Vitaly Kuznetsov > > > > >> >> > > > > >> >> Add tracepoint to CHANNELMSG_OPENCHANNEL sender. > > > > >> >> > > > > >> >> Signed-off-by: Vitaly Kuznetsov > > > > >> >> Signed-off-by: K. Y. Srinivasan > > > > >> >> --- > > > > >> >> drivers/hv/channel.c | 2 ++ > > > > >> >> drivers/hv/hv_trace.h | 27 +++ > > > > >> >> 2 files changed, 29 insertions(+) > > > > >> >> > > > > >> >> diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c > > > > >> >> index a406beb10dd0..739b3fe1e0fb 100644 > > > > >> >> --- a/drivers/hv/channel.c > > > > >> >> +++ b/drivers/hv/channel.c > > > > >> >> @@ -185,6 +185,8 @@ int vmbus_open(struct vmbus_channel > > > > >> >> *newchannel, u32 send_ringbuffer_size, > > > > >> >> ret = vmbus_post_msg(open_msg, > > > > >> >>sizeof(struct vmbus_channel_open_channel), > > > > >> >> true); > > > > >> >> > > > > >> >> + trace_vmbus_open(open_msg, ret); > > > > >> > > > > > >> > Why add tracepoints for things that ftrace can handle > > > > >> > automatically? > > > > >> > > > > >> This series adds pretty prints for structures printing what is needed > > > > >> and in the right format significantly simplifying debugging. And it > > > > >> wouldn't make sense to add tracepoints to *some* messages-related > > > > >> functions and skip others where parsing is more trivial. > > > > > > > > > > Tracepoints add memory usage and take up real space. If you don't > > > > > need > > > > > them for something, as there are other ways to already get the > > > > > information needed, why add new ones that you now need to drag around > > > > > for all time? > > > > > > > > > > > > > Are you opposed to the series as a whole (AKA 'no tracepoints in > > > > drivers') or only to some tracepoints we add here? > > > > > > I'm opposed to adding tracepoints for things that are not needed as you > > > can get the same info already today without the tracepoint. > > > > I looked at this specific tracepoint, and I don't see how to get the > > information from the current tracing infrastructure. Maybe an eBPF > > program attached to a kprobe here might work. But the tracepoint data > > looks like this: > > > > + TP_STRUCT__entry( > > + __field(u32, child_relid) > > + __field(u32, openid) > > + __field(u32, gpadlhandle) > > + __field(u32, target_vp) > > + __field(u32, offset) > > + __field(int, ret) > > + ), > > + TP_fast_assign( > > + __entry->child_relid = msg->child_relid; > > + __entry->openid = msg->openid; > > + __entry->gpadlhandle = msg->ringbuffer_gpadlhandle; > > + __entry->target_vp = msg->target_vp; > > + __entry->offset = msg->downstream_ringbuffer_pageoffset; > > + __entry->ret = ret; > > + ), > > > > I don't see how that information can be extracted easily without a > > tracepoint here. Am I missing something? > > Wasn't one of the outcomes of the conference last week the fact that for > ftrace + ebpf we could get access to the structures of the function > parameters? Or that work would soon be showing up? > > It just feels "wrong" to add a tracepoint for a function call, like it > is a duplication of work/functionality we already have. > > thanks, > > greg k-h Just to add some context. VMBus open/close etc are not in critical path. The reason that tracing of these makes sense is that there have been bugs in the past with teardown and restart of channels, and having some information would help. Not sure if the detailed internals of the parameters matter that much since it has been clear what the parameters were by context. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[RFC] hv_netvsc: safer orderly shutdown
Several types of control operations require that the underlying RNDIS infrastructure be restarted. This patch changes the ordering of the shutdown to avoid race conditions. Stop all transmits before doing RNDIS halt. This involves stopping the network device transmit queues, then waiting for all outstanding sends before informing host to halt. Also, check for successful restart of the device when after the change is done. For review, not tested on Hyper-V yet. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc_drv.c | 40 ++- drivers/net/hyperv/rndis_filter.c | 23 +++--- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index da216ca4f2b2..3afa082e093d 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -855,8 +855,10 @@ static int netvsc_set_channels(struct net_device *net, orig = nvdev->num_chn; was_opened = rndis_filter_opened(nvdev); - if (was_opened) + if (was_opened) { + netif_tx_disable(net); rndis_filter_close(nvdev); + } memset(&device_info, 0, sizeof(device_info)); device_info.num_chn = count; @@ -881,8 +883,13 @@ static int netvsc_set_channels(struct net_device *net, } } - if (was_opened) - rndis_filter_open(nvdev); + if (was_opened) { + ret = rndis_filter_open(nvdev); + if (ret) + netdev_err(net, "reopening device failed: %d\n", ret); + else + netif_tx_start_all_queues(net); + } /* We may have missed link change notifications */ net_device_ctx->last_reconfig = 0; @@ -971,8 +978,10 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) netif_device_detach(ndev); was_opened = rndis_filter_opened(nvdev); - if (was_opened) + if (was_opened) { + netif_tx_disable(net); rndis_filter_close(nvdev); + } memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; @@ -1004,8 +1013,13 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) } } - if (was_opened) - rndis_filter_open(nvdev); + if (was_opened) { + ret = rndis_filter_open(nvdev); + if (ret) + netdev_err(net, "reopening device failed: %d\n", ret); + else + netif_tx_start_all_queues(net); + } netif_device_attach(ndev); @@ -1547,8 +1561,10 @@ static int netvsc_set_ringparam(struct net_device *ndev, netif_device_detach(ndev); was_opened = rndis_filter_opened(nvdev); - if (was_opened) + if (was_opened) { + netif_tx_disable(net); rndis_filter_close(nvdev); + } rndis_filter_device_remove(hdev, nvdev); @@ -1566,8 +1582,14 @@ static int netvsc_set_ringparam(struct net_device *ndev, } } - if (was_opened) - rndis_filter_open(nvdev); + if (was_opened) { + ret = rndis_filter_open(nvdev); + if (ret) + netdev_err(net, "reopening device failed: %d\n", ret); + else + netif_tx_start_all_queues(net); + } + netif_device_attach(ndev); /* We may have missed link change notifications */ diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 0648eebda829..164f5ffe9c50 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -948,11 +948,20 @@ static void rndis_filter_halt_device(struct rndis_device *dev) struct net_device_context *net_device_ctx = netdev_priv(dev->ndev); struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev); + /* tell bottom half that deice is being closed */ + nvdev->destroy = true; + + /* Force flag to be ordered before waiting */ + wmb(); + + /* Wait for all send completions */ + wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev)); + /* Attempt to do a rndis device halt */ request = get_rndis_request(dev, RNDIS_MSG_HALT, RNDIS_MESSAGE_SIZE(struct rndis_halt_request)); if (!request) - goto cleanup; + return; /* Setup the rndis set */ halt = &request->request_msg.msg.halt_req; @@ -963,17 +972,7 @@ static void rndis_filter_halt_device(struct rndis_device *dev) dev->state = RNDIS_DEV_UNINITIALIZED; -cleanup: - nvdev->destroy = true; - - /* Force flag to be ordered before
Re: [PATCH] drivers: hv: balloon: remove extraneous assignment to region_start
On Wed, 18 Oct 2017 12:52:12 +0100 Colin King wrote: > From: Colin Ian King > > The variable region_start is assigned twice, the first value is > never read and redundant, so can be removed. We can clean up the > code further by assigning rg_start directly rather than using the > temporary variable region_start which can then be removed. Cleans > up the clang warning: > > drivers/hv/hv_balloon.c:976:3: warning: Value stored to 'region_start' > is never read > > Signed-off-by: Colin Ian King LGTM Acked-by: Stephen Hemminger ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 0/2] retire IPX and Netware file system
Netware has bee dead for years. Time to deprecate IPX and the Novell file system. Stephen Hemminger (2): ipx: move Novell IPX protocol support into staging ncpfs: move net/ncpfs to drivers/staging/ncpfs MAINTAINERS| 5 ++--- drivers/staging/Kconfig| 4 drivers/staging/Makefile | 2 ++ {net => drivers/staging}/ipx/Kconfig | 0 {net => drivers/staging}/ipx/Makefile | 0 {net => drivers/staging}/ipx/af_ipx.c | 0 {net => drivers/staging}/ipx/ipx_proc.c| 0 {net => drivers/staging}/ipx/ipx_route.c | 0 {net => drivers/staging}/ipx/pe2.c | 0 {net => drivers/staging}/ipx/sysctl_net_ipx.c | 0 {fs => drivers/staging}/ncpfs/Kconfig | 0 {fs => drivers/staging}/ncpfs/Makefile | 0 {fs => drivers/staging}/ncpfs/dir.c| 0 {fs => drivers/staging}/ncpfs/file.c | 0 {fs => drivers/staging}/ncpfs/getopt.c | 0 {fs => drivers/staging}/ncpfs/getopt.h | 0 {fs => drivers/staging}/ncpfs/inode.c | 0 {fs => drivers/staging}/ncpfs/ioctl.c | 0 {fs => drivers/staging}/ncpfs/mmap.c | 0 {fs => drivers/staging}/ncpfs/ncp_fs.h | 0 {fs => drivers/staging}/ncpfs/ncp_fs_i.h | 0 {fs => drivers/staging}/ncpfs/ncp_fs_sb.h | 0 {fs => drivers/staging}/ncpfs/ncplib_kernel.c | 0 {fs => drivers/staging}/ncpfs/ncplib_kernel.h | 0 {fs => drivers/staging}/ncpfs/ncpsign_kernel.c | 0 {fs => drivers/staging}/ncpfs/ncpsign_kernel.h | 0 {fs => drivers/staging}/ncpfs/sock.c | 0 {fs => drivers/staging}/ncpfs/symlink.c| 0 fs/Kconfig | 1 - fs/Makefile| 1 - net/Kconfig| 1 - net/Makefile | 1 - 32 files changed, 8 insertions(+), 7 deletions(-) rename {net => drivers/staging}/ipx/Kconfig (100%) rename {net => drivers/staging}/ipx/Makefile (100%) rename {net => drivers/staging}/ipx/af_ipx.c (100%) rename {net => drivers/staging}/ipx/ipx_proc.c (100%) rename {net => drivers/staging}/ipx/ipx_route.c (100%) rename {net => drivers/staging}/ipx/pe2.c (100%) rename {net => drivers/staging}/ipx/sysctl_net_ipx.c (100%) rename {fs => drivers/staging}/ncpfs/Kconfig (100%) rename {fs => drivers/staging}/ncpfs/Makefile (100%) rename {fs => drivers/staging}/ncpfs/dir.c (100%) rename {fs => drivers/staging}/ncpfs/file.c (100%) rename {fs => drivers/staging}/ncpfs/getopt.c (100%) rename {fs => drivers/staging}/ncpfs/getopt.h (100%) rename {fs => drivers/staging}/ncpfs/inode.c (100%) rename {fs => drivers/staging}/ncpfs/ioctl.c (100%) rename {fs => drivers/staging}/ncpfs/mmap.c (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs.h (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs_i.h (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs_sb.h (100%) rename {fs => drivers/staging}/ncpfs/ncplib_kernel.c (100%) rename {fs => drivers/staging}/ncpfs/ncplib_kernel.h (100%) rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.c (100%) rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.h (100%) rename {fs => drivers/staging}/ncpfs/sock.c (100%) rename {fs => drivers/staging}/ncpfs/symlink.c (100%) -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 2/2] ncpfs: move net/ncpfs to drivers/staging/ncpfs
The Netware Core Protocol is a file system that talks to Netware clients over IPX. Since IPX has been dead for many years move the file system into staging for eventual interment. Signed-off-by: Stephen Hemminger --- MAINTAINERS| 2 +- drivers/staging/Kconfig| 2 ++ drivers/staging/Makefile | 1 + {fs => drivers/staging}/ncpfs/Kconfig | 0 {fs => drivers/staging}/ncpfs/Makefile | 0 {fs => drivers/staging}/ncpfs/dir.c| 0 {fs => drivers/staging}/ncpfs/file.c | 0 {fs => drivers/staging}/ncpfs/getopt.c | 0 {fs => drivers/staging}/ncpfs/getopt.h | 0 {fs => drivers/staging}/ncpfs/inode.c | 0 {fs => drivers/staging}/ncpfs/ioctl.c | 0 {fs => drivers/staging}/ncpfs/mmap.c | 0 {fs => drivers/staging}/ncpfs/ncp_fs.h | 0 {fs => drivers/staging}/ncpfs/ncp_fs_i.h | 0 {fs => drivers/staging}/ncpfs/ncp_fs_sb.h | 0 {fs => drivers/staging}/ncpfs/ncplib_kernel.c | 0 {fs => drivers/staging}/ncpfs/ncplib_kernel.h | 0 {fs => drivers/staging}/ncpfs/ncpsign_kernel.c | 0 {fs => drivers/staging}/ncpfs/ncpsign_kernel.h | 0 {fs => drivers/staging}/ncpfs/sock.c | 0 {fs => drivers/staging}/ncpfs/symlink.c| 0 fs/Kconfig | 1 - fs/Makefile| 1 - 23 files changed, 4 insertions(+), 3 deletions(-) rename {fs => drivers/staging}/ncpfs/Kconfig (100%) rename {fs => drivers/staging}/ncpfs/Makefile (100%) rename {fs => drivers/staging}/ncpfs/dir.c (100%) rename {fs => drivers/staging}/ncpfs/file.c (100%) rename {fs => drivers/staging}/ncpfs/getopt.c (100%) rename {fs => drivers/staging}/ncpfs/getopt.h (100%) rename {fs => drivers/staging}/ncpfs/inode.c (100%) rename {fs => drivers/staging}/ncpfs/ioctl.c (100%) rename {fs => drivers/staging}/ncpfs/mmap.c (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs.h (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs_i.h (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs_sb.h (100%) rename {fs => drivers/staging}/ncpfs/ncplib_kernel.c (100%) rename {fs => drivers/staging}/ncpfs/ncplib_kernel.h (100%) rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.c (100%) rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.h (100%) rename {fs => drivers/staging}/ncpfs/sock.c (100%) rename {fs => drivers/staging}/ncpfs/symlink.c (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 54c29ebcec55..5ed623ad5717 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9276,7 +9276,7 @@ F:drivers/net/ethernet/natsemi/natsemi.c NCP FILESYSTEM M: Petr Vandrovec S: Odd Fixes -F: fs/ncpfs/ +F: drivers/staging/ncpfs/ NCR 5380 SCSI DRIVERS M: Finn Thain diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 3744640bed5a..e95ab683331e 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -28,6 +28,8 @@ source "drivers/staging/irda/net/Kconfig" source "drivers/staging/ipx/Kconfig" +source "drivers/staging/ncpfs/Kconfig" + source "drivers/staging/wlan-ng/Kconfig" source "drivers/staging/comedi/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index f4c0506470ff..af8cd6a3a1f6 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -4,6 +4,7 @@ obj-y += media/ obj-y += typec/ obj-$(CONFIG_IPX) += ipx/ +obj-$(CONFIG_NCP_FS) += ncpfs/ obj-$(CONFIG_IRDA) += irda/net/ obj-$(CONFIG_IRDA) += irda/drivers/ obj-$(CONFIG_PRISM2_USB) += wlan-ng/ diff --git a/fs/ncpfs/Kconfig b/drivers/staging/ncpfs/Kconfig similarity index 100% rename from fs/ncpfs/Kconfig rename to drivers/staging/ncpfs/Kconfig diff --git a/fs/ncpfs/Makefile b/drivers/staging/ncpfs/Makefile similarity index 100% rename from fs/ncpfs/Makefile rename to drivers/staging/ncpfs/Makefile diff --git a/fs/ncpfs/dir.c b/drivers/staging/ncpfs/dir.c similarity index 100% rename from fs/ncpfs/dir.c rename to drivers/staging/ncpfs/dir.c diff --git a/fs/ncpfs/file.c b/drivers/staging/ncpfs/file.c similarity index 100% rename from fs/ncpfs/file.c rename to drivers/staging/ncpfs/file.c diff --git a/fs/ncpfs/getopt.c b/drivers/staging/ncpfs/getopt.c similarity index 100% rename from fs/ncpfs/getopt.c rename to drivers/staging/ncpfs/getopt.c diff --git a/fs/ncpfs/getopt.h b/drivers/staging/ncpfs/getopt.h similarity index 100% rename from fs/ncpfs/getopt.h rename to drivers/staging/ncpfs/getopt.h diff --git a/fs/ncpfs/inode.c b/drivers/staging/ncpfs/inode.c similarity index 100% rename from fs/ncpfs/inode.c rename to drivers/staging/ncpfs/inode.c diff --git a/fs/ncpfs/ioctl.c b/drivers/staging/ncpfs/ioctl.c similarity in
[PATCH net-next 1/2] ipx: move Novell IPX protocol support into staging
The Netware IPX protocol is very old and no one should still be using it. It is time to move it into staging for a while and eventually decommision it. Note: net/ipx.h has to be kept around because bonding is still using it to decode IPX header for LAG. Signed-off-by: Stephen Hemminger --- MAINTAINERS | 3 +-- drivers/staging/Kconfig | 2 ++ drivers/staging/Makefile | 1 + {net => drivers/staging}/ipx/Kconfig | 0 {net => drivers/staging}/ipx/Makefile | 0 {net => drivers/staging}/ipx/af_ipx.c | 0 {net => drivers/staging}/ipx/ipx_proc.c | 0 {net => drivers/staging}/ipx/ipx_route.c | 0 {net => drivers/staging}/ipx/pe2.c| 0 {net => drivers/staging}/ipx/sysctl_net_ipx.c | 0 net/Kconfig | 1 - net/Makefile | 1 - 12 files changed, 4 insertions(+), 4 deletions(-) rename {net => drivers/staging}/ipx/Kconfig (100%) rename {net => drivers/staging}/ipx/Makefile (100%) rename {net => drivers/staging}/ipx/af_ipx.c (100%) rename {net => drivers/staging}/ipx/ipx_proc.c (100%) rename {net => drivers/staging}/ipx/ipx_route.c (100%) rename {net => drivers/staging}/ipx/pe2.c (100%) rename {net => drivers/staging}/ipx/sysctl_net_ipx.c (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 29aa89a1837b..54c29ebcec55 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7206,9 +7206,8 @@ F:drivers/tty/ipwireless/ IPX NETWORK LAYER L: net...@vger.kernel.org S: Odd fixes -F: include/net/ipx.h F: include/uapi/linux/ipx.h -F: net/ipx/ +F: drivers/staging/ipx/ IRDA SUBSYSTEM M: Samuel Ortiz diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 554683912cff..3744640bed5a 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -26,6 +26,8 @@ if STAGING source "drivers/staging/irda/net/Kconfig" +source "drivers/staging/ipx/Kconfig" + source "drivers/staging/wlan-ng/Kconfig" source "drivers/staging/comedi/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 6e536020029a..f4c0506470ff 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -3,6 +3,7 @@ obj-y += media/ obj-y += typec/ +obj-$(CONFIG_IPX) += ipx/ obj-$(CONFIG_IRDA) += irda/net/ obj-$(CONFIG_IRDA) += irda/drivers/ obj-$(CONFIG_PRISM2_USB) += wlan-ng/ diff --git a/net/ipx/Kconfig b/drivers/staging/ipx/Kconfig similarity index 100% rename from net/ipx/Kconfig rename to drivers/staging/ipx/Kconfig diff --git a/net/ipx/Makefile b/drivers/staging/ipx/Makefile similarity index 100% rename from net/ipx/Makefile rename to drivers/staging/ipx/Makefile diff --git a/net/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c similarity index 100% rename from net/ipx/af_ipx.c rename to drivers/staging/ipx/af_ipx.c diff --git a/net/ipx/ipx_proc.c b/drivers/staging/ipx/ipx_proc.c similarity index 100% rename from net/ipx/ipx_proc.c rename to drivers/staging/ipx/ipx_proc.c diff --git a/net/ipx/ipx_route.c b/drivers/staging/ipx/ipx_route.c similarity index 100% rename from net/ipx/ipx_route.c rename to drivers/staging/ipx/ipx_route.c diff --git a/net/ipx/pe2.c b/drivers/staging/ipx/pe2.c similarity index 100% rename from net/ipx/pe2.c rename to drivers/staging/ipx/pe2.c diff --git a/net/ipx/sysctl_net_ipx.c b/drivers/staging/ipx/sysctl_net_ipx.c similarity index 100% rename from net/ipx/sysctl_net_ipx.c rename to drivers/staging/ipx/sysctl_net_ipx.c diff --git a/net/Kconfig b/net/Kconfig index 9dba2715919d..ff71ba0f6c27 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -212,7 +212,6 @@ source "net/dsa/Kconfig" source "net/8021q/Kconfig" source "net/decnet/Kconfig" source "net/llc/Kconfig" -source "net/ipx/Kconfig" source "drivers/net/appletalk/Kconfig" source "net/x25/Kconfig" source "net/lapb/Kconfig" diff --git a/net/Makefile b/net/Makefile index 14fede520840..a6147c61b174 100644 --- a/net/Makefile +++ b/net/Makefile @@ -24,7 +24,6 @@ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ obj-$(CONFIG_NET_DSA) += dsa/ -obj-$(CONFIG_IPX) += ipx/ obj-$(CONFIG_ATALK)+= appletalk/ obj-$(CONFIG_X25) += x25/ obj-$(CONFIG_LAPB) += lapb/ -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next] decnet: move to staging
Support for Decnet has been orphaned for many years. In the interest of reducing the potential bug surface and pre-holiday cleaning, move the decnet protocol into staging for eventual removal. Signed-off-by: Stephen Hemminger --- MAINTAINERS | 2 +- drivers/staging/Kconfig | 5 + drivers/staging/Makefile | 1 + {net => drivers/staging}/decnet/Kconfig | 0 {net => drivers/staging}/decnet/Makefile | 0 {net => drivers/staging}/decnet/README | 0 {net => drivers/staging}/decnet/TODO | 0 {net => drivers/staging}/decnet/af_decnet.c | 0 {net => drivers/staging}/decnet/dn_dev.c | 0 {net => drivers/staging}/decnet/dn_fib.c | 0 {net => drivers/staging}/decnet/dn_neigh.c | 0 {net => drivers/staging}/decnet/dn_nsp_in.c | 0 {net => drivers/staging}/decnet/dn_nsp_out.c | 0 {net => drivers/staging}/decnet/dn_route.c | 0 {net => drivers/staging}/decnet/dn_rules.c | 0 {net => drivers/staging}/decnet/dn_table.c | 0 {net => drivers/staging}/decnet/dn_timer.c | 0 {net => drivers/staging}/decnet/netfilter/Kconfig| 0 {net => drivers/staging}/decnet/netfilter/Makefile | 0 {net => drivers/staging}/decnet/netfilter/dn_rtmsg.c | 0 {net => drivers/staging}/decnet/sysctl_net_decnet.c | 0 net/Kconfig | 2 -- net/Makefile | 1 - 23 files changed, 7 insertions(+), 4 deletions(-) rename {net => drivers/staging}/decnet/Kconfig (100%) rename {net => drivers/staging}/decnet/Makefile (100%) rename {net => drivers/staging}/decnet/README (100%) rename {net => drivers/staging}/decnet/TODO (100%) rename {net => drivers/staging}/decnet/af_decnet.c (100%) rename {net => drivers/staging}/decnet/dn_dev.c (100%) rename {net => drivers/staging}/decnet/dn_fib.c (100%) rename {net => drivers/staging}/decnet/dn_neigh.c (100%) rename {net => drivers/staging}/decnet/dn_nsp_in.c (100%) rename {net => drivers/staging}/decnet/dn_nsp_out.c (100%) rename {net => drivers/staging}/decnet/dn_route.c (100%) rename {net => drivers/staging}/decnet/dn_rules.c (100%) rename {net => drivers/staging}/decnet/dn_table.c (100%) rename {net => drivers/staging}/decnet/dn_timer.c (100%) rename {net => drivers/staging}/decnet/netfilter/Kconfig (100%) rename {net => drivers/staging}/decnet/netfilter/Makefile (100%) rename {net => drivers/staging}/decnet/netfilter/dn_rtmsg.c (100%) rename {net => drivers/staging}/decnet/sysctl_net_decnet.c (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 29aa89a1837b..66e2d302d9eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3968,7 +3968,7 @@ W:http://linux-decnet.sourceforge.net L: linux-decnet-u...@lists.sourceforge.net S: Orphan F: Documentation/networking/decnet.txt -F: net/decnet/ +F: drivers/staging/decnet/ DECSTATION PLATFORM SUPPORT M: "Maciej W. Rozycki" diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 554683912cff..e30af73c3797 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -30,6 +30,11 @@ source "drivers/staging/wlan-ng/Kconfig" source "drivers/staging/comedi/Kconfig" +if NETFILTER +source "drivers/staging/decnet/netfilter/Kconfig" +endif +source "drivers/staging/decnet/Kconfig" + source "drivers/staging/olpc_dcon/Kconfig" source "drivers/staging/rtl8192u/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 6e536020029a..89655cc80a91 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_IRDA) += irda/net/ obj-$(CONFIG_IRDA) += irda/drivers/ obj-$(CONFIG_PRISM2_USB) += wlan-ng/ obj-$(CONFIG_COMEDI) += comedi/ +obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_FB_OLPC_DCON) += olpc_dcon/ obj-$(CONFIG_RTL8192U) += rtl8192u/ obj-$(CONFIG_RTL8192E) += rtl8192e/ diff --git a/net/decnet/Kconfig b/drivers/staging/decnet/Kconfig similarity index 100% rename from net/decnet/Kconfig rename to drivers/staging/decnet/Kconfig diff --git a/net/decnet/Makefile b/drivers/staging/decnet/Makefile similarity index 100% rename from net/decnet/Makefile rename to drivers/staging/decnet/Makefile diff --git a/net/decnet/README b/drivers/staging/decnet/README similarity index 100% rename from net/decnet/README rename to drivers/staging/decnet/README diff --git a/net/decnet/TODO b/drivers/staging/decnet/TODO similarity index 100% rename from net/decnet/TODO rename to drivers/staging/decnet/TODO diff --git a/net/decnet/af_decnet.c b/drivers/staging/decnet/af_decnet.c similar
Re: [PATCH] vmbus: unregister device_obj->channels_kset
On Mon, 13 Nov 2017 01:53:33 + Dexuan Cui wrote: > Fixes: c2e5df616e1a ("vmbus: add per-channel sysfs info") > > Without the patch, a device can't be thoroughly destroyed, because > vmbus_device_register() -> kset_create_and_add() still holds a reference > to the hv_device's device.kobj. > > Signed-off-by: Dexuan Cui > Cc: Stephen Hemminger > Cc: K. Y. Srinivasan > --- Good catch Signed-off-by: Stephen Hemminger ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next v2] net: move decnet to staging
Support for Decnet has been orphaned for some time. In the interest of reducing the potential bug surface and pre-holiday cleaning, move the decnet protocol into staging for eventual removal. Signed-off-by: Stephen Hemminger --- Note original submission was hour or so before net-next closed. Not sure if you want to wait on this until after 4.15-rc1 v2 - update TODO and move include/net/dn.h to staging as well MAINTAINERS| 2 +- drivers/staging/Kconfig| 5 +++ drivers/staging/Makefile | 1 + {net => drivers/staging}/decnet/Kconfig| 0 {net => drivers/staging}/decnet/Makefile | 3 +- {net => drivers/staging}/decnet/README | 0 drivers/staging/decnet/TODO| 4 +++ {net => drivers/staging}/decnet/af_decnet.c| 0 {net => drivers/staging}/decnet/dn_dev.c | 0 {net => drivers/staging}/decnet/dn_fib.c | 0 {net => drivers/staging}/decnet/dn_neigh.c | 0 {net => drivers/staging}/decnet/dn_nsp_in.c| 0 {net => drivers/staging}/decnet/dn_nsp_out.c | 0 {net => drivers/staging}/decnet/dn_route.c | 0 {net => drivers/staging}/decnet/dn_rules.c | 0 {net => drivers/staging}/decnet/dn_table.c | 0 {net => drivers/staging}/decnet/dn_timer.c | 0 .../staging/decnet/include}/net/dn.h | 0 {net => drivers/staging}/decnet/netfilter/Kconfig | 0 {net => drivers/staging}/decnet/netfilter/Makefile | 3 +- .../staging}/decnet/netfilter/dn_rtmsg.c | 0 .../staging}/decnet/sysctl_net_decnet.c| 0 net/Kconfig| 2 -- net/Makefile | 1 - net/decnet/TODO| 41 -- 25 files changed, 15 insertions(+), 47 deletions(-) rename {net => drivers/staging}/decnet/Kconfig (100%) rename {net => drivers/staging}/decnet/Makefile (84%) rename {net => drivers/staging}/decnet/README (100%) create mode 100644 drivers/staging/decnet/TODO rename {net => drivers/staging}/decnet/af_decnet.c (100%) rename {net => drivers/staging}/decnet/dn_dev.c (100%) rename {net => drivers/staging}/decnet/dn_fib.c (100%) rename {net => drivers/staging}/decnet/dn_neigh.c (100%) rename {net => drivers/staging}/decnet/dn_nsp_in.c (100%) rename {net => drivers/staging}/decnet/dn_nsp_out.c (100%) rename {net => drivers/staging}/decnet/dn_route.c (100%) rename {net => drivers/staging}/decnet/dn_rules.c (100%) rename {net => drivers/staging}/decnet/dn_table.c (100%) rename {net => drivers/staging}/decnet/dn_timer.c (100%) rename {include => drivers/staging/decnet/include}/net/dn.h (100%) rename {net => drivers/staging}/decnet/netfilter/Kconfig (100%) rename {net => drivers/staging}/decnet/netfilter/Makefile (62%) rename {net => drivers/staging}/decnet/netfilter/dn_rtmsg.c (100%) rename {net => drivers/staging}/decnet/sysctl_net_decnet.c (100%) delete mode 100644 net/decnet/TODO diff --git a/MAINTAINERS b/MAINTAINERS index 29aa89a1837b..66e2d302d9eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3968,7 +3968,7 @@ W:http://linux-decnet.sourceforge.net L: linux-decnet-u...@lists.sourceforge.net S: Orphan F: Documentation/networking/decnet.txt -F: net/decnet/ +F: drivers/staging/decnet/ DECSTATION PLATFORM SUPPORT M: "Maciej W. Rozycki" diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 554683912cff..e30af73c3797 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -30,6 +30,11 @@ source "drivers/staging/wlan-ng/Kconfig" source "drivers/staging/comedi/Kconfig" +if NETFILTER +source "drivers/staging/decnet/netfilter/Kconfig" +endif +source "drivers/staging/decnet/Kconfig" + source "drivers/staging/olpc_dcon/Kconfig" source "drivers/staging/rtl8192u/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 6e536020029a..89655cc80a91 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_IRDA) += irda/net/ obj-$(CONFIG_IRDA) += irda/drivers/ obj-$(CONFIG_PRISM2_USB) += wlan-ng/ obj-$(CONFIG_COMEDI) += comedi/ +obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_FB_OLPC_DCON) += olpc_dcon/ obj-$(CONFIG_RTL8192U) += rtl8192u/ obj-$(CONFIG_RTL8192E) += rtl8192e/ diff --git a/net/decnet/Kconfig b/drivers/staging/decnet/Kconfig similarity index 100% rename from net/decnet/Kconfig rename to drivers/staging/decnet/Kconfig diff --git a/net/decnet/Makefile b/drivers/staging/decnet/Makefile similarity index 84% rename from net/decnet/Makefile rename to drivers/staging/de
Re: [RFC] hv_netvsc: safer orderly shutdown
On Mon, 13 Nov 2017 11:57:47 +0100 Vitaly Kuznetsov wrote: > Stephen Hemminger writes: > > > > > The NAPI disable is already handled by rndis close. > > Sorry, but I'm probably missing something: I can only see > netif_napi_del() call in netvsc_device_remove() but this happens much > later. And I don see us doing napi_disable() anywhere on the path. > But I'm probably missing something. > You need to keep NAPI running to handle transmit completions. Disabling the Tx and Rx filter should keep spurious activity away until the halt is done. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH net] hv_netvsc: preserve hw_features on mtu/channels/ringparam changes
On Tue, 14 Nov 2017 16:22:05 +0100 Vitaly Kuznetsov wrote: Yes, this looks like a real issue. > + /* Query hardware capabilities if we're called from netvsc_probe() */ > + if (!net->hw_features) { > + ret = rndis_netdev_set_hwcaps(net_device, rndis_device); > + if (ret != 0) > + goto err_dev_remv; > + } > + Rather than conditional behavior in rndis_filter_device_add, it would be cleaner to make the call to get hardware capabilities there. Please respin and make the query of host a separate function. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next v2 0/2] retire IPX and NCPFS
These are both old decrepit protocols that need to be sent to pasture. Stephen Hemminger (2): ipx: move Novell IPX protocol support into staging ncpfs: move net/ncpfs to drivers/staging/ncpfs MAINTAINERS| 5 ++--- drivers/staging/Kconfig| 4 drivers/staging/Makefile | 2 ++ {net => drivers/staging}/ipx/Kconfig | 0 {net => drivers/staging}/ipx/Makefile | 0 drivers/staging/ipx/TODO | 4 {net => drivers/staging}/ipx/af_ipx.c | 0 {net => drivers/staging}/ipx/ipx_proc.c| 0 {net => drivers/staging}/ipx/ipx_route.c | 0 {net => drivers/staging}/ipx/pe2.c | 0 {net => drivers/staging}/ipx/sysctl_net_ipx.c | 0 {fs => drivers/staging}/ncpfs/Kconfig | 0 {fs => drivers/staging}/ncpfs/Makefile | 0 drivers/staging/ncpfs/TODO | 4 {fs => drivers/staging}/ncpfs/dir.c| 0 {fs => drivers/staging}/ncpfs/file.c | 0 {fs => drivers/staging}/ncpfs/getopt.c | 0 {fs => drivers/staging}/ncpfs/getopt.h | 0 {fs => drivers/staging}/ncpfs/inode.c | 0 {fs => drivers/staging}/ncpfs/ioctl.c | 0 {fs => drivers/staging}/ncpfs/mmap.c | 0 {fs => drivers/staging}/ncpfs/ncp_fs.h | 0 {fs => drivers/staging}/ncpfs/ncp_fs_i.h | 0 {fs => drivers/staging}/ncpfs/ncp_fs_sb.h | 0 {fs => drivers/staging}/ncpfs/ncplib_kernel.c | 0 {fs => drivers/staging}/ncpfs/ncplib_kernel.h | 0 {fs => drivers/staging}/ncpfs/ncpsign_kernel.c | 0 {fs => drivers/staging}/ncpfs/ncpsign_kernel.h | 0 {fs => drivers/staging}/ncpfs/sock.c | 0 {fs => drivers/staging}/ncpfs/symlink.c| 0 fs/Kconfig | 1 - fs/Makefile| 1 - net/Kconfig| 1 - net/Makefile | 1 - 34 files changed, 16 insertions(+), 7 deletions(-) rename {net => drivers/staging}/ipx/Kconfig (100%) rename {net => drivers/staging}/ipx/Makefile (100%) create mode 100644 drivers/staging/ipx/TODO rename {net => drivers/staging}/ipx/af_ipx.c (100%) rename {net => drivers/staging}/ipx/ipx_proc.c (100%) rename {net => drivers/staging}/ipx/ipx_route.c (100%) rename {net => drivers/staging}/ipx/pe2.c (100%) rename {net => drivers/staging}/ipx/sysctl_net_ipx.c (100%) rename {fs => drivers/staging}/ncpfs/Kconfig (100%) rename {fs => drivers/staging}/ncpfs/Makefile (100%) create mode 100644 drivers/staging/ncpfs/TODO rename {fs => drivers/staging}/ncpfs/dir.c (100%) rename {fs => drivers/staging}/ncpfs/file.c (100%) rename {fs => drivers/staging}/ncpfs/getopt.c (100%) rename {fs => drivers/staging}/ncpfs/getopt.h (100%) rename {fs => drivers/staging}/ncpfs/inode.c (100%) rename {fs => drivers/staging}/ncpfs/ioctl.c (100%) rename {fs => drivers/staging}/ncpfs/mmap.c (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs.h (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs_i.h (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs_sb.h (100%) rename {fs => drivers/staging}/ncpfs/ncplib_kernel.c (100%) rename {fs => drivers/staging}/ncpfs/ncplib_kernel.h (100%) rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.c (100%) rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.h (100%) rename {fs => drivers/staging}/ncpfs/sock.c (100%) rename {fs => drivers/staging}/ncpfs/symlink.c (100%) -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next v2 1/2] ipx: move Novell IPX protocol support into staging
The Netware IPX protocol is very old and no one should still be using it. It is time to move it into staging for a while and eventually decommision it. Signed-off-by: Stephen Hemminger --- v2 add a TODO (don't touch me) file MAINTAINERS | 3 +-- drivers/staging/Kconfig | 2 ++ drivers/staging/Makefile | 1 + {net => drivers/staging}/ipx/Kconfig | 0 {net => drivers/staging}/ipx/Makefile | 0 drivers/staging/ipx/TODO | 4 {net => drivers/staging}/ipx/af_ipx.c | 0 {net => drivers/staging}/ipx/ipx_proc.c | 0 {net => drivers/staging}/ipx/ipx_route.c | 0 {net => drivers/staging}/ipx/pe2.c| 0 {net => drivers/staging}/ipx/sysctl_net_ipx.c | 0 net/Kconfig | 1 - net/Makefile | 1 - 13 files changed, 8 insertions(+), 4 deletions(-) rename {net => drivers/staging}/ipx/Kconfig (100%) rename {net => drivers/staging}/ipx/Makefile (100%) create mode 100644 drivers/staging/ipx/TODO rename {net => drivers/staging}/ipx/af_ipx.c (100%) rename {net => drivers/staging}/ipx/ipx_proc.c (100%) rename {net => drivers/staging}/ipx/ipx_route.c (100%) rename {net => drivers/staging}/ipx/pe2.c (100%) rename {net => drivers/staging}/ipx/sysctl_net_ipx.c (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 29aa89a1837b..54c29ebcec55 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7206,9 +7206,8 @@ F:drivers/tty/ipwireless/ IPX NETWORK LAYER L: net...@vger.kernel.org S: Odd fixes -F: include/net/ipx.h F: include/uapi/linux/ipx.h -F: net/ipx/ +F: drivers/staging/ipx/ IRDA SUBSYSTEM M: Samuel Ortiz diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 554683912cff..3744640bed5a 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -26,6 +26,8 @@ if STAGING source "drivers/staging/irda/net/Kconfig" +source "drivers/staging/ipx/Kconfig" + source "drivers/staging/wlan-ng/Kconfig" source "drivers/staging/comedi/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 6e536020029a..f4c0506470ff 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -3,6 +3,7 @@ obj-y += media/ obj-y += typec/ +obj-$(CONFIG_IPX) += ipx/ obj-$(CONFIG_IRDA) += irda/net/ obj-$(CONFIG_IRDA) += irda/drivers/ obj-$(CONFIG_PRISM2_USB) += wlan-ng/ diff --git a/net/ipx/Kconfig b/drivers/staging/ipx/Kconfig similarity index 100% rename from net/ipx/Kconfig rename to drivers/staging/ipx/Kconfig diff --git a/net/ipx/Makefile b/drivers/staging/ipx/Makefile similarity index 100% rename from net/ipx/Makefile rename to drivers/staging/ipx/Makefile diff --git a/drivers/staging/ipx/TODO b/drivers/staging/ipx/TODO new file mode 100644 index ..80db5d968264 --- /dev/null +++ b/drivers/staging/ipx/TODO @@ -0,0 +1,4 @@ +The ipx code will be removed soon from the kernel tree as it is old and +obsolete and broken. + +Don't worry about fixing up anything here, it's not needed. diff --git a/net/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c similarity index 100% rename from net/ipx/af_ipx.c rename to drivers/staging/ipx/af_ipx.c diff --git a/net/ipx/ipx_proc.c b/drivers/staging/ipx/ipx_proc.c similarity index 100% rename from net/ipx/ipx_proc.c rename to drivers/staging/ipx/ipx_proc.c diff --git a/net/ipx/ipx_route.c b/drivers/staging/ipx/ipx_route.c similarity index 100% rename from net/ipx/ipx_route.c rename to drivers/staging/ipx/ipx_route.c diff --git a/net/ipx/pe2.c b/drivers/staging/ipx/pe2.c similarity index 100% rename from net/ipx/pe2.c rename to drivers/staging/ipx/pe2.c diff --git a/net/ipx/sysctl_net_ipx.c b/drivers/staging/ipx/sysctl_net_ipx.c similarity index 100% rename from net/ipx/sysctl_net_ipx.c rename to drivers/staging/ipx/sysctl_net_ipx.c diff --git a/net/Kconfig b/net/Kconfig index 9dba2715919d..ff71ba0f6c27 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -212,7 +212,6 @@ source "net/dsa/Kconfig" source "net/8021q/Kconfig" source "net/decnet/Kconfig" source "net/llc/Kconfig" -source "net/ipx/Kconfig" source "drivers/net/appletalk/Kconfig" source "net/x25/Kconfig" source "net/lapb/Kconfig" diff --git a/net/Makefile b/net/Makefile index 14fede520840..a6147c61b174 100644 --- a/net/Makefile +++ b/net/Makefile @@ -24,7 +24,6 @@ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ obj-$(CONFIG_NET_DSA) += dsa/ -obj-$(CONFIG_IPX) += ipx/ obj-$(CONFIG_ATALK)+= appletalk/ obj-$(CONFIG_X25) += x25/ obj-$(CONFIG_LAPB)
[PATCH net-next v2 2/2] ncpfs: move net/ncpfs to drivers/staging/ncpfs
The Netware Core Protocol is a file system that talks to Netware clients over IPX. Since IPX has been dead for many years move the file system into staging for eventual interment. Signed-off-by: Stephen Hemminger --- v2 add a TODO (don't touch me) file MAINTAINERS| 2 +- drivers/staging/Kconfig| 2 ++ drivers/staging/Makefile | 1 + {fs => drivers/staging}/ncpfs/Kconfig | 0 {fs => drivers/staging}/ncpfs/Makefile | 0 drivers/staging/ncpfs/TODO | 4 {fs => drivers/staging}/ncpfs/dir.c| 0 {fs => drivers/staging}/ncpfs/file.c | 0 {fs => drivers/staging}/ncpfs/getopt.c | 0 {fs => drivers/staging}/ncpfs/getopt.h | 0 {fs => drivers/staging}/ncpfs/inode.c | 0 {fs => drivers/staging}/ncpfs/ioctl.c | 0 {fs => drivers/staging}/ncpfs/mmap.c | 0 {fs => drivers/staging}/ncpfs/ncp_fs.h | 0 {fs => drivers/staging}/ncpfs/ncp_fs_i.h | 0 {fs => drivers/staging}/ncpfs/ncp_fs_sb.h | 0 {fs => drivers/staging}/ncpfs/ncplib_kernel.c | 0 {fs => drivers/staging}/ncpfs/ncplib_kernel.h | 0 {fs => drivers/staging}/ncpfs/ncpsign_kernel.c | 0 {fs => drivers/staging}/ncpfs/ncpsign_kernel.h | 0 {fs => drivers/staging}/ncpfs/sock.c | 0 {fs => drivers/staging}/ncpfs/symlink.c| 0 fs/Kconfig | 1 - fs/Makefile| 1 - 24 files changed, 8 insertions(+), 3 deletions(-) rename {fs => drivers/staging}/ncpfs/Kconfig (100%) rename {fs => drivers/staging}/ncpfs/Makefile (100%) create mode 100644 drivers/staging/ncpfs/TODO rename {fs => drivers/staging}/ncpfs/dir.c (100%) rename {fs => drivers/staging}/ncpfs/file.c (100%) rename {fs => drivers/staging}/ncpfs/getopt.c (100%) rename {fs => drivers/staging}/ncpfs/getopt.h (100%) rename {fs => drivers/staging}/ncpfs/inode.c (100%) rename {fs => drivers/staging}/ncpfs/ioctl.c (100%) rename {fs => drivers/staging}/ncpfs/mmap.c (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs.h (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs_i.h (100%) rename {fs => drivers/staging}/ncpfs/ncp_fs_sb.h (100%) rename {fs => drivers/staging}/ncpfs/ncplib_kernel.c (100%) rename {fs => drivers/staging}/ncpfs/ncplib_kernel.h (100%) rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.c (100%) rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.h (100%) rename {fs => drivers/staging}/ncpfs/sock.c (100%) rename {fs => drivers/staging}/ncpfs/symlink.c (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 54c29ebcec55..5ed623ad5717 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9276,7 +9276,7 @@ F:drivers/net/ethernet/natsemi/natsemi.c NCP FILESYSTEM M: Petr Vandrovec S: Odd Fixes -F: fs/ncpfs/ +F: drivers/staging/ncpfs/ NCR 5380 SCSI DRIVERS M: Finn Thain diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 3744640bed5a..e95ab683331e 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -28,6 +28,8 @@ source "drivers/staging/irda/net/Kconfig" source "drivers/staging/ipx/Kconfig" +source "drivers/staging/ncpfs/Kconfig" + source "drivers/staging/wlan-ng/Kconfig" source "drivers/staging/comedi/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index f4c0506470ff..af8cd6a3a1f6 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -4,6 +4,7 @@ obj-y += media/ obj-y += typec/ obj-$(CONFIG_IPX) += ipx/ +obj-$(CONFIG_NCP_FS) += ncpfs/ obj-$(CONFIG_IRDA) += irda/net/ obj-$(CONFIG_IRDA) += irda/drivers/ obj-$(CONFIG_PRISM2_USB) += wlan-ng/ diff --git a/fs/ncpfs/Kconfig b/drivers/staging/ncpfs/Kconfig similarity index 100% rename from fs/ncpfs/Kconfig rename to drivers/staging/ncpfs/Kconfig diff --git a/fs/ncpfs/Makefile b/drivers/staging/ncpfs/Makefile similarity index 100% rename from fs/ncpfs/Makefile rename to drivers/staging/ncpfs/Makefile diff --git a/drivers/staging/ncpfs/TODO b/drivers/staging/ncpfs/TODO new file mode 100644 index ..9b6d38b7e248 --- /dev/null +++ b/drivers/staging/ncpfs/TODO @@ -0,0 +1,4 @@ +The ncpfs code will be removed soon from the kernel tree as it is old and +obsolete and broken. + +Don't worry about fixing up anything here, it's not needed. diff --git a/fs/ncpfs/dir.c b/drivers/staging/ncpfs/dir.c similarity index 100% rename from fs/ncpfs/dir.c rename to drivers/staging/ncpfs/dir.c diff --git a/fs/ncpfs/file.c b/drivers/staging/ncpfs/file.c similarity index 100% rename from fs/ncpfs/file.c rename to drivers/staging/ncpfs/file.c diff --git a/fs/ncpfs/getopt.c b/drive
Re: [PATCH net-next v2 0/2] retire IPX and NCPFS
On Wed, 15 Nov 2017 09:58:33 +0900 (KST) David Miller wrote: > From: Stephen Hemminger > Date: Tue, 14 Nov 2017 08:37:13 -0800 > > > These are both old decrepit protocols that need to be sent > > to pasture. > > These need to go to gregkh and his staging/ tree, not net-next. Ok, just wanted to get acceptance from netdev for moving ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
RE: [PATCH 2/2] vmbus: make hv_get_ringbuffer_availbytes local
This patch required a patch that is still going through net-next. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH 1/2] vmbus: unregister device_obj->channels_kset
On Tue, 28 Nov 2017 16:56:05 +0100 Greg KH wrote: > On Tue, Nov 14, 2017 at 06:53:32AM -0700, k...@exchange.microsoft.com wrote: > > From: Dexuan Cui > > > > Fixes: c2e5df616e1a ("vmbus: add per-channel sysfs info") > > > > Without the patch, a device can't be thoroughly destroyed, because > > vmbus_device_register() -> kset_create_and_add() still holds a reference > > to the hv_device's device.kobj. > > > > Signed-off-by: Dexuan Cui > > Cc: Stephen Hemminger > > Cc: sta...@vger.kernel.org > > Why is this marked for stable when the patch it "fixes" is in 4.15-rc1? It doesn't need to go to stable. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH RFC 2/6] x86/hyper-v: add a function to read both TSC and TSC page value simulateneously
On Fri, 1 Dec 2017 14:13:17 +0100 Vitaly Kuznetsov wrote: > + > +static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page > *tsc_pg, > +u64 *cur_tsc) > +{ > + *cur_tsc = rdtsc(); > + > + return cur_tsc; Why do return and setting by reference. Looks like an ugly API. ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 0/6] hv_netvsc: minor optimizations
These are a set of local optimizations the Hyper-V networking driver. Also include a vmbus patch in this set, because it depends on the netvsc that last used that function. Stephen Hemminger (6): hv_netvsc: drop unused macros hv_netvsc: don't need local xmit_more hv_netvsc: replace divide with mask when computing padding hv_netvsc: use reciprocal divide to speed up percent calculation hv_netvsc: optimize initialization of RNDIS header vmbus: make hv_get_ringbuffer_availbytes local drivers/hv/ring_buffer.c | 23 drivers/net/hyperv/hyperv_net.h | 32 ++--- drivers/net/hyperv/netvsc.c | 26 ++ drivers/net/hyperv/netvsc_drv.c | 74 ++- drivers/net/hyperv/rndis_filter.c | 4 +-- include/linux/hyperv.h| 22 6 files changed, 73 insertions(+), 108 deletions(-) -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 3/6] hv_netvsc: replace divide with mask when computing padding
Packet alignment is always a power of 2 therefore modulus can be replaced with a faster and operation Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index ee31faa67cad..4b931f017a25 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -712,11 +712,12 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, int i; u32 msg_size = 0; u32 padding = 0; - u32 remain = packet->total_data_buflen % net_device->pkt_align; u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : packet->page_buf_cnt; + u32 remain; /* Add padding */ + remain = packet->total_data_buflen & (net_device->pkt_align - 1); if (skb->xmit_more && remain && !packet->cp_partial) { padding = net_device->pkt_align - remain; rndis_msg->msg_len += padding; -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 1/6] hv_netvsc: drop unused macros
Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/hyperv_net.h | 26 -- 1 file changed, 26 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 88ddfb92122b..7226230561de 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -1425,32 +1425,6 @@ struct rndis_message { (sizeof(msg) + (sizeof(struct rndis_message) - \ sizeof(union rndis_message_container))) -/* get pointer to info buffer with message pointer */ -#define MESSAGE_TO_INFO_BUFFER(msg)\ - (((unsigned char *)(msg)) + msg->info_buf_offset) - -/* get pointer to status buffer with message pointer */ -#define MESSAGE_TO_STATUS_BUFFER(msg) \ - (((unsigned char *)(msg)) + msg->status_buf_offset) - -/* get pointer to OOBD buffer with message pointer */ -#define MESSAGE_TO_OOBD_BUFFER(msg)\ - (((unsigned char *)(msg)) + msg->oob_data_offset) - -/* get pointer to data buffer with message pointer */ -#define MESSAGE_TO_DATA_BUFFER(msg)\ - (((unsigned char *)(msg)) + msg->per_pkt_info_offset) - -/* get pointer to contained message from NDIS_MESSAGE pointer */ -#define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_msg)\ - ((void *) &rndis_msg->msg) - -/* get pointer to contained message from NDIS_MESSAGE pointer */ -#define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_msg)\ - ((void *) rndis_msg) - - - #define RNDIS_HEADER_SIZE (sizeof(struct rndis_message) - \ sizeof(union rndis_message_container)) -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 4/6] hv_netvsc: use reciprocal divide to speed up percent calculation
Every packet sent checks the available ring space. The calculation can be sped up by using reciprocal divide which is multiplication. Since ring_size can only be configured by module parameter, so it doesn't have to be passed around everywhere. Also it should be unsigned since it is number of pages. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/hyperv_net.h | 6 +++--- drivers/net/hyperv/netvsc.c | 20 +++- drivers/net/hyperv/netvsc_drv.c | 17 + drivers/net/hyperv/rndis_filter.c | 4 ++-- 4 files changed, 21 insertions(+), 26 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 7226230561de..3d940c67ea94 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -146,7 +146,6 @@ struct hv_netvsc_packet { struct netvsc_device_info { unsigned char mac_adr[ETH_ALEN]; - int ring_size; u32 num_chn; u32 send_sections; u32 recv_sections; @@ -188,6 +187,9 @@ struct rndis_message; struct netvsc_device; struct net_device_context; +extern u32 netvsc_ring_bytes; +extern struct reciprocal_value netvsc_ring_reciprocal; + struct netvsc_device *netvsc_device_add(struct hv_device *device, const struct netvsc_device_info *info); int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx); @@ -804,8 +806,6 @@ struct netvsc_device { struct rndis_device *extension; - int ring_size; - u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 4b931f017a25..e4bcd202a56a 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -588,14 +589,11 @@ void netvsc_device_remove(struct hv_device *device) * Get the percentage of available bytes to write in the ring. * The return value is in range from 0 to 100. */ -static inline u32 hv_ringbuf_avail_percent( - struct hv_ring_buffer_info *ring_info) +static u32 hv_ringbuf_avail_percent(const struct hv_ring_buffer_info *ring_info) { - u32 avail_read, avail_write; + u32 avail_write = hv_get_bytes_to_write(ring_info); - hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write); - - return avail_write * 100 / ring_info->ring_datasize; + return reciprocal_divide(avail_write * 100, netvsc_ring_reciprocal); } static inline void netvsc_free_send_slot(struct netvsc_device *net_device, @@ -1249,7 +1247,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, const struct netvsc_device_info *device_info) { int i, ret = 0; - int ring_size = device_info->ring_size; struct netvsc_device *net_device; struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev); @@ -1261,8 +1258,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) net_device_ctx->tx_table[i] = 0; - net_device->ring_size = ring_size; - /* Because the device uses NAPI, all the interrupt batching and * control is done via Net softirq, not the channel handling */ @@ -1289,10 +1284,9 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, netvsc_poll, NAPI_POLL_WEIGHT); /* Open the channel */ - ret = vmbus_open(device->channel, ring_size * PAGE_SIZE, -ring_size * PAGE_SIZE, NULL, 0, -netvsc_channel_cb, -net_device->chan_table); + ret = vmbus_open(device->channel, netvsc_ring_bytes, +netvsc_ring_bytes, NULL, 0, +netvsc_channel_cb, net_device->chan_table); if (ret != 0) { netif_napi_del(&net_device->chan_table[0].napi); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5129647d420c..ba690e1737ab 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -54,9 +55,11 @@ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) -static int ring_size = 128; -module_param(ring_size, int, S_IRUGO); +static unsigned int ring_size __ro_after_init = 128; +module_param(ring_size, uint, S_IRUGO); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); +unsigned int netvsc_ring_bytes __ro_after_init; +struct reciprocal_value netvsc_ring_reciprocal __ro_after_init; static const u32 default_msg = NETIF_MSG_DRV | NETIF_M
[PATCH net-next 6/6] vmbus: make hv_get_ringbuffer_availbytes local
The last use of hv_get_ringbuffer_availbytes in drivers is now gone. Only used by the debug info routine so make it static. Also, add READ_ONCE() to avoid any possible issues with potentially volatile index values. Signed-off-by: Stephen Hemminger --- drivers/hv/ring_buffer.c | 23 +++ include/linux/hyperv.h | 22 -- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 12eb8caa4263..50e071444a5c 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -140,6 +140,29 @@ static u32 hv_copyto_ringbuffer( return start_write_offset; } +/* + * + * hv_get_ringbuffer_availbytes() + * + * Get number of bytes available to read and to write to + * for the specified ring buffer + */ +static void +hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi, +u32 *read, u32 *write) +{ + u32 read_loc, write_loc, dsize; + + /* Capture the read/write indices before they changed */ + read_loc = READ_ONCE(rbi->ring_buffer->read_index); + write_loc = READ_ONCE(rbi->ring_buffer->write_index); + dsize = rbi->ring_datasize; + + *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : + read_loc - write_loc; + *read = dsize - *write; +} + /* Get various debug metrics for the specified ring buffer. */ void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, struct hv_ring_buffer_debug_info *debug_info) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f3e97c5f94c9..5f8bd0cebddf 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -127,28 +127,6 @@ struct hv_ring_buffer_info { u32 priv_read_index; }; -/* - * - * hv_get_ringbuffer_availbytes() - * - * Get number of bytes available to read and to write to - * for the specified ring buffer - */ -static inline void -hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi, -u32 *read, u32 *write) -{ - u32 read_loc, write_loc, dsize; - - /* Capture the read/write indices before they changed */ - read_loc = rbi->ring_buffer->read_index; - write_loc = rbi->ring_buffer->write_index; - dsize = rbi->ring_datasize; - - *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : - read_loc - write_loc; - *read = dsize - *write; -} static inline u32 hv_get_bytes_to_read(const struct hv_ring_buffer_info *rbi) { -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 2/6] hv_netvsc: don't need local xmit_more
Since skb is always non-NULL in the copy portion of netvsc_send do not need local variable. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index bfc79698b8f4..ee31faa67cad 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -848,7 +848,6 @@ int netvsc_send(struct net_device_context *ndev_ctx, struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL; struct sk_buff *msd_skb = NULL; bool try_batch; - bool xmit_more = (skb != NULL) ? skb->xmit_more : false; /* If device is rescinded, return error and packet will get dropped. */ if (unlikely(!net_device || net_device->destroy)) @@ -922,7 +921,7 @@ int netvsc_send(struct net_device_context *ndev_ctx, if (msdp->skb) dev_consume_skb_any(msdp->skb); - if (xmit_more && !packet->cp_partial) { + if (skb->xmit_more && !packet->cp_partial) { msdp->skb = skb; msdp->pkt = packet; msdp->count++; -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 5/6] hv_netvsc: optimize initialization of RNDIS header
The memset of the whole maximum possible RNDIS header is unnecessary. For the main part of the header use a structure assignment. No need to memset the whole per packet info. Instead rely on caller to set what it wants. Also get rid of cast to void and signed/unsigned conversion. Now return pointer to per packet data (rather than the header) which simplifies use by code setting up the packet data. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc_drv.c | 57 +++-- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index ba690e1737ab..dc70de674ca9 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -177,17 +177,15 @@ static int netvsc_close(struct net_device *net) return ret; } -static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, - int pkt_type) +static inline void *init_ppi_data(struct rndis_message *msg, + u32 ppi_size, u32 pkt_type) { - struct rndis_packet *rndis_pkt; + struct rndis_packet *rndis_pkt = &msg->msg.pkt; struct rndis_per_packet_info *ppi; - rndis_pkt = &msg->msg.pkt; rndis_pkt->data_offset += ppi_size; - - ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt + - rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len); + ppi = (void *)rndis_pkt + rndis_pkt->per_pkt_info_offset + + rndis_pkt->per_pkt_info_len; ppi->size = ppi_size; ppi->type = pkt_type; @@ -195,7 +193,7 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size, rndis_pkt->per_pkt_info_len += ppi_size; - return ppi; + return ppi + 1; } /* Azure hosts don't support non-TCP port numbers in hashing for fragmented @@ -472,10 +470,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) int ret; unsigned int num_data_pgs; struct rndis_message *rndis_msg; - struct rndis_packet *rndis_pkt; struct net_device *vf_netdev; u32 rndis_msg_size; - struct rndis_per_packet_info *ppi; u32 hash; struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; @@ -530,34 +526,36 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) rndis_msg = (struct rndis_message *)skb->head; - memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); - /* Add the rndis header */ rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; rndis_msg->msg_len = packet->total_data_buflen; - rndis_pkt = &rndis_msg->msg.pkt; - rndis_pkt->data_offset = sizeof(struct rndis_packet); - rndis_pkt->data_len = packet->total_data_buflen; - rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet); + + rndis_msg->msg.pkt = (struct rndis_packet) { + .data_offset = sizeof(struct rndis_packet), + .data_len = packet->total_data_buflen, + .per_pkt_info_offset = sizeof(struct rndis_packet), + }; rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet); hash = skb_get_hash_raw(skb); if (hash != 0 && net->real_num_tx_queues > 1) { + u32 *hash_info; + rndis_msg_size += NDIS_HASH_PPI_SIZE; - ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, - NBL_HASH_VALUE); - *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; + hash_info = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE, + NBL_HASH_VALUE); + *hash_info = hash; } if (skb_vlan_tag_present(skb)) { struct ndis_pkt_8021q_info *vlan; rndis_msg_size += NDIS_VLAN_PPI_SIZE; - ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, - IEEE_8021Q_INFO); + vlan = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE, +IEEE_8021Q_INFO); - vlan = (void *)ppi + ppi->ppi_offset; + vlan->value = 0; vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK; vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; @@ -567,11 +565,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) struct ndis_tcp_lso_info *lso_info; rndis_msg_size += NDIS_LSO_PPI_SIZE; - ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, - TCP_LARGESEND_PKTINFO); - - lso_info = (void *)ppi + ppi->ppi_offset; + lso_info = init_ppi_data(
[PATCH net 2/3] hv_netvsc: Limit the receive buffer size for legacy hosts
From: Haiyang Zhang Legacy hosts only allow 15 MB receive buffer, and we know the NVSP version only after negotiation. So, we put the limit in netvsc_init_buf(). Fixes: 5023a6db73196 ("netvsc: increase default receive buffer size") Signed-off-by: Haiyang Zhang Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index e4bcd202a56a..e5d16a8cf0d6 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -268,6 +268,11 @@ static int netvsc_init_buf(struct hv_device *device, buf_size = device_info->recv_sections * device_info->recv_section_size; buf_size = roundup(buf_size, PAGE_SIZE); + /* Legacy hosts only allow smaller receive buffer */ + if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2) + buf_size = min_t(unsigned int, buf_size, +NETVSC_RECEIVE_BUFFER_SIZE_LEGACY); + net_device->recv_buf = vzalloc(buf_size); if (!net_device->recv_buf) { netdev_err(ndev, -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net 3/3] hv_netvsc: Fix the default receive buffer size
From: Haiyang Zhang The intended size is 16 MB, and the default slot size is 1728. So, NETVSC_DEFAULT_RX should be 16*1024*1024 / 1728 = 9709. Fixes: 5023a6db73196 ("netvsc: increase default receive buffer size") Signed-off-by: Haiyang Zhang Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index dc70de674ca9..edfcde5d3621 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -50,7 +50,7 @@ #define NETVSC_MIN_TX_SECTIONS 10 #define NETVSC_DEFAULT_TX 192 /* ~1M */ #define NETVSC_MIN_RX_SECTIONS 10 /* ~64K */ -#define NETVSC_DEFAULT_RX 10485 /* Max ~16M */ +#define NETVSC_DEFAULT_RX 9709/* ~16M */ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net 0/3] netvsc fix buffer size issues
The changes to allow setting buffer size can cause issues on older versions of Windows Server which have smaller limits. And the actual maximum value for WS2016 is 31M not 16M. This is a resend of patchset that didn't make it to netdev correctly. Haiyang Zhang (3): hv_netvsc: Correct the max receive buffer size hv_netvsc: Limit the receive buffer size for legacy hosts hv_netvsc: Fix the default receive buffer size drivers/net/hyperv/hyperv_net.h | 6 -- drivers/net/hyperv/netvsc.c | 5 + drivers/net/hyperv/netvsc_drv.c | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net 1/3] hv_netvsc: Correct the max receive buffer size
From: Haiyang Zhang It should be 31 MB on recent host versions. Signed-off-by: Haiyang Zhang Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/hyperv_net.h | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 3d940c67ea94..373455f216ce 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -637,9 +637,11 @@ struct nvsp_message { #define NETVSC_MTU 65535 #define NETVSC_MTU_MIN ETH_MIN_MTU -#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ -#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ +/* Max buffer sizes allowed by a host */ +#define NETVSC_RECEIVE_BUFFER_SIZE (1024 * 1024 * 31) /* 31MB */ +#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024 * 1024 * 15) /* 15MB */ #define NETVSC_SEND_BUFFER_SIZE(1024 * 1024 * 15) /* 15MB */ + #define NETVSC_INVALID_INDEX -1 #define NETVSC_SEND_SECTION_SIZE 6144 -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
Re: [PATCH] hyperv: make HYPERV a menuconfig to ease disabling it all
On Sat, 9 Dec 2017 16:21:51 +0100 Vincent Legoll wrote: > No need to get into the submenu to disable all HYPERV-related > config entries. > > This makes it easier to disable all HYPERV config options > without entering the submenu. It will also enable one > to see that en/dis-abled state from the outside menu. > > This is only intended to change menuconfig UI, not change > the config dependencies. > > Signed-off-by: Vincent Legoll > --- > drivers/hv/Kconfig | 7 +-- > 1 file changed, 5 insertions(+), 2 deletions(-) > > diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig > index 50b89ea0e60f..5804081d936d 100644 > --- a/drivers/hv/Kconfig > +++ b/drivers/hv/Kconfig > @@ -1,4 +1,7 @@ > -menu "Microsoft Hyper-V guest support" > +menuconfig HYPERV_MENU > + bool "Microsoft Hyper-V guest support" > + > +if HYPERV_MENU > > config HYPERV > tristate "Microsoft Hyper-V client drivers" > @@ -23,4 +26,4 @@ config HYPERV_BALLOON > help > Select this option to enable Hyper-V Balloon driver. > > -endmenu > +endif # HYPERV_MENU Will this break existing configs? ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 0/2] hv_netvsc: Fix default and limit of recv buffe
The default for receive buffer descriptors is not correct, it should match the default receive buffer size and the upper limit of receive buffer size is too low. Also, for older versions of Window servers hosts, different lower limit check is necessary, otherwise the buffer request will be rejected by the host, resulting vNIC not come up. This patch set corrects these problems. Haiyang Zhang (2): hv_netvsc: Fix the receive buffer size limit hv_netvsc: Fix the TX/RX buffer default sizes drivers/net/hyperv/hyperv_net.h | 19 --- drivers/net/hyperv/netvsc.c | 5 + drivers/net/hyperv/netvsc_drv.c | 4 3 files changed, 21 insertions(+), 7 deletions(-) -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 1/2] hv_netvsc: Fix the receive buffer size limit
From: Haiyang Zhang The max should be 31 MB on host with NVSP version > 2. On legacy hosts (NVSP version <=2) only 15 MB receive buffer is allowed, otherwise the buffer request will be rejected by the host, resulting vNIC not coming up. The NVSP version is only available after negotiation. So, we add the limit checking for legacy hosts in netvsc_init_buf(). Fixes: 5023a6db73196 ("netvsc: increase default receive buffer size") Signed-off-by: Haiyang Zhang Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/hyperv_net.h | 6 -- drivers/net/hyperv/netvsc.c | 5 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 3d940c67ea94..373455f216ce 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -637,9 +637,11 @@ struct nvsp_message { #define NETVSC_MTU 65535 #define NETVSC_MTU_MIN ETH_MIN_MTU -#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ -#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ +/* Max buffer sizes allowed by a host */ +#define NETVSC_RECEIVE_BUFFER_SIZE (1024 * 1024 * 31) /* 31MB */ +#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024 * 1024 * 15) /* 15MB */ #define NETVSC_SEND_BUFFER_SIZE(1024 * 1024 * 15) /* 15MB */ + #define NETVSC_INVALID_INDEX -1 #define NETVSC_SEND_SECTION_SIZE 6144 diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index e4bcd202a56a..e5d16a8cf0d6 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -268,6 +268,11 @@ static int netvsc_init_buf(struct hv_device *device, buf_size = device_info->recv_sections * device_info->recv_section_size; buf_size = roundup(buf_size, PAGE_SIZE); + /* Legacy hosts only allow smaller receive buffer */ + if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2) + buf_size = min_t(unsigned int, buf_size, +NETVSC_RECEIVE_BUFFER_SIZE_LEGACY); + net_device->recv_buf = vzalloc(buf_size); if (!net_device->recv_buf) { netdev_err(ndev, -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 2/2] hv_netvsc: Fix the TX/RX buffer default sizes
From: Haiyang Zhang The values were not computed correctly. There are no significant visible impact, though. The intended size of RX buffer is 16 MB, and the default slot size is 1728. So, NETVSC_DEFAULT_RX should be 16*1024*1024 / 1728 = 9709. The intended size of TX buffer is 1 MB, and the slot size is 6144. So, NETVSC_DEFAULT_TX should be 1024*1024 / 6144 = 170. The patch puts the formula directly into the macro, and moves them to hyperv_net.h, together with related macros. Fixes: 5023a6db73196 ("netvsc: increase default receive buffer size") Signed-off-by: Haiyang Zhang Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/hyperv_net.h | 13 - drivers/net/hyperv/netvsc_drv.c | 4 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 373455f216ce..845ddc7bba46 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -640,13 +640,24 @@ struct nvsp_message { /* Max buffer sizes allowed by a host */ #define NETVSC_RECEIVE_BUFFER_SIZE (1024 * 1024 * 31) /* 31MB */ #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024 * 1024 * 15) /* 15MB */ -#define NETVSC_SEND_BUFFER_SIZE(1024 * 1024 * 15) /* 15MB */ +#define NETVSC_RECEIVE_BUFFER_DEFAULT (1024 * 1024 * 16) + +#define NETVSC_SEND_BUFFER_SIZE(1024 * 1024 * 15) /* 15MB */ +#define NETVSC_SEND_BUFFER_DEFAULT (1024 * 1024) #define NETVSC_INVALID_INDEX -1 #define NETVSC_SEND_SECTION_SIZE 6144 #define NETVSC_RECV_SECTION_SIZE 1728 +/* Default size of TX buf: 1MB, RX buf: 16MB */ +#define NETVSC_MIN_TX_SECTIONS 10 +#define NETVSC_DEFAULT_TX (NETVSC_SEND_BUFFER_DEFAULT \ +/ NETVSC_SEND_SECTION_SIZE) +#define NETVSC_MIN_RX_SECTIONS 10 +#define NETVSC_DEFAULT_RX (NETVSC_RECEIVE_BUFFER_DEFAULT \ +/ NETVSC_RECV_SECTION_SIZE) + #define NETVSC_RECEIVE_BUFFER_ID 0xcafe #define NETVSC_SEND_BUFFER_ID 0 diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index dc70de674ca9..b6a434ac64d3 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -47,10 +47,6 @@ #include "hyperv_net.h" #define RING_SIZE_MIN 64 -#define NETVSC_MIN_TX_SECTIONS 10 -#define NETVSC_DEFAULT_TX 192 /* ~1M */ -#define NETVSC_MIN_RX_SECTIONS 10 /* ~64K */ -#define NETVSC_DEFAULT_RX 10485 /* Max ~16M */ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 0/6] hv_netvsc: minor changes
This includes minor cleanup of code in send and receive path and also a new statistic to check for allocation failures. This also eliminates some of the extra RCU when not needed. There is a theoritical bug where buffered data could be blocked for longer than necessary if the ring buffer got full. This has not been seen in the wild, found by inspection. The reference count between net device and internal RNDIS is not needed. Stephen Hemminger (6): hv_netvsc: copy_to_send buf can be void hv_netvsc: track memory allocation failures in ethtool stats hv_netvsc: simplify function args in receive status path hv_netvsc: pass netvsc_device to receive callback hv_netvsc: remove open_cnt reference count hv_netvsc: empty current transmit aggregation if flow blocked drivers/net/hyperv/hyperv_net.h | 9 drivers/net/hyperv/netvsc.c | 44 --- drivers/net/hyperv/netvsc_drv.c | 33 +++-- drivers/net/hyperv/rndis_filter.c | 29 +++--- 4 files changed, 47 insertions(+), 68 deletions(-) -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 1/6] hv_netvsc: copy_to_send buf can be void
Since only caller does not care about return value. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/netvsc.c | 22 -- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index e4bcd202a56a..9407907c4988 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -696,19 +696,18 @@ static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) return NETVSC_INVALID_INDEX; } -static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, - unsigned int section_index, - u32 pend_size, - struct hv_netvsc_packet *packet, - struct rndis_message *rndis_msg, - struct hv_page_buffer *pb, - struct sk_buff *skb) +static void netvsc_copy_to_send_buf(struct netvsc_device *net_device, + unsigned int section_index, + u32 pend_size, + struct hv_netvsc_packet *packet, + struct rndis_message *rndis_msg, + struct hv_page_buffer *pb, + struct sk_buff *skb) { char *start = net_device->send_buf; char *dest = start + (section_index * net_device->send_section_size) + pend_size; int i; - u32 msg_size = 0; u32 padding = 0; u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt : packet->page_buf_cnt; @@ -728,16 +727,11 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device, u32 len = pb[i].len; memcpy(dest, (src + offset), len); - msg_size += len; dest += len; } - if (padding) { + if (padding) memset(dest, 0, padding); - msg_size += padding; - } - - return msg_size; } static inline int netvsc_send_pkt( -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 5/6] hv_netvsc: remove open_cnt reference count
There is only ever a single instance of network device object referencing the internal rndis object. Therefore the open_cnt atomic is not necessary. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/hyperv_net.h | 2 -- drivers/net/hyperv/netvsc.c | 2 +- drivers/net/hyperv/rndis_filter.c | 10 +++--- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index eb01943b23c3..8ebe72bf89ff 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -810,8 +810,6 @@ struct netvsc_device { u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ - atomic_t open_cnt; - struct netvsc_channel chan_table[VRSS_CHANNEL_MAX]; struct rcu_head rcu; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index d8165407bcda..6dd97f232f87 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -73,7 +73,7 @@ static struct netvsc_device *alloc_net_device(void) init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; - atomic_set(&net_device->open_cnt, 0); + net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 025110a19d4a..035976949177 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1362,9 +1362,6 @@ int rndis_filter_open(struct netvsc_device *nvdev) if (!nvdev) return -EINVAL; - if (atomic_inc_return(&nvdev->open_cnt) != 1) - return 0; - return rndis_filter_open_device(nvdev->extension); } @@ -1373,13 +1370,12 @@ int rndis_filter_close(struct netvsc_device *nvdev) if (!nvdev) return -EINVAL; - if (atomic_dec_return(&nvdev->open_cnt) != 0) - return 0; - return rndis_filter_close_device(nvdev->extension); } bool rndis_filter_opened(const struct netvsc_device *nvdev) { - return atomic_read(&nvdev->open_cnt) > 0; + const struct rndis_device *dev = nvdev->extension; + + return dev->state == RNDIS_DEV_DATAINITIALIZED; } -- 2.11.0 ___ devel mailing list de...@linuxdriverproject.org http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel
[PATCH net-next 3/6] hv_netvsc: simplify function args in receive status path
The caller (netvsc_receive) already has the net device pointer, and should just pass that to functions rather than the hyperv device. This eliminates several impossible error paths in the process. Signed-off-by: Stephen Hemminger --- drivers/net/hyperv/hyperv_net.h | 3 +-- drivers/net/hyperv/netvsc.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 12 ++-- drivers/net/hyperv/rndis_filter.c | 9 +++-- 4 files changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 42bbde1cbe45..6463b7f5aa00 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -199,7 +199,7 @@ int netvsc_send(struct net_device_context *ndc, struct rndis_message *rndis_msg, struct hv_page_buffer *page_buffer, struct sk_buff *skb); -void netvsc_linkstatus_callback(struct hv_device *device_obj, +void netvsc_linkstatus_callback(struct net_device *net, struct rndis_message *resp); int netvsc_recv_callback(struct net_device *net, struct vmbus_channel *channel, @@ -222,7 +222,6 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev, const u8 *key); int rndis_filter_receive(struct net_device *ndev, struct netvsc_device *net_dev, -struct hv_device *dev, struct vmbus_channel *channel, void *data, u32 buflen); diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 9407907c4988..d8165407bcda 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1077,7 +1077,7 @@ static int netvsc_receive(struct net_device *ndev, u32 buflen = vmxferpage_packet->ranges[i].byte_count; /* Pass it to the upper layer */ - status = rndis_filter_receive(ndev, net_device, device, + status = rndis_filter_receive(ndev, net_device, channel, data, buflen); } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index d16b68974d80..6f12f81fd8aa 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -656,22 +656,14 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) /* * netvsc_linkstatus_callback - Link up/down notification */ -void netvsc_linkstatus_callback(struct hv_device *device_obj, +void netvsc_linkstatus_callback(struct net_device *net, struct rndis_message *resp) { struct rndis_indicate_status *indicate = &resp->msg.indicate_status; - struct net_device *net; - struct net_device_context *ndev_ctx; + struct net_device_context *ndev_ctx = netdev_priv(net); struct netvsc_reconfig *event; unsigned long flags; - net = hv_get_drvdata(device_obj); - - if (!net) - return; - - ndev_ctx = netdev_priv(net); - /* Update the physical link speed when changing to another vSwitch */ if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { u32 speed; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 673492063307..901838b2bcc9 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -134,11 +134,9 @@ static void put_rndis_request(struct rndis_device *dev, kfree(req); } -static void dump_rndis_message(struct hv_device *hv_dev, +static void dump_rndis_message(struct net_device *netdev, const struct rndis_message *rndis_msg) { - struct net_device *netdev = hv_get_drvdata(hv_dev); - switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, " @@ -397,7 +395,6 @@ static int rndis_filter_receive_data(struct net_device *ndev, int rndis_filter_receive(struct net_device *ndev, struct netvsc_device *net_dev, -struct hv_device *dev, struct vmbus_channel *channel, void *data, u32 buflen) { @@ -419,7 +416,7 @@ int rndis_filter_receive(struct net_device *ndev, } if (netif_msg_rx_status(net_device_ctx)) - dump_rndis_message(dev, rndis_msg); + dump_rndis_message(ndev, rndis_msg); switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: @@ -434,7 +431,7 @@ int rndis_filter_receive(struct net_device *ndev, case RNDIS_MSG_INDICATE: /* notification msgs */ - netvsc_linkstatus_callback(dev, rndis_msg); + netvsc_linkstatus_callback(ndev, rndis_msg); break; default: