[PATCH 2/6] vmbus: keep pointer to ring buffer page

2018-09-13 Thread Stephen Hemminger
Avoid going from struct page to virt address (and back) by just
keeping pointer to the allocated pages instead of virt address.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel.c | 20 +---
 drivers/uio/uio_hv_generic.c |  5 +++--
 include/linux/hyperv.h   |  2 +-
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 33e6db02dbab..56ec0d96d876 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -91,11 +91,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 
send_ringbuffer_size,
unsigned long flags;
int ret, err = 0;
struct page *page;
+   unsigned int order;
 
if (send_ringbuffer_size % PAGE_SIZE ||
recv_ringbuffer_size % PAGE_SIZE)
return -EINVAL;
 
+   order = get_order(send_ringbuffer_size + recv_ringbuffer_size);
+
spin_lock_irqsave(&newchannel->lock, flags);
if (newchannel->state == CHANNEL_OPEN_STATE) {
newchannel->state = CHANNEL_OPENING_STATE;
@@ -110,21 +113,17 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 
send_ringbuffer_size,
 
/* Allocate the ring buffer */
page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
-   GFP_KERNEL|__GFP_ZERO,
-   get_order(send_ringbuffer_size +
-   recv_ringbuffer_size));
+   GFP_KERNEL|__GFP_ZERO, order);
 
if (!page)
-   page = alloc_pages(GFP_KERNEL|__GFP_ZERO,
-  get_order(send_ringbuffer_size +
-recv_ringbuffer_size));
+   page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order);
 
if (!page) {
err = -ENOMEM;
goto error_set_chnstate;
}
 
-   newchannel->ringbuffer_pages = page_address(page);
+   newchannel->ringbuffer_page = page;
newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
   recv_ringbuffer_size) >> PAGE_SHIFT;
 
@@ -239,8 +238,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 
send_ringbuffer_size,
 error_free_pages:
hv_ringbuffer_cleanup(&newchannel->outbound);
hv_ringbuffer_cleanup(&newchannel->inbound);
-   __free_pages(page,
-get_order(send_ringbuffer_size + recv_ringbuffer_size));
+   __free_pages(page, order);
 error_set_chnstate:
newchannel->state = CHANNEL_OPEN_STATE;
return err;
@@ -658,8 +656,8 @@ static int vmbus_close_internal(struct vmbus_channel 
*channel)
hv_ringbuffer_cleanup(&channel->outbound);
hv_ringbuffer_cleanup(&channel->inbound);
 
-   free_pages((unsigned long)channel->ringbuffer_pages,
-   get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
+   __free_pages(channel->ringbuffer_page,
+get_order(channel->ringbuffer_pagecount << PAGE_SHIFT));
 
 out:
return ret;
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index a08860260f55..ba67a5267557 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -130,11 +130,12 @@ static int hv_uio_ring_mmap(struct file *filp, struct 
kobject *kobj,
= container_of(kobj, struct vmbus_channel, kobj);
struct hv_device *dev = channel->primary_channel->device_obj;
u16 q_idx = channel->offermsg.offer.sub_channel_index;
+   void *ring_buffer = page_address(channel->ringbuffer_page);
 
dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n",
q_idx, vma_pages(vma), vma->vm_pgoff);
 
-   return vm_iomap_memory(vma, virt_to_phys(channel->ringbuffer_pages),
+   return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
   channel->ringbuffer_pagecount << PAGE_SHIFT);
 }
 
@@ -223,7 +224,7 @@ hv_uio_probe(struct hv_device *dev,
/* mem resources */
pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings";
pdata->info.mem[TXRX_RING_MAP].addr
-   = (uintptr_t)dev->channel->ringbuffer_pages;
+   = (uintptr_t)page_address(dev->channel->ringbuffer_page);
pdata->info.mem[TXRX_RING_MAP].size
= dev->channel->ringbuffer_pagecount << PAGE_SHIFT;
pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6c4575c7f46b..a6c32d2d090b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -739,7 +739,7 @@ struct vmbus_channel {
u32 ringbuffer_gpadlhandle;
 
/* Allocated memory for ring buffer */
-   void *ringbuffer_pages;
+   struc

[PATCH 1/6] vmbus: pass channel to hv_process_channel_removal

2018-09-13 Thread Stephen Hemminger
Rather than passing relid and then looking up the channel.
Pass the channel directly, since caller already knows it.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel.c  |  3 +--
 drivers/hv/channel_mgmt.c | 17 +
 drivers/hv/vmbus_drv.c|  3 +--
 include/linux/hyperv.h|  2 +-
 4 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 741857d80da1..33e6db02dbab 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -690,8 +690,7 @@ void vmbus_close(struct vmbus_channel *channel)
wait_for_completion(&cur_channel->rescind_event);
mutex_lock(&vmbus_connection.channel_mutex);
vmbus_close_internal(cur_channel);
-   hv_process_channel_removal(
-  cur_channel->offermsg.child_relid);
+   hv_process_channel_removal(cur_channel);
} else {
mutex_lock(&vmbus_connection.channel_mutex);
vmbus_close_internal(cur_channel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 0f0e091c117c..b7c48ebdf6a1 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -385,21 +385,14 @@ static void vmbus_release_relid(u32 relid)
trace_vmbus_release_relid(&msg, ret);
 }
 
-void hv_process_channel_removal(u32 relid)
+void hv_process_channel_removal(struct vmbus_channel *channel)
 {
+   struct vmbus_channel *primary_channel;
unsigned long flags;
-   struct vmbus_channel *primary_channel, *channel;
 
BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
-
-   /*
-* Make sure channel is valid as we may have raced.
-*/
-   channel = relid2channel(relid);
-   if (!channel)
-   return;
-
BUG_ON(!channel->rescind);
+
if (channel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(channel->target_cpu,
@@ -429,7 +422,7 @@ void hv_process_channel_removal(u32 relid)
cpumask_clear_cpu(channel->target_cpu,
  &primary_channel->alloced_cpus_in_node);
 
-   vmbus_release_relid(relid);
+   vmbus_release_relid(channel->offermsg.child_relid);
 
free_channel(channel);
 }
@@ -943,7 +936,7 @@ static void vmbus_onoffer_rescind(struct 
vmbus_channel_message_header *hdr)
 * The channel is currently not open;
 * it is safe for us to cleanup the channel.
 */
-   hv_process_channel_removal(rescind->child_relid);
+   hv_process_channel_removal(channel);
} else {
complete(&channel->rescind_event);
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e6d8fdac6d8b..007ee8e5986a 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -864,10 +864,9 @@ static void vmbus_device_release(struct device *device)
struct vmbus_channel *channel = hv_dev->channel;
 
mutex_lock(&vmbus_connection.channel_mutex);
-   hv_process_channel_removal(channel->offermsg.child_relid);
+   hv_process_channel_removal(channel);
mutex_unlock(&vmbus_connection.channel_mutex);
kfree(hv_dev);
-
 }
 
 /* The one and only one */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2c3798bcb01c..6c4575c7f46b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1443,7 +1443,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr 
*icmsghdrp, u8 *buf,
const int *srv_version, int srv_vercnt,
int *nego_fw_version, int *nego_srv_version);
 
-void hv_process_channel_removal(u32 relid);
+void hv_process_channel_removal(struct vmbus_channel *channel);
 
 void vmbus_setevent(struct vmbus_channel *channel);
 /*
-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 5/6] hv_uio_generic: map ringbuffer phys addr

2018-09-13 Thread Stephen Hemminger
The ring buffer is contiguous IOVA and is mapped via phys addr
for sysfs file. Use same method for the UIO mapping.

Signed-off-by: Stephen Hemminger 
---
 drivers/uio/uio_hv_generic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index ba67a5267557..53f5610c6065 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -224,10 +224,10 @@ hv_uio_probe(struct hv_device *dev,
/* mem resources */
pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings";
pdata->info.mem[TXRX_RING_MAP].addr
-   = (uintptr_t)page_address(dev->channel->ringbuffer_page);
+   = 
(uintptr_t)virt_to_phys(page_address(dev->channel->ringbuffer_page));
pdata->info.mem[TXRX_RING_MAP].size
= dev->channel->ringbuffer_pagecount << PAGE_SHIFT;
-   pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL;
+   pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_IOVA;
 
pdata->info.mem[INT_PAGE_MAP].name = "int_page";
pdata->info.mem[INT_PAGE_MAP].addr
-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 4/6] uio: introduce UIO_MEM_IOVA

2018-09-13 Thread Stephen Hemminger
Introduce the concept of mapping physical memory locations that
are normal memory. The new type UIO_MEM_IOVA are similar to
existing UIO_MEM_PHYS but the backing memory is not marked as uncached.

Also, indent related switch to the currently used style.

Signed-off-by: Stephen Hemminger 
---
 drivers/uio/uio.c  | 24 +---
 include/linux/uio_driver.h |  1 +
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 0ffb324aa038..e601bd3fbae1 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -738,7 +738,8 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
return -EINVAL;
 
vma->vm_ops = &uio_physical_vm_ops;
-   vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+   if (idev->info->mem[mi].memtype == UIO_MEM_PHYS)
+   vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
/*
 * We cannot use the vm_iomap_memory() helper here,
@@ -795,18 +796,19 @@ static int uio_mmap(struct file *filep, struct 
vm_area_struct *vma)
}
 
switch (idev->info->mem[mi].memtype) {
-   case UIO_MEM_PHYS:
-   ret = uio_mmap_physical(vma);
-   break;
-   case UIO_MEM_LOGICAL:
-   case UIO_MEM_VIRTUAL:
-   ret = uio_mmap_logical(vma);
-   break;
-   default:
-   ret = -EINVAL;
+   case UIO_MEM_IOVA:
+   case UIO_MEM_PHYS:
+   ret = uio_mmap_physical(vma);
+   break;
+   case UIO_MEM_LOGICAL:
+   case UIO_MEM_VIRTUAL:
+   ret = uio_mmap_logical(vma);
+   break;
+   default:
+   ret = -EINVAL;
}
 
-out:
+ out:
mutex_unlock(&idev->info_lock);
return ret;
 }
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 6f8b68cd460f..a3cd7cb67a69 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -133,6 +133,7 @@ extern void uio_event_notify(struct uio_info *info);
 #define UIO_MEM_PHYS   1
 #define UIO_MEM_LOGICAL2
 #define UIO_MEM_VIRTUAL 3
+#define UIO_MEM_IOVA   4
 
 /* defines for uio_port->porttype */
 #define UIO_PORT_NONE  0
-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 6/6] uio_hv_generic: defer opening vmbus until first use

2018-09-13 Thread Stephen Hemminger
This fixes two design flaws in hv_uio_generic.

Since hv_uio_probe is called from vmbus_probe with lock held
it potentially can cause sleep in an atomic section because
vmbus_open will wait for response from host.

The hv_uio_generic driver could not handle applications
exiting and restarting because the vmbus channel was
persistent.  Change the semantics so that the buffers are
allocated on probe, but not attached to host until
device is opened.

Signed-off-by: Stephen Hemminger 
---
 drivers/uio/uio_hv_generic.c | 102 ---
 1 file changed, 72 insertions(+), 30 deletions(-)

diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 53f5610c6065..9bd837accdb5 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -55,6 +55,7 @@ enum hv_uio_map {
 struct hv_uio_private_data {
struct uio_info info;
struct hv_device *device;
+   atomic_t refcnt;
 
void*recv_buf;
u32 recv_gpadl;
@@ -128,12 +129,10 @@ static int hv_uio_ring_mmap(struct file *filp, struct 
kobject *kobj,
 {
struct vmbus_channel *channel
= container_of(kobj, struct vmbus_channel, kobj);
-   struct hv_device *dev = channel->primary_channel->device_obj;
-   u16 q_idx = channel->offermsg.offer.sub_channel_index;
void *ring_buffer = page_address(channel->ringbuffer_page);
 
-   dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n",
-   q_idx, vma_pages(vma), vma->vm_pgoff);
+   if (channel->state != CHANNEL_OPENED_STATE)
+   return -ENODEV;
 
return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
   channel->ringbuffer_pagecount << PAGE_SHIFT);
@@ -176,57 +175,101 @@ hv_uio_new_channel(struct vmbus_channel *new_sc)
}
 }
 
+/* free the reserved buffers for send and receive */
 static void
 hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
 {
-   if (pdata->send_gpadl)
+   if (pdata->send_gpadl) {
vmbus_teardown_gpadl(dev->channel, pdata->send_gpadl);
-   vfree(pdata->send_buf);
+   pdata->send_gpadl = 0;
+   vfree(pdata->send_buf);
+   }
 
-   if (pdata->recv_gpadl)
+   if (pdata->recv_gpadl) {
vmbus_teardown_gpadl(dev->channel, pdata->recv_gpadl);
-   vfree(pdata->recv_buf);
+   pdata->recv_gpadl = 0;
+   vfree(pdata->recv_buf);
+   }
+}
+
+/* VMBus primary channel is opened on first use */
+static int
+hv_uio_open(struct uio_info *info, struct inode *inode)
+{
+   struct hv_uio_private_data *pdata
+   = container_of(info, struct hv_uio_private_data, info);
+   struct hv_device *dev = pdata->device;
+   int ret;
+
+   if (atomic_inc_return(&pdata->refcnt) != 1)
+   return 0;
+
+   ret = vmbus_connect_ring(dev->channel,
+hv_uio_channel_cb, dev->channel);
+
+   if (ret == 0)
+   dev->channel->inbound.ring_buffer->interrupt_mask = 1;
+
+   return ret;
+}
+
+/* VMBus primary channel is closed on last close */
+static int
+hv_uio_release(struct uio_info *info, struct inode *inode)
+{
+   struct hv_uio_private_data *pdata
+   = container_of(info, struct hv_uio_private_data, info);
+   struct hv_device *dev = pdata->device;
+   int ret = 0;
+
+   if (atomic_dec_and_test(&pdata->refcnt))
+   ret = vmbus_disconnect_ring(dev->channel);
+
+   return ret;
 }
 
 static int
 hv_uio_probe(struct hv_device *dev,
 const struct hv_vmbus_device_id *dev_id)
 {
+   struct vmbus_channel *channel = dev->channel;
struct hv_uio_private_data *pdata;
+   void *ring_buffer;
int ret;
 
+   /* Communicating with host has to be via shared memory not hypercall */
+   if (!channel->offermsg.monitor_allocated) {
+   dev_err(&dev->device, "vmbus channel requires hypercall\n");
+   return -ENOTSUPP;
+   }
+
pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
if (!pdata)
return -ENOMEM;
 
-   ret = vmbus_open(dev->channel, HV_RING_SIZE * PAGE_SIZE,
-HV_RING_SIZE * PAGE_SIZE, NULL, 0,
-hv_uio_channel_cb, dev->channel);
+   ret = vmbus_alloc_ring(channel, HV_RING_SIZE * PAGE_SIZE,
+  HV_RING_SIZE * PAGE_SIZE);
if (ret)
goto fail;
 
-   /* Communicating with host has to be via shared memory not hypercall */
-   if (!dev->channel->offermsg.monitor_allocated) {
-   dev_err(&dev->device, "vmbus channel requires hypercall\n");
-   ret = -ENOTSUPP;
-   goto fail_c

[PATCH 0/6] fix Hyper-V uio restart

2018-09-13 Thread Stephen Hemminger
This set of patches fixes the problem where DPDK applications
using hv_uio_generic driver can not be successfully restarted.

In order to get this working it required small change to uio
to allow for mapping without no-cache. And refactoring of how
ring buffer is setup in vmbus code.

It could be backported as a fix, to 4.19 but that is not
an LTS so probably not worth it.

Stephen Hemminger (6):
  vmbus: pass channel to hv_process_channel_removal
  vmbus: keep pointer to ring buffer page
  vmbus: split ring buffer allocation from open
  uio: introduce UIO_MEM_IOVA
  hv_uio_generic: map ringbuffer phys addr
  uio_hv_generic: defer opening vmbus until first use

 drivers/hv/channel.c | 276 ---
 drivers/hv/channel_mgmt.c|  17 +--
 drivers/hv/ring_buffer.c |   1 +
 drivers/hv/vmbus_drv.c   |   3 +-
 drivers/uio/uio.c|  24 +--
 drivers/uio/uio_hv_generic.c | 107 ++
 include/linux/hyperv.h   |  13 +-
 include/linux/uio_driver.h   |   1 +
 8 files changed, 262 insertions(+), 180 deletions(-)

-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 3/6] vmbus: split ring buffer allocation from open

2018-09-13 Thread Stephen Hemminger
The UIO driver needs the ring buffer to be persistent(reused)
across open/close. Split the allocation and setup of ring buffer
out of vmbus_open. For normal usage vmbus_open/vmbus_close there
are no changes; only impacts uio_hv_generic which needs to keep
ring buffer memory and reuse when application restarts.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel.c | 267 ++-
 drivers/hv/ring_buffer.c |   1 +
 include/linux/hyperv.h   |   9 ++
 3 files changed, 162 insertions(+), 115 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 56ec0d96d876..ddadb7efd1cc 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -79,84 +79,96 @@ void vmbus_setevent(struct vmbus_channel *channel)
 }
 EXPORT_SYMBOL_GPL(vmbus_setevent);
 
-/*
- * vmbus_open - Open the specified channel.
- */
-int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
-u32 recv_ringbuffer_size, void *userdata, u32 userdatalen,
-void (*onchannelcallback)(void *context), void *context)
+/* vmbus_free_ring - drop mapping of ring buffer */
+void vmbus_free_ring(struct vmbus_channel *channel)
 {
-   struct vmbus_channel_open_channel *open_msg;
-   struct vmbus_channel_msginfo *open_info = NULL;
-   unsigned long flags;
-   int ret, err = 0;
-   struct page *page;
-   unsigned int order;
+   hv_ringbuffer_cleanup(&channel->outbound);
+   hv_ringbuffer_cleanup(&channel->inbound);
 
-   if (send_ringbuffer_size % PAGE_SIZE ||
-   recv_ringbuffer_size % PAGE_SIZE)
-   return -EINVAL;
+   if (channel->ringbuffer_page) {
+   __free_pages(channel->ringbuffer_page,
+get_order(channel->ringbuffer_pagecount
+  << PAGE_SHIFT));
+   channel->ringbuffer_page = NULL;
+   }
+}
+EXPORT_SYMBOL_GPL(vmbus_free_ring);
 
-   order = get_order(send_ringbuffer_size + recv_ringbuffer_size);
+/* vmbus_alloc_ring - allocate and map pages for ring buffer */
+int vmbus_alloc_ring(struct vmbus_channel *newchannel,
+u32 send_size, u32 recv_size)
+{
+   struct page *page;
+   int order;
 
-   spin_lock_irqsave(&newchannel->lock, flags);
-   if (newchannel->state == CHANNEL_OPEN_STATE) {
-   newchannel->state = CHANNEL_OPENING_STATE;
-   } else {
-   spin_unlock_irqrestore(&newchannel->lock, flags);
+   if (send_size % PAGE_SIZE || recv_size % PAGE_SIZE)
return -EINVAL;
-   }
-   spin_unlock_irqrestore(&newchannel->lock, flags);
-
-   newchannel->onchannel_callback = onchannelcallback;
-   newchannel->channel_callback_context = context;
 
/* Allocate the ring buffer */
+   order = get_order(send_size + recv_size);
page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
GFP_KERNEL|__GFP_ZERO, order);
 
if (!page)
page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order);
 
-   if (!page) {
-   err = -ENOMEM;
-   goto error_set_chnstate;
-   }
+   if (!page)
+   return -ENOMEM;
 
newchannel->ringbuffer_page = page;
-   newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
-  recv_ringbuffer_size) >> PAGE_SHIFT;
+   newchannel->ringbuffer_pagecount = (send_size + recv_size) >> 
PAGE_SHIFT;
+   newchannel->ringbuffer_send_offset = send_size >> PAGE_SHIFT;
 
-   ret = hv_ringbuffer_init(&newchannel->outbound, page,
-send_ringbuffer_size >> PAGE_SHIFT);
+   return 0;
+}
+EXPORT_SYMBOL_GPL(vmbus_alloc_ring);
 
-   if (ret != 0) {
-   err = ret;
-   goto error_free_pages;
-   }
+static int __vmbus_open(struct vmbus_channel *newchannel,
+  void *userdata, u32 userdatalen,
+  void (*onchannelcallback)(void *context), void *context)
+{
+   struct vmbus_channel_open_channel *open_msg;
+   struct vmbus_channel_msginfo *open_info = NULL;
+   struct page *page = newchannel->ringbuffer_page;
+   u32 send_pages, recv_pages;
+   unsigned long flags;
+   int err;
 
-   ret = hv_ringbuffer_init(&newchannel->inbound,
-&page[send_ringbuffer_size >> PAGE_SHIFT],
-recv_ringbuffer_size >> PAGE_SHIFT);
-   if (ret != 0) {
-   err = ret;
-   goto error_free_pages;
+   if (userdatalen > MAX_USER_DEFINED_BYTES)
+   return -EINVAL;
+
+   send_pages = newchannel->ringbuffer_send_offset;
+   recv_pages = newchannel

[PATCH 0/6] fix Hyper-V uio restart

2018-09-14 Thread Stephen Hemminger
This set of patches fixes the problem where DPDK applications
using hv_uio_generic driver can not be successfully restarted.

In order to get this working it required small change to uio
to allow for mapping without no-cache. And refactoring of how
ring buffer is setup in vmbus code.

It could be backported as a fix, to 4.19 but that is not
an LTS so probably not worth it.

v2 - add refcount unwind in hv_uio_generic open in case of error

Stephen Hemminger (6):
  vmbus: pass channel to hv_process_channel_removal
  vmbus: keep pointer to ring buffer page
  vmbus: split ring buffer allocation from open
  uio: introduce UIO_MEM_IOVA
  hv_uio_generic: map ringbuffer phys addr
  uio_hv_generic: defer opening vmbus until first use

 drivers/hv/channel.c | 276 ---
 drivers/hv/channel_mgmt.c|  17 +--
 drivers/hv/ring_buffer.c |   1 +
 drivers/hv/vmbus_drv.c   |   3 +-
 drivers/uio/uio.c|  24 +--
 drivers/uio/uio_hv_generic.c | 109 ++
 include/linux/hyperv.h   |  13 +-
 include/linux/uio_driver.h   |   1 +
 8 files changed, 264 insertions(+), 180 deletions(-)

-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 3/6] vmbus: split ring buffer allocation from open

2018-09-14 Thread Stephen Hemminger
The UIO driver needs the ring buffer to be persistent(reused)
across open/close. Split the allocation and setup of ring buffer
out of vmbus_open. For normal usage vmbus_open/vmbus_close there
are no changes; only impacts uio_hv_generic which needs to keep
ring buffer memory and reuse when application restarts.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel.c | 267 ++-
 drivers/hv/ring_buffer.c |   1 +
 include/linux/hyperv.h   |   9 ++
 3 files changed, 162 insertions(+), 115 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 56ec0d96d876..ddadb7efd1cc 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -79,84 +79,96 @@ void vmbus_setevent(struct vmbus_channel *channel)
 }
 EXPORT_SYMBOL_GPL(vmbus_setevent);
 
-/*
- * vmbus_open - Open the specified channel.
- */
-int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
-u32 recv_ringbuffer_size, void *userdata, u32 userdatalen,
-void (*onchannelcallback)(void *context), void *context)
+/* vmbus_free_ring - drop mapping of ring buffer */
+void vmbus_free_ring(struct vmbus_channel *channel)
 {
-   struct vmbus_channel_open_channel *open_msg;
-   struct vmbus_channel_msginfo *open_info = NULL;
-   unsigned long flags;
-   int ret, err = 0;
-   struct page *page;
-   unsigned int order;
+   hv_ringbuffer_cleanup(&channel->outbound);
+   hv_ringbuffer_cleanup(&channel->inbound);
 
-   if (send_ringbuffer_size % PAGE_SIZE ||
-   recv_ringbuffer_size % PAGE_SIZE)
-   return -EINVAL;
+   if (channel->ringbuffer_page) {
+   __free_pages(channel->ringbuffer_page,
+get_order(channel->ringbuffer_pagecount
+  << PAGE_SHIFT));
+   channel->ringbuffer_page = NULL;
+   }
+}
+EXPORT_SYMBOL_GPL(vmbus_free_ring);
 
-   order = get_order(send_ringbuffer_size + recv_ringbuffer_size);
+/* vmbus_alloc_ring - allocate and map pages for ring buffer */
+int vmbus_alloc_ring(struct vmbus_channel *newchannel,
+u32 send_size, u32 recv_size)
+{
+   struct page *page;
+   int order;
 
-   spin_lock_irqsave(&newchannel->lock, flags);
-   if (newchannel->state == CHANNEL_OPEN_STATE) {
-   newchannel->state = CHANNEL_OPENING_STATE;
-   } else {
-   spin_unlock_irqrestore(&newchannel->lock, flags);
+   if (send_size % PAGE_SIZE || recv_size % PAGE_SIZE)
return -EINVAL;
-   }
-   spin_unlock_irqrestore(&newchannel->lock, flags);
-
-   newchannel->onchannel_callback = onchannelcallback;
-   newchannel->channel_callback_context = context;
 
/* Allocate the ring buffer */
+   order = get_order(send_size + recv_size);
page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
GFP_KERNEL|__GFP_ZERO, order);
 
if (!page)
page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order);
 
-   if (!page) {
-   err = -ENOMEM;
-   goto error_set_chnstate;
-   }
+   if (!page)
+   return -ENOMEM;
 
newchannel->ringbuffer_page = page;
-   newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
-  recv_ringbuffer_size) >> PAGE_SHIFT;
+   newchannel->ringbuffer_pagecount = (send_size + recv_size) >> 
PAGE_SHIFT;
+   newchannel->ringbuffer_send_offset = send_size >> PAGE_SHIFT;
 
-   ret = hv_ringbuffer_init(&newchannel->outbound, page,
-send_ringbuffer_size >> PAGE_SHIFT);
+   return 0;
+}
+EXPORT_SYMBOL_GPL(vmbus_alloc_ring);
 
-   if (ret != 0) {
-   err = ret;
-   goto error_free_pages;
-   }
+static int __vmbus_open(struct vmbus_channel *newchannel,
+  void *userdata, u32 userdatalen,
+  void (*onchannelcallback)(void *context), void *context)
+{
+   struct vmbus_channel_open_channel *open_msg;
+   struct vmbus_channel_msginfo *open_info = NULL;
+   struct page *page = newchannel->ringbuffer_page;
+   u32 send_pages, recv_pages;
+   unsigned long flags;
+   int err;
 
-   ret = hv_ringbuffer_init(&newchannel->inbound,
-&page[send_ringbuffer_size >> PAGE_SHIFT],
-recv_ringbuffer_size >> PAGE_SHIFT);
-   if (ret != 0) {
-   err = ret;
-   goto error_free_pages;
+   if (userdatalen > MAX_USER_DEFINED_BYTES)
+   return -EINVAL;
+
+   send_pages = newchannel->ringbuffer_send_offset;
+   recv_pages = newchannel

[PATCH 5/6] hv_uio_generic: map ringbuffer phys addr

2018-09-14 Thread Stephen Hemminger
The ring buffer is contiguous IOVA and is mapped via phys addr
for sysfs file. Use same method for the UIO mapping.

Signed-off-by: Stephen Hemminger 
---
 drivers/uio/uio_hv_generic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index ba67a5267557..53f5610c6065 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -224,10 +224,10 @@ hv_uio_probe(struct hv_device *dev,
/* mem resources */
pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings";
pdata->info.mem[TXRX_RING_MAP].addr
-   = (uintptr_t)page_address(dev->channel->ringbuffer_page);
+   = 
(uintptr_t)virt_to_phys(page_address(dev->channel->ringbuffer_page));
pdata->info.mem[TXRX_RING_MAP].size
= dev->channel->ringbuffer_pagecount << PAGE_SHIFT;
-   pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL;
+   pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_IOVA;
 
pdata->info.mem[INT_PAGE_MAP].name = "int_page";
pdata->info.mem[INT_PAGE_MAP].addr
-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 2/6] vmbus: keep pointer to ring buffer page

2018-09-14 Thread Stephen Hemminger
Avoid going from struct page to virt address (and back) by just
keeping pointer to the allocated pages instead of virt address.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel.c | 20 +---
 drivers/uio/uio_hv_generic.c |  5 +++--
 include/linux/hyperv.h   |  2 +-
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 33e6db02dbab..56ec0d96d876 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -91,11 +91,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 
send_ringbuffer_size,
unsigned long flags;
int ret, err = 0;
struct page *page;
+   unsigned int order;
 
if (send_ringbuffer_size % PAGE_SIZE ||
recv_ringbuffer_size % PAGE_SIZE)
return -EINVAL;
 
+   order = get_order(send_ringbuffer_size + recv_ringbuffer_size);
+
spin_lock_irqsave(&newchannel->lock, flags);
if (newchannel->state == CHANNEL_OPEN_STATE) {
newchannel->state = CHANNEL_OPENING_STATE;
@@ -110,21 +113,17 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 
send_ringbuffer_size,
 
/* Allocate the ring buffer */
page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
-   GFP_KERNEL|__GFP_ZERO,
-   get_order(send_ringbuffer_size +
-   recv_ringbuffer_size));
+   GFP_KERNEL|__GFP_ZERO, order);
 
if (!page)
-   page = alloc_pages(GFP_KERNEL|__GFP_ZERO,
-  get_order(send_ringbuffer_size +
-recv_ringbuffer_size));
+   page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order);
 
if (!page) {
err = -ENOMEM;
goto error_set_chnstate;
}
 
-   newchannel->ringbuffer_pages = page_address(page);
+   newchannel->ringbuffer_page = page;
newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
   recv_ringbuffer_size) >> PAGE_SHIFT;
 
@@ -239,8 +238,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 
send_ringbuffer_size,
 error_free_pages:
hv_ringbuffer_cleanup(&newchannel->outbound);
hv_ringbuffer_cleanup(&newchannel->inbound);
-   __free_pages(page,
-get_order(send_ringbuffer_size + recv_ringbuffer_size));
+   __free_pages(page, order);
 error_set_chnstate:
newchannel->state = CHANNEL_OPEN_STATE;
return err;
@@ -658,8 +656,8 @@ static int vmbus_close_internal(struct vmbus_channel 
*channel)
hv_ringbuffer_cleanup(&channel->outbound);
hv_ringbuffer_cleanup(&channel->inbound);
 
-   free_pages((unsigned long)channel->ringbuffer_pages,
-   get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
+   __free_pages(channel->ringbuffer_page,
+get_order(channel->ringbuffer_pagecount << PAGE_SHIFT));
 
 out:
return ret;
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index a08860260f55..ba67a5267557 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -130,11 +130,12 @@ static int hv_uio_ring_mmap(struct file *filp, struct 
kobject *kobj,
= container_of(kobj, struct vmbus_channel, kobj);
struct hv_device *dev = channel->primary_channel->device_obj;
u16 q_idx = channel->offermsg.offer.sub_channel_index;
+   void *ring_buffer = page_address(channel->ringbuffer_page);
 
dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n",
q_idx, vma_pages(vma), vma->vm_pgoff);
 
-   return vm_iomap_memory(vma, virt_to_phys(channel->ringbuffer_pages),
+   return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
   channel->ringbuffer_pagecount << PAGE_SHIFT);
 }
 
@@ -223,7 +224,7 @@ hv_uio_probe(struct hv_device *dev,
/* mem resources */
pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings";
pdata->info.mem[TXRX_RING_MAP].addr
-   = (uintptr_t)dev->channel->ringbuffer_pages;
+   = (uintptr_t)page_address(dev->channel->ringbuffer_page);
pdata->info.mem[TXRX_RING_MAP].size
= dev->channel->ringbuffer_pagecount << PAGE_SHIFT;
pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6c4575c7f46b..a6c32d2d090b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -739,7 +739,7 @@ struct vmbus_channel {
u32 ringbuffer_gpadlhandle;
 
/* Allocated memory for ring buffer */
-   void *ringbuffer_pages;
+   struc

[PATCH 1/6] vmbus: pass channel to hv_process_channel_removal

2018-09-14 Thread Stephen Hemminger
Rather than passing relid and then looking up the channel.
Pass the channel directly, since caller already knows it.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel.c  |  3 +--
 drivers/hv/channel_mgmt.c | 17 +
 drivers/hv/vmbus_drv.c|  3 +--
 include/linux/hyperv.h|  2 +-
 4 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 741857d80da1..33e6db02dbab 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -690,8 +690,7 @@ void vmbus_close(struct vmbus_channel *channel)
wait_for_completion(&cur_channel->rescind_event);
mutex_lock(&vmbus_connection.channel_mutex);
vmbus_close_internal(cur_channel);
-   hv_process_channel_removal(
-  cur_channel->offermsg.child_relid);
+   hv_process_channel_removal(cur_channel);
} else {
mutex_lock(&vmbus_connection.channel_mutex);
vmbus_close_internal(cur_channel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 0f0e091c117c..b7c48ebdf6a1 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -385,21 +385,14 @@ static void vmbus_release_relid(u32 relid)
trace_vmbus_release_relid(&msg, ret);
 }
 
-void hv_process_channel_removal(u32 relid)
+void hv_process_channel_removal(struct vmbus_channel *channel)
 {
+   struct vmbus_channel *primary_channel;
unsigned long flags;
-   struct vmbus_channel *primary_channel, *channel;
 
BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
-
-   /*
-* Make sure channel is valid as we may have raced.
-*/
-   channel = relid2channel(relid);
-   if (!channel)
-   return;
-
BUG_ON(!channel->rescind);
+
if (channel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(channel->target_cpu,
@@ -429,7 +422,7 @@ void hv_process_channel_removal(u32 relid)
cpumask_clear_cpu(channel->target_cpu,
  &primary_channel->alloced_cpus_in_node);
 
-   vmbus_release_relid(relid);
+   vmbus_release_relid(channel->offermsg.child_relid);
 
free_channel(channel);
 }
@@ -943,7 +936,7 @@ static void vmbus_onoffer_rescind(struct 
vmbus_channel_message_header *hdr)
 * The channel is currently not open;
 * it is safe for us to cleanup the channel.
 */
-   hv_process_channel_removal(rescind->child_relid);
+   hv_process_channel_removal(channel);
} else {
complete(&channel->rescind_event);
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e6d8fdac6d8b..007ee8e5986a 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -864,10 +864,9 @@ static void vmbus_device_release(struct device *device)
struct vmbus_channel *channel = hv_dev->channel;
 
mutex_lock(&vmbus_connection.channel_mutex);
-   hv_process_channel_removal(channel->offermsg.child_relid);
+   hv_process_channel_removal(channel);
mutex_unlock(&vmbus_connection.channel_mutex);
kfree(hv_dev);
-
 }
 
 /* The one and only one */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2c3798bcb01c..6c4575c7f46b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1443,7 +1443,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr 
*icmsghdrp, u8 *buf,
const int *srv_version, int srv_vercnt,
int *nego_fw_version, int *nego_srv_version);
 
-void hv_process_channel_removal(u32 relid);
+void hv_process_channel_removal(struct vmbus_channel *channel);
 
 void vmbus_setevent(struct vmbus_channel *channel);
 /*
-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH 6/6] uio_hv_generic: defer opening vmbus until first use

2018-09-14 Thread Stephen Hemminger
This fixes two design flaws in hv_uio_generic.

Since hv_uio_probe is called from vmbus_probe with lock held
it potentially can cause sleep in an atomic section because
vmbus_open will wait for response from host.

The hv_uio_generic driver could not handle applications
exiting and restarting because the vmbus channel was
persistent.  Change the semantics so that the buffers are
allocated on probe, but not attached to host until
device is opened.

Signed-off-by: Stephen Hemminger 
---
 drivers/uio/uio_hv_generic.c | 104 +--
 1 file changed, 74 insertions(+), 30 deletions(-)

diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 53f5610c6065..f2ec981d66cb 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -55,6 +55,7 @@ enum hv_uio_map {
 struct hv_uio_private_data {
struct uio_info info;
struct hv_device *device;
+   atomic_t refcnt;
 
void*recv_buf;
u32 recv_gpadl;
@@ -128,12 +129,10 @@ static int hv_uio_ring_mmap(struct file *filp, struct 
kobject *kobj,
 {
struct vmbus_channel *channel
= container_of(kobj, struct vmbus_channel, kobj);
-   struct hv_device *dev = channel->primary_channel->device_obj;
-   u16 q_idx = channel->offermsg.offer.sub_channel_index;
void *ring_buffer = page_address(channel->ringbuffer_page);
 
-   dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n",
-   q_idx, vma_pages(vma), vma->vm_pgoff);
+   if (channel->state != CHANNEL_OPENED_STATE)
+   return -ENODEV;
 
return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
   channel->ringbuffer_pagecount << PAGE_SHIFT);
@@ -176,57 +175,103 @@ hv_uio_new_channel(struct vmbus_channel *new_sc)
}
 }
 
+/* free the reserved buffers for send and receive */
 static void
 hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
 {
-   if (pdata->send_gpadl)
+   if (pdata->send_gpadl) {
vmbus_teardown_gpadl(dev->channel, pdata->send_gpadl);
-   vfree(pdata->send_buf);
+   pdata->send_gpadl = 0;
+   vfree(pdata->send_buf);
+   }
 
-   if (pdata->recv_gpadl)
+   if (pdata->recv_gpadl) {
vmbus_teardown_gpadl(dev->channel, pdata->recv_gpadl);
-   vfree(pdata->recv_buf);
+   pdata->recv_gpadl = 0;
+   vfree(pdata->recv_buf);
+   }
+}
+
+/* VMBus primary channel is opened on first use */
+static int
+hv_uio_open(struct uio_info *info, struct inode *inode)
+{
+   struct hv_uio_private_data *pdata
+   = container_of(info, struct hv_uio_private_data, info);
+   struct hv_device *dev = pdata->device;
+   int ret;
+
+   if (atomic_inc_return(&pdata->refcnt) != 1)
+   return 0;
+
+   ret = vmbus_connect_ring(dev->channel,
+hv_uio_channel_cb, dev->channel);
+
+   if (ret == 0)
+   dev->channel->inbound.ring_buffer->interrupt_mask = 1;
+   else
+   atomic_dec(&pdata->refcount);
+
+   return ret;
+}
+
+/* VMBus primary channel is closed on last close */
+static int
+hv_uio_release(struct uio_info *info, struct inode *inode)
+{
+   struct hv_uio_private_data *pdata
+   = container_of(info, struct hv_uio_private_data, info);
+   struct hv_device *dev = pdata->device;
+   int ret = 0;
+
+   if (atomic_dec_and_test(&pdata->refcnt))
+   ret = vmbus_disconnect_ring(dev->channel);
+
+   return ret;
 }
 
 static int
 hv_uio_probe(struct hv_device *dev,
 const struct hv_vmbus_device_id *dev_id)
 {
+   struct vmbus_channel *channel = dev->channel;
struct hv_uio_private_data *pdata;
+   void *ring_buffer;
int ret;
 
+   /* Communicating with host has to be via shared memory not hypercall */
+   if (!channel->offermsg.monitor_allocated) {
+   dev_err(&dev->device, "vmbus channel requires hypercall\n");
+   return -ENOTSUPP;
+   }
+
pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
if (!pdata)
return -ENOMEM;
 
-   ret = vmbus_open(dev->channel, HV_RING_SIZE * PAGE_SIZE,
-HV_RING_SIZE * PAGE_SIZE, NULL, 0,
-hv_uio_channel_cb, dev->channel);
+   ret = vmbus_alloc_ring(channel, HV_RING_SIZE * PAGE_SIZE,
+  HV_RING_SIZE * PAGE_SIZE);
if (ret)
goto fail;
 
-   /* Communicating with host has to be via shared memory not hypercall */
-   if (!dev->channel->offermsg.monitor_allocated) {
-   dev_err(&dev->device, "vmbus channel requires hypercall\n

[PATCH 4/6] uio: introduce UIO_MEM_IOVA

2018-09-14 Thread Stephen Hemminger
Introduce the concept of mapping physical memory locations that
are normal memory. The new type UIO_MEM_IOVA are similar to
existing UIO_MEM_PHYS but the backing memory is not marked as uncached.

Also, indent related switch to the currently used style.

Signed-off-by: Stephen Hemminger 
---
 drivers/uio/uio.c  | 24 +---
 include/linux/uio_driver.h |  1 +
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 0ffb324aa038..e601bd3fbae1 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -738,7 +738,8 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
return -EINVAL;
 
vma->vm_ops = &uio_physical_vm_ops;
-   vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+   if (idev->info->mem[mi].memtype == UIO_MEM_PHYS)
+   vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
/*
 * We cannot use the vm_iomap_memory() helper here,
@@ -795,18 +796,19 @@ static int uio_mmap(struct file *filep, struct 
vm_area_struct *vma)
}
 
switch (idev->info->mem[mi].memtype) {
-   case UIO_MEM_PHYS:
-   ret = uio_mmap_physical(vma);
-   break;
-   case UIO_MEM_LOGICAL:
-   case UIO_MEM_VIRTUAL:
-   ret = uio_mmap_logical(vma);
-   break;
-   default:
-   ret = -EINVAL;
+   case UIO_MEM_IOVA:
+   case UIO_MEM_PHYS:
+   ret = uio_mmap_physical(vma);
+   break;
+   case UIO_MEM_LOGICAL:
+   case UIO_MEM_VIRTUAL:
+   ret = uio_mmap_logical(vma);
+   break;
+   default:
+   ret = -EINVAL;
}
 
-out:
+ out:
mutex_unlock(&idev->info_lock);
return ret;
 }
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 6f8b68cd460f..a3cd7cb67a69 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -133,6 +133,7 @@ extern void uio_event_notify(struct uio_info *info);
 #define UIO_MEM_PHYS   1
 #define UIO_MEM_LOGICAL2
 #define UIO_MEM_VIRTUAL 3
+#define UIO_MEM_IOVA   4
 
 /* defines for uio_port->porttype */
 #define UIO_PORT_NONE  0
-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v3 4/6] uio: introduce UIO_MEM_IOVA

2018-09-14 Thread Stephen Hemminger
Introduce the concept of mapping physical memory locations that
are normal memory. The new type UIO_MEM_IOVA are similar to
existing UIO_MEM_PHYS but the backing memory is not marked as uncached.

Also, indent related switch to the currently used style.

Signed-off-by: Stephen Hemminger 
---
 drivers/uio/uio.c  | 24 +---
 include/linux/uio_driver.h |  1 +
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 0ffb324aa038..e601bd3fbae1 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -738,7 +738,8 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
return -EINVAL;
 
vma->vm_ops = &uio_physical_vm_ops;
-   vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+   if (idev->info->mem[mi].memtype == UIO_MEM_PHYS)
+   vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
/*
 * We cannot use the vm_iomap_memory() helper here,
@@ -795,18 +796,19 @@ static int uio_mmap(struct file *filep, struct 
vm_area_struct *vma)
}
 
switch (idev->info->mem[mi].memtype) {
-   case UIO_MEM_PHYS:
-   ret = uio_mmap_physical(vma);
-   break;
-   case UIO_MEM_LOGICAL:
-   case UIO_MEM_VIRTUAL:
-   ret = uio_mmap_logical(vma);
-   break;
-   default:
-   ret = -EINVAL;
+   case UIO_MEM_IOVA:
+   case UIO_MEM_PHYS:
+   ret = uio_mmap_physical(vma);
+   break;
+   case UIO_MEM_LOGICAL:
+   case UIO_MEM_VIRTUAL:
+   ret = uio_mmap_logical(vma);
+   break;
+   default:
+   ret = -EINVAL;
}
 
-out:
+ out:
mutex_unlock(&idev->info_lock);
return ret;
 }
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 6f8b68cd460f..a3cd7cb67a69 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -133,6 +133,7 @@ extern void uio_event_notify(struct uio_info *info);
 #define UIO_MEM_PHYS   1
 #define UIO_MEM_LOGICAL2
 #define UIO_MEM_VIRTUAL 3
+#define UIO_MEM_IOVA   4
 
 /* defines for uio_port->porttype */
 #define UIO_PORT_NONE  0
-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v3 5/6] hv_uio_generic: map ringbuffer phys addr

2018-09-14 Thread Stephen Hemminger
The ring buffer is contiguous IOVA and is mapped via phys addr
for sysfs file. Use same method for the UIO mapping.

Signed-off-by: Stephen Hemminger 
---
 drivers/uio/uio_hv_generic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index ba67a5267557..53f5610c6065 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -224,10 +224,10 @@ hv_uio_probe(struct hv_device *dev,
/* mem resources */
pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings";
pdata->info.mem[TXRX_RING_MAP].addr
-   = (uintptr_t)page_address(dev->channel->ringbuffer_page);
+   = 
(uintptr_t)virt_to_phys(page_address(dev->channel->ringbuffer_page));
pdata->info.mem[TXRX_RING_MAP].size
= dev->channel->ringbuffer_pagecount << PAGE_SHIFT;
-   pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL;
+   pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_IOVA;
 
pdata->info.mem[INT_PAGE_MAP].name = "int_page";
pdata->info.mem[INT_PAGE_MAP].addr
-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v3 6/6] uio_hv_generic: defer opening vmbus until first use

2018-09-14 Thread Stephen Hemminger
This fixes two design flaws in hv_uio_generic.

Since hv_uio_probe is called from vmbus_probe with lock held
it potentially can cause sleep in an atomic section because
vmbus_open will wait for response from host.

The hv_uio_generic driver could not handle applications
exiting and restarting because the vmbus channel was
persistent.  Change the semantics so that the buffers are
allocated on probe, but not attached to host until
device is opened.

Signed-off-by: Stephen Hemminger 
---
 drivers/uio/uio_hv_generic.c | 104 +--
 1 file changed, 74 insertions(+), 30 deletions(-)

diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 53f5610c6065..c2493d011225 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -55,6 +55,7 @@ enum hv_uio_map {
 struct hv_uio_private_data {
struct uio_info info;
struct hv_device *device;
+   atomic_t refcnt;
 
void*recv_buf;
u32 recv_gpadl;
@@ -128,12 +129,10 @@ static int hv_uio_ring_mmap(struct file *filp, struct 
kobject *kobj,
 {
struct vmbus_channel *channel
= container_of(kobj, struct vmbus_channel, kobj);
-   struct hv_device *dev = channel->primary_channel->device_obj;
-   u16 q_idx = channel->offermsg.offer.sub_channel_index;
void *ring_buffer = page_address(channel->ringbuffer_page);
 
-   dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n",
-   q_idx, vma_pages(vma), vma->vm_pgoff);
+   if (channel->state != CHANNEL_OPENED_STATE)
+   return -ENODEV;
 
return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
   channel->ringbuffer_pagecount << PAGE_SHIFT);
@@ -176,57 +175,103 @@ hv_uio_new_channel(struct vmbus_channel *new_sc)
}
 }
 
+/* free the reserved buffers for send and receive */
 static void
 hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
 {
-   if (pdata->send_gpadl)
+   if (pdata->send_gpadl) {
vmbus_teardown_gpadl(dev->channel, pdata->send_gpadl);
-   vfree(pdata->send_buf);
+   pdata->send_gpadl = 0;
+   vfree(pdata->send_buf);
+   }
 
-   if (pdata->recv_gpadl)
+   if (pdata->recv_gpadl) {
vmbus_teardown_gpadl(dev->channel, pdata->recv_gpadl);
-   vfree(pdata->recv_buf);
+   pdata->recv_gpadl = 0;
+   vfree(pdata->recv_buf);
+   }
+}
+
+/* VMBus primary channel is opened on first use */
+static int
+hv_uio_open(struct uio_info *info, struct inode *inode)
+{
+   struct hv_uio_private_data *pdata
+   = container_of(info, struct hv_uio_private_data, info);
+   struct hv_device *dev = pdata->device;
+   int ret;
+
+   if (atomic_inc_return(&pdata->refcnt) != 1)
+   return 0;
+
+   ret = vmbus_connect_ring(dev->channel,
+hv_uio_channel_cb, dev->channel);
+
+   if (ret == 0)
+   dev->channel->inbound.ring_buffer->interrupt_mask = 1;
+   else
+   atomic_dec(&pdata->refcnt);
+
+   return ret;
+}
+
+/* VMBus primary channel is closed on last close */
+static int
+hv_uio_release(struct uio_info *info, struct inode *inode)
+{
+   struct hv_uio_private_data *pdata
+   = container_of(info, struct hv_uio_private_data, info);
+   struct hv_device *dev = pdata->device;
+   int ret = 0;
+
+   if (atomic_dec_and_test(&pdata->refcnt))
+   ret = vmbus_disconnect_ring(dev->channel);
+
+   return ret;
 }
 
 static int
 hv_uio_probe(struct hv_device *dev,
 const struct hv_vmbus_device_id *dev_id)
 {
+   struct vmbus_channel *channel = dev->channel;
struct hv_uio_private_data *pdata;
+   void *ring_buffer;
int ret;
 
+   /* Communicating with host has to be via shared memory not hypercall */
+   if (!channel->offermsg.monitor_allocated) {
+   dev_err(&dev->device, "vmbus channel requires hypercall\n");
+   return -ENOTSUPP;
+   }
+
pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
if (!pdata)
return -ENOMEM;
 
-   ret = vmbus_open(dev->channel, HV_RING_SIZE * PAGE_SIZE,
-HV_RING_SIZE * PAGE_SIZE, NULL, 0,
-hv_uio_channel_cb, dev->channel);
+   ret = vmbus_alloc_ring(channel, HV_RING_SIZE * PAGE_SIZE,
+  HV_RING_SIZE * PAGE_SIZE);
if (ret)
goto fail;
 
-   /* Communicating with host has to be via shared memory not hypercall */
-   if (!dev->channel->offermsg.monitor_allocated) {
-   dev_err(&dev->device, "vmbus channel requires hypercall\n");

[PATCH v3 2/6] vmbus: keep pointer to ring buffer page

2018-09-14 Thread Stephen Hemminger
Avoid going from struct page to virt address (and back) by just
keeping pointer to the allocated pages instead of virt address.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel.c | 20 +---
 drivers/uio/uio_hv_generic.c |  5 +++--
 include/linux/hyperv.h   |  2 +-
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 33e6db02dbab..56ec0d96d876 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -91,11 +91,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 
send_ringbuffer_size,
unsigned long flags;
int ret, err = 0;
struct page *page;
+   unsigned int order;
 
if (send_ringbuffer_size % PAGE_SIZE ||
recv_ringbuffer_size % PAGE_SIZE)
return -EINVAL;
 
+   order = get_order(send_ringbuffer_size + recv_ringbuffer_size);
+
spin_lock_irqsave(&newchannel->lock, flags);
if (newchannel->state == CHANNEL_OPEN_STATE) {
newchannel->state = CHANNEL_OPENING_STATE;
@@ -110,21 +113,17 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 
send_ringbuffer_size,
 
/* Allocate the ring buffer */
page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
-   GFP_KERNEL|__GFP_ZERO,
-   get_order(send_ringbuffer_size +
-   recv_ringbuffer_size));
+   GFP_KERNEL|__GFP_ZERO, order);
 
if (!page)
-   page = alloc_pages(GFP_KERNEL|__GFP_ZERO,
-  get_order(send_ringbuffer_size +
-recv_ringbuffer_size));
+   page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order);
 
if (!page) {
err = -ENOMEM;
goto error_set_chnstate;
}
 
-   newchannel->ringbuffer_pages = page_address(page);
+   newchannel->ringbuffer_page = page;
newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
   recv_ringbuffer_size) >> PAGE_SHIFT;
 
@@ -239,8 +238,7 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 
send_ringbuffer_size,
 error_free_pages:
hv_ringbuffer_cleanup(&newchannel->outbound);
hv_ringbuffer_cleanup(&newchannel->inbound);
-   __free_pages(page,
-get_order(send_ringbuffer_size + recv_ringbuffer_size));
+   __free_pages(page, order);
 error_set_chnstate:
newchannel->state = CHANNEL_OPEN_STATE;
return err;
@@ -658,8 +656,8 @@ static int vmbus_close_internal(struct vmbus_channel 
*channel)
hv_ringbuffer_cleanup(&channel->outbound);
hv_ringbuffer_cleanup(&channel->inbound);
 
-   free_pages((unsigned long)channel->ringbuffer_pages,
-   get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
+   __free_pages(channel->ringbuffer_page,
+get_order(channel->ringbuffer_pagecount << PAGE_SHIFT));
 
 out:
return ret;
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index a08860260f55..ba67a5267557 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -130,11 +130,12 @@ static int hv_uio_ring_mmap(struct file *filp, struct 
kobject *kobj,
= container_of(kobj, struct vmbus_channel, kobj);
struct hv_device *dev = channel->primary_channel->device_obj;
u16 q_idx = channel->offermsg.offer.sub_channel_index;
+   void *ring_buffer = page_address(channel->ringbuffer_page);
 
dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n",
q_idx, vma_pages(vma), vma->vm_pgoff);
 
-   return vm_iomap_memory(vma, virt_to_phys(channel->ringbuffer_pages),
+   return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
   channel->ringbuffer_pagecount << PAGE_SHIFT);
 }
 
@@ -223,7 +224,7 @@ hv_uio_probe(struct hv_device *dev,
/* mem resources */
pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings";
pdata->info.mem[TXRX_RING_MAP].addr
-   = (uintptr_t)dev->channel->ringbuffer_pages;
+   = (uintptr_t)page_address(dev->channel->ringbuffer_page);
pdata->info.mem[TXRX_RING_MAP].size
= dev->channel->ringbuffer_pagecount << PAGE_SHIFT;
pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6c4575c7f46b..a6c32d2d090b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -739,7 +739,7 @@ struct vmbus_channel {
u32 ringbuffer_gpadlhandle;
 
/* Allocated memory for ring buffer */
-   void *ringbuffer_pages;
+   struc

[PATCH v3 1/6] vmbus: pass channel to hv_process_channel_removal

2018-09-14 Thread Stephen Hemminger
Rather than passing relid and then looking up the channel.
Pass the channel directly, since caller already knows it.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel.c  |  3 +--
 drivers/hv/channel_mgmt.c | 17 +
 drivers/hv/vmbus_drv.c|  3 +--
 include/linux/hyperv.h|  2 +-
 4 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 741857d80da1..33e6db02dbab 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -690,8 +690,7 @@ void vmbus_close(struct vmbus_channel *channel)
wait_for_completion(&cur_channel->rescind_event);
mutex_lock(&vmbus_connection.channel_mutex);
vmbus_close_internal(cur_channel);
-   hv_process_channel_removal(
-  cur_channel->offermsg.child_relid);
+   hv_process_channel_removal(cur_channel);
} else {
mutex_lock(&vmbus_connection.channel_mutex);
vmbus_close_internal(cur_channel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 0f0e091c117c..b7c48ebdf6a1 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -385,21 +385,14 @@ static void vmbus_release_relid(u32 relid)
trace_vmbus_release_relid(&msg, ret);
 }
 
-void hv_process_channel_removal(u32 relid)
+void hv_process_channel_removal(struct vmbus_channel *channel)
 {
+   struct vmbus_channel *primary_channel;
unsigned long flags;
-   struct vmbus_channel *primary_channel, *channel;
 
BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
-
-   /*
-* Make sure channel is valid as we may have raced.
-*/
-   channel = relid2channel(relid);
-   if (!channel)
-   return;
-
BUG_ON(!channel->rescind);
+
if (channel->target_cpu != get_cpu()) {
put_cpu();
smp_call_function_single(channel->target_cpu,
@@ -429,7 +422,7 @@ void hv_process_channel_removal(u32 relid)
cpumask_clear_cpu(channel->target_cpu,
  &primary_channel->alloced_cpus_in_node);
 
-   vmbus_release_relid(relid);
+   vmbus_release_relid(channel->offermsg.child_relid);
 
free_channel(channel);
 }
@@ -943,7 +936,7 @@ static void vmbus_onoffer_rescind(struct 
vmbus_channel_message_header *hdr)
 * The channel is currently not open;
 * it is safe for us to cleanup the channel.
 */
-   hv_process_channel_removal(rescind->child_relid);
+   hv_process_channel_removal(channel);
} else {
complete(&channel->rescind_event);
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e6d8fdac6d8b..007ee8e5986a 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -864,10 +864,9 @@ static void vmbus_device_release(struct device *device)
struct vmbus_channel *channel = hv_dev->channel;
 
mutex_lock(&vmbus_connection.channel_mutex);
-   hv_process_channel_removal(channel->offermsg.child_relid);
+   hv_process_channel_removal(channel);
mutex_unlock(&vmbus_connection.channel_mutex);
kfree(hv_dev);
-
 }
 
 /* The one and only one */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2c3798bcb01c..6c4575c7f46b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1443,7 +1443,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr 
*icmsghdrp, u8 *buf,
const int *srv_version, int srv_vercnt,
int *nego_fw_version, int *nego_srv_version);
 
-void hv_process_channel_removal(u32 relid);
+void hv_process_channel_removal(struct vmbus_channel *channel);
 
 void vmbus_setevent(struct vmbus_channel *channel);
 /*
-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v3 0/6] fix hv_uio_generic open/close

2018-09-14 Thread Stephen Hemminger
This set of patches fixes the problem where DPDK applications
using hv_uio_generic driver can not be successfully restarted.

In order to get this working it required small change to uio
to allow for mapping without no-cache. And refactoring of how
ring buffer is setup in vmbus code.

It could be backported as a fix, to 4.19 but that is not
an LTS so probably not worth it.

v3 - fix typo (sent wrong version for v2)

v2 - fix refcount when hv_uio_open fails


Stephen Hemminger (6):
  vmbus: pass channel to hv_process_channel_removal
  vmbus: keep pointer to ring buffer page
  vmbus: split ring buffer allocation from open
  uio: introduce UIO_MEM_IOVA
  hv_uio_generic: map ringbuffer phys addr
  uio_hv_generic: defer opening vmbus until first use

 drivers/hv/channel.c | 276 ---
 drivers/hv/channel_mgmt.c|  17 +--
 drivers/hv/ring_buffer.c |   1 +
 drivers/hv/vmbus_drv.c   |   3 +-
 drivers/uio/uio.c|  24 +--
 drivers/uio/uio_hv_generic.c | 109 ++
 include/linux/hyperv.h   |  13 +-
 include/linux/uio_driver.h   |   1 +
 8 files changed, 264 insertions(+), 180 deletions(-)

-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v3 3/6] vmbus: split ring buffer allocation from open

2018-09-14 Thread Stephen Hemminger
The UIO driver needs the ring buffer to be persistent(reused)
across open/close. Split the allocation and setup of ring buffer
out of vmbus_open. For normal usage vmbus_open/vmbus_close there
are no changes; only impacts uio_hv_generic which needs to keep
ring buffer memory and reuse when application restarts.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel.c | 267 ++-
 drivers/hv/ring_buffer.c |   1 +
 include/linux/hyperv.h   |   9 ++
 3 files changed, 162 insertions(+), 115 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 56ec0d96d876..ddadb7efd1cc 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -79,84 +79,96 @@ void vmbus_setevent(struct vmbus_channel *channel)
 }
 EXPORT_SYMBOL_GPL(vmbus_setevent);
 
-/*
- * vmbus_open - Open the specified channel.
- */
-int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
-u32 recv_ringbuffer_size, void *userdata, u32 userdatalen,
-void (*onchannelcallback)(void *context), void *context)
+/* vmbus_free_ring - drop mapping of ring buffer */
+void vmbus_free_ring(struct vmbus_channel *channel)
 {
-   struct vmbus_channel_open_channel *open_msg;
-   struct vmbus_channel_msginfo *open_info = NULL;
-   unsigned long flags;
-   int ret, err = 0;
-   struct page *page;
-   unsigned int order;
+   hv_ringbuffer_cleanup(&channel->outbound);
+   hv_ringbuffer_cleanup(&channel->inbound);
 
-   if (send_ringbuffer_size % PAGE_SIZE ||
-   recv_ringbuffer_size % PAGE_SIZE)
-   return -EINVAL;
+   if (channel->ringbuffer_page) {
+   __free_pages(channel->ringbuffer_page,
+get_order(channel->ringbuffer_pagecount
+  << PAGE_SHIFT));
+   channel->ringbuffer_page = NULL;
+   }
+}
+EXPORT_SYMBOL_GPL(vmbus_free_ring);
 
-   order = get_order(send_ringbuffer_size + recv_ringbuffer_size);
+/* vmbus_alloc_ring - allocate and map pages for ring buffer */
+int vmbus_alloc_ring(struct vmbus_channel *newchannel,
+u32 send_size, u32 recv_size)
+{
+   struct page *page;
+   int order;
 
-   spin_lock_irqsave(&newchannel->lock, flags);
-   if (newchannel->state == CHANNEL_OPEN_STATE) {
-   newchannel->state = CHANNEL_OPENING_STATE;
-   } else {
-   spin_unlock_irqrestore(&newchannel->lock, flags);
+   if (send_size % PAGE_SIZE || recv_size % PAGE_SIZE)
return -EINVAL;
-   }
-   spin_unlock_irqrestore(&newchannel->lock, flags);
-
-   newchannel->onchannel_callback = onchannelcallback;
-   newchannel->channel_callback_context = context;
 
/* Allocate the ring buffer */
+   order = get_order(send_size + recv_size);
page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
GFP_KERNEL|__GFP_ZERO, order);
 
if (!page)
page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order);
 
-   if (!page) {
-   err = -ENOMEM;
-   goto error_set_chnstate;
-   }
+   if (!page)
+   return -ENOMEM;
 
newchannel->ringbuffer_page = page;
-   newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
-  recv_ringbuffer_size) >> PAGE_SHIFT;
+   newchannel->ringbuffer_pagecount = (send_size + recv_size) >> 
PAGE_SHIFT;
+   newchannel->ringbuffer_send_offset = send_size >> PAGE_SHIFT;
 
-   ret = hv_ringbuffer_init(&newchannel->outbound, page,
-send_ringbuffer_size >> PAGE_SHIFT);
+   return 0;
+}
+EXPORT_SYMBOL_GPL(vmbus_alloc_ring);
 
-   if (ret != 0) {
-   err = ret;
-   goto error_free_pages;
-   }
+static int __vmbus_open(struct vmbus_channel *newchannel,
+  void *userdata, u32 userdatalen,
+  void (*onchannelcallback)(void *context), void *context)
+{
+   struct vmbus_channel_open_channel *open_msg;
+   struct vmbus_channel_msginfo *open_info = NULL;
+   struct page *page = newchannel->ringbuffer_page;
+   u32 send_pages, recv_pages;
+   unsigned long flags;
+   int err;
 
-   ret = hv_ringbuffer_init(&newchannel->inbound,
-&page[send_ringbuffer_size >> PAGE_SHIFT],
-recv_ringbuffer_size >> PAGE_SHIFT);
-   if (ret != 0) {
-   err = ret;
-   goto error_free_pages;
+   if (userdatalen > MAX_USER_DEFINED_BYTES)
+   return -EINVAL;
+
+   send_pages = newchannel->ringbuffer_send_offset;
+   recv_pages = newchannel

[PATCH v2 0/2] hv_netvsc: associate VF and PV device by serial number

2018-09-14 Thread Stephen Hemminger
The Hyper-V implementation of PCI controller has concept of 32 bit serial number
(not to be confused with PCI-E serial number).  This value is sent in the 
protocol
from the host to indicate SR-IOV VF device is attached to a synthetic NIC.

Using the serial number (instead of MAC address) to associate the two devices
avoids lots of potential problems when there are duplicate MAC addresses from
tunnels or layered devices.

The patch set is broken into two parts, one is for the PCI controller
and the other is for the netvsc device. Normally, these go through different
trees but sending them together here for better review. The PCI changes
were submitted previously, but the main review comment was "why do you
need this?". This is why.

v2 - slot name can be shorter.
 remove locking when creating pci_slots; see comment for explaination

Stephen Hemminger (2):
  PCI: hv: support reporting serial number as slot information
  hv_netvsc: pair VF based on serial number

 drivers/net/hyperv/netvsc.c |  3 ++
 drivers/net/hyperv/netvsc_drv.c | 58 -
 drivers/pci/controller/pci-hyperv.c | 37 ++
 3 files changed, 73 insertions(+), 25 deletions(-)

-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 2/2] hv_netvsc: pair VF based on serial number

2018-09-14 Thread Stephen Hemminger
Matching network device based on MAC address is problematic
since a non VF network device can be creted with a duplicate MAC
address causing confusion and problems.  The VMBus API does provide
a serial number that is a better matching method.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc.c |  3 ++
 drivers/net/hyperv/netvsc_drv.c | 58 +++--
 2 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 31c3d77b4733..fe01e141c8f8 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1203,6 +1203,9 @@ static void netvsc_send_vf(struct net_device *ndev,
 
net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
+   netdev_info(ndev, "VF slot %u %s\n",
+   net_device_ctx->vf_serial,
+   net_device_ctx->vf_alloc ? "added" : "removed");
 }
 
 static  void netvsc_receive_inband(struct net_device *ndev,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1121a1ec407c..9dedc1463e88 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1894,20 +1894,6 @@ static void netvsc_link_change(struct work_struct *w)
rtnl_unlock();
 }
 
-static struct net_device *get_netvsc_bymac(const u8 *mac)
-{
-   struct net_device_context *ndev_ctx;
-
-   list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
-   struct net_device *dev = hv_get_drvdata(ndev_ctx->device_ctx);
-
-   if (ether_addr_equal(mac, dev->perm_addr))
-   return dev;
-   }
-
-   return NULL;
-}
-
 static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
 {
struct net_device_context *net_device_ctx;
@@ -2036,26 +2022,48 @@ static void netvsc_vf_setup(struct work_struct *w)
rtnl_unlock();
 }
 
+/* Find netvsc by VMBus serial number.
+ * The PCI hyperv controller records the serial number as the slot.
+ */
+static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev)
+{
+   struct device *parent = vf_netdev->dev.parent;
+   struct net_device_context *ndev_ctx;
+   struct pci_dev *pdev;
+
+   if (!parent || !dev_is_pci(parent))
+   return NULL; /* not a PCI device */
+
+   pdev = to_pci_dev(parent);
+   if (!pdev->slot) {
+   netdev_notice(vf_netdev, "no PCI slot information\n");
+   return NULL;
+   }
+
+   list_for_each_entry(ndev_ctx, &netvsc_dev_list, list) {
+   if (!ndev_ctx->vf_alloc)
+   continue;
+
+   if (ndev_ctx->vf_serial == pdev->slot->number)
+   return hv_get_drvdata(ndev_ctx->device_ctx);
+   }
+
+   netdev_notice(vf_netdev,
+ "no netdev found for slot %u\n", pdev->slot->number);
+   return NULL;
+}
+
 static int netvsc_register_vf(struct net_device *vf_netdev)
 {
-   struct net_device *ndev;
struct net_device_context *net_device_ctx;
-   struct device *pdev = vf_netdev->dev.parent;
struct netvsc_device *netvsc_dev;
+   struct net_device *ndev;
int ret;
 
if (vf_netdev->addr_len != ETH_ALEN)
return NOTIFY_DONE;
 
-   if (!pdev || !dev_is_pci(pdev) || dev_is_pf(pdev))
-   return NOTIFY_DONE;
-
-   /*
-* We will use the MAC address to locate the synthetic interface to
-* associate with the VF interface. If we don't find a matching
-* synthetic interface, move on.
-*/
-   ndev = get_netvsc_bymac(vf_netdev->perm_addr);
+   ndev = get_netvsc_byslot(vf_netdev);
if (!ndev)
return NOTIFY_DONE;
 
-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH v2 1/2] PCI: hv: support reporting serial number as slot information

2018-09-14 Thread Stephen Hemminger
The Hyper-V host API for PCI provides a unique "serial number" which
can be used as basis for sysfs PCI slot table. This can be useful
for cases where userspace wants to find the PCI device based on
serial number.

When an SR-IOV NIC is added, the host sends an attach message
with serial number. The kernel doesn't use the serial number, but
it is useful when doing the same thing in a userspace driver such
as the DPDK. By having /sys/bus/pci/slots/N it provides a direct
way to find the matching PCI device.

There maybe some cases where serial number is not unique such
as when using GPU's. But the PCI slot infrastructure will handle
that.

This has a side effect which may also be useful. The common udev
network device naming policy uses the slot information (rather
than PCI address).

Signed-off-by: Stephen Hemminger 
---
 drivers/pci/controller/pci-hyperv.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/drivers/pci/controller/pci-hyperv.c 
b/drivers/pci/controller/pci-hyperv.c
index c00f82cc54aa..ee80e79db21a 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -89,6 +89,9 @@ static enum pci_protocol_version_t pci_protocol_version;
 
 #define STATUS_REVISION_MISMATCH 0xC059
 
+/* space for 32bit serial number as string */
+#define SLOT_NAME_SIZE 11
+
 /*
  * Message Types
  */
@@ -494,6 +497,7 @@ struct hv_pci_dev {
struct list_head list_entry;
refcount_t refs;
enum hv_pcichild_state state;
+   struct pci_slot *pci_slot;
struct pci_function_description desc;
bool reported_missing;
struct hv_pcibus_device *hbus;
@@ -1457,6 +1461,34 @@ static void prepopulate_bars(struct hv_pcibus_device 
*hbus)
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
 }
 
+/*
+ * Assign entries in sysfs pci slot directory.
+ *
+ * Note that this function does not need to lock the children list
+ * because it is called from pci_devices_present_work which
+ * is serialized with hv_eject_device_work because they are on the
+ * same ordered workqueue. Therefore hbus->children list will not change
+ * even when pci_create_slot sleeps.
+ */
+static void hv_pci_assign_slots(struct hv_pcibus_device *hbus)
+{
+   struct hv_pci_dev *hpdev;
+   char name[SLOT_NAME_SIZE];
+   int slot_nr;
+
+   list_for_each_entry(hpdev, &hbus->children, list_entry) {
+   if (hpdev->pci_slot)
+   continue;
+
+   slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot));
+   snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser);
+   hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr,
+ name, NULL);
+   if (!hpdev->pci_slot)
+   pr_warn("pci_create slot %s failed\n", name);
+   }
+}
+
 /**
  * create_root_hv_pci_bus() - Expose a new root PCI bus
  * @hbus:  Root PCI bus, as understood by this driver
@@ -1480,6 +1512,7 @@ static int create_root_hv_pci_bus(struct hv_pcibus_device 
*hbus)
pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
pci_bus_assign_resources(hbus->pci_bus);
+   hv_pci_assign_slots(hbus);
pci_bus_add_devices(hbus->pci_bus);
pci_unlock_rescan_remove();
hbus->state = hv_pcibus_installed;
@@ -1742,6 +1775,7 @@ static void pci_devices_present_work(struct work_struct 
*work)
 */
pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
+   hv_pci_assign_slots(hbus);
pci_unlock_rescan_remove();
break;
 
@@ -1858,6 +1892,9 @@ static void hv_eject_device_work(struct work_struct *work)
list_del(&hpdev->list_entry);
spin_unlock_irqrestore(&hpdev->hbus->device_list_lock, flags);
 
+   if (hpdev->pci_slot)
+   pci_destroy_slot(hpdev->pci_slot);
+
memset(&ctxt, 0, sizeof(ctxt));
ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE;
-- 
2.18.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next 17/22] hv_netvsc: fix return type of ndo_start_xmit function

2018-09-20 Thread Stephen Hemminger
On Thu, 20 Sep 2018 20:33:01 +0800
YueHaibing  wrote:

> The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
> which is a typedef for an enum type, so make sure the implementation in
> this driver has returns 'netdev_tx_t' value, and change the function
> return type to netdev_tx_t.
> 
> Found by coccinelle.
> 
> Signed-off-by: YueHaibing 
> ---
>  drivers/net/hyperv/netvsc_drv.c | 10 +++---
>  1 file changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index 3af6d8d..056c472 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -511,7 +511,8 @@ static int netvsc_vf_xmit(struct net_device *net, struct 
> net_device *vf_netdev,
>   return rc;
>  }
>  
> -static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
> +static netdev_tx_t
> +netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
>  {
>   struct net_device_context *net_device_ctx = netdev_priv(net);
>   struct hv_netvsc_packet *packet = NULL;
> @@ -528,8 +529,11 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct 
> net_device *net)
>*/
>   vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev);
>   if (vf_netdev && netif_running(vf_netdev) &&
> - !netpoll_tx_running(net))
> - return netvsc_vf_xmit(net, vf_netdev, skb);
> + !netpoll_tx_running(net)) {
> + ret = netvsc_vf_xmit(net, vf_netdev, skb);
> + if (ret)
> + return NETDEV_TX_BUSY;
> + }

Sorry, the new code is wrong. It will fall through if ret == 0 (NETDEV_TX_OK)
Please review and test your patches.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v2 0/2] hv_netvsc: associate VF and PV device by serial number

2018-09-20 Thread Stephen Hemminger
On Thu, 20 Sep 2018 15:18:20 +0100
Lorenzo Pieralisi  wrote:

> On Fri, Sep 14, 2018 at 12:54:55PM -0700, Stephen Hemminger wrote:
> > The Hyper-V implementation of PCI controller has concept of 32 bit serial 
> > number
> > (not to be confused with PCI-E serial number).  This value is sent in the 
> > protocol
> > from the host to indicate SR-IOV VF device is attached to a synthetic NIC.
> > 
> > Using the serial number (instead of MAC address) to associate the two 
> > devices
> > avoids lots of potential problems when there are duplicate MAC addresses 
> > from
> > tunnels or layered devices.
> > 
> > The patch set is broken into two parts, one is for the PCI controller
> > and the other is for the netvsc device. Normally, these go through different
> > trees but sending them together here for better review. The PCI changes
> > were submitted previously, but the main review comment was "why do you
> > need this?". This is why.  
> 
> The question was more whether we should convert this serial number into
> a PCI slot number (that has user space visibility and that is what you are
> after) to improve the current matching, I do not question why you need
> it, just for the records.

The name slot is way overloaded in this context.
There is 
windows slot number which comes from Hyperv
pci address slot which pci-hyperv sets from windows slot
pci slot api value which for normal devices comes from ACPI
this patch gets it from serial number


The netvsc driver needed to be able to find a PCI device based on the serial
number. The serial number was not visible in any current PCI-hyperv controller
values.  The windows slot (wslot) is not the same the serial number.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch

2018-09-20 Thread Stephen Hemminger
On Thu, 20 Sep 2018 17:06:59 +
Haiyang Zhang  wrote:

> +static inline void rsc_add_data
> + (struct netvsc_channel *nvchan,
> +  const struct ndis_pkt_8021q_info *vlan,
> +  const struct ndis_tcp_ip_checksum_info *csum_info,
> +  void *data, u32 len)
> +{

Could this be changed to look more like a function and skip the inline.
The compiler will end up inlining it anyway.

static void rsc_add_data(struct netvsc_channel *nvchan,

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch

2018-09-20 Thread Stephen Hemminger
On Thu, 20 Sep 2018 20:56:46 +
Haiyang Zhang  wrote:

> > -Original Message-
> > From: Stephen Hemminger 
> > Sent: Thursday, September 20, 2018 4:48 PM
> > To: Haiyang Zhang 
> > Cc: Haiyang Zhang ; da...@davemloft.net;
> > net...@vger.kernel.org; o...@aepfle.de; linux-ker...@vger.kernel.org;
> > de...@linuxdriverproject.org; vkuznets 
> > Subject: Re: [PATCH net-next, 1/3] hv_netvsc: Add support for LRO/RSC in the
> > vSwitch
> > 
> > On Thu, 20 Sep 2018 17:06:59 +
> > Haiyang Zhang  wrote:
> >   
> > > +static inline void rsc_add_data
> > > + (struct netvsc_channel *nvchan,
> > > +  const struct ndis_pkt_8021q_info *vlan,
> > > +  const struct ndis_tcp_ip_checksum_info *csum_info,
> > > +  void *data, u32 len)
> > > +{  
> > 
> > Could this be changed to look more like a function and skip the inline.
> > The compiler will end up inlining it anyway.
> > 
> > static void rsc_add_data(struct netvsc_channel *nvchan,  
> 
> How about this?
> static inline
> void rsc_add_data(struct netvsc_channel *nvchan,
> 

Sure that matches other code in that file
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch

2018-09-21 Thread Stephen Hemminger
On Fri, 21 Sep 2018 18:20:35 +
Haiyang Zhang  wrote:

Overall, this looks good. Some minor suggestions.

> +struct nvsc_rsc {
> + const struct ndis_pkt_8021q_info *vlan;
> + const struct ndis_tcp_ip_checksum_info *csum_info;
> + u8 is_last; /* last RNDIS msg in a vmtransfer_page */
> + u32 cnt; /* #fragments in an RSC packet */
> + u32 pktlen; /* Full packet length */
> + void *data[NVSP_RSC_MAX];
> + u32 len[NVSP_RSC_MAX];
> +};
> +

This new state structure is state on a per-channel basis.
Do you really need this to be persistent across packets?

Could this be on stack or do you need it to handle split packets
arriving in different polls? Or is the stack space a problem?

Also, maybe data and length could be in one structure since they
are related.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch

2018-09-21 Thread Stephen Hemminger
On Fri, 21 Sep 2018 18:51:54 +
Haiyang Zhang  wrote:

> > -Original Message-
> > From: Stephen Hemminger 
> > Sent: Friday, September 21, 2018 2:37 PM
> > To: Haiyang Zhang 
> > Cc: Haiyang Zhang ; da...@davemloft.net;
> > net...@vger.kernel.org; o...@aepfle.de; linux-ker...@vger.kernel.org;
> > de...@linuxdriverproject.org; vkuznets 
> > Subject: Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in
> > the vSwitch
> > 
> > On Fri, 21 Sep 2018 18:20:35 +
> > Haiyang Zhang  wrote:
> > 
> > Overall, this looks good. Some minor suggestions.
> >   
> > > +struct nvsc_rsc {
> > > + const struct ndis_pkt_8021q_info *vlan;
> > > + const struct ndis_tcp_ip_checksum_info *csum_info;
> > > + u8 is_last; /* last RNDIS msg in a vmtransfer_page */
> > > + u32 cnt; /* #fragments in an RSC packet */
> > > + u32 pktlen; /* Full packet length */
> > > + void *data[NVSP_RSC_MAX];
> > > + u32 len[NVSP_RSC_MAX];
> > > +};
> > > +  
> > 
> > This new state structure is state on a per-channel basis.
> > Do you really need this to be persistent across packets?
> > 
> > Could this be on stack or do you need it to handle split packets arriving in
> > different polls? Or is the stack space a problem?
> > 
> > Also, maybe data and length could be in one structure since they are 
> > related.  
> 
> The stack space is a problem. NVSP_RSC_MAX is 562, which is defined by host.
> It will be too large for limited stack space. 
> 
> struct nvsc_rsc includes the data, len, cnt, chksum, vlan for one RSC packet. 
> They
> are all related to construction of one SKB and its meta data. So I put them in
> one structure.
> 
> Thanks,
> - Haiyang
> 

That makes sense. How big is sizeof(struct net_device) + netdev_priv now?
Need to make sure it doesn't become an order 2 (ie keep it less than 4K).
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next] hv_netvsc: fix return type of ndo_start_xmit function

2018-09-26 Thread Stephen Hemminger
On Wed, 26 Sep 2018 18:25:10 +0800
YueHaibing  wrote:

> The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
> which is a typedef for an enum type, so make sure the implementation in
> this driver has returns 'netdev_tx_t' value, and change the function
> return type to netdev_tx_t.
> 
> As suggestion from Haiyang Zhang , if netvsc_vf_xmit
> fails, We are not sure if the error can go away after retrying, returning
> NETDEV_TX_BUSY may cause infinite retry from the upper layer.
> so just return NETDEV_TX_OK at there.
> 
> Found by coccinelle.
> 
> Signed-off-by: YueHaibing 
> ---
>  drivers/net/hyperv/netvsc_drv.c | 9 ++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
> index ec69974..a1d44b4 100644
> --- a/drivers/net/hyperv/netvsc_drv.c
> +++ b/drivers/net/hyperv/netvsc_drv.c
> @@ -511,7 +511,8 @@ static int netvsc_vf_xmit(struct net_device *net, struct 
> net_device *vf_netdev,
>   return rc;
>  }
>  
> -static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
> +static netdev_tx_t
> +netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
>  {
>   struct net_device_context *net_device_ctx = netdev_priv(net);
>   struct hv_netvsc_packet *packet = NULL;
> @@ -528,8 +529,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct 
> net_device *net)
>*/
>   vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev);
>   if (vf_netdev && netif_running(vf_netdev) &&
> - !netpoll_tx_running(net))
> - return netvsc_vf_xmit(net, vf_netdev, skb);
> + !netpoll_tx_running(net)) {
> + netvsc_vf_xmit(net, vf_netdev, skb);
> + return NETDEV_TX_OK;
> + }
>  
>   /* We will atmost need two pages to describe the rndis
>* header. We can only transmit MAX_PAGE_BUFFER_COUNT number

Your patch loses the possible return values of netvsc_vf_xmit.
A suggested better fix would be to make netvsc_vf_xmit return netdev_tx_t type.
And this means the return value of dev_queue_xmit needs to be netdev_tx_t.

Please don't merge this as is.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] hv_netvsc: Make sure out channel is fully opened on send

2018-09-27 Thread Stephen Hemminger
On Thu, 27 Sep 2018 10:57:05 +0200
Mohammed Gamal  wrote:

> On Wed, 2018-09-26 at 17:13 +, Haiyang Zhang wrote:
> > > -Original Message-
> > > From: Mohammed Gamal 
> > > Sent: Wednesday, September 26, 2018 12:34 PM
> > > To: Stephen Hemminger ; netdev@vger.kernel.
> > > org
> > > Cc: KY Srinivasan ; Haiyang Zhang
> > > ; vkuznets ;
> > > ot...@redhat.com; cavery ; linux-
> > > ker...@vger.kernel.org; de...@linuxdriverproject.org; Mohammed
> > > Gamal
> > > 
> > > Subject: [PATCH] hv_netvsc: Make sure out channel is fully opened
> > > on send
> > > 
> > > Dring high network traffic changes to network interface parameters
> > > such as
> > > number of channels or MTU can cause a kernel panic with a NULL
> > > pointer
> > > dereference. This is due to netvsc_device_remove() being called and
> > > deallocating the channel ring buffers, which can then be accessed
> > > by
> > > netvsc_send_pkt() before they're allocated on calling
> > > netvsc_device_add()
> > > 
> > > The patch fixes this problem by checking the channel state and
> > > returning
> > > ENODEV if not yet opened. We also move the call to
> > > hv_ringbuf_avail_percent()
> > > which may access the uninitialized ring buffer.
> > > 
> > > Signed-off-by: Mohammed Gamal 
> > > ---
> > >  drivers/net/hyperv/netvsc.c | 7 ++-
> > >  1 file changed, 6 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/net/hyperv/netvsc.c
> > > b/drivers/net/hyperv/netvsc.c index
> > > fe01e14..75f1b31 100644
> > > --- a/drivers/net/hyperv/netvsc.c
> > > +++ b/drivers/net/hyperv/netvsc.c
> > > @@ -825,7 +825,12 @@ static inline int netvsc_send_pkt(
> > >   struct netdev_queue *txq = netdev_get_tx_queue(ndev,
> > > packet->q_idx);
> > >   u64 req_id;
> > >   int ret;
> > > - u32 ring_avail =
> > > hv_get_avail_to_write_percent(&out_channel-  
> > > > outbound);  
> > > 
> > > + u32 ring_avail;
> > > +
> > > + if (out_channel->state != CHANNEL_OPENED_STATE)
> > > + return -ENODEV;
> > > +
> > > + ring_avail = hv_get_avail_to_write_percent(&out_channel-  
> > > >outbound);  
> > 
> > When you reproducing the NULL ptr panic, does your kernel include the
> > following patch?
> > hv_netvsc: common detach logic
> > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/c
> > ommit/?id=7b2ee50c0cd513a176a26a71f2989facdd75bfea
> >   
> Yes it is included. And the commit did reduce the occurrence of this
> race condition, but it still nevertheless occurs albeit rarely.
> 
> > We call netif_tx_disable(ndev) and netif_device_detach(ndev) before
> > doing the changes 
> > on MTU or #channels. So there should be no call to start_xmit() when
> > channel is not ready.
> > 
> > If you see the check for CHANNEL_OPENED_STATE is still necessary on
> > upstream kernel (including 
> > the patch " common detach logic "), we should debug further on the
> > code and find out the 
> > root cause.
> > 
> > Thanks,
> > - Haiyang
> >   
> ___
> devel mailing list
> de...@linuxdriverproject.org
> http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

Is there some workload, that can be used to reproduce this?
The stress test from Vitaly with changing parameters while running network 
traffic
passes now.

Can you reproduce this with the upstream current kernel?

Adding the check in start xmit is still racy, and won't cure the problem.

Another solution would be to add a grace period in the netvsc detach logic.

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next, v2] hv_netvsc: fix vf serial matching with pci slot info

2018-10-12 Thread Stephen Hemminger
On Fri, 12 Oct 2018 20:55:15 +
Haiyang Zhang  wrote:

Thanks for fixing this.

  
> + if (kstrtou32(kobject_name(&pdev->slot->kobj), 10, &serial)) {
> + netdev_notice(vf_netdev, "Invalid vf serial:%s\n",
> +   pdev->slot->kobj.name);
> + return NULL;
> + }

Shouldn't this use kobject_name() in the message as well.

Looking at the pci.h code there is already an API to get name from
slot (it uses kobject_name()). So please use that one.

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next, v3] hv_netvsc: fix vf serial matching with pci slot info

2018-10-15 Thread Stephen Hemminger
On Mon, 15 Oct 2018 19:06:15 +
Haiyang Zhang  wrote:

> From: Haiyang Zhang 
> 
> The VF device's serial number is saved as a string in PCI slot's
> kobj name, not the slot->number. This patch corrects the netvsc
> driver, so the VF device can be successfully paired with synthetic
> NIC.
> 
> Fixes: 00d7ddba1143 ("hv_netvsc: pair VF based on serial number")
> Reported-by: Vitaly Kuznetsov 
> Signed-off-by: Haiyang Zhang 

Reviewed-by: Stephen Hemminger 

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH V2 3/4] vmbus: add per-channel sysfs info

2018-10-18 Thread Stephen Hemminger
On Thu, 18 Oct 2018 17:19:53 +0200
Olaf Hering  wrote:

> Am Sun, 17 Sep 2017 20:54:18 -0700
> schrieb k...@exchange.microsoft.com:
> 
> > This extends existing vmbus related sysfs structure to provide per-channel
> > state information. This is useful when diagnosing issues with multiple
> > queues in networking and storage.  
> 
> > +++ b/drivers/hv/vmbus_drv.c
> > +static ssize_t write_avail_show(const struct vmbus_channel *channel, char 
> > *buf)
> > +{
> > +   const struct hv_ring_buffer_info *rbi = &channel->outbound;
> > +
> > +   return sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi));
> > +}
> > +VMBUS_CHAN_ATTR_RO(write_avail);  
> 
> This is upstream since a year.
> 
> But I wonder how this can work if vmbus_device_register is called,
> and then something reads the populated sysfs files before vmbus_open returns.
> Nothing protects rbi->ring_buffer in this case, which remains NULL
> until vmbus_open populates it.
> 
> A simple reproduce, with a modular kernel, is to boot with init=/bin/bash
> head /sys/bus/vmbus/devices/*/channels/*/*
> 
> Olaf


Good catch, actually the problem goes across all of the ring buffer sysfs files
so it existed long before that.

The channel ring buffer could be missing.

I am less worried about the open from init case, and more worried about issues
when channels are closed (as happens when changing number of channels on a net 
device).

As Al has pointed out for years, sysfs is riddled with dangling reference 
issues.

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH V2 3/4] vmbus: add per-channel sysfs info

2018-10-18 Thread Stephen Hemminger
On Thu, 18 Oct 2018 15:32:35 +
Michael Kelley  wrote:

> From Olaf Hering  Sent: Thursday, October 18, 2018 8:20 AM
> >  
> > > This extends existing vmbus related sysfs structure to provide per-channel
> > > state information. This is useful when diagnosing issues with multiple
> > > queues in networking and storage.  
> >   
> > > +++ b/drivers/hv/vmbus_drv.c
> > > +static ssize_t write_avail_show(const struct vmbus_channel *channel, 
> > > char *buf)
> > > +{
> > > + const struct hv_ring_buffer_info *rbi = &channel->outbound;
> > > +
> > > + return sprintf(buf, "%u\n", hv_get_bytes_to_write(rbi));
> > > +}
> > > +VMBUS_CHAN_ATTR_RO(write_avail);  
> > 
> > This is upstream since a year.
> > 
> > But I wonder how this can work if vmbus_device_register is called,
> > and then something reads the populated sysfs files before vmbus_open 
> > returns.
> > Nothing protects rbi->ring_buffer in this case, which remains NULL
> > until vmbus_open populates it.
> > 
> > A simple reproduce, with a modular kernel, is to boot with init=/bin/bash
> > head /sys/bus/vmbus/devices/*/channels/*/*
> >   
> 
> There are multiple race conditions with this and other VMbus sysfs 
> information.
> There's a race on the close path as well.  I've got an action on my list to 
> get it
> cleaned up.
> 
> Michael
> 

There is also a bunch of issues with code like:

static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
   char *buf)
{
struct hv_device *hv_dev = device_to_hv_device(dev);

if (!hv_dev->channel)
return -ENODEV;
return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
}

Which should be using ACCESS_ONCE on hv_dev->channel or doing proper RCU.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] hyperv: replace mutex_is_locked with lockdep

2018-12-07 Thread Stephen Hemminger
lockdep_assert_held is better at checking for locking requirements
since it doesn't get confused if someone else is holding the mutex.

Inspired by changes in network drivers by Lance Roy.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel_mgmt.c | 2 +-
 drivers/hv/connection.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 6277597d3d58..abdaf8ac0002 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -385,7 +385,7 @@ void hv_process_channel_removal(struct vmbus_channel 
*channel)
struct vmbus_channel *primary_channel;
unsigned long flags;
 
-   BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+   lockdep_assert_held(&vmbus_connection.channel_mutex);
BUG_ON(!channel->rescind);
 
if (channel->target_cpu != get_cpu()) {
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index f4d08c8ac7f8..0adaec0db85a 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -307,7 +307,7 @@ struct vmbus_channel *relid2channel(u32 relid)
struct list_head *cur, *tmp;
struct vmbus_channel *cur_sc;
 
-   BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+   lockdep_assert_held(&vmbus_connection.channel_mutex);
 
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
if (channel->offermsg.child_relid == relid) {
-- 
2.19.2

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] vmbus: fix subchannel removal

2018-12-07 Thread Stephen Hemminger
The changes to split ring allocation from open/close, broke
the cleanup of subchannels. This resulted in problems using
uio on network devices because the subchannel was left behind
when the network device was unbound.

The cause was in the disconnect logic which used list splice
to move the subchannel list into a local variable. This won't
work because the subchannel list is needed later during the
process of the rescind messages (relid2channel).

The fix is to just leave the subchannel list in place
which is what the original code did. The list is cleaned
up later when the host rescind is processed.

Fixes: ae6935ed7d42 ("vmbus: split ring buffer allocation from open")
Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel.c | 10 +-
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index fe00b12e4417..bea4c9850247 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -701,20 +701,12 @@ static int vmbus_close_internal(struct vmbus_channel 
*channel)
 int vmbus_disconnect_ring(struct vmbus_channel *channel)
 {
struct vmbus_channel *cur_channel, *tmp;
-   unsigned long flags;
-   LIST_HEAD(list);
int ret;
 
if (channel->primary_channel != NULL)
return -EINVAL;
 
-   /* Snapshot the list of subchannels */
-   spin_lock_irqsave(&channel->lock, flags);
-   list_splice_init(&channel->sc_list, &list);
-   channel->num_sc = 0;
-   spin_unlock_irqrestore(&channel->lock, flags);
-
-   list_for_each_entry_safe(cur_channel, tmp, &list, sc_list) {
+   list_for_each_entry_safe(cur_channel, tmp, &channel->sc_list, sc_list) {
if (cur_channel->rescind)
wait_for_completion(&cur_channel->rescind_event);
 
-- 
2.19.2

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] uio_hv_generic: set callbacks on open

2018-12-10 Thread Stephen Hemminger
This fixes the problem where uio application was unable to
use multple queues on restart. The root cause is that the callbacks
are cleared on disconnect. Change to setting up callbacks
everytime in open.

Fixes: cdfa835c6e5e ("uio_hv_generic: defer opening vmbus until first use")
Reported-by: Mohammed Gamal 
Signed-off-by: Stephen Hemminger 
---
 drivers/uio/uio_hv_generic.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index c2493d011225..3c5169eb23f5 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -204,9 +204,11 @@ hv_uio_open(struct uio_info *info, struct inode *inode)
if (atomic_inc_return(&pdata->refcnt) != 1)
return 0;
 
+   vmbus_set_chn_rescind_callback(dev->channel, hv_uio_rescind);
+   vmbus_set_sc_create_callback(dev->channel, hv_uio_new_channel);
+
ret = vmbus_connect_ring(dev->channel,
 hv_uio_channel_cb, dev->channel);
-
if (ret == 0)
dev->channel->inbound.ring_buffer->interrupt_mask = 1;
else
@@ -334,9 +336,6 @@ hv_uio_probe(struct hv_device *dev,
goto fail_close;
}
 
-   vmbus_set_chn_rescind_callback(channel, hv_uio_rescind);
-   vmbus_set_sc_create_callback(channel, hv_uio_new_channel);
-
ret = sysfs_create_bin_file(&channel->kobj, &ring_buffer_bin_attr);
if (ret)
dev_notice(&dev->device,
-- 
2.19.2

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] Drivers: hv: vmbus: Return -EINVAL for the sys files for unopened channels

2018-12-17 Thread Stephen Hemminger
On Thu, 13 Dec 2018 16:35:43 +
Dexuan Cui  wrote:

> Before 98f4c651762c, we returned zeros for unopened channels.
> With 98f4c651762c, we started to return random on-stack values.
> 
> We'd better return -EINVAL instead.
> 
> Fixes: 98f4c651762c ("hv: move ringbuffer bus attributes to dev_groups")
> Cc: sta...@vger.kernel.org
> Cc: K. Y. Srinivasan 
> Cc: Haiyang Zhang 
> Cc: Stephen Hemminger 
> Signed-off-by: Dexuan Cui 

The concept looks fine, but maybe it would be simpler to move it into
hv_ringbuffer_get_debuginfo and have it return an error code.

Since so much of the code is repeated, I would probably make a
macro which generates the code as well.

Something like this:

>From c6bbdbcde933c85098f7b3e71650a8479d52810c Mon Sep 17 00:00:00 2001
From: Stephen Hemminger 
Date: Mon, 17 Dec 2018 09:13:24 -0800
Subject: [PATCH] hv: vmbus: check for ring in debug info

---
 drivers/hv/ring_buffer.c | 31 +-
 drivers/hv/vmbus_drv.c   | 71 ++--
 include/linux/hyperv.h   |  5 +--
 3 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 64d0c85d5161..1f1a55e07733 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -164,26 +164,25 @@ hv_get_ringbuffer_availbytes(const struct 
hv_ring_buffer_info *rbi,
 }
 
 /* Get various debug metrics for the specified ring buffer. */
-void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info,
-struct hv_ring_buffer_debug_info *debug_info)
+int hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info,
+   struct hv_ring_buffer_debug_info *debug_info)
 {
u32 bytes_avail_towrite;
u32 bytes_avail_toread;
 
-   if (ring_info->ring_buffer) {
-   hv_get_ringbuffer_availbytes(ring_info,
-   &bytes_avail_toread,
-   &bytes_avail_towrite);
-
-   debug_info->bytes_avail_toread = bytes_avail_toread;
-   debug_info->bytes_avail_towrite = bytes_avail_towrite;
-   debug_info->current_read_index =
-   ring_info->ring_buffer->read_index;
-   debug_info->current_write_index =
-   ring_info->ring_buffer->write_index;
-   debug_info->current_interrupt_mask =
-   ring_info->ring_buffer->interrupt_mask;
-   }
+   if (!ring_info->ring_buffer)
+   return -EINVAL;
+
+   hv_get_ringbuffer_availbytes(ring_info,
+&bytes_avail_toread,
+&bytes_avail_towrite);
+   debug_info->bytes_avail_toread = bytes_avail_toread;
+   debug_info->bytes_avail_towrite = bytes_avail_towrite;
+   debug_info->current_read_index = ring_info->ring_buffer->read_index;
+   debug_info->current_write_index = ring_info->ring_buffer->write_index;
+   debug_info->current_interrupt_mask
+   = ring_info->ring_buffer->interrupt_mask;
+   return 0;
 }
 EXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo);
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 283d184280af..403fee01572c 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -313,10 +313,16 @@ static ssize_t out_intr_mask_show(struct device *dev,
 {
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info outbound;
+   int ret;
 
if (!hv_dev->channel)
return -ENODEV;
-   hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+   ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+ &outbound);
+   if (ret < 0)
+   return ret;
+
return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
 }
 static DEVICE_ATTR_RO(out_intr_mask);
@@ -326,10 +332,15 @@ static ssize_t out_read_index_show(struct device *dev,
 {
struct hv_device *hv_dev = device_to_hv_device(dev);
struct hv_ring_buffer_debug_info outbound;
+   int ret;
 
if (!hv_dev->channel)
return -ENODEV;
-   hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
+
+   ret = hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound,
+ &outbound);
+   if (ret < 0)
+   return ret;
return sprintf(buf, "%d\n", outbound.current_read_index);
 }
 static DEVICE_ATTR_RO(out_read_index);
@@ -340,10 +351,15 @@ static ssize_t out_write_index_show(struct device *dev,
 {
struct hv_device *hv_dev = devi

Re: [PATCH] Drivers: hv: vmbus: Return -EINVAL for the sys files for unopened channels

2018-12-17 Thread Stephen Hemminger
On Mon, 17 Dec 2018 18:00:29 +
Dexuan Cui  wrote:

> > From: Stephen Hemminger 
> > On Thu, 13 Dec 2018 16:35:43 +
> > Dexuan Cui  wrote:
> >   
> > > Before 98f4c651762c, we returned zeros for unopened channels.
> > > With 98f4c651762c, we started to return random on-stack values.
> > >
> > > We'd better return -EINVAL instead.  
> > 
> > The concept looks fine, but maybe it would be simpler to move it into
> > hv_ringbuffer_get_debuginfo and have it return an error code.
> > 
> > Since so much of the code is repeated, I would probably make a
> > macro which generates the code as well.
> > 
> > Something like this:  
> 
> Thanks, Stephen! Now the patch has been in char-misc's char-misc-linus
> branch, so IMO we may as well leave it as is (considering the code here is
> unlikely to be frqeuencly changed), and we have a smaller patch this way. :-)
> 
> But, yes, I agree with you that generally we should make a common
> function to avoid duplicate code.
> 
> Thanks,
> -- Dexuan

The old code was risky because it would silently return stack garbage.
Having an error check in get_debuginfo would eliminate that.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] Drivers: hv: vmbus: Return -EINVAL for the sys files for unopened channels

2018-12-17 Thread Stephen Hemminger
On Mon, 17 Dec 2018 18:44:12 +
Dexuan Cui  wrote:

> > From: devel  On Behalf Of
> > Dexuan Cui
> > Sent: Monday, December 17, 2018 10:31 AM  
> > > From: Stephen Hemminger 
> > >
> > > The old code was risky because it would silently return stack garbage.
> > > Having an error check in get_debuginfo would eliminate that.  
> > 
> > OK, then let me make another patch based on the latest char-misc-linus.
> > 
> > -- Dexuan  
> 
> Hi Stephen, your patch can apply cleanly. Let me rebase your patch to
> char-misc-linus, do a test, and then post it with your Signed-off-by and 
> mine: 
> I assume you're Ok with this. Please let me know in case it's not. :-)
> 
> Thanks,
> -- Dexuan

Sure.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v3] Drivers: hv: vmbus: Expose counters for interrupts and full conditions

2019-01-17 Thread Stephen Hemminger



> +static ssize_t channel_intr_in_full_show(const struct vmbus_channel
> *channel,
> +  char *buf)
> +{
> + return sprintf(buf, "%llu\n", channel->intr_in_full);
> +}


intr_in_full is u64, which is not the same as unsigned long long.
to be correct you need a cast here.

> > diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> > index dcb6977afce9..7e5239123276 100644
> > --- a/include/linux/hyperv.h
> > +++ b/include/linux/hyperv.h
> > @@ -751,6 +751,27 @@ struct vmbus_channel {
> > u64 interrupts; /* Host to Guest interrupts */
> > u64 sig_events; /* Guest to Host events */
> > 
> > +   /* Interrupt counts for 2 types of Guest to Host interrupts */
> > +   u64 intr_in_full;   /* in ring buffer, full to not full */
> > +   u64 intr_out_empty; /* out ring buffer, empty to not empty */
> > +
> > +   /*
> > +* The total number of write operations that encountered a full
> > +* outbound ring buffer.
> > +*/
> > +   u64 out_full_total;
> > +   /*
> > +* The number of write operations that were the first to encounter a
> > +* full outbound ring buffer.
> > +*/
> > +   u64 out_full_first;

Adding more fields changes cache layout which can cause
additional cache miss in the hot path.  

> > +   /*
> > +* Indicates that a full outbound ring buffer was encountered. The flag
> > +* is set to true when a full outbound ring buffer is encountered and
> > +* set to false when a write to the outbound ring buffer is completed.
> > +*/
> > +   bool out_full_flag;

Discussion on kernel mailing list. Recommends against putting bool
in structures since that pads to full sizeof(int).  Could this be
part of a bitfield?

> > /* Channel callback's invoked in softirq context */
> > struct tasklet_struct callback_event;
> > void (*onchannel_callback)(void *context);
> > @@ -936,6 +957,23 @@ static inline void *get_per_channel_state(struct
> > vmbus_channel *c)
> >  static inline void set_channel_pending_send_size(struct vmbus_channel *c,
> >  u32 size)
> >  {
> > +   unsigned long flags;
> > +
> > +   spin_lock_irqsave(&c->outbound.ring_lock, flags);
> > +
> > +   if (size) {
> > +   ++c->out_full_total;
> > +
> > +   if (!c->out_full_flag) {
> > +   ++c->out_full_first;
> > +   c->out_full_flag = true;
> > +   }
> > +   } else {
> > +   c->out_full_flag = false;
> > +   }
> > +
> > +   spin_unlock_irqrestore(&c->outbound.ring_lock, flags);

If this is called often, the additional locking will impact performance.

> > c->outbound.ring_buffer->pending_send_sz = size;
> >  }
> > 

Could I propose another alternative.

It might be more useful to count the guest to host interaction events
rather than the ring buffer.

For example the number of calls to:
vmbus_set_event which means host exit call
vmbus_setevent fastpath using sync_set_bit
calls to rinbuffer_write that returned -EAGAIN

These would require less locking, reuse existing code paths
and not require additional state.

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] PCI: hv: Add hv_pci_remove_slots() when we unload the driver

2019-02-07 Thread Stephen Hemminger
On Thu, 7 Feb 2019 20:36:32 +
Dexuan Cui  wrote:

> When we unload pci-hyperv, the host doesn't send us a PCI_EJECT message.
> In this case we also need to make sure the sysfs pci slot directory
> is removed, otherwise "cat /sys/bus/pci/slots/2/address" will trigger
> "BUG: unable to handle kernel paging request". And, if we unload/reload
> the driver several times, we'll have multiple pci slot directories in
> /sys/bus/pci/slots/ like this:
> 
> root@localhost:~# ls -rtl  /sys/bus/pci/slots/
> total 0
> drwxr-xr-x 2 root root 0 Feb  7 10:49 2
> drwxr-xr-x 2 root root 0 Feb  7 10:49 2-1
> drwxr-xr-x 2 root root 0 Feb  7 10:51 2-2
> 
> The patch adds the missing code, and in hv_eject_device_work() it also
> moves pci_destroy_slot() to an earlier place where we hold the pci lock.
> 
> Fixes: a15f2c08c708 ("PCI: hv: support reporting serial number as slot 
> information")
> Signed-off-by: Dexuan Cui 
> Cc: sta...@vger.kernel.org
> Cc: Stephen Hemminger 

Acked-by: Stephen Hemminger 
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 1/2] Drivers: hv: vmbus: Change server monitor_pages index to 0

2019-02-08 Thread Stephen Hemminger
On Fri, 8 Feb 2019 04:58:52 -0500
Kimberly Brown  wrote:

> Change the monitor_pages index in server_monitor_pending_show() to '0'.
> '0' is the correct monitor_pages index for the server. A comment for the
> monitor_pages field in the vmbus_connection struct definition indicates
> that the 1st page is for parent->child notifications. In addition, the
> server_monitor_latency_show() and server_monitor_conn_id_show()
> functions use monitor_pages index '0'.
> 
> Signed-off-by: Kimberly Brown 
> ---
>  drivers/hv/vmbus_drv.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 403fee01572c..f2a79f5129d7 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -234,7 +234,7 @@ static ssize_t server_monitor_pending_show(struct device 
> *dev,
>   return -ENODEV;
>   return sprintf(buf, "%d\n",
>  channel_pending(hv_dev->channel,
> -vmbus_connection.monitor_pages[1]));
> +vmbus_connection.monitor_pages[0]));
>  }
>  static DEVICE_ATTR_RO(server_monitor_pending);


Looks good.

I wonder if ever gets used though since it returned incorrect data...

Acked-by: Stephen Hemminger 
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 2/2] Drivers: hv: vmbus: Display nothing in sysfs if monitor_allocated not set

2019-02-08 Thread Stephen Hemminger
On Fri, 8 Feb 2019 05:01:12 -0500
Kimberly Brown  wrote:

You are right, the current behavior is broken.
It would be good to add a description of under what conditions
monitor is not used. Is this some part of a project emulating
Hyper-V?


> +
> + if (!hv_dev->channel->offermsg.monitor_allocated)
> + return sprintf(buf, "\n");

If monitor is not used, why not return an error instead of empty
data. Any program (or user) would have to handle that already.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 2/2] Drivers: hv: vmbus: Display nothing in sysfs if monitor_allocated not set

2019-02-11 Thread Stephen Hemminger
On Mon, 11 Feb 2019 02:01:18 -0500
Kimberly Brown  wrote:

> On Fri, Feb 08, 2019 at 02:32:09PM -0800, Stephen Hemminger wrote:
> > On Fri, 8 Feb 2019 05:01:12 -0500
> > Kimberly Brown  wrote:
> > 
> > You are right, the current behavior is broken.
> > It would be good to add a description of under what conditions
> > monitor is not used. Is this some part of a project emulating
> > Hyper-V?
> >   
> 
> I'm not sure which conditions determine whether the monitor mechanism is
> used. I've searched the Hypervisor TLFS, and I couldn't find any
> information. If you have any suggestions for where I can find this
> information, please let me know.

The monitor page stuff pre-dates my involvement with Hyper-V. KY might know.
But based on comments it looks like it was added to avoid hypercalls
for each message. It probably showed up in Windows Server 2012 timeframe.

To test you might want to dig up Windows Server 2008.
 
> No, I'm not working on a project emulating Hyper-V.

OK, I had heard that KVM project was doing something with QEMU.

> >   
> > > +
> > > + if (!hv_dev->channel->offermsg.monitor_allocated)
> > > + return sprintf(buf, "\n");  
> > 
> > If monitor is not used, why not return an error instead of empty
> > data. Any program (or user) would have to handle that already.  
> 
> I think that returning an error instead is fine. I'll make this change
> in the next version of the patch.

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 2/2] Drivers: hv: vmbus: Display nothing in sysfs if monitor_allocated not set

2019-02-14 Thread Stephen Hemminger
On Thu, 14 Feb 2019 01:11:03 -0500
Kimberly Brown  wrote:

> On Mon, Feb 11, 2019 at 10:02:47AM -0800, Stephen Hemminger wrote:
> > On Mon, 11 Feb 2019 02:01:18 -0500
> > Kimberly Brown  wrote:
> >   
> > > On Fri, Feb 08, 2019 at 02:32:09PM -0800, Stephen Hemminger wrote:  
> > > > On Fri, 8 Feb 2019 05:01:12 -0500
> > > > Kimberly Brown  wrote:
> > > > 
> > > > You are right, the current behavior is broken.
> > > > It would be good to add a description of under what conditions
> > > > monitor is not used. Is this some part of a project emulating
> > > > Hyper-V?
> > > > 
> > > 
> > > I'm not sure which conditions determine whether the monitor mechanism is
> > > used. I've searched the Hypervisor TLFS, and I couldn't find any
> > > information. If you have any suggestions for where I can find this
> > > information, please let me know.  
> > 
> > The monitor page stuff pre-dates my involvement with Hyper-V. KY might know.
> > But based on comments it looks like it was added to avoid hypercalls
> > for each message. It probably showed up in Windows Server 2012 timeframe.
> > 
> > To test you might want to dig up Windows Server 2008.
> >
> 
> It looks like the monitor mechanism has always been used. It's present in the
> earliest commit that I can find: 3e7ee4902fe6 ("add the Hyper-V virtual bus")
> from 2009.
> 
> I propose that the following sentences be added to the sysfs documentation for
> the affected attributes:
> 
> "The monitor page mechanism is used for performance critical channels 
> (storage,
> network, etc.). Channels that do not use the monitor page mechanism will 
> return
> EINVAL."
> 
> I think that this provides sufficient information for a user to understand why
> opening an affected file can return EINVAL. What do you think?

Thanks for following up. I agree with you EINVAL works as a solution.
My understanding is that their are two ways a channel can work. The first one is
for the guest to send a hyper call to the host to indicate when data is 
available.
The other is for the guest to indicate by setting a bit in shared memory with 
host.

The shared memory approach reduces host/guest overhead and allows for more 
opportunities
for batching in the ring. The host checks the shared memory on a polling 
interval
defined in the latency field.

The hypercall method does not use the monitor page. It has lower latency (no 
polling).
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH V3 1/10] X86/Hyper-V: Add parameter offset for hyperv_fill_flush_guest_mapping_list()

2019-02-22 Thread Stephen Hemminger
int hyperv_fill_flush_guest_mapping_list(
struct hv_guest_mapping_flush_list *flush,
-   u64 start_gfn, u64 pages)
+   int offset, u64 start_gfn, u64 pages)
 {
u64 cur = start_gfn;
u64 additional_pages;
-   int gpa_n = 0;
+   int gpa_n = offset;
 
do {
/*

Do you mean to support negative offsets here? Maybe unsigned would be better?
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 0/3] pci-hyperv: fix memory leak and add pci_destroy_slot()

2019-03-05 Thread Stephen Hemminger
On Mon, 4 Mar 2019 21:34:47 +
Dexuan Cui  wrote:

> Patch #1 fixes a memory leak caused by incorrectly-maintained hpdev->refs.
> 
> Patch #2 and #3 make sure the "slot" is removed in all the scenarios.
> Without them, in the quick hot-add/hot-remove test, systemd-dev may easily
> crash when trying to access a dangling sys file in /sys/bus/pci/slots/:
> "BUG: unable to handle kernel paging request".
> 
> BTW, Patch #2 was posted on Feb 7, 2019, and this is the v2: the change
> to hv_eject_device_work() in v1 is removed, as the change is only needed
> when we hot-remove the device and remove the pci-hyperv driver at the 
> same time. It looks more work is required to make this scenaro work
> correctly, and since removing the driver is not really a "usual" usage,
> we can address this scenario in the future.
> 
> Please review the patchset.
> 
> Dexuan Cui (3):
>   PCI: hv: Fix a memory leak in hv_eject_device_work()
>   PCI: hv: Add hv_pci_remove_slots() when we unload the driver
>   PCI: hv: Add pci_destroy_slot() in pci_devices_present_work(), if
> necessary
> 
>  drivers/pci/controller/pci-hyperv.c | 23 +++
>  1 file changed, 23 insertions(+)


Thanks for fixing this.

Reviewed-by: Stephen Hemminger 
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] isdn: hysdn: Fix error spaces around '*'

2019-08-02 Thread Stephen Hemminger
On Fri,  2 Aug 2019 19:56:02 +
Jose Carlos Cazarin Filho  wrote:

> Fix checkpath error:
> CHECK: spaces preferred around that '*' (ctx:WxV)
> +extern hysdn_card *card_root;/* pointer to first card */
> 
> Signed-off-by: Jose Carlos Cazarin Filho 


Read the TODO, these drivers are scheduled for removal, so changes
are not helpful at this time.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] isdn: hysdn: fix code style error from checkpatch

2019-08-02 Thread Stephen Hemminger
On Fri,  2 Aug 2019 19:50:17 +
Ricardo Bruno Lopes da Silva  wrote:

> Fix error bellow from checkpatch.
> 
> WARNING: Block comments use * on subsequent lines
> +/***
> +
> 
> Signed-off-by: Ricardo Bruno Lopes da Silva 

Read the TODO, these drivers are scheduled for removal, so changes
are not helpful at this time.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net] netvsc: increase default receive buffer size

2017-09-14 Thread Stephen Hemminger
The default receive buffer size was reduced by recent change
to a value which was appropriate for 10G and Windows Server 2016.
But the value is too small for full performance with 40G on Azure.
Increase the default back to maximum supported by host.

Fixes: 8b5327975ae1 ("netvsc: allow controlling send/recv buffer size")
Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index c538a4f15f3b..d4902ee5f260 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -49,7 +49,7 @@
 #define NETVSC_MIN_TX_SECTIONS 10
 #define NETVSC_DEFAULT_TX  192 /* ~1M */
 #define NETVSC_MIN_RX_SECTIONS 10  /* ~64K */
-#define NETVSC_DEFAULT_RX  2048/* ~4M */
+#define NETVSC_DEFAULT_RX  10485   /* Max ~16M */
 
 #define LINKCHANGE_INT (2 * HZ)
 #define VF_TAKEOVER_INT (HZ / 10)
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net] netvsc: increase default receive buffer size

2017-09-14 Thread Stephen Hemminger
On Thu, 14 Sep 2017 10:02:03 -0700 (PDT)
David Miller  wrote:

> From: Stephen Hemminger 
> Date: Thu, 14 Sep 2017 09:31:07 -0700
> 
> > The default receive buffer size was reduced by recent change
> > to a value which was appropriate for 10G and Windows Server 2016.
> > But the value is too small for full performance with 40G on Azure.
> > Increase the default back to maximum supported by host.
> > 
> > Fixes: 8b5327975ae1 ("netvsc: allow controlling send/recv buffer size")
> > Signed-off-by: Stephen Hemminger   
> 
> What other side effects are there to making this buffer so large?
> 
> Just curious...

It increase latency and exercises bufferbloat avoidance on TCP.
The problem was the smaller buffer caused regressions in UDP
benchmarks on 40G Azure. One could argue that this is not a reasonable
benchmark but people run it. Apparently, Windows already went
the same thing and uses an even bigger buffer.

Longer term there will be more internal discussion with different
teams about what the receive latency and buffering needs to be.
Also, the issue goes away when doing accelerated networking (SR-IOV)
is more widely used.

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 02/16] hyper-v: trace vmbus_on_message()

2017-10-03 Thread Stephen Hemminger
On Thu, 21 Sep 2017 10:17:18 +0200
Vitaly Kuznetsov  wrote:

> Steven Rostedt  writes:
> 
> > On Wed, 20 Sep 2017 19:21:53 +0200
> > Vitaly Kuznetsov  wrote:
> >  
> >> diff --git a/drivers/hv/hv_trace.h b/drivers/hv/hv_trace.h
> >> index 9a29ef55477d..72911dfc9682 100644
> >> --- a/drivers/hv/hv_trace.h
> >> +++ b/drivers/hv/hv_trace.h
> >> @@ -14,6 +14,14 @@ TRACE_EVENT(vmbus_on_msg_dpc,
> >>TP_printk("message %u received", __entry->msgtype)
> >>);
> >>  
> >> +TRACE_EVENT(vmbus_on_message,
> >> +  TP_PROTO(const struct vmbus_channel_message_header *hdr),
> >> +  TP_ARGS(hdr),
> >> +  TP_STRUCT__entry(__field(unsigned int, msgtype)),
> >> +  TP_fast_assign(__entry->msgtype = hdr->msgtype),
> >> +  TP_printk("processing message %u", __entry->msgtype)
> >> +  );  
> >
> > Whenever you have two trace events with everything the same but the
> > TP_printk(), you can save a little space by using DEFINE_EVENT_PRINT()
> >
> > DECLARE_EVENT_CLASS(vmbus_hdr_msg,
> > TP_PROTO(const struct vmbus_channel_message_header *hdr),
> > TP_ARGS(hdr),
> > TP_STRUCT__entry(__field(unsigned int, msgtype),
> > TP_fast_assign(__entry->msg = hdr->msgtype;),
> > TP_printk("msgtype=%d", __entry->msgtype)
> > );
> >
> > DEFINE_EVENT_PRINT(vmbus_hdr_msg, vmbus_on_msg_dpc,
> > TP_PROTO(const struct vmbus_channel_message_header *hdr),
> > TP_ARGS(hdr),
> > TP_printk("message %u received", __entry->msgtype));
> >
> > DEFINE_EVENT_PRINT(vmbus_hdr_msg, vmbus_on_message,
> > TP_PROTO(const struct vmbus_channel_message_header *hdr),
> > TP_ARGS(hdr),
> > TP_printk("processing message %u", __entry->msgtype));
> >
> > This will use the same functions required to save and record the
> > message but will use a different function to output it to the trace.  
> 
> Oh, thanks! This seems to be useful for
> vmbus_on_msg_dpc/vmbus_on_message only as all the rest needs to parse
> different structures. Will use it in v2.
> 

I just used this patch. Since function name is already in the trace message
no need to have different print's for each one.

>From ff85967810c216eb01d181789af4f56bd00dc9b9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger 
Date: Tue, 3 Oct 2017 09:24:11 -0700
Subject: [PATCH 3/4] hyperv: fix warnings in trace print

This gets rid of the build warnings from unused printf format.
And uses common class for print.
---
 drivers/hv/hv_trace.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/hv/hv_trace.h b/drivers/hv/hv_trace.h
index be7762955650..4755c4640e39 100644
--- a/drivers/hv/hv_trace.h
+++ b/drivers/hv/hv_trace.h
@@ -11,18 +11,18 @@ DECLARE_EVENT_CLASS(vmbus_hdr_msg,
TP_ARGS(hdr),
TP_STRUCT__entry(__field(unsigned int, msgtype)),
TP_fast_assign(__entry->msgtype = hdr->msgtype;),
-   TP_printk("msgtype=%d", __entry->msgtype)
+   TP_printk("msgtype=%u", __entry->msgtype)
 );
 
-DEFINE_EVENT_PRINT(vmbus_hdr_msg, vmbus_on_msg_dpc,
+DEFINE_EVENT(vmbus_hdr_msg, vmbus_on_msg_dpc,
TP_PROTO(const struct vmbus_channel_message_header *hdr),
-   TP_ARGS(hdr),
-   TP_printk("message %u received", __entry->msgtype));
+   TP_ARGS(hdr)
+);
 
-DEFINE_EVENT_PRINT(vmbus_hdr_msg, vmbus_on_message,
+DEFINE_EVENT(vmbus_hdr_msg, vmbus_on_message,
 TP_PROTO(const struct vmbus_channel_message_header *hdr),
-TP_ARGS(hdr),
-TP_printk("processing message %u", __entry->msgtype));
+TP_ARGS(hdr)
+);
 
 TRACE_EVENT(vmbus_onoffer,
TP_PROTO(const struct vmbus_channel_offer_channel *offer),
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v2 16/16] hyper-v: trace vmbus_send_tl_connect_request()

2017-10-03 Thread Stephen Hemminger
I added an additional set of trace points for when channel gets notified or 
signals host.

diff -urNp linux-msft/drivers/hv/channel.c msft-4.14-rc3/drivers/hv/channel.c
--- linux-msft/drivers/hv/channel.c 2017-10-03 10:06:54.893209237 -0700
+++ msft-4.14-rc3/drivers/hv/channel.c  2017-10-03 10:07:35.501665114 -0700
@@ -55,6 +55,8 @@ void vmbus_setevent(struct vmbus_channel
 {
struct hv_monitor_page *monitorpage;
 
+   trace_vmbus_setevent(channel);
+
/*
 * For channels marked as in "low latency" mode
 * bypass the monitor page mechanism.
diff -urNp linux-msft/drivers/hv/connection.c 
msft-4.14-rc3/drivers/hv/connection.c
--- linux-msft/drivers/hv/connection.c  2017-10-03 10:06:54.893209237 -0700
+++ msft-4.14-rc3/drivers/hv/connection.c   2017-10-03 10:07:35.501665114 
-0700
@@ -322,6 +322,8 @@ void vmbus_on_event(unsigned long data)
struct vmbus_channel *channel = (void *) data;
unsigned long time_limit = jiffies + 2;
 
+   trace_vmbus_on_event(channel);
+   
do {
void (*callback_fn)(void *);
 
diff -urNp linux-msft/drivers/hv/hv_trace.h msft-4.14-rc3/drivers/hv/hv_trace.h
--- linux-msft/drivers/hv/hv_trace.h2017-10-03 10:08:06.514014019 -0700
+++ msft-4.14-rc3/drivers/hv/hv_trace.h 2017-10-03 10:07:35.505665159 -0700
@@ -294,6 +294,29 @@ TRACE_EVENT(vmbus_send_tl_connect_reques
)
);
 
+DECLARE_EVENT_CLASS(vmbus_channel,
+   TP_PROTO(const struct vmbus_channel *channel),
+   TP_ARGS(channel),
+   TP_STRUCT__entry(__field(u32, relid)),
+   TP_fast_assign(__entry->relid = channel->offermsg.child_relid),
+   TP_printk("relid 0x%x", __entry->relid)
+);
+
+DEFINE_EVENT(vmbus_channel, vmbus_chan_sched,
+   TP_PROTO(const struct vmbus_channel *channel),
+   TP_ARGS(channel)
+);
+
+DEFINE_EVENT(vmbus_channel, vmbus_setevent,
+   TP_PROTO(const struct vmbus_channel *channel),
+   TP_ARGS(channel)
+);
+
+DEFINE_EVENT(vmbus_channel, vmbus_on_event,
+   TP_PROTO(const struct vmbus_channel *channel),
+   TP_ARGS(channel)
+);
+
 #undef TRACE_INCLUDE_PATH
 #define TRACE_INCLUDE_PATH .
 #undef TRACE_INCLUDE_FILE
diff -urNp linux-msft/drivers/hv/vmbus_drv.c 
msft-4.14-rc3/drivers/hv/vmbus_drv.c
--- linux-msft/drivers/hv/vmbus_drv.c   2017-10-03 10:06:54.897209282 -0700
+++ msft-4.14-rc3/drivers/hv/vmbus_drv.c2017-10-03 10:07:35.505665159 
-0700
@@ -948,6 +948,7 @@ static void vmbus_chan_sched(struct hv_p
continue;
 
++channel->interrupts_in;
+   trace_vmbus_chan_sched(channel);
 
switch (channel->callback_mode) {
case HV_CALL_ISR:
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH] vmbus: initialize reserved fields in messages

2017-10-04 Thread Stephen Hemminger
Make sure and initialize reserved fields in messages to host,
rather than passing stack junk.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/channel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index efd5db743319..9f48f454bde0 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -745,6 +745,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel 
*channel,
desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */
desc.length8 = (u16)(packetlen_aligned >> 3);
desc.transactionid = requestid;
+   desc.reserved = 0;
desc.rangecount = pagecount;
 
for (i = 0; i < pagecount; i++) {
@@ -788,6 +789,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
desc->dataoffset8 = desc_size >> 3; /* in 8-bytes granularity */
desc->length8 = (u16)(packetlen_aligned >> 3);
desc->transactionid = requestid;
+   desc->reserved = 0;
desc->rangecount = 1;
 
bufferlist[0].iov_base = desc;
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH v3 00/17] Hyper-V: add tracing to VMBus module and trace messages/events

2017-10-05 Thread Stephen Hemminger
On Thu,  5 Oct 2017 16:50:27 +0200
Vitaly Kuznetsov  wrote:

> Changes since v2:
> - Use DEFINE_EVENT in PATCH1/2 to avoid compile warnings [Stephen Hemminger]
> - Add PATCH17 tracing channel events [Stephen Hemminger]
> 
> Messages between guest and host are used in Hyper-V as control flow. To
> simplify debugging various issues which are often hard to reproduce add
> tracepoints to all message senders and handlers. This is not a performance
> critical path and tracing overhead should be negligible.
> 
> The example usage and output is:
> 
> Enable all tracing events:
> # echo 1 > /sys/kernel/debug/tracing/events/hyperv/enable 
> 
> Do something which causes messages to be sent between host and guest, e.g.
> hot remove a VMBus device.
> 
> Check events:
> # cat /sys/kernel/debug/tracing/trace 
> 
> # tracer: nop
> #
> #  _-=> irqs-off
> # / _=> need-resched
> #| / _---=> hardirq/softirq
> #|| / _--=> preempt-depth
> #||| / delay
> #   TASK-PID   CPU#  TIMESTAMP  FUNCTION
> #  | |   |      | |
>   -0 [011] ..s.   122.981583: vmbus_on_msg_dpc: msgtype=1
> kworker/11:7-1506  [011]    122.981597: vmbus_on_message: msgtype=1
> kworker/11:7-1506  [011]    122.981598: vmbus_onoffer: child_relid 
> 0x10, monitorid 0x2, is_dedicated 1, connection_id 0x10010, if_type 
> f8615163-df3e-46c5-913f-f2d2f965ed0e, if_instance 
> 6676e078-e4b3-44da-8a7d-12eafb577d31, chn_flags 0x0, mmio_megabytes 0, 
> sub_channel_index 0
> kworker/11:7-1506  [011]    122.982130: vmbus_establish_gpadl_header: 
> sending child_relid 0x10, gpadl 0xe1e34, range_buflen 2056 rangecount 1, ret 0
> kworker/11:7-1506  [011]    122.982133: vmbus_establish_gpadl_body: 
> sending msgnumber 0, gpadl 0xe1e34, ret 0
> kworker/11:7-1506  [011]    122.982136: vmbus_establish_gpadl_body: 
> sending msgnumber 0, gpadl 0xe1e34, ret 0
> kworker/11:7-1506  [011]    122.982137: vmbus_establish_gpadl_body: 
> sending msgnumber 0, gpadl 0xe1e34, ret 0
> kworker/11:7-1506  [011]    122.982139: vmbus_establish_gpadl_body: 
> sending msgnumber 0, gpadl 0xe1e34, ret 0
> kworker/11:7-1506  [011]    122.982141: vmbus_establish_gpadl_body: 
> sending msgnumber 0, gpadl 0xe1e34, ret 0
> kworker/11:7-1506  [011]    122.982142: vmbus_establish_gpadl_body: 
> sending msgnumber 0, gpadl 0xe1e34, ret 0
> kworker/11:7-1506  [011]    122.982144: vmbus_establish_gpadl_body: 
> sending msgnumber 0, gpadl 0xe1e34, ret 0
> kworker/11:7-1506  [011]    122.982146: vmbus_establish_gpadl_body: 
> sending msgnumber 0, gpadl 0xe1e34, ret 0
> kworker/11:7-1506  [011]    122.982148: vmbus_establish_gpadl_body: 
> sending msgnumber 0, gpadl 0xe1e34, ret 0
>   -0 [011] ..s.   122.982336: vmbus_on_msg_dpc: msgtype=10
>   -0 [011] ..s.   122.982337: vmbus_ongpadl_created: 
> child_relid 0x10, gpadl 0xe1e34, creation_status 0
> kworker/11:7-1506  [011]    122.982351: vmbus_open: sending 
> child_relid 0x10, openid 16, gpadlhandle 0xe1e34, target_vp 0xb, offset 0x80, 
> ret 0
>  kworker/3:1-214   [003]    123.015007: vmbus_setevent: relid 0x5
>   -0 [011] ..s.   123.029467: vmbus_on_msg_dpc: msgtype=6
>   -0 [011] ..s.   123.029470: vmbus_onopen_result: 
> child_relid 0x10, openid 16, status 0
> kworker/11:7-1506  [011]    123.029492: vmbus_setevent: relid 0x10
>   -0 [011] d.h.   123.029533: vmbus_chan_sched: relid 0x10
> kworker/11:7-1506  [011]    123.029539: vmbus_setevent: relid 0x10
> 
> CHANNELMSG_UNLOAD/CHANNELMSG_UNLOAD_RESPONSE are not traced as these are
> mostly used on crash.
> 
> Vitaly Kuznetsov (17):
>   hyper-v: trace vmbus_on_msg_dpc()
>   hyper-v: trace vmbus_on_message()
>   hyper-v: trace vmbus_onoffer()
>   hyper-v: trace vmbus_onoffer_rescind()
>   hyper-v: trace vmbus_onopen_result()
>   hyper-v: trace vmbus_ongpadl_created()
>   hyper-v: trace vmbus_ongpadl_torndown()
>   hyper-v: trace vmbus_onversion_response()
>   hyper-v: trace vmbus_request_offers()
>   hyper-v: trace vmbus_open()
>   hyper-v: trace vmbus_close_internal()
>   hyper-v: trace vmbus_establish_gpadl()
>   hyper-v: trace vmbus_teardown_gpadl()
>   hyper-v: trace vmbus_negotiate_version()
>   hyper-v: trace vmbus_release_relid()
>   hyper-v: trace vmbus_send_tl_connect_request()
>   hyper-v: trace channel events
> 
>  drivers/hv/Makefile   |   4 +-
>  drivers/hv/channel.c  |  21 ++-
>  drivers/hv/channel_m

Re: [PATCH 2/2] uio: Prefer MSI(X) interrupts in PCI drivers

2017-10-06 Thread Stephen Hemminger
On Fri, 6 Oct 2017 13:50:44 +
"Stahl, Manuel"  wrote:

> MSI(X) interrupts are not shared between devices. So when available
> those should be preferred over legacy interrupts.
> 
> Signed-off-by: Manuel Stahl 
> ---
>  drivers/uio/uio_pci_dmem_genirq.c | 27 ---
>  drivers/uio/uio_pci_generic.c | 24 ++--
>  2 files changed, 38 insertions(+), 13 deletions(-)

The last time I tried to do MSI-X with pci-generic it got rejected
by the maintainer.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 2/2] uio: Prefer MSI(X) interrupts in PCI drivers

2017-10-24 Thread Stephen Hemminger
On Fri, 20 Oct 2017 14:50:44 +0200
"gre...@linuxfoundation.org"  wrote:

> On Fri, Oct 06, 2017 at 07:57:00AM -0700, Stephen Hemminger wrote:
> > On Fri, 6 Oct 2017 13:50:44 +
> > "Stahl, Manuel"  wrote:
> >   
> > > MSI(X) interrupts are not shared between devices. So when available
> > > those should be preferred over legacy interrupts.
> > > 
> > > Signed-off-by: Manuel Stahl 
> > > ---
> > >  drivers/uio/uio_pci_dmem_genirq.c | 27 ---
> > >  drivers/uio/uio_pci_generic.c | 24 ++--
> > >  2 files changed, 38 insertions(+), 13 deletions(-)  
> > 
> > The last time I tried to do MSI-X with pci-generic it got rejected
> > by the maintainer.  
> 
> Hm, yeah, this would break users today that do not have msi-x, right?
> 
> Not good, Manuel, how well did you test this?
> 
> thanks,
> 
> greg k-h

Look at https://patchwork.kernel.org/patch/7303021/

The objection was more that UIO developers did not like that
UIO was (already) being used for DMA without IOMMU, and MSI-x has DMA because
of vector table.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next 1/4] hv_netvsc: netvsc_teardown_gpadl() split

2017-10-31 Thread Stephen Hemminger
On Tue, 31 Oct 2017 14:42:01 +0100
Vitaly Kuznetsov  wrote:

> It was found that in some cases host refuses to teardown GPADL for send/
> receive buffers (probably when some work with these buffere is scheduled or
> ongoing). Change the teardown logic to be:
> 1) Send NVSP_MSG1_TYPE_REVOKE_* messages
> 2) Close the channel
> 3) Teardown GPADLs.
> This seems to work reliably.
> 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  drivers/net/hyperv/netvsc.c | 69 
> +++--
>  1 file changed, 36 insertions(+), 33 deletions(-)
> 
> diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
> index 5bb6a20072dd..bfc79698b8f4 100644
> --- a/drivers/net/hyperv/netvsc.c
> +++ b/drivers/net/hyperv/netvsc.c
> @@ -100,12 +100,11 @@ static void free_netvsc_device_rcu(struct netvsc_device 
> *nvdev)
>   call_rcu(&nvdev->rcu, free_netvsc_device);
>  }
>  
> -static void netvsc_destroy_buf(struct hv_device *device)
> +static void netvsc_revoke_buf(struct hv_device *device,
> +   struct netvsc_device *net_device)
>  {
>   struct nvsp_message *revoke_packet;
>   struct net_device *ndev = hv_get_drvdata(device);
> - struct net_device_context *ndc = netdev_priv(ndev);
> - struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
>   int ret;
>  
>   /*
> @@ -148,28 +147,6 @@ static void netvsc_destroy_buf(struct hv_device *device)
>   net_device->recv_section_cnt = 0;
>   }
>  
> - /* Teardown the gpadl on the vsp end */
> - if (net_device->recv_buf_gpadl_handle) {
> - ret = vmbus_teardown_gpadl(device->channel,
> -net_device->recv_buf_gpadl_handle);
> -
> - /* If we failed here, we might as well return and have a leak
> -  * rather than continue and a bugchk
> -  */
> - if (ret != 0) {
> - netdev_err(ndev,
> -"unable to teardown receive buffer's 
> gpadl\n");
> - return;
> - }
> - net_device->recv_buf_gpadl_handle = 0;
> - }
> -
> - if (net_device->recv_buf) {
> - /* Free up the receive buffer */
> - vfree(net_device->recv_buf);
> - net_device->recv_buf = NULL;
> - }
> -
>   /* Deal with the send buffer we may have setup.
>* If we got a  send section size, it means we received a
>* NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
> @@ -210,7 +187,35 @@ static void netvsc_destroy_buf(struct hv_device *device)
>   }
>   net_device->send_section_cnt = 0;
>   }
> - /* Teardown the gpadl on the vsp end */
> +}
> +
> +static void netvsc_teardown_gpadl(struct hv_device *device,
> +   struct netvsc_device *net_device)
> +{
> + struct net_device *ndev = hv_get_drvdata(device);
> + int ret;
> +
> + if (net_device->recv_buf_gpadl_handle) {
> + ret = vmbus_teardown_gpadl(device->channel,
> +net_device->recv_buf_gpadl_handle);
> +
> + /* If we failed here, we might as well return and have a leak
> +  * rather than continue and a bugchk
> +  */
> + if (ret != 0) {
> + netdev_err(ndev,
> +"unable to teardown receive buffer's 
> gpadl\n");
> + return;
> + }
> + net_device->recv_buf_gpadl_handle = 0;
> + }
> +
> + if (net_device->recv_buf) {
> + /* Free up the receive buffer */
> + vfree(net_device->recv_buf);
> + net_device->recv_buf = NULL;
> + }
> +
>   if (net_device->send_buf_gpadl_handle) {
>   ret = vmbus_teardown_gpadl(device->channel,
>  net_device->send_buf_gpadl_handle);
> @@ -420,7 +425,8 @@ static int netvsc_init_buf(struct hv_device *device,
>   goto exit;
>  
>  cleanup:
> - netvsc_destroy_buf(device);
> + netvsc_revoke_buf(device, net_device);
> + netvsc_teardown_gpadl(device, net_device);
>  
>  exit:
>   return ret;
> @@ -539,11 +545,6 @@ static int netvsc_connect_vsp(struct hv_device *device,
>   return ret;
>  }
>  
> -static void netvsc_disconnect_vsp(struct hv_device *device)
> -{
> - netvsc_destroy_buf(device);
> -}
> -
>  /*
>   * netvsc_device_remove - Callback when the root bus device is removed
>   */
> @@ -557,7 +558,7 @@ void netvsc_device_remove(struct hv_device *device)
>  
>   cancel_work_sync(&net_device->subchan_work);
>  
> - netvsc_disconnect_vsp(device);
> + netvsc_revoke_buf(device, net_device);
>  
>   RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
>  
> @@ -570,6 +571,8 @@ void netvsc_device_remove(struct hv_device *device)
>   /* Now, we can close the channel safely */
>   vmbus_close(device->channel

Re: [PATCH net-next 2/4] hv_netvsc: protect nvdev->extension with RCU

2017-10-31 Thread Stephen Hemminger
On Tue, 31 Oct 2017 14:42:02 +0100
Vitaly Kuznetsov  wrote:

> @@ -2002,7 +2002,9 @@ static int netvsc_probe(struct hv_device *dev,
>   device_info.recv_sections = NETVSC_DEFAULT_RX;
>   device_info.recv_section_size = NETVSC_RECV_SECTION_SIZE;
>  
> + rtnl_lock();
>   nvdev = rndis_filter_device_add(dev, &device_info);
> + rtnl_unlock();

rtnl is not necessary here. probe can not be bothered by other changes.

> --- a/drivers/net/hyperv/rndis_filter.c
> +++ b/drivers/net/hyperv/rndis_filter.c
> @@ -402,20 +402,27 @@ int rndis_filter_receive(struct net_device *ndev,
>void *data, u32 buflen)
>  {
>   struct net_device_context *net_device_ctx = netdev_priv(ndev);
> - struct rndis_device *rndis_dev = net_dev->extension;
> + struct rndis_device *rndis_dev;
>   struct rndis_message *rndis_msg = data;
> + int ret = 0;
> +
> + rcu_read_lock_bh();
> +
> + rndis_dev = rcu_dereference_bh(net_dev->extension);

filter_receive is already called only from NAPI only and has RCU lock and soft
irq disabled. This is not necessary.

> - net_dev->extension = NULL;
> + rcu_assign_pointer(net_dev->extension, NULL);
> +
> + synchronize_rcu();

rcu_assign_pointer with NULL is never a good idea.
And synchronize_rcu is slow. Since net_device is already protected
by RCU (for deletion) it should not be necessary.


Thank you for trying to address these races. But it should be
done carefully not by just slapping RCU everywhere.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net-next 3/4] hv_netvsc: reset net_device_ctx->nvdev with rcu_assign_pointer()

2017-10-31 Thread Stephen Hemminger
On Tue, 31 Oct 2017 07:09:58 -0700
Eric Dumazet  wrote:

> On Tue, 2017-10-31 at 14:42 +0100, Vitaly Kuznetsov wrote:
> > RCU_INIT_POINTER() is not suitable here as it doesn't give us ordering
> > guarantees (see the comment in rcupdate.h). This is also not a hotpath.
> > 
> > Signed-off-by: Vitaly Kuznetsov 
> > ---
> >  drivers/net/hyperv/netvsc.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
> > index bfc79698b8f4..12efb3e34775 100644
> > --- a/drivers/net/hyperv/netvsc.c
> > +++ b/drivers/net/hyperv/netvsc.c
> > @@ -560,7 +560,7 @@ void netvsc_device_remove(struct hv_device *device)
> >  
> > netvsc_revoke_buf(device, net_device);
> >  
> > -   RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
> > +   rcu_assign_pointer(net_device_ctx->nvdev, NULL);  
> 
> I see no point for this patch.
> 
> Setting a NULL pointer needs no barrier at all.

Agreed with Eric.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 10/17] hyper-v: trace vmbus_open()

2017-10-31 Thread Stephen Hemminger
On Tue, 31 Oct 2017 13:48:00 +0100
Greg KH  wrote:

> On Mon, Oct 30, 2017 at 10:31:34AM -0400, Steven Rostedt wrote:
> > On Mon, 30 Oct 2017 11:32:20 +0100
> > Greg KH  wrote:
> >   
> > > On Mon, Oct 30, 2017 at 11:07:01AM +0100, Vitaly Kuznetsov wrote:  
> > > > Greg KH  writes:
> > > > 
> > > > > On Mon, Oct 30, 2017 at 09:16:19AM +0100, Vitaly Kuznetsov wrote:
> > > > >> Greg KH  writes:
> > > > >> 
> > > > >> > On Sun, Oct 29, 2017 at 12:21:09PM -0700, 
> > > > >> > k...@exchange.microsoft.com wrote:
> > > > >> >> From: Vitaly Kuznetsov 
> > > > >> >> 
> > > > >> >> Add tracepoint to CHANNELMSG_OPENCHANNEL sender.
> > > > >> >> 
> > > > >> >> Signed-off-by: Vitaly Kuznetsov 
> > > > >> >> Signed-off-by: K. Y. Srinivasan 
> > > > >> >> ---
> > > > >> >>  drivers/hv/channel.c  |  2 ++
> > > > >> >>  drivers/hv/hv_trace.h | 27 +++
> > > > >> >>  2 files changed, 29 insertions(+)
> > > > >> >> 
> > > > >> >> diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
> > > > >> >> index a406beb10dd0..739b3fe1e0fb 100644
> > > > >> >> --- a/drivers/hv/channel.c
> > > > >> >> +++ b/drivers/hv/channel.c
> > > > >> >> @@ -185,6 +185,8 @@ int vmbus_open(struct vmbus_channel 
> > > > >> >> *newchannel, u32 send_ringbuffer_size,
> > > > >> >>   ret = vmbus_post_msg(open_msg,
> > > > >> >>sizeof(struct vmbus_channel_open_channel), 
> > > > >> >> true);
> > > > >> >>  
> > > > >> >> + trace_vmbus_open(open_msg, ret);
> > > > >> >
> > > > >> > Why add tracepoints for things that ftrace can handle 
> > > > >> > automatically?
> > > > >> 
> > > > >> This series adds pretty prints for structures printing what is needed
> > > > >> and in the right format significantly simplifying debugging. And it
> > > > >> wouldn't make sense to add tracepoints to *some* messages-related
> > > > >> functions and skip others where parsing is more trivial.
> > > > >
> > > > > Tracepoints add memory usage and take up real space.  If you don't 
> > > > > need
> > > > > them for something, as there are other ways to already get the
> > > > > information needed, why add new ones that you now need to drag around
> > > > > for all time?
> > > > >
> > > > 
> > > > Are you opposed to the series as a whole (AKA 'no tracepoints in
> > > > drivers') or only to some tracepoints we add here?
> > > 
> > > I'm opposed to adding tracepoints for things that are not needed as you
> > > can get the same info already today without the tracepoint.  
> > 
> > I looked at this specific tracepoint, and I don't see how to get the
> > information from the current tracing infrastructure. Maybe an eBPF
> > program attached to a kprobe here might work. But the tracepoint data
> > looks like this:
> > 
> > +   TP_STRUCT__entry(
> > +   __field(u32, child_relid)
> > +   __field(u32, openid)
> > +   __field(u32, gpadlhandle)
> > +   __field(u32, target_vp)
> > +   __field(u32, offset)
> > +   __field(int, ret)
> > +   ),
> > +   TP_fast_assign(
> > +   __entry->child_relid = msg->child_relid;
> > +   __entry->openid = msg->openid;
> > +   __entry->gpadlhandle = msg->ringbuffer_gpadlhandle;
> > +   __entry->target_vp = msg->target_vp;
> > +   __entry->offset = msg->downstream_ringbuffer_pageoffset;
> > +   __entry->ret = ret;
> > +   ),
> > 
> > I don't see how that information can be extracted easily without a
> > tracepoint here. Am I missing something?  
> 
> Wasn't one of the outcomes of the conference last week the fact that for
> ftrace + ebpf we could get access to the structures of the function
> parameters?  Or that work would soon be showing up?
> 
> It just feels "wrong" to add a tracepoint for a function call, like it
> is a duplication of work/functionality we already have.
> 
> thanks,
> 
> greg k-h

Just to add some context. VMBus open/close etc are not in critical path.
The reason that tracing of these makes sense is that there have been bugs
in the past with teardown and restart of channels, and having some information
would help. Not sure if the detailed internals of the parameters matter that
much since it has been clear what the parameters were by context.

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[RFC] hv_netvsc: safer orderly shutdown

2017-11-09 Thread Stephen Hemminger

Several types of control operations require that the underlying RNDIS
infrastructure be restarted. This patch changes the ordering of the
shutdown to avoid race conditions.
Stop all transmits before doing RNDIS halt. This involves stopping the
network device transmit queues, then waiting for all outstanding
sends before informing host to halt.

Also, check for successful restart of the device when after the
change is done.

For review, not tested on Hyper-V yet.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc_drv.c   | 40 ++-
 drivers/net/hyperv/rndis_filter.c | 23 +++---
 2 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index da216ca4f2b2..3afa082e093d 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -855,8 +855,10 @@ static int netvsc_set_channels(struct net_device *net,
 
orig = nvdev->num_chn;
was_opened = rndis_filter_opened(nvdev);
-   if (was_opened)
+   if (was_opened) {
+   netif_tx_disable(net);
rndis_filter_close(nvdev);
+   }
 
memset(&device_info, 0, sizeof(device_info));
device_info.num_chn = count;
@@ -881,8 +883,13 @@ static int netvsc_set_channels(struct net_device *net,
}
}
 
-   if (was_opened)
-   rndis_filter_open(nvdev);
+   if (was_opened) {
+   ret = rndis_filter_open(nvdev);
+   if (ret)
+   netdev_err(net, "reopening device failed: %d\n", ret);
+   else
+   netif_tx_start_all_queues(net);
+   }
 
/* We may have missed link change notifications */
net_device_ctx->last_reconfig = 0;
@@ -971,8 +978,10 @@ static int netvsc_change_mtu(struct net_device *ndev, int 
mtu)
 
netif_device_detach(ndev);
was_opened = rndis_filter_opened(nvdev);
-   if (was_opened)
+   if (was_opened) {
+   netif_tx_disable(net);
rndis_filter_close(nvdev);
+   }
 
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
@@ -1004,8 +1013,13 @@ static int netvsc_change_mtu(struct net_device *ndev, 
int mtu)
}
}
 
-   if (was_opened)
-   rndis_filter_open(nvdev);
+   if (was_opened) {
+   ret = rndis_filter_open(nvdev);
+   if (ret)
+   netdev_err(net, "reopening device failed: %d\n", ret);
+   else
+   netif_tx_start_all_queues(net);
+   }
 
netif_device_attach(ndev);
 
@@ -1547,8 +1561,10 @@ static int netvsc_set_ringparam(struct net_device *ndev,
 
netif_device_detach(ndev);
was_opened = rndis_filter_opened(nvdev);
-   if (was_opened)
+   if (was_opened) {
+   netif_tx_disable(net);
rndis_filter_close(nvdev);
+   }
 
rndis_filter_device_remove(hdev, nvdev);
 
@@ -1566,8 +1582,14 @@ static int netvsc_set_ringparam(struct net_device *ndev,
}
}
 
-   if (was_opened)
-   rndis_filter_open(nvdev);
+   if (was_opened) {
+   ret = rndis_filter_open(nvdev);
+   if (ret)
+   netdev_err(net, "reopening device failed: %d\n", ret);
+   else
+   netif_tx_start_all_queues(net);
+   }
+
netif_device_attach(ndev);
 
/* We may have missed link change notifications */
diff --git a/drivers/net/hyperv/rndis_filter.c 
b/drivers/net/hyperv/rndis_filter.c
index 0648eebda829..164f5ffe9c50 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -948,11 +948,20 @@ static void rndis_filter_halt_device(struct rndis_device 
*dev)
struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 
+   /* tell bottom half that deice is being closed */
+   nvdev->destroy = true;
+
+   /* Force flag to be ordered before waiting */
+   wmb();
+
+   /* Wait for all send completions */
+   wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev));
+
/* Attempt to do a rndis device halt */
request = get_rndis_request(dev, RNDIS_MSG_HALT,
RNDIS_MESSAGE_SIZE(struct rndis_halt_request));
if (!request)
-   goto cleanup;
+   return;
 
/* Setup the rndis set */
halt = &request->request_msg.msg.halt_req;
@@ -963,17 +972,7 @@ static void rndis_filter_halt_device(struct rndis_device 
*dev)
 
dev->state = RNDIS_DEV_UNINITIALIZED;
 
-cleanup:
-   nvdev->destroy = true;
-
-   /* Force flag to be ordered before

Re: [PATCH] drivers: hv: balloon: remove extraneous assignment to region_start

2017-11-09 Thread Stephen Hemminger
On Wed, 18 Oct 2017 12:52:12 +0100
Colin King  wrote:

> From: Colin Ian King 
> 
> The variable region_start is assigned twice, the first value is
> never read and redundant, so can be removed.  We can clean up the
> code further by assigning rg_start directly rather than using the
> temporary variable region_start which can then be removed. Cleans
> up the clang warning:
> 
> drivers/hv/hv_balloon.c:976:3: warning: Value stored to 'region_start'
> is never read
> 
> Signed-off-by: Colin Ian King 

LGTM

Acked-by: Stephen Hemminger 


___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 0/2] retire IPX and Netware file system

2017-11-12 Thread Stephen Hemminger
Netware has bee dead for years. Time to deprecate IPX and the
Novell file system.

Stephen Hemminger (2):
  ipx: move Novell IPX protocol support into staging
  ncpfs: move net/ncpfs to drivers/staging/ncpfs

 MAINTAINERS| 5 ++---
 drivers/staging/Kconfig| 4 
 drivers/staging/Makefile   | 2 ++
 {net => drivers/staging}/ipx/Kconfig   | 0
 {net => drivers/staging}/ipx/Makefile  | 0
 {net => drivers/staging}/ipx/af_ipx.c  | 0
 {net => drivers/staging}/ipx/ipx_proc.c| 0
 {net => drivers/staging}/ipx/ipx_route.c   | 0
 {net => drivers/staging}/ipx/pe2.c | 0
 {net => drivers/staging}/ipx/sysctl_net_ipx.c  | 0
 {fs => drivers/staging}/ncpfs/Kconfig  | 0
 {fs => drivers/staging}/ncpfs/Makefile | 0
 {fs => drivers/staging}/ncpfs/dir.c| 0
 {fs => drivers/staging}/ncpfs/file.c   | 0
 {fs => drivers/staging}/ncpfs/getopt.c | 0
 {fs => drivers/staging}/ncpfs/getopt.h | 0
 {fs => drivers/staging}/ncpfs/inode.c  | 0
 {fs => drivers/staging}/ncpfs/ioctl.c  | 0
 {fs => drivers/staging}/ncpfs/mmap.c   | 0
 {fs => drivers/staging}/ncpfs/ncp_fs.h | 0
 {fs => drivers/staging}/ncpfs/ncp_fs_i.h   | 0
 {fs => drivers/staging}/ncpfs/ncp_fs_sb.h  | 0
 {fs => drivers/staging}/ncpfs/ncplib_kernel.c  | 0
 {fs => drivers/staging}/ncpfs/ncplib_kernel.h  | 0
 {fs => drivers/staging}/ncpfs/ncpsign_kernel.c | 0
 {fs => drivers/staging}/ncpfs/ncpsign_kernel.h | 0
 {fs => drivers/staging}/ncpfs/sock.c   | 0
 {fs => drivers/staging}/ncpfs/symlink.c| 0
 fs/Kconfig | 1 -
 fs/Makefile| 1 -
 net/Kconfig| 1 -
 net/Makefile   | 1 -
 32 files changed, 8 insertions(+), 7 deletions(-)
 rename {net => drivers/staging}/ipx/Kconfig (100%)
 rename {net => drivers/staging}/ipx/Makefile (100%)
 rename {net => drivers/staging}/ipx/af_ipx.c (100%)
 rename {net => drivers/staging}/ipx/ipx_proc.c (100%)
 rename {net => drivers/staging}/ipx/ipx_route.c (100%)
 rename {net => drivers/staging}/ipx/pe2.c (100%)
 rename {net => drivers/staging}/ipx/sysctl_net_ipx.c (100%)
 rename {fs => drivers/staging}/ncpfs/Kconfig (100%)
 rename {fs => drivers/staging}/ncpfs/Makefile (100%)
 rename {fs => drivers/staging}/ncpfs/dir.c (100%)
 rename {fs => drivers/staging}/ncpfs/file.c (100%)
 rename {fs => drivers/staging}/ncpfs/getopt.c (100%)
 rename {fs => drivers/staging}/ncpfs/getopt.h (100%)
 rename {fs => drivers/staging}/ncpfs/inode.c (100%)
 rename {fs => drivers/staging}/ncpfs/ioctl.c (100%)
 rename {fs => drivers/staging}/ncpfs/mmap.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs_i.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs_sb.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncplib_kernel.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncplib_kernel.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.h (100%)
 rename {fs => drivers/staging}/ncpfs/sock.c (100%)
 rename {fs => drivers/staging}/ncpfs/symlink.c (100%)

-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 2/2] ncpfs: move net/ncpfs to drivers/staging/ncpfs

2017-11-12 Thread Stephen Hemminger
The Netware Core Protocol is a file system that talks to
Netware clients over IPX. Since IPX has been dead for many years
move the file system into staging for eventual interment.

Signed-off-by: Stephen Hemminger 
---
 MAINTAINERS| 2 +-
 drivers/staging/Kconfig| 2 ++
 drivers/staging/Makefile   | 1 +
 {fs => drivers/staging}/ncpfs/Kconfig  | 0
 {fs => drivers/staging}/ncpfs/Makefile | 0
 {fs => drivers/staging}/ncpfs/dir.c| 0
 {fs => drivers/staging}/ncpfs/file.c   | 0
 {fs => drivers/staging}/ncpfs/getopt.c | 0
 {fs => drivers/staging}/ncpfs/getopt.h | 0
 {fs => drivers/staging}/ncpfs/inode.c  | 0
 {fs => drivers/staging}/ncpfs/ioctl.c  | 0
 {fs => drivers/staging}/ncpfs/mmap.c   | 0
 {fs => drivers/staging}/ncpfs/ncp_fs.h | 0
 {fs => drivers/staging}/ncpfs/ncp_fs_i.h   | 0
 {fs => drivers/staging}/ncpfs/ncp_fs_sb.h  | 0
 {fs => drivers/staging}/ncpfs/ncplib_kernel.c  | 0
 {fs => drivers/staging}/ncpfs/ncplib_kernel.h  | 0
 {fs => drivers/staging}/ncpfs/ncpsign_kernel.c | 0
 {fs => drivers/staging}/ncpfs/ncpsign_kernel.h | 0
 {fs => drivers/staging}/ncpfs/sock.c   | 0
 {fs => drivers/staging}/ncpfs/symlink.c| 0
 fs/Kconfig | 1 -
 fs/Makefile| 1 -
 23 files changed, 4 insertions(+), 3 deletions(-)
 rename {fs => drivers/staging}/ncpfs/Kconfig (100%)
 rename {fs => drivers/staging}/ncpfs/Makefile (100%)
 rename {fs => drivers/staging}/ncpfs/dir.c (100%)
 rename {fs => drivers/staging}/ncpfs/file.c (100%)
 rename {fs => drivers/staging}/ncpfs/getopt.c (100%)
 rename {fs => drivers/staging}/ncpfs/getopt.h (100%)
 rename {fs => drivers/staging}/ncpfs/inode.c (100%)
 rename {fs => drivers/staging}/ncpfs/ioctl.c (100%)
 rename {fs => drivers/staging}/ncpfs/mmap.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs_i.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs_sb.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncplib_kernel.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncplib_kernel.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.h (100%)
 rename {fs => drivers/staging}/ncpfs/sock.c (100%)
 rename {fs => drivers/staging}/ncpfs/symlink.c (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 54c29ebcec55..5ed623ad5717 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9276,7 +9276,7 @@ F:drivers/net/ethernet/natsemi/natsemi.c
 NCP FILESYSTEM
 M: Petr Vandrovec 
 S: Odd Fixes
-F: fs/ncpfs/
+F: drivers/staging/ncpfs/
 
 NCR 5380 SCSI DRIVERS
 M: Finn Thain 
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 3744640bed5a..e95ab683331e 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -28,6 +28,8 @@ source "drivers/staging/irda/net/Kconfig"
 
 source "drivers/staging/ipx/Kconfig"
 
+source "drivers/staging/ncpfs/Kconfig"
+
 source "drivers/staging/wlan-ng/Kconfig"
 
 source "drivers/staging/comedi/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index f4c0506470ff..af8cd6a3a1f6 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -4,6 +4,7 @@
 obj-y  += media/
 obj-y  += typec/
 obj-$(CONFIG_IPX)  += ipx/
+obj-$(CONFIG_NCP_FS)   += ncpfs/
 obj-$(CONFIG_IRDA) += irda/net/
 obj-$(CONFIG_IRDA) += irda/drivers/
 obj-$(CONFIG_PRISM2_USB)   += wlan-ng/
diff --git a/fs/ncpfs/Kconfig b/drivers/staging/ncpfs/Kconfig
similarity index 100%
rename from fs/ncpfs/Kconfig
rename to drivers/staging/ncpfs/Kconfig
diff --git a/fs/ncpfs/Makefile b/drivers/staging/ncpfs/Makefile
similarity index 100%
rename from fs/ncpfs/Makefile
rename to drivers/staging/ncpfs/Makefile
diff --git a/fs/ncpfs/dir.c b/drivers/staging/ncpfs/dir.c
similarity index 100%
rename from fs/ncpfs/dir.c
rename to drivers/staging/ncpfs/dir.c
diff --git a/fs/ncpfs/file.c b/drivers/staging/ncpfs/file.c
similarity index 100%
rename from fs/ncpfs/file.c
rename to drivers/staging/ncpfs/file.c
diff --git a/fs/ncpfs/getopt.c b/drivers/staging/ncpfs/getopt.c
similarity index 100%
rename from fs/ncpfs/getopt.c
rename to drivers/staging/ncpfs/getopt.c
diff --git a/fs/ncpfs/getopt.h b/drivers/staging/ncpfs/getopt.h
similarity index 100%
rename from fs/ncpfs/getopt.h
rename to drivers/staging/ncpfs/getopt.h
diff --git a/fs/ncpfs/inode.c b/drivers/staging/ncpfs/inode.c
similarity index 100%
rename from fs/ncpfs/inode.c
rename to drivers/staging/ncpfs/inode.c
diff --git a/fs/ncpfs/ioctl.c b/drivers/staging/ncpfs/ioctl.c
similarity in

[PATCH net-next 1/2] ipx: move Novell IPX protocol support into staging

2017-11-12 Thread Stephen Hemminger
The Netware IPX protocol is very old and no one should still be using
it. It is time to move it into staging for a while and eventually
decommision it.

Note: net/ipx.h has to be kept around because bonding is still
using it to decode IPX header for LAG.

Signed-off-by: Stephen Hemminger 
---
 MAINTAINERS   | 3 +--
 drivers/staging/Kconfig   | 2 ++
 drivers/staging/Makefile  | 1 +
 {net => drivers/staging}/ipx/Kconfig  | 0
 {net => drivers/staging}/ipx/Makefile | 0
 {net => drivers/staging}/ipx/af_ipx.c | 0
 {net => drivers/staging}/ipx/ipx_proc.c   | 0
 {net => drivers/staging}/ipx/ipx_route.c  | 0
 {net => drivers/staging}/ipx/pe2.c| 0
 {net => drivers/staging}/ipx/sysctl_net_ipx.c | 0
 net/Kconfig   | 1 -
 net/Makefile  | 1 -
 12 files changed, 4 insertions(+), 4 deletions(-)
 rename {net => drivers/staging}/ipx/Kconfig (100%)
 rename {net => drivers/staging}/ipx/Makefile (100%)
 rename {net => drivers/staging}/ipx/af_ipx.c (100%)
 rename {net => drivers/staging}/ipx/ipx_proc.c (100%)
 rename {net => drivers/staging}/ipx/ipx_route.c (100%)
 rename {net => drivers/staging}/ipx/pe2.c (100%)
 rename {net => drivers/staging}/ipx/sysctl_net_ipx.c (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 29aa89a1837b..54c29ebcec55 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7206,9 +7206,8 @@ F:drivers/tty/ipwireless/
 IPX NETWORK LAYER
 L: net...@vger.kernel.org
 S: Odd fixes
-F: include/net/ipx.h
 F: include/uapi/linux/ipx.h
-F: net/ipx/
+F: drivers/staging/ipx/
 
 IRDA SUBSYSTEM
 M: Samuel Ortiz 
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 554683912cff..3744640bed5a 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -26,6 +26,8 @@ if STAGING
 
 source "drivers/staging/irda/net/Kconfig"
 
+source "drivers/staging/ipx/Kconfig"
+
 source "drivers/staging/wlan-ng/Kconfig"
 
 source "drivers/staging/comedi/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 6e536020029a..f4c0506470ff 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -3,6 +3,7 @@
 
 obj-y  += media/
 obj-y  += typec/
+obj-$(CONFIG_IPX)  += ipx/
 obj-$(CONFIG_IRDA) += irda/net/
 obj-$(CONFIG_IRDA) += irda/drivers/
 obj-$(CONFIG_PRISM2_USB)   += wlan-ng/
diff --git a/net/ipx/Kconfig b/drivers/staging/ipx/Kconfig
similarity index 100%
rename from net/ipx/Kconfig
rename to drivers/staging/ipx/Kconfig
diff --git a/net/ipx/Makefile b/drivers/staging/ipx/Makefile
similarity index 100%
rename from net/ipx/Makefile
rename to drivers/staging/ipx/Makefile
diff --git a/net/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c
similarity index 100%
rename from net/ipx/af_ipx.c
rename to drivers/staging/ipx/af_ipx.c
diff --git a/net/ipx/ipx_proc.c b/drivers/staging/ipx/ipx_proc.c
similarity index 100%
rename from net/ipx/ipx_proc.c
rename to drivers/staging/ipx/ipx_proc.c
diff --git a/net/ipx/ipx_route.c b/drivers/staging/ipx/ipx_route.c
similarity index 100%
rename from net/ipx/ipx_route.c
rename to drivers/staging/ipx/ipx_route.c
diff --git a/net/ipx/pe2.c b/drivers/staging/ipx/pe2.c
similarity index 100%
rename from net/ipx/pe2.c
rename to drivers/staging/ipx/pe2.c
diff --git a/net/ipx/sysctl_net_ipx.c b/drivers/staging/ipx/sysctl_net_ipx.c
similarity index 100%
rename from net/ipx/sysctl_net_ipx.c
rename to drivers/staging/ipx/sysctl_net_ipx.c
diff --git a/net/Kconfig b/net/Kconfig
index 9dba2715919d..ff71ba0f6c27 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -212,7 +212,6 @@ source "net/dsa/Kconfig"
 source "net/8021q/Kconfig"
 source "net/decnet/Kconfig"
 source "net/llc/Kconfig"
-source "net/ipx/Kconfig"
 source "drivers/net/appletalk/Kconfig"
 source "net/x25/Kconfig"
 source "net/lapb/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 14fede520840..a6147c61b174 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -24,7 +24,6 @@ obj-$(CONFIG_PACKET)  += packet/
 obj-$(CONFIG_NET_KEY)  += key/
 obj-$(CONFIG_BRIDGE)   += bridge/
 obj-$(CONFIG_NET_DSA)  += dsa/
-obj-$(CONFIG_IPX)  += ipx/
 obj-$(CONFIG_ATALK)+= appletalk/
 obj-$(CONFIG_X25)  += x25/
 obj-$(CONFIG_LAPB) += lapb/
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next] decnet: move to staging

2017-11-12 Thread Stephen Hemminger
Support for Decnet has been orphaned for many years.
In the interest of reducing the potential bug surface and pre-holiday
cleaning, move the decnet protocol into staging for eventual removal.

Signed-off-by: Stephen Hemminger 
---
 MAINTAINERS  | 2 +-
 drivers/staging/Kconfig  | 5 +
 drivers/staging/Makefile | 1 +
 {net => drivers/staging}/decnet/Kconfig  | 0
 {net => drivers/staging}/decnet/Makefile | 0
 {net => drivers/staging}/decnet/README   | 0
 {net => drivers/staging}/decnet/TODO | 0
 {net => drivers/staging}/decnet/af_decnet.c  | 0
 {net => drivers/staging}/decnet/dn_dev.c | 0
 {net => drivers/staging}/decnet/dn_fib.c | 0
 {net => drivers/staging}/decnet/dn_neigh.c   | 0
 {net => drivers/staging}/decnet/dn_nsp_in.c  | 0
 {net => drivers/staging}/decnet/dn_nsp_out.c | 0
 {net => drivers/staging}/decnet/dn_route.c   | 0
 {net => drivers/staging}/decnet/dn_rules.c   | 0
 {net => drivers/staging}/decnet/dn_table.c   | 0
 {net => drivers/staging}/decnet/dn_timer.c   | 0
 {net => drivers/staging}/decnet/netfilter/Kconfig| 0
 {net => drivers/staging}/decnet/netfilter/Makefile   | 0
 {net => drivers/staging}/decnet/netfilter/dn_rtmsg.c | 0
 {net => drivers/staging}/decnet/sysctl_net_decnet.c  | 0
 net/Kconfig  | 2 --
 net/Makefile | 1 -
 23 files changed, 7 insertions(+), 4 deletions(-)
 rename {net => drivers/staging}/decnet/Kconfig (100%)
 rename {net => drivers/staging}/decnet/Makefile (100%)
 rename {net => drivers/staging}/decnet/README (100%)
 rename {net => drivers/staging}/decnet/TODO (100%)
 rename {net => drivers/staging}/decnet/af_decnet.c (100%)
 rename {net => drivers/staging}/decnet/dn_dev.c (100%)
 rename {net => drivers/staging}/decnet/dn_fib.c (100%)
 rename {net => drivers/staging}/decnet/dn_neigh.c (100%)
 rename {net => drivers/staging}/decnet/dn_nsp_in.c (100%)
 rename {net => drivers/staging}/decnet/dn_nsp_out.c (100%)
 rename {net => drivers/staging}/decnet/dn_route.c (100%)
 rename {net => drivers/staging}/decnet/dn_rules.c (100%)
 rename {net => drivers/staging}/decnet/dn_table.c (100%)
 rename {net => drivers/staging}/decnet/dn_timer.c (100%)
 rename {net => drivers/staging}/decnet/netfilter/Kconfig (100%)
 rename {net => drivers/staging}/decnet/netfilter/Makefile (100%)
 rename {net => drivers/staging}/decnet/netfilter/dn_rtmsg.c (100%)
 rename {net => drivers/staging}/decnet/sysctl_net_decnet.c (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 29aa89a1837b..66e2d302d9eb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3968,7 +3968,7 @@ W:http://linux-decnet.sourceforge.net
 L: linux-decnet-u...@lists.sourceforge.net
 S: Orphan
 F: Documentation/networking/decnet.txt
-F: net/decnet/
+F: drivers/staging/decnet/
 
 DECSTATION PLATFORM SUPPORT
 M: "Maciej W. Rozycki" 
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 554683912cff..e30af73c3797 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -30,6 +30,11 @@ source "drivers/staging/wlan-ng/Kconfig"
 
 source "drivers/staging/comedi/Kconfig"
 
+if NETFILTER
+source "drivers/staging/decnet/netfilter/Kconfig"
+endif
+source "drivers/staging/decnet/Kconfig"
+
 source "drivers/staging/olpc_dcon/Kconfig"
 
 source "drivers/staging/rtl8192u/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 6e536020029a..89655cc80a91 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_IRDA)  += irda/net/
 obj-$(CONFIG_IRDA) += irda/drivers/
 obj-$(CONFIG_PRISM2_USB)   += wlan-ng/
 obj-$(CONFIG_COMEDI)   += comedi/
+obj-$(CONFIG_DECNET)   += decnet/
 obj-$(CONFIG_FB_OLPC_DCON) += olpc_dcon/
 obj-$(CONFIG_RTL8192U) += rtl8192u/
 obj-$(CONFIG_RTL8192E) += rtl8192e/
diff --git a/net/decnet/Kconfig b/drivers/staging/decnet/Kconfig
similarity index 100%
rename from net/decnet/Kconfig
rename to drivers/staging/decnet/Kconfig
diff --git a/net/decnet/Makefile b/drivers/staging/decnet/Makefile
similarity index 100%
rename from net/decnet/Makefile
rename to drivers/staging/decnet/Makefile
diff --git a/net/decnet/README b/drivers/staging/decnet/README
similarity index 100%
rename from net/decnet/README
rename to drivers/staging/decnet/README
diff --git a/net/decnet/TODO b/drivers/staging/decnet/TODO
similarity index 100%
rename from net/decnet/TODO
rename to drivers/staging/decnet/TODO
diff --git a/net/decnet/af_decnet.c b/drivers/staging/decnet/af_decnet.c
similar

Re: [PATCH] vmbus: unregister device_obj->channels_kset

2017-11-13 Thread Stephen Hemminger
On Mon, 13 Nov 2017 01:53:33 +
Dexuan Cui  wrote:

> Fixes: c2e5df616e1a ("vmbus: add per-channel sysfs info")
> 
> Without the patch, a device can't be thoroughly destroyed, because
> vmbus_device_register() -> kset_create_and_add() still holds a reference
> to the hv_device's device.kobj.
> 
> Signed-off-by: Dexuan Cui 
> Cc: Stephen Hemminger 
> Cc: K. Y. Srinivasan 
> ---

Good catch

Signed-off-by: Stephen Hemminger 
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next v2] net: move decnet to staging

2017-11-13 Thread Stephen Hemminger
Support for Decnet has been orphaned for some time.
In the interest of reducing the potential bug surface and pre-holiday
cleaning, move the decnet protocol into staging for eventual removal.

Signed-off-by: Stephen Hemminger 
---
Note original submission was hour or so before net-next closed.
Not sure if you want to wait on this until after 4.15-rc1

v2 - update TODO and move include/net/dn.h to staging as well

 MAINTAINERS|  2 +-
 drivers/staging/Kconfig|  5 +++
 drivers/staging/Makefile   |  1 +
 {net => drivers/staging}/decnet/Kconfig|  0
 {net => drivers/staging}/decnet/Makefile   |  3 +-
 {net => drivers/staging}/decnet/README |  0
 drivers/staging/decnet/TODO|  4 +++
 {net => drivers/staging}/decnet/af_decnet.c|  0
 {net => drivers/staging}/decnet/dn_dev.c   |  0
 {net => drivers/staging}/decnet/dn_fib.c   |  0
 {net => drivers/staging}/decnet/dn_neigh.c |  0
 {net => drivers/staging}/decnet/dn_nsp_in.c|  0
 {net => drivers/staging}/decnet/dn_nsp_out.c   |  0
 {net => drivers/staging}/decnet/dn_route.c |  0
 {net => drivers/staging}/decnet/dn_rules.c |  0
 {net => drivers/staging}/decnet/dn_table.c |  0
 {net => drivers/staging}/decnet/dn_timer.c |  0
 .../staging/decnet/include}/net/dn.h   |  0
 {net => drivers/staging}/decnet/netfilter/Kconfig  |  0
 {net => drivers/staging}/decnet/netfilter/Makefile |  3 +-
 .../staging}/decnet/netfilter/dn_rtmsg.c   |  0
 .../staging}/decnet/sysctl_net_decnet.c|  0
 net/Kconfig|  2 --
 net/Makefile   |  1 -
 net/decnet/TODO| 41 --
 25 files changed, 15 insertions(+), 47 deletions(-)
 rename {net => drivers/staging}/decnet/Kconfig (100%)
 rename {net => drivers/staging}/decnet/Makefile (84%)
 rename {net => drivers/staging}/decnet/README (100%)
 create mode 100644 drivers/staging/decnet/TODO
 rename {net => drivers/staging}/decnet/af_decnet.c (100%)
 rename {net => drivers/staging}/decnet/dn_dev.c (100%)
 rename {net => drivers/staging}/decnet/dn_fib.c (100%)
 rename {net => drivers/staging}/decnet/dn_neigh.c (100%)
 rename {net => drivers/staging}/decnet/dn_nsp_in.c (100%)
 rename {net => drivers/staging}/decnet/dn_nsp_out.c (100%)
 rename {net => drivers/staging}/decnet/dn_route.c (100%)
 rename {net => drivers/staging}/decnet/dn_rules.c (100%)
 rename {net => drivers/staging}/decnet/dn_table.c (100%)
 rename {net => drivers/staging}/decnet/dn_timer.c (100%)
 rename {include => drivers/staging/decnet/include}/net/dn.h (100%)
 rename {net => drivers/staging}/decnet/netfilter/Kconfig (100%)
 rename {net => drivers/staging}/decnet/netfilter/Makefile (62%)
 rename {net => drivers/staging}/decnet/netfilter/dn_rtmsg.c (100%)
 rename {net => drivers/staging}/decnet/sysctl_net_decnet.c (100%)
 delete mode 100644 net/decnet/TODO

diff --git a/MAINTAINERS b/MAINTAINERS
index 29aa89a1837b..66e2d302d9eb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3968,7 +3968,7 @@ W:http://linux-decnet.sourceforge.net
 L: linux-decnet-u...@lists.sourceforge.net
 S: Orphan
 F: Documentation/networking/decnet.txt
-F: net/decnet/
+F: drivers/staging/decnet/
 
 DECSTATION PLATFORM SUPPORT
 M: "Maciej W. Rozycki" 
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 554683912cff..e30af73c3797 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -30,6 +30,11 @@ source "drivers/staging/wlan-ng/Kconfig"
 
 source "drivers/staging/comedi/Kconfig"
 
+if NETFILTER
+source "drivers/staging/decnet/netfilter/Kconfig"
+endif
+source "drivers/staging/decnet/Kconfig"
+
 source "drivers/staging/olpc_dcon/Kconfig"
 
 source "drivers/staging/rtl8192u/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 6e536020029a..89655cc80a91 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_IRDA)  += irda/net/
 obj-$(CONFIG_IRDA) += irda/drivers/
 obj-$(CONFIG_PRISM2_USB)   += wlan-ng/
 obj-$(CONFIG_COMEDI)   += comedi/
+obj-$(CONFIG_DECNET)   += decnet/
 obj-$(CONFIG_FB_OLPC_DCON) += olpc_dcon/
 obj-$(CONFIG_RTL8192U) += rtl8192u/
 obj-$(CONFIG_RTL8192E) += rtl8192e/
diff --git a/net/decnet/Kconfig b/drivers/staging/decnet/Kconfig
similarity index 100%
rename from net/decnet/Kconfig
rename to drivers/staging/decnet/Kconfig
diff --git a/net/decnet/Makefile b/drivers/staging/decnet/Makefile
similarity index 84%
rename from net/decnet/Makefile
rename to drivers/staging/de

Re: [RFC] hv_netvsc: safer orderly shutdown

2017-11-13 Thread Stephen Hemminger
On Mon, 13 Nov 2017 11:57:47 +0100
Vitaly Kuznetsov  wrote:

> Stephen Hemminger  writes:
> 
> >
> > The NAPI disable is already handled by rndis close.  
> 
> Sorry, but I'm probably missing something: I can only see
> netif_napi_del() call in netvsc_device_remove() but this happens much
> later. And I don see us doing napi_disable() anywhere on the path.
> But I'm probably missing something.
> 

You need to keep NAPI running to handle transmit completions.
Disabling the Tx and Rx filter should keep spurious activity
away until the halt is done.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH net] hv_netvsc: preserve hw_features on mtu/channels/ringparam changes

2017-11-14 Thread Stephen Hemminger
On Tue, 14 Nov 2017 16:22:05 +0100
Vitaly Kuznetsov  wrote:

Yes, this looks like a real issue.



> + /* Query hardware capabilities if we're called from netvsc_probe() */
> + if (!net->hw_features) {
> + ret = rndis_netdev_set_hwcaps(net_device, rndis_device);
> + if (ret != 0)
> + goto err_dev_remv;
> + }
> +

Rather than conditional behavior in rndis_filter_device_add, it would be cleaner
to make the call to get hardware capabilities there.

Please respin and make the query of host a separate function.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next v2 0/2] retire IPX and NCPFS

2017-11-14 Thread Stephen Hemminger
These are both old decrepit protocols that need to be sent
to pasture.

Stephen Hemminger (2):
  ipx: move Novell IPX protocol support into staging
  ncpfs: move net/ncpfs to drivers/staging/ncpfs

 MAINTAINERS| 5 ++---
 drivers/staging/Kconfig| 4 
 drivers/staging/Makefile   | 2 ++
 {net => drivers/staging}/ipx/Kconfig   | 0
 {net => drivers/staging}/ipx/Makefile  | 0
 drivers/staging/ipx/TODO   | 4 
 {net => drivers/staging}/ipx/af_ipx.c  | 0
 {net => drivers/staging}/ipx/ipx_proc.c| 0
 {net => drivers/staging}/ipx/ipx_route.c   | 0
 {net => drivers/staging}/ipx/pe2.c | 0
 {net => drivers/staging}/ipx/sysctl_net_ipx.c  | 0
 {fs => drivers/staging}/ncpfs/Kconfig  | 0
 {fs => drivers/staging}/ncpfs/Makefile | 0
 drivers/staging/ncpfs/TODO | 4 
 {fs => drivers/staging}/ncpfs/dir.c| 0
 {fs => drivers/staging}/ncpfs/file.c   | 0
 {fs => drivers/staging}/ncpfs/getopt.c | 0
 {fs => drivers/staging}/ncpfs/getopt.h | 0
 {fs => drivers/staging}/ncpfs/inode.c  | 0
 {fs => drivers/staging}/ncpfs/ioctl.c  | 0
 {fs => drivers/staging}/ncpfs/mmap.c   | 0
 {fs => drivers/staging}/ncpfs/ncp_fs.h | 0
 {fs => drivers/staging}/ncpfs/ncp_fs_i.h   | 0
 {fs => drivers/staging}/ncpfs/ncp_fs_sb.h  | 0
 {fs => drivers/staging}/ncpfs/ncplib_kernel.c  | 0
 {fs => drivers/staging}/ncpfs/ncplib_kernel.h  | 0
 {fs => drivers/staging}/ncpfs/ncpsign_kernel.c | 0
 {fs => drivers/staging}/ncpfs/ncpsign_kernel.h | 0
 {fs => drivers/staging}/ncpfs/sock.c   | 0
 {fs => drivers/staging}/ncpfs/symlink.c| 0
 fs/Kconfig | 1 -
 fs/Makefile| 1 -
 net/Kconfig| 1 -
 net/Makefile   | 1 -
 34 files changed, 16 insertions(+), 7 deletions(-)
 rename {net => drivers/staging}/ipx/Kconfig (100%)
 rename {net => drivers/staging}/ipx/Makefile (100%)
 create mode 100644 drivers/staging/ipx/TODO
 rename {net => drivers/staging}/ipx/af_ipx.c (100%)
 rename {net => drivers/staging}/ipx/ipx_proc.c (100%)
 rename {net => drivers/staging}/ipx/ipx_route.c (100%)
 rename {net => drivers/staging}/ipx/pe2.c (100%)
 rename {net => drivers/staging}/ipx/sysctl_net_ipx.c (100%)
 rename {fs => drivers/staging}/ncpfs/Kconfig (100%)
 rename {fs => drivers/staging}/ncpfs/Makefile (100%)
 create mode 100644 drivers/staging/ncpfs/TODO
 rename {fs => drivers/staging}/ncpfs/dir.c (100%)
 rename {fs => drivers/staging}/ncpfs/file.c (100%)
 rename {fs => drivers/staging}/ncpfs/getopt.c (100%)
 rename {fs => drivers/staging}/ncpfs/getopt.h (100%)
 rename {fs => drivers/staging}/ncpfs/inode.c (100%)
 rename {fs => drivers/staging}/ncpfs/ioctl.c (100%)
 rename {fs => drivers/staging}/ncpfs/mmap.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs_i.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs_sb.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncplib_kernel.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncplib_kernel.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.h (100%)
 rename {fs => drivers/staging}/ncpfs/sock.c (100%)
 rename {fs => drivers/staging}/ncpfs/symlink.c (100%)

-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next v2 1/2] ipx: move Novell IPX protocol support into staging

2017-11-14 Thread Stephen Hemminger
The Netware IPX protocol is very old and no one should still be using
it. It is time to move it into staging for a while and eventually
decommision it.

Signed-off-by: Stephen Hemminger 
---
v2 add a TODO (don't touch me) file

 MAINTAINERS   | 3 +--
 drivers/staging/Kconfig   | 2 ++
 drivers/staging/Makefile  | 1 +
 {net => drivers/staging}/ipx/Kconfig  | 0
 {net => drivers/staging}/ipx/Makefile | 0
 drivers/staging/ipx/TODO  | 4 
 {net => drivers/staging}/ipx/af_ipx.c | 0
 {net => drivers/staging}/ipx/ipx_proc.c   | 0
 {net => drivers/staging}/ipx/ipx_route.c  | 0
 {net => drivers/staging}/ipx/pe2.c| 0
 {net => drivers/staging}/ipx/sysctl_net_ipx.c | 0
 net/Kconfig   | 1 -
 net/Makefile  | 1 -
 13 files changed, 8 insertions(+), 4 deletions(-)
 rename {net => drivers/staging}/ipx/Kconfig (100%)
 rename {net => drivers/staging}/ipx/Makefile (100%)
 create mode 100644 drivers/staging/ipx/TODO
 rename {net => drivers/staging}/ipx/af_ipx.c (100%)
 rename {net => drivers/staging}/ipx/ipx_proc.c (100%)
 rename {net => drivers/staging}/ipx/ipx_route.c (100%)
 rename {net => drivers/staging}/ipx/pe2.c (100%)
 rename {net => drivers/staging}/ipx/sysctl_net_ipx.c (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 29aa89a1837b..54c29ebcec55 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7206,9 +7206,8 @@ F:drivers/tty/ipwireless/
 IPX NETWORK LAYER
 L: net...@vger.kernel.org
 S: Odd fixes
-F: include/net/ipx.h
 F: include/uapi/linux/ipx.h
-F: net/ipx/
+F: drivers/staging/ipx/
 
 IRDA SUBSYSTEM
 M: Samuel Ortiz 
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 554683912cff..3744640bed5a 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -26,6 +26,8 @@ if STAGING
 
 source "drivers/staging/irda/net/Kconfig"
 
+source "drivers/staging/ipx/Kconfig"
+
 source "drivers/staging/wlan-ng/Kconfig"
 
 source "drivers/staging/comedi/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 6e536020029a..f4c0506470ff 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -3,6 +3,7 @@
 
 obj-y  += media/
 obj-y  += typec/
+obj-$(CONFIG_IPX)  += ipx/
 obj-$(CONFIG_IRDA) += irda/net/
 obj-$(CONFIG_IRDA) += irda/drivers/
 obj-$(CONFIG_PRISM2_USB)   += wlan-ng/
diff --git a/net/ipx/Kconfig b/drivers/staging/ipx/Kconfig
similarity index 100%
rename from net/ipx/Kconfig
rename to drivers/staging/ipx/Kconfig
diff --git a/net/ipx/Makefile b/drivers/staging/ipx/Makefile
similarity index 100%
rename from net/ipx/Makefile
rename to drivers/staging/ipx/Makefile
diff --git a/drivers/staging/ipx/TODO b/drivers/staging/ipx/TODO
new file mode 100644
index ..80db5d968264
--- /dev/null
+++ b/drivers/staging/ipx/TODO
@@ -0,0 +1,4 @@
+The ipx code will be removed soon from the kernel tree as it is old and
+obsolete and broken.
+
+Don't worry about fixing up anything here, it's not needed.
diff --git a/net/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c
similarity index 100%
rename from net/ipx/af_ipx.c
rename to drivers/staging/ipx/af_ipx.c
diff --git a/net/ipx/ipx_proc.c b/drivers/staging/ipx/ipx_proc.c
similarity index 100%
rename from net/ipx/ipx_proc.c
rename to drivers/staging/ipx/ipx_proc.c
diff --git a/net/ipx/ipx_route.c b/drivers/staging/ipx/ipx_route.c
similarity index 100%
rename from net/ipx/ipx_route.c
rename to drivers/staging/ipx/ipx_route.c
diff --git a/net/ipx/pe2.c b/drivers/staging/ipx/pe2.c
similarity index 100%
rename from net/ipx/pe2.c
rename to drivers/staging/ipx/pe2.c
diff --git a/net/ipx/sysctl_net_ipx.c b/drivers/staging/ipx/sysctl_net_ipx.c
similarity index 100%
rename from net/ipx/sysctl_net_ipx.c
rename to drivers/staging/ipx/sysctl_net_ipx.c
diff --git a/net/Kconfig b/net/Kconfig
index 9dba2715919d..ff71ba0f6c27 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -212,7 +212,6 @@ source "net/dsa/Kconfig"
 source "net/8021q/Kconfig"
 source "net/decnet/Kconfig"
 source "net/llc/Kconfig"
-source "net/ipx/Kconfig"
 source "drivers/net/appletalk/Kconfig"
 source "net/x25/Kconfig"
 source "net/lapb/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 14fede520840..a6147c61b174 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -24,7 +24,6 @@ obj-$(CONFIG_PACKET)  += packet/
 obj-$(CONFIG_NET_KEY)  += key/
 obj-$(CONFIG_BRIDGE)   += bridge/
 obj-$(CONFIG_NET_DSA)  += dsa/
-obj-$(CONFIG_IPX)  += ipx/
 obj-$(CONFIG_ATALK)+= appletalk/
 obj-$(CONFIG_X25)  += x25/
 obj-$(CONFIG_LAPB)

[PATCH net-next v2 2/2] ncpfs: move net/ncpfs to drivers/staging/ncpfs

2017-11-14 Thread Stephen Hemminger
The Netware Core Protocol is a file system that talks to
Netware clients over IPX. Since IPX has been dead for many years
move the file system into staging for eventual interment.

Signed-off-by: Stephen Hemminger 
---
v2 add a TODO (don't touch me) file

 MAINTAINERS| 2 +-
 drivers/staging/Kconfig| 2 ++
 drivers/staging/Makefile   | 1 +
 {fs => drivers/staging}/ncpfs/Kconfig  | 0
 {fs => drivers/staging}/ncpfs/Makefile | 0
 drivers/staging/ncpfs/TODO | 4 
 {fs => drivers/staging}/ncpfs/dir.c| 0
 {fs => drivers/staging}/ncpfs/file.c   | 0
 {fs => drivers/staging}/ncpfs/getopt.c | 0
 {fs => drivers/staging}/ncpfs/getopt.h | 0
 {fs => drivers/staging}/ncpfs/inode.c  | 0
 {fs => drivers/staging}/ncpfs/ioctl.c  | 0
 {fs => drivers/staging}/ncpfs/mmap.c   | 0
 {fs => drivers/staging}/ncpfs/ncp_fs.h | 0
 {fs => drivers/staging}/ncpfs/ncp_fs_i.h   | 0
 {fs => drivers/staging}/ncpfs/ncp_fs_sb.h  | 0
 {fs => drivers/staging}/ncpfs/ncplib_kernel.c  | 0
 {fs => drivers/staging}/ncpfs/ncplib_kernel.h  | 0
 {fs => drivers/staging}/ncpfs/ncpsign_kernel.c | 0
 {fs => drivers/staging}/ncpfs/ncpsign_kernel.h | 0
 {fs => drivers/staging}/ncpfs/sock.c   | 0
 {fs => drivers/staging}/ncpfs/symlink.c| 0
 fs/Kconfig | 1 -
 fs/Makefile| 1 -
 24 files changed, 8 insertions(+), 3 deletions(-)
 rename {fs => drivers/staging}/ncpfs/Kconfig (100%)
 rename {fs => drivers/staging}/ncpfs/Makefile (100%)
 create mode 100644 drivers/staging/ncpfs/TODO
 rename {fs => drivers/staging}/ncpfs/dir.c (100%)
 rename {fs => drivers/staging}/ncpfs/file.c (100%)
 rename {fs => drivers/staging}/ncpfs/getopt.c (100%)
 rename {fs => drivers/staging}/ncpfs/getopt.h (100%)
 rename {fs => drivers/staging}/ncpfs/inode.c (100%)
 rename {fs => drivers/staging}/ncpfs/ioctl.c (100%)
 rename {fs => drivers/staging}/ncpfs/mmap.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs_i.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncp_fs_sb.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncplib_kernel.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncplib_kernel.h (100%)
 rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.c (100%)
 rename {fs => drivers/staging}/ncpfs/ncpsign_kernel.h (100%)
 rename {fs => drivers/staging}/ncpfs/sock.c (100%)
 rename {fs => drivers/staging}/ncpfs/symlink.c (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 54c29ebcec55..5ed623ad5717 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9276,7 +9276,7 @@ F:drivers/net/ethernet/natsemi/natsemi.c
 NCP FILESYSTEM
 M: Petr Vandrovec 
 S: Odd Fixes
-F: fs/ncpfs/
+F: drivers/staging/ncpfs/
 
 NCR 5380 SCSI DRIVERS
 M: Finn Thain 
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 3744640bed5a..e95ab683331e 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -28,6 +28,8 @@ source "drivers/staging/irda/net/Kconfig"
 
 source "drivers/staging/ipx/Kconfig"
 
+source "drivers/staging/ncpfs/Kconfig"
+
 source "drivers/staging/wlan-ng/Kconfig"
 
 source "drivers/staging/comedi/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index f4c0506470ff..af8cd6a3a1f6 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -4,6 +4,7 @@
 obj-y  += media/
 obj-y  += typec/
 obj-$(CONFIG_IPX)  += ipx/
+obj-$(CONFIG_NCP_FS)   += ncpfs/
 obj-$(CONFIG_IRDA) += irda/net/
 obj-$(CONFIG_IRDA) += irda/drivers/
 obj-$(CONFIG_PRISM2_USB)   += wlan-ng/
diff --git a/fs/ncpfs/Kconfig b/drivers/staging/ncpfs/Kconfig
similarity index 100%
rename from fs/ncpfs/Kconfig
rename to drivers/staging/ncpfs/Kconfig
diff --git a/fs/ncpfs/Makefile b/drivers/staging/ncpfs/Makefile
similarity index 100%
rename from fs/ncpfs/Makefile
rename to drivers/staging/ncpfs/Makefile
diff --git a/drivers/staging/ncpfs/TODO b/drivers/staging/ncpfs/TODO
new file mode 100644
index ..9b6d38b7e248
--- /dev/null
+++ b/drivers/staging/ncpfs/TODO
@@ -0,0 +1,4 @@
+The ncpfs code will be removed soon from the kernel tree as it is old and
+obsolete and broken.
+
+Don't worry about fixing up anything here, it's not needed.
diff --git a/fs/ncpfs/dir.c b/drivers/staging/ncpfs/dir.c
similarity index 100%
rename from fs/ncpfs/dir.c
rename to drivers/staging/ncpfs/dir.c
diff --git a/fs/ncpfs/file.c b/drivers/staging/ncpfs/file.c
similarity index 100%
rename from fs/ncpfs/file.c
rename to drivers/staging/ncpfs/file.c
diff --git a/fs/ncpfs/getopt.c b/drive

Re: [PATCH net-next v2 0/2] retire IPX and NCPFS

2017-11-15 Thread Stephen Hemminger
On Wed, 15 Nov 2017 09:58:33 +0900 (KST)
David Miller  wrote:

> From: Stephen Hemminger 
> Date: Tue, 14 Nov 2017 08:37:13 -0800
> 
> > These are both old decrepit protocols that need to be sent
> > to pasture.  
> 
> These need to go to gregkh and his staging/ tree, not net-next.

Ok, just wanted to get acceptance from netdev for moving
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


RE: [PATCH 2/2] vmbus: make hv_get_ringbuffer_availbytes local

2017-11-28 Thread Stephen Hemminger
This patch required a patch that is still going through net-next.


___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH 1/2] vmbus: unregister device_obj->channels_kset

2017-11-28 Thread Stephen Hemminger
On Tue, 28 Nov 2017 16:56:05 +0100
Greg KH  wrote:

> On Tue, Nov 14, 2017 at 06:53:32AM -0700, k...@exchange.microsoft.com wrote:
> > From: Dexuan Cui 
> > 
> > Fixes: c2e5df616e1a ("vmbus: add per-channel sysfs info")
> > 
> > Without the patch, a device can't be thoroughly destroyed, because
> > vmbus_device_register() -> kset_create_and_add() still holds a reference
> > to the hv_device's device.kobj.
> > 
> > Signed-off-by: Dexuan Cui 
> > Cc: Stephen Hemminger 
> > Cc: sta...@vger.kernel.org  
> 
> Why is this marked for stable when the patch it "fixes" is in 4.15-rc1?

It doesn't need to go to stable.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH RFC 2/6] x86/hyper-v: add a function to read both TSC and TSC page value simulateneously

2017-12-01 Thread Stephen Hemminger
On Fri,  1 Dec 2017 14:13:17 +0100
Vitaly Kuznetsov  wrote:

> +
> +static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page 
> *tsc_pg,
> +u64 *cur_tsc)
> +{
> + *cur_tsc = rdtsc();
> +
> + return cur_tsc;

Why do return and setting by reference. Looks like an ugly API.
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 0/6] hv_netvsc: minor optimizations

2017-12-01 Thread Stephen Hemminger
These are a set of local optimizations the Hyper-V networking driver.
Also include a vmbus patch in this set, because it depends on the
netvsc that last used that function.

Stephen Hemminger (6):
  hv_netvsc: drop unused macros
  hv_netvsc: don't need local xmit_more
  hv_netvsc: replace divide with mask when computing padding
  hv_netvsc: use reciprocal divide to speed up percent calculation
  hv_netvsc: optimize initialization of RNDIS header
  vmbus: make hv_get_ringbuffer_availbytes local

 drivers/hv/ring_buffer.c  | 23 
 drivers/net/hyperv/hyperv_net.h   | 32 ++---
 drivers/net/hyperv/netvsc.c   | 26 ++
 drivers/net/hyperv/netvsc_drv.c   | 74 ++-
 drivers/net/hyperv/rndis_filter.c |  4 +--
 include/linux/hyperv.h| 22 
 6 files changed, 73 insertions(+), 108 deletions(-)

-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 3/6] hv_netvsc: replace divide with mask when computing padding

2017-12-01 Thread Stephen Hemminger
Packet alignment is always a power of 2 therefore modulus can
be replaced with a faster and operation

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index ee31faa67cad..4b931f017a25 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -712,11 +712,12 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device 
*net_device,
int i;
u32 msg_size = 0;
u32 padding = 0;
-   u32 remain = packet->total_data_buflen % net_device->pkt_align;
u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
packet->page_buf_cnt;
+   u32 remain;
 
/* Add padding */
+   remain = packet->total_data_buflen & (net_device->pkt_align - 1);
if (skb->xmit_more && remain && !packet->cp_partial) {
padding = net_device->pkt_align - remain;
rndis_msg->msg_len += padding;
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 1/6] hv_netvsc: drop unused macros

2017-12-01 Thread Stephen Hemminger
Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h | 26 --
 1 file changed, 26 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 88ddfb92122b..7226230561de 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -1425,32 +1425,6 @@ struct rndis_message {
(sizeof(msg) + (sizeof(struct rndis_message) -  \
 sizeof(union rndis_message_container)))
 
-/* get pointer to info buffer with message pointer */
-#define MESSAGE_TO_INFO_BUFFER(msg)\
-   (((unsigned char *)(msg)) + msg->info_buf_offset)
-
-/* get pointer to status buffer with message pointer */
-#define MESSAGE_TO_STATUS_BUFFER(msg)  \
-   (((unsigned char *)(msg)) + msg->status_buf_offset)
-
-/* get pointer to OOBD buffer with message pointer */
-#define MESSAGE_TO_OOBD_BUFFER(msg)\
-   (((unsigned char *)(msg)) + msg->oob_data_offset)
-
-/* get pointer to data buffer with message pointer */
-#define MESSAGE_TO_DATA_BUFFER(msg)\
-   (((unsigned char *)(msg)) + msg->per_pkt_info_offset)
-
-/* get pointer to contained message from NDIS_MESSAGE pointer */
-#define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_msg)\
-   ((void *) &rndis_msg->msg)
-
-/* get pointer to contained message from NDIS_MESSAGE pointer */
-#define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_msg)\
-   ((void *) rndis_msg)
-
-
-
 #define RNDIS_HEADER_SIZE  (sizeof(struct rndis_message) - \
 sizeof(union rndis_message_container))
 
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 4/6] hv_netvsc: use reciprocal divide to speed up percent calculation

2017-12-01 Thread Stephen Hemminger
Every packet sent checks the available ring space. The calculation
can be sped up by using reciprocal divide which is multiplication.

Since ring_size can only be configured by module parameter, so it doesn't
have to be passed around everywhere. Also it should be unsigned
since it is number of pages.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h   |  6 +++---
 drivers/net/hyperv/netvsc.c   | 20 +++-
 drivers/net/hyperv/netvsc_drv.c   | 17 +
 drivers/net/hyperv/rndis_filter.c |  4 ++--
 4 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 7226230561de..3d940c67ea94 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -146,7 +146,6 @@ struct hv_netvsc_packet {
 
 struct netvsc_device_info {
unsigned char mac_adr[ETH_ALEN];
-   int  ring_size;
u32  num_chn;
u32  send_sections;
u32  recv_sections;
@@ -188,6 +187,9 @@ struct rndis_message;
 struct netvsc_device;
 struct net_device_context;
 
+extern u32 netvsc_ring_bytes;
+extern struct reciprocal_value netvsc_ring_reciprocal;
+
 struct netvsc_device *netvsc_device_add(struct hv_device *device,
const struct netvsc_device_info *info);
 int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx);
@@ -804,8 +806,6 @@ struct netvsc_device {
 
struct rndis_device *extension;
 
-   int ring_size;
-
u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
u32 pkt_align; /* alignment bytes, e.g. 8 */
 
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 4b931f017a25..e4bcd202a56a 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -588,14 +589,11 @@ void netvsc_device_remove(struct hv_device *device)
  * Get the percentage of available bytes to write in the ring.
  * The return value is in range from 0 to 100.
  */
-static inline u32 hv_ringbuf_avail_percent(
-   struct hv_ring_buffer_info *ring_info)
+static u32 hv_ringbuf_avail_percent(const struct hv_ring_buffer_info 
*ring_info)
 {
-   u32 avail_read, avail_write;
+   u32 avail_write = hv_get_bytes_to_write(ring_info);
 
-   hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);
-
-   return avail_write * 100 / ring_info->ring_datasize;
+   return reciprocal_divide(avail_write  * 100, netvsc_ring_reciprocal);
 }
 
 static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
@@ -1249,7 +1247,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device 
*device,
const struct netvsc_device_info *device_info)
 {
int i, ret = 0;
-   int ring_size = device_info->ring_size;
struct netvsc_device *net_device;
struct net_device *ndev = hv_get_drvdata(device);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
@@ -1261,8 +1258,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device 
*device,
for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
net_device_ctx->tx_table[i] = 0;
 
-   net_device->ring_size = ring_size;
-
/* Because the device uses NAPI, all the interrupt batching and
 * control is done via Net softirq, not the channel handling
 */
@@ -1289,10 +1284,9 @@ struct netvsc_device *netvsc_device_add(struct hv_device 
*device,
   netvsc_poll, NAPI_POLL_WEIGHT);
 
/* Open the channel */
-   ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
-ring_size * PAGE_SIZE, NULL, 0,
-netvsc_channel_cb,
-net_device->chan_table);
+   ret = vmbus_open(device->channel, netvsc_ring_bytes,
+netvsc_ring_bytes,  NULL, 0,
+netvsc_channel_cb, net_device->chan_table);
 
if (ret != 0) {
netif_napi_del(&net_device->chan_table[0].napi);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 5129647d420c..ba690e1737ab 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -54,9 +55,11 @@
 #define LINKCHANGE_INT (2 * HZ)
 #define VF_TAKEOVER_INT (HZ / 10)
 
-static int ring_size = 128;
-module_param(ring_size, int, S_IRUGO);
+static unsigned int ring_size __ro_after_init = 128;
+module_param(ring_size, uint, S_IRUGO);
 MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
+unsigned int netvsc_ring_bytes __ro_after_init;
+struct reciprocal_value netvsc_ring_reciprocal __ro_after_init;
 
 static const u32 default_msg = NETIF_MSG_DRV | NETIF_M

[PATCH net-next 6/6] vmbus: make hv_get_ringbuffer_availbytes local

2017-12-01 Thread Stephen Hemminger
The last use of hv_get_ringbuffer_availbytes in drivers is now
gone. Only used by the debug info routine so make it static. Also, add
READ_ONCE() to avoid any possible issues with potentially volatile
index values.

Signed-off-by: Stephen Hemminger 
---
 drivers/hv/ring_buffer.c | 23 +++
 include/linux/hyperv.h   | 22 --
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 12eb8caa4263..50e071444a5c 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -140,6 +140,29 @@ static u32 hv_copyto_ringbuffer(
return start_write_offset;
 }
 
+/*
+ *
+ * hv_get_ringbuffer_availbytes()
+ *
+ * Get number of bytes available to read and to write to
+ * for the specified ring buffer
+ */
+static void
+hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi,
+u32 *read, u32 *write)
+{
+   u32 read_loc, write_loc, dsize;
+
+   /* Capture the read/write indices before they changed */
+   read_loc = READ_ONCE(rbi->ring_buffer->read_index);
+   write_loc = READ_ONCE(rbi->ring_buffer->write_index);
+   dsize = rbi->ring_datasize;
+
+   *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
+   read_loc - write_loc;
+   *read = dsize - *write;
+}
+
 /* Get various debug metrics for the specified ring buffer. */
 void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info,
 struct hv_ring_buffer_debug_info *debug_info)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index f3e97c5f94c9..5f8bd0cebddf 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -127,28 +127,6 @@ struct hv_ring_buffer_info {
u32 priv_read_index;
 };
 
-/*
- *
- * hv_get_ringbuffer_availbytes()
- *
- * Get number of bytes available to read and to write to
- * for the specified ring buffer
- */
-static inline void
-hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi,
-u32 *read, u32 *write)
-{
-   u32 read_loc, write_loc, dsize;
-
-   /* Capture the read/write indices before they changed */
-   read_loc = rbi->ring_buffer->read_index;
-   write_loc = rbi->ring_buffer->write_index;
-   dsize = rbi->ring_datasize;
-
-   *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
-   read_loc - write_loc;
-   *read = dsize - *write;
-}
 
 static inline u32 hv_get_bytes_to_read(const struct hv_ring_buffer_info *rbi)
 {
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 2/6] hv_netvsc: don't need local xmit_more

2017-12-01 Thread Stephen Hemminger
Since skb is always non-NULL in the copy portion of netvsc_send
do not need local variable.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index bfc79698b8f4..ee31faa67cad 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -848,7 +848,6 @@ int netvsc_send(struct net_device_context *ndev_ctx,
struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
struct sk_buff *msd_skb = NULL;
bool try_batch;
-   bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
 
/* If device is rescinded, return error and packet will get dropped. */
if (unlikely(!net_device || net_device->destroy))
@@ -922,7 +921,7 @@ int netvsc_send(struct net_device_context *ndev_ctx,
if (msdp->skb)
dev_consume_skb_any(msdp->skb);
 
-   if (xmit_more && !packet->cp_partial) {
+   if (skb->xmit_more && !packet->cp_partial) {
msdp->skb = skb;
msdp->pkt = packet;
msdp->count++;
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 5/6] hv_netvsc: optimize initialization of RNDIS header

2017-12-01 Thread Stephen Hemminger
The memset of the whole maximum possible RNDIS header is unnecessary.
For the main part of the header use a structure assignment.

No need to memset the whole per packet info. Instead rely on caller to
set what it wants. Also get rid of cast to void and signed/unsigned
conversion. Now return pointer to per packet data (rather than the
header) which simplifies use by code setting up the packet data.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc_drv.c | 57 +++--
 1 file changed, 26 insertions(+), 31 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index ba690e1737ab..dc70de674ca9 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -177,17 +177,15 @@ static int netvsc_close(struct net_device *net)
return ret;
 }
 
-static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
-  int pkt_type)
+static inline void *init_ppi_data(struct rndis_message *msg,
+ u32 ppi_size, u32 pkt_type)
 {
-   struct rndis_packet *rndis_pkt;
+   struct rndis_packet *rndis_pkt = &msg->msg.pkt;
struct rndis_per_packet_info *ppi;
 
-   rndis_pkt = &msg->msg.pkt;
rndis_pkt->data_offset += ppi_size;
-
-   ppi = (struct rndis_per_packet_info *)((void *)rndis_pkt +
-   rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_len);
+   ppi = (void *)rndis_pkt + rndis_pkt->per_pkt_info_offset
+   + rndis_pkt->per_pkt_info_len;
 
ppi->size = ppi_size;
ppi->type = pkt_type;
@@ -195,7 +193,7 @@ static void *init_ppi_data(struct rndis_message *msg, u32 
ppi_size,
 
rndis_pkt->per_pkt_info_len += ppi_size;
 
-   return ppi;
+   return ppi + 1;
 }
 
 /* Azure hosts don't support non-TCP port numbers in hashing for fragmented
@@ -472,10 +470,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct 
net_device *net)
int ret;
unsigned int num_data_pgs;
struct rndis_message *rndis_msg;
-   struct rndis_packet *rndis_pkt;
struct net_device *vf_netdev;
u32 rndis_msg_size;
-   struct rndis_per_packet_info *ppi;
u32 hash;
struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT];
 
@@ -530,34 +526,36 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct 
net_device *net)
 
rndis_msg = (struct rndis_message *)skb->head;
 
-   memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE);
-
/* Add the rndis header */
rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
rndis_msg->msg_len = packet->total_data_buflen;
-   rndis_pkt = &rndis_msg->msg.pkt;
-   rndis_pkt->data_offset = sizeof(struct rndis_packet);
-   rndis_pkt->data_len = packet->total_data_buflen;
-   rndis_pkt->per_pkt_info_offset = sizeof(struct rndis_packet);
+
+   rndis_msg->msg.pkt = (struct rndis_packet) {
+   .data_offset = sizeof(struct rndis_packet),
+   .data_len = packet->total_data_buflen,
+   .per_pkt_info_offset = sizeof(struct rndis_packet),
+   };
 
rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
 
hash = skb_get_hash_raw(skb);
if (hash != 0 && net->real_num_tx_queues > 1) {
+   u32 *hash_info;
+
rndis_msg_size += NDIS_HASH_PPI_SIZE;
-   ppi = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
-   NBL_HASH_VALUE);
-   *(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
+   hash_info = init_ppi_data(rndis_msg, NDIS_HASH_PPI_SIZE,
+ NBL_HASH_VALUE);
+   *hash_info = hash;
}
 
if (skb_vlan_tag_present(skb)) {
struct ndis_pkt_8021q_info *vlan;
 
rndis_msg_size += NDIS_VLAN_PPI_SIZE;
-   ppi = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
-   IEEE_8021Q_INFO);
+   vlan = init_ppi_data(rndis_msg, NDIS_VLAN_PPI_SIZE,
+IEEE_8021Q_INFO);
 
-   vlan = (void *)ppi + ppi->ppi_offset;
+   vlan->value = 0;
vlan->vlanid = skb->vlan_tci & VLAN_VID_MASK;
vlan->pri = (skb->vlan_tci & VLAN_PRIO_MASK) >>
VLAN_PRIO_SHIFT;
@@ -567,11 +565,10 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct 
net_device *net)
struct ndis_tcp_lso_info *lso_info;
 
rndis_msg_size += NDIS_LSO_PPI_SIZE;
-   ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
-   TCP_LARGESEND_PKTINFO);
-
-   lso_info = (void *)ppi + ppi->ppi_offset;
+   lso_info = init_ppi_data(

[PATCH net 2/3] hv_netvsc: Limit the receive buffer size for legacy hosts

2017-12-07 Thread Stephen Hemminger
From: Haiyang Zhang 

Legacy hosts only allow 15 MB receive buffer, and we know the
NVSP version only after negotiation. So, we put the limit in
netvsc_init_buf().

Fixes: 5023a6db73196 ("netvsc: increase default receive buffer size")
Signed-off-by: Haiyang Zhang 
Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index e4bcd202a56a..e5d16a8cf0d6 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -268,6 +268,11 @@ static int netvsc_init_buf(struct hv_device *device,
buf_size = device_info->recv_sections * device_info->recv_section_size;
buf_size = roundup(buf_size, PAGE_SIZE);
 
+   /* Legacy hosts only allow smaller receive buffer */
+   if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
+   buf_size = min_t(unsigned int, buf_size,
+NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
+
net_device->recv_buf = vzalloc(buf_size);
if (!net_device->recv_buf) {
netdev_err(ndev,
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net 3/3] hv_netvsc: Fix the default receive buffer size

2017-12-07 Thread Stephen Hemminger
From: Haiyang Zhang 

The intended size is 16 MB, and the default slot size is 1728.
So, NETVSC_DEFAULT_RX should be 16*1024*1024 / 1728 = 9709.

Fixes: 5023a6db73196 ("netvsc: increase default receive buffer size")
Signed-off-by: Haiyang Zhang 
Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index dc70de674ca9..edfcde5d3621 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -50,7 +50,7 @@
 #define NETVSC_MIN_TX_SECTIONS 10
 #define NETVSC_DEFAULT_TX  192 /* ~1M */
 #define NETVSC_MIN_RX_SECTIONS 10  /* ~64K */
-#define NETVSC_DEFAULT_RX  10485   /* Max ~16M */
+#define NETVSC_DEFAULT_RX  9709/* ~16M */
 
 #define LINKCHANGE_INT (2 * HZ)
 #define VF_TAKEOVER_INT (HZ / 10)
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net 0/3] netvsc fix buffer size issues

2017-12-07 Thread Stephen Hemminger
The changes to allow setting buffer size can cause issues
on older versions of Windows Server which have smaller limits.
And the actual maximum value for WS2016 is 31M not 16M.

This is a resend of patchset that didn't make it to
netdev correctly.

Haiyang Zhang (3):
  hv_netvsc: Correct the max receive buffer size
  hv_netvsc: Limit the receive buffer size for legacy hosts
  hv_netvsc: Fix the default receive buffer size

 drivers/net/hyperv/hyperv_net.h | 6 --
 drivers/net/hyperv/netvsc.c | 5 +
 drivers/net/hyperv/netvsc_drv.c | 2 +-
 3 files changed, 10 insertions(+), 3 deletions(-)

-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net 1/3] hv_netvsc: Correct the max receive buffer size

2017-12-07 Thread Stephen Hemminger
From: Haiyang Zhang 

It should be 31 MB on recent host versions.

Signed-off-by: Haiyang Zhang 
Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 3d940c67ea94..373455f216ce 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -637,9 +637,11 @@ struct nvsp_message {
 #define NETVSC_MTU 65535
 #define NETVSC_MTU_MIN ETH_MIN_MTU
 
-#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16)  /* 16MB */
-#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY  (1024*1024*15)  /* 15MB */
+/* Max buffer sizes allowed by a host */
+#define NETVSC_RECEIVE_BUFFER_SIZE (1024 * 1024 * 31) /* 31MB */
+#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY  (1024 * 1024 * 15) /* 15MB */
 #define NETVSC_SEND_BUFFER_SIZE(1024 * 1024 * 15)   /* 
15MB */
+
 #define NETVSC_INVALID_INDEX   -1
 
 #define NETVSC_SEND_SECTION_SIZE   6144
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


Re: [PATCH] hyperv: make HYPERV a menuconfig to ease disabling it all

2017-12-09 Thread Stephen Hemminger
On Sat,  9 Dec 2017 16:21:51 +0100
Vincent Legoll  wrote:

> No need to get into the submenu to disable all HYPERV-related
> config entries.
> 
> This makes it easier to disable all HYPERV config options
> without entering the submenu. It will also enable one
> to see that en/dis-abled state from the outside menu.
> 
> This is only intended to change menuconfig UI, not change
> the config dependencies.
> 
> Signed-off-by: Vincent Legoll 
> ---
>  drivers/hv/Kconfig | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
> index 50b89ea0e60f..5804081d936d 100644
> --- a/drivers/hv/Kconfig
> +++ b/drivers/hv/Kconfig
> @@ -1,4 +1,7 @@
> -menu "Microsoft Hyper-V guest support"
> +menuconfig HYPERV_MENU
> + bool "Microsoft Hyper-V guest support"
> +
> +if HYPERV_MENU
>  
>  config HYPERV
>   tristate "Microsoft Hyper-V client drivers"
> @@ -23,4 +26,4 @@ config HYPERV_BALLOON
>   help
> Select this option to enable Hyper-V Balloon driver.
>  
> -endmenu
> +endif # HYPERV_MENU

Will this break existing configs?
___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 0/2] hv_netvsc: Fix default and limit of recv buffe

2017-12-11 Thread Stephen Hemminger
The default for receive buffer descriptors is not correct, it should
match the default receive buffer size and the upper limit of receive
buffer size is too low.  Also, for older versions of Window servers
hosts, different lower limit check is necessary, otherwise the buffer
request will be rejected by the host, resulting vNIC not come up.

This patch set corrects these problems.

Haiyang Zhang (2):
  hv_netvsc: Fix the receive buffer size limit
  hv_netvsc: Fix the TX/RX buffer default sizes

 drivers/net/hyperv/hyperv_net.h | 19 ---
 drivers/net/hyperv/netvsc.c |  5 +
 drivers/net/hyperv/netvsc_drv.c |  4 
 3 files changed, 21 insertions(+), 7 deletions(-)

-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 1/2] hv_netvsc: Fix the receive buffer size limit

2017-12-11 Thread Stephen Hemminger
From: Haiyang Zhang 

The max should be 31 MB on host with NVSP version > 2.

On legacy hosts (NVSP version <=2) only 15 MB receive buffer is allowed,
otherwise the buffer request will be rejected by the host, resulting
vNIC not coming up.

The NVSP version is only available after negotiation. So, we add the
limit checking for legacy hosts in netvsc_init_buf().

Fixes: 5023a6db73196 ("netvsc: increase default receive buffer size")
Signed-off-by: Haiyang Zhang 
Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h | 6 --
 drivers/net/hyperv/netvsc.c | 5 +
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 3d940c67ea94..373455f216ce 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -637,9 +637,11 @@ struct nvsp_message {
 #define NETVSC_MTU 65535
 #define NETVSC_MTU_MIN ETH_MIN_MTU
 
-#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16)  /* 16MB */
-#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY  (1024*1024*15)  /* 15MB */
+/* Max buffer sizes allowed by a host */
+#define NETVSC_RECEIVE_BUFFER_SIZE (1024 * 1024 * 31) /* 31MB */
+#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY  (1024 * 1024 * 15) /* 15MB */
 #define NETVSC_SEND_BUFFER_SIZE(1024 * 1024 * 15)   /* 
15MB */
+
 #define NETVSC_INVALID_INDEX   -1
 
 #define NETVSC_SEND_SECTION_SIZE   6144
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index e4bcd202a56a..e5d16a8cf0d6 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -268,6 +268,11 @@ static int netvsc_init_buf(struct hv_device *device,
buf_size = device_info->recv_sections * device_info->recv_section_size;
buf_size = roundup(buf_size, PAGE_SIZE);
 
+   /* Legacy hosts only allow smaller receive buffer */
+   if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
+   buf_size = min_t(unsigned int, buf_size,
+NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
+
net_device->recv_buf = vzalloc(buf_size);
if (!net_device->recv_buf) {
netdev_err(ndev,
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 2/2] hv_netvsc: Fix the TX/RX buffer default sizes

2017-12-11 Thread Stephen Hemminger
From: Haiyang Zhang 

The values were not computed correctly. There are no significant
visible impact, though.

The intended size of RX buffer is 16 MB, and the default slot size is 1728.
So, NETVSC_DEFAULT_RX should be 16*1024*1024 / 1728 = 9709.

The intended size of TX buffer is 1 MB, and the slot size is 6144.
So, NETVSC_DEFAULT_TX should be 1024*1024 / 6144 = 170.

The patch puts the formula directly into the macro, and moves them to
hyperv_net.h, together with related macros.

Fixes: 5023a6db73196 ("netvsc: increase default receive buffer size")
Signed-off-by: Haiyang Zhang 
Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h | 13 -
 drivers/net/hyperv/netvsc_drv.c |  4 
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 373455f216ce..845ddc7bba46 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -640,13 +640,24 @@ struct nvsp_message {
 /* Max buffer sizes allowed by a host */
 #define NETVSC_RECEIVE_BUFFER_SIZE (1024 * 1024 * 31) /* 31MB */
 #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY  (1024 * 1024 * 15) /* 15MB */
-#define NETVSC_SEND_BUFFER_SIZE(1024 * 1024 * 15)   /* 
15MB */
+#define NETVSC_RECEIVE_BUFFER_DEFAULT  (1024 * 1024 * 16)
+
+#define NETVSC_SEND_BUFFER_SIZE(1024 * 1024 * 15)  /* 
15MB */
+#define NETVSC_SEND_BUFFER_DEFAULT (1024 * 1024)
 
 #define NETVSC_INVALID_INDEX   -1
 
 #define NETVSC_SEND_SECTION_SIZE   6144
 #define NETVSC_RECV_SECTION_SIZE   1728
 
+/* Default size of TX buf: 1MB, RX buf: 16MB */
+#define NETVSC_MIN_TX_SECTIONS 10
+#define NETVSC_DEFAULT_TX  (NETVSC_SEND_BUFFER_DEFAULT \
+/ NETVSC_SEND_SECTION_SIZE)
+#define NETVSC_MIN_RX_SECTIONS 10
+#define NETVSC_DEFAULT_RX  (NETVSC_RECEIVE_BUFFER_DEFAULT \
+/ NETVSC_RECV_SECTION_SIZE)
+
 #define NETVSC_RECEIVE_BUFFER_ID   0xcafe
 #define NETVSC_SEND_BUFFER_ID  0
 
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index dc70de674ca9..b6a434ac64d3 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -47,10 +47,6 @@
 #include "hyperv_net.h"
 
 #define RING_SIZE_MIN  64
-#define NETVSC_MIN_TX_SECTIONS 10
-#define NETVSC_DEFAULT_TX  192 /* ~1M */
-#define NETVSC_MIN_RX_SECTIONS 10  /* ~64K */
-#define NETVSC_DEFAULT_RX  10485   /* Max ~16M */
 
 #define LINKCHANGE_INT (2 * HZ)
 #define VF_TAKEOVER_INT (HZ / 10)
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 0/6] hv_netvsc: minor changes

2017-12-12 Thread Stephen Hemminger
This includes minor cleanup of code in send and receive path and
also a new statistic to check for allocation failures. This also
eliminates some of the extra RCU when not needed.

There is a theoritical bug where buffered data could be blocked
for longer than necessary if the ring buffer got full. This
has not been seen in the wild, found by inspection.

The reference count between net device and internal RNDIS
is not needed.

Stephen Hemminger (6):
  hv_netvsc: copy_to_send buf can be void
  hv_netvsc: track memory allocation failures in ethtool stats
  hv_netvsc: simplify function args in receive status path
  hv_netvsc: pass netvsc_device to receive callback
  hv_netvsc: remove open_cnt reference count
  hv_netvsc: empty current transmit aggregation if flow blocked

 drivers/net/hyperv/hyperv_net.h   |  9 
 drivers/net/hyperv/netvsc.c   | 44 ---
 drivers/net/hyperv/netvsc_drv.c   | 33 +++--
 drivers/net/hyperv/rndis_filter.c | 29 +++---
 4 files changed, 47 insertions(+), 68 deletions(-)

-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 1/6] hv_netvsc: copy_to_send buf can be void

2017-12-12 Thread Stephen Hemminger
Since only caller does not care about return value.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/netvsc.c | 22 --
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index e4bcd202a56a..9407907c4988 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -696,19 +696,18 @@ static u32 netvsc_get_next_send_section(struct 
netvsc_device *net_device)
return NETVSC_INVALID_INDEX;
 }
 
-static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
-  unsigned int section_index,
-  u32 pend_size,
-  struct hv_netvsc_packet *packet,
-  struct rndis_message *rndis_msg,
-  struct hv_page_buffer *pb,
-  struct sk_buff *skb)
+static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
+   unsigned int section_index,
+   u32 pend_size,
+   struct hv_netvsc_packet *packet,
+   struct rndis_message *rndis_msg,
+   struct hv_page_buffer *pb,
+   struct sk_buff *skb)
 {
char *start = net_device->send_buf;
char *dest = start + (section_index * net_device->send_section_size)
 + pend_size;
int i;
-   u32 msg_size = 0;
u32 padding = 0;
u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
packet->page_buf_cnt;
@@ -728,16 +727,11 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device 
*net_device,
u32 len = pb[i].len;
 
memcpy(dest, (src + offset), len);
-   msg_size += len;
dest += len;
}
 
-   if (padding) {
+   if (padding)
memset(dest, 0, padding);
-   msg_size += padding;
-   }
-
-   return msg_size;
 }
 
 static inline int netvsc_send_pkt(
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 5/6] hv_netvsc: remove open_cnt reference count

2017-12-12 Thread Stephen Hemminger
There is only ever a single instance of network device object
referencing the internal rndis object. Therefore the open_cnt atomic
is not necessary.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h   |  2 --
 drivers/net/hyperv/netvsc.c   |  2 +-
 drivers/net/hyperv/rndis_filter.c | 10 +++---
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index eb01943b23c3..8ebe72bf89ff 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -810,8 +810,6 @@ struct netvsc_device {
u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
u32 pkt_align; /* alignment bytes, e.g. 8 */
 
-   atomic_t open_cnt;
-
struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
 
struct rcu_head rcu;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index d8165407bcda..6dd97f232f87 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -73,7 +73,7 @@ static struct netvsc_device *alloc_net_device(void)
 
init_waitqueue_head(&net_device->wait_drain);
net_device->destroy = false;
-   atomic_set(&net_device->open_cnt, 0);
+
net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
 
diff --git a/drivers/net/hyperv/rndis_filter.c 
b/drivers/net/hyperv/rndis_filter.c
index 025110a19d4a..035976949177 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1362,9 +1362,6 @@ int rndis_filter_open(struct netvsc_device *nvdev)
if (!nvdev)
return -EINVAL;
 
-   if (atomic_inc_return(&nvdev->open_cnt) != 1)
-   return 0;
-
return rndis_filter_open_device(nvdev->extension);
 }
 
@@ -1373,13 +1370,12 @@ int rndis_filter_close(struct netvsc_device *nvdev)
if (!nvdev)
return -EINVAL;
 
-   if (atomic_dec_return(&nvdev->open_cnt) != 0)
-   return 0;
-
return rndis_filter_close_device(nvdev->extension);
 }
 
 bool rndis_filter_opened(const struct netvsc_device *nvdev)
 {
-   return atomic_read(&nvdev->open_cnt) > 0;
+   const struct rndis_device *dev = nvdev->extension;
+
+   return dev->state == RNDIS_DEV_DATAINITIALIZED;
 }
-- 
2.11.0

___
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel


[PATCH net-next 3/6] hv_netvsc: simplify function args in receive status path

2017-12-12 Thread Stephen Hemminger
The caller (netvsc_receive) already has the net device pointer,
and should just pass that to functions rather than the hyperv device.
This eliminates several impossible error paths in the process.

Signed-off-by: Stephen Hemminger 
---
 drivers/net/hyperv/hyperv_net.h   |  3 +--
 drivers/net/hyperv/netvsc.c   |  2 +-
 drivers/net/hyperv/netvsc_drv.c   | 12 ++--
 drivers/net/hyperv/rndis_filter.c |  9 +++--
 4 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 42bbde1cbe45..6463b7f5aa00 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -199,7 +199,7 @@ int netvsc_send(struct net_device_context *ndc,
struct rndis_message *rndis_msg,
struct hv_page_buffer *page_buffer,
struct sk_buff *skb);
-void netvsc_linkstatus_callback(struct hv_device *device_obj,
+void netvsc_linkstatus_callback(struct net_device *net,
struct rndis_message *resp);
 int netvsc_recv_callback(struct net_device *net,
 struct vmbus_channel *channel,
@@ -222,7 +222,6 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev,
   const u8 *key);
 int rndis_filter_receive(struct net_device *ndev,
 struct netvsc_device *net_dev,
-struct hv_device *dev,
 struct vmbus_channel *channel,
 void *data, u32 buflen);
 
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 9407907c4988..d8165407bcda 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1077,7 +1077,7 @@ static int netvsc_receive(struct net_device *ndev,
u32 buflen = vmxferpage_packet->ranges[i].byte_count;
 
/* Pass it to the upper layer */
-   status = rndis_filter_receive(ndev, net_device, device,
+   status = rndis_filter_receive(ndev, net_device,
  channel, data, buflen);
}
 
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index d16b68974d80..6f12f81fd8aa 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -656,22 +656,14 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct 
net_device *net)
 /*
  * netvsc_linkstatus_callback - Link up/down notification
  */
-void netvsc_linkstatus_callback(struct hv_device *device_obj,
+void netvsc_linkstatus_callback(struct net_device *net,
struct rndis_message *resp)
 {
struct rndis_indicate_status *indicate = &resp->msg.indicate_status;
-   struct net_device *net;
-   struct net_device_context *ndev_ctx;
+   struct net_device_context *ndev_ctx = netdev_priv(net);
struct netvsc_reconfig *event;
unsigned long flags;
 
-   net = hv_get_drvdata(device_obj);
-
-   if (!net)
-   return;
-
-   ndev_ctx = netdev_priv(net);
-
/* Update the physical link speed when changing to another vSwitch */
if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
u32 speed;
diff --git a/drivers/net/hyperv/rndis_filter.c 
b/drivers/net/hyperv/rndis_filter.c
index 673492063307..901838b2bcc9 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -134,11 +134,9 @@ static void put_rndis_request(struct rndis_device *dev,
kfree(req);
 }
 
-static void dump_rndis_message(struct hv_device *hv_dev,
+static void dump_rndis_message(struct net_device *netdev,
   const struct rndis_message *rndis_msg)
 {
-   struct net_device *netdev = hv_get_drvdata(hv_dev);
-
switch (rndis_msg->ndis_msg_type) {
case RNDIS_MSG_PACKET:
netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, "
@@ -397,7 +395,6 @@ static int rndis_filter_receive_data(struct net_device 
*ndev,
 
 int rndis_filter_receive(struct net_device *ndev,
 struct netvsc_device *net_dev,
-struct hv_device *dev,
 struct vmbus_channel *channel,
 void *data, u32 buflen)
 {
@@ -419,7 +416,7 @@ int rndis_filter_receive(struct net_device *ndev,
}
 
if (netif_msg_rx_status(net_device_ctx))
-   dump_rndis_message(dev, rndis_msg);
+   dump_rndis_message(ndev, rndis_msg);
 
switch (rndis_msg->ndis_msg_type) {
case RNDIS_MSG_PACKET:
@@ -434,7 +431,7 @@ int rndis_filter_receive(struct net_device *ndev,
 
case RNDIS_MSG_INDICATE:
/* notification msgs */
-   netvsc_linkstatus_callback(dev, rndis_msg);
+   netvsc_linkstatus_callback(ndev, rndis_msg);
break;
default:
  

  1   2   3   4   5   6   >