[PATCH net 2/2] virtio_net: fix missing lock protection on control_buf access

2024-05-28 Thread Heng Qi
Refactored the handling of control_buf to be within the cvq_lock
critical section, mitigating race conditions between reading device
responses and new command submissions.

Fixes: 6f45ab3e0409 ("virtio_net: Add a lock for the command VQ.")
Signed-off-by: Heng Qi 
---
 drivers/net/virtio_net.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6b0512a628e0..3d8407d9e3d2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2686,6 +2686,7 @@ static bool virtnet_send_command_reply(struct 
virtnet_info *vi, u8 class, u8 cmd
 {
struct scatterlist *sgs[5], hdr, stat;
u32 out_num = 0, tmp, in_num = 0;
+   bool ret;
int err;
 
/* Caller should know better */
@@ -2731,8 +2732,9 @@ static bool virtnet_send_command_reply(struct 
virtnet_info *vi, u8 class, u8 cmd
}
 
 unlock:
+   ret = vi->ctrl->status == VIRTIO_NET_OK;
mutex_unlock(&vi->cvq_lock);
-   return vi->ctrl->status == VIRTIO_NET_OK;
+   return ret;
 }
 
 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
-- 
2.32.0.3.g01195cf9f




[PATCH net 0/2] virtio_net: fix race on control_buf

2024-05-28 Thread Heng Qi
Patch 1 did a simple rename, leaving 'ret' for patch 2.
Patch 2 fixed a race between reading the device response and the
new command submission.

Heng Qi (2):
  virtio_net: rename ret to err
  virtio_net: fix missing lock protection on control_buf access

 drivers/net/virtio_net.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

-- 
2.32.0.3.g01195cf9f




[PATCH net 1/2] virtio_net: rename ret to err

2024-05-28 Thread Heng Qi
The integer variable 'ret', denoting the return code, is mismatched with
the boolean return type of virtnet_send_command_reply(); hence, it is
renamed to 'err'.

The usage of 'ret' is deferred to the next patch.

Signed-off-by: Heng Qi 
---
 drivers/net/virtio_net.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4a802c0ea2cb..6b0512a628e0 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2686,7 +2686,7 @@ static bool virtnet_send_command_reply(struct 
virtnet_info *vi, u8 class, u8 cmd
 {
struct scatterlist *sgs[5], hdr, stat;
u32 out_num = 0, tmp, in_num = 0;
-   int ret;
+   int err;
 
/* Caller should know better */
BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
@@ -2710,10 +2710,10 @@ static bool virtnet_send_command_reply(struct 
virtnet_info *vi, u8 class, u8 cmd
sgs[out_num + in_num++] = in;
 
BUG_ON(out_num + in_num > ARRAY_SIZE(sgs));
-   ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC);
-   if (ret < 0) {
+   err = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC);
+   if (err < 0) {
dev_warn(&vi->vdev->dev,
-"Failed to add sgs for command vq: %d\n.", ret);
+"Failed to add sgs for command vq: %d\n.", err);
mutex_unlock(&vi->cvq_lock);
return false;
}
-- 
2.32.0.3.g01195cf9f




[PATCH net 1/2] virtio_net: rename ret to err

2024-05-28 Thread Hariprasad Kelam



> -Original Message-
> From: Heng Qi 
> Sent: Tuesday, May 28, 2024 1:22 PM
> To: netdev@vger.kernel.org; virtualizat...@lists.linux.dev
> Cc: Michael S. Tsirkin ; Jason Wang
> ; Xuan Zhuo ;
> Eugenio Pérez ; David S. Miller
> ; Eric Dumazet ; Jakub
> Kicinski ; Paolo Abeni ; Jiri Pirko
> ; Daniel Jurgens 
> Subject: [EXTERNAL] [PATCH net 1/2] virtio_net: rename ret to err
> 
> Prioritize security for external emails: Confirm sender and content safety
> before clicking links or opening attachments
> 
> --
> The integer variable 'ret', denoting the return code, is mismatched with the
> boolean return type of virtnet_send_command_reply(); hence, it is renamed
> to 'err'.
> 
> The usage of 'ret' is deferred to the next patch.
> 
> Signed-off-by: Heng Qi 
> ---
>  drivers/net/virtio_net.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index
> 4a802c0ea2cb..6b0512a628e0 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2686,7 +2686,7 @@ static bool virtnet_send_command_reply(struct
> virtnet_info *vi, u8 class, u8 cmd  {
>   struct scatterlist *sgs[5], hdr, stat;
>   u32 out_num = 0, tmp, in_num = 0;
> - int ret;
> + int err;
> 
>   /* Caller should know better */
>   BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
> @@ -2710,10 +2710,10 @@ static bool virtnet_send_command_reply(struct
> virtnet_info *vi, u8 class, u8 cmd
>   sgs[out_num + in_num++] = in;
> 
>   BUG_ON(out_num + in_num > ARRAY_SIZE(sgs));
> - ret = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi,
> GFP_ATOMIC);
> - if (ret < 0) {
> + err = virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi,
> GFP_ATOMIC);
> + if (err < 0) {
>   dev_warn(&vi->vdev->dev,
> -  "Failed to add sgs for command vq: %d\n.", ret);
> +  "Failed to add sgs for command vq: %d\n.", err);
>   mutex_unlock(&vi->cvq_lock);
>   return false;
>   }
> --
> 2.32.0.3.g01195cf9f
> 
Reviewed-by: Hariprasad Kelam 



[PATCH net 2/2] virtio_net: fix missing lock protection on control_buf access

2024-05-28 Thread Hariprasad Kelam



> Refactored the handling of control_buf to be within the cvq_lock critical
> section, mitigating race conditions between reading device responses and
> new command submissions.
> 
> Fixes: 6f45ab3e0409 ("virtio_net: Add a lock for the command VQ.")
> Signed-off-by: Heng Qi 
> ---
>  drivers/net/virtio_net.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index
> 6b0512a628e0..3d8407d9e3d2 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2686,6 +2686,7 @@ static bool virtnet_send_command_reply(struct
> virtnet_info *vi, u8 class, u8 cmd  {
>   struct scatterlist *sgs[5], hdr, stat;
>   u32 out_num = 0, tmp, in_num = 0;
> + bool ret;
>   int err;
> 
>   /* Caller should know better */
> @@ -2731,8 +2732,9 @@ static bool virtnet_send_command_reply(struct
> virtnet_info *vi, u8 class, u8 cmd
>   }
> 
>  unlock:
> + ret = vi->ctrl->status == VIRTIO_NET_OK;
>   mutex_unlock(&vi->cvq_lock);
> - return vi->ctrl->status == VIRTIO_NET_OK;
> + return ret;
>  }
> 
>  static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> --
> 2.32.0.3.g01195cf9f
> 
Reviewed-by: Hariprasad Kelam 



Re: [PATCH net v2 2/2] Revert "virtio_net: Add a lock for per queue RX coalesce"

2024-05-28 Thread Paolo Abeni
On Tue, 2024-05-28 at 11:06 +0800, Heng Qi wrote:
> On Mon, 27 May 2024 12:42:43 +0200, Paolo Abeni  wrote:
> > On Thu, 2024-05-23 at 15:46 +0800, Heng Qi wrote:
> > > This reverts commit 4d4ac2ececd3c42a08dd32a6e3a4aaf25f7efe44.
> > > 
> > > When the following snippet is run, lockdep will report a deadlock[1].
> > > 
> > >   /* Acquire all queues dim_locks */
> > >   for (i = 0; i < vi->max_queue_pairs; i++)
> > >   mutex_lock(&vi->rq[i].dim_lock);
> > > 
> > > There's no deadlock here because the vq locks are always taken
> > > in the same order, but lockdep can not figure it out, and we
> > > can not make each lock a separate class because there can be more
> > > than MAX_LOCKDEP_SUBCLASSES of vqs.
> > > 
> > > However, dropping the lock is harmless:
> > >   1. If dim is enabled, modifications made by dim worker to coalescing
> > >  params may cause the user's query results to be dirty data.
> > 
> > It looks like the above can confuse the user-space/admin?
> 
> Maybe, but we don't seem to guarantee this --
> the global query interface (.get_coalesce) cannot 
> guarantee correct results when the DIM and .get_per_queue_coalesce are 
> present:
> 
> 1. DIM has been around for a long time (it will modify the per-queue 
> parameters),
>but many nics only have interfaces for querying global parameters.
> 2. Some nics provide the .get_per_queue_coalesce interface, it is not
>synchronized with DIM.
> 
> So I think this is acceptable.

Yes, the above sounds acceptable to me.

> > Have you considered instead re-factoring
> > virtnet_send_rx_notf_coal_cmds() to avoid acquiring all the mutex in
> > sequence?
> 
> Perhaps it is a way to not traverse and update the parameters of each queue
> in the global settings interface.

I'm wondering if something as dumb as the following would suffice? Not
even compile-tested.
---
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4a802c0ea2cb..d844f4c89152 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -4267,27 +4267,27 @@ static int virtnet_send_rx_notf_coal_cmds(struct 
virtnet_info *vi,
   ec->rx_max_coalesced_frames != 
vi->intr_coal_rx.max_packets))
return -EINVAL;
 
-   /* Acquire all queues dim_locks */
-   for (i = 0; i < vi->max_queue_pairs; i++)
-   mutex_lock(&vi->rq[i].dim_lock);
-
if (rx_ctrl_dim_on && !vi->rx_dim_enabled) {
vi->rx_dim_enabled = true;
-   for (i = 0; i < vi->max_queue_pairs; i++)
+   for (i = 0; i < vi->max_queue_pairs; i++) {
+   mutex_lock(&vi->rq[i].dim_lock);
vi->rq[i].dim_enabled = true;
-   goto unlock;
+   mutex_unlock(&vi->rq[i].dim_lock);
+   }
+   return 0;
}
 
coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL);
-   if (!coal_rx) {
-   ret = -ENOMEM;
-   goto unlock;
-   }
+   if (!coal_rx)
+   return -ENOMEM;
 
if (!rx_ctrl_dim_on && vi->rx_dim_enabled) {
vi->rx_dim_enabled = false;
-   for (i = 0; i < vi->max_queue_pairs; i++)
+   for (i = 0; i < vi->max_queue_pairs; i++) {
+   mutex_lock(&vi->rq[i].dim_lock);
vi->rq[i].dim_enabled = false;
+   mutex_unlock(&vi->rq[i].dim_lock);
+   }
}
 
/* Since the per-queue coalescing params can be set,
@@ -4300,21 +4300,17 @@ static int virtnet_send_rx_notf_coal_cmds(struct 
virtnet_info *vi,
 
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
  VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
- &sgs_rx)) {
-   ret = -EINVAL;
-   goto unlock;
-   }
+ &sgs_rx))
+   return -EINVAL;
 
vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;
vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
for (i = 0; i < vi->max_queue_pairs; i++) {
+   mutex_lock(&vi->rq[i].dim_lock);
vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs;
vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames;
-   }
-unlock:
-   for (i = vi->max_queue_pairs - 1; i >= 0; i--)
mutex_unlock(&vi->rq[i].dim_lock);
-
+   }
return ret;
 }
---

Otherwise I think you need to add {READ,WRITE}_ONCE annotations while
touching the dim fields to avoid data races.

Thanks,

Paolo








Re: [PATCH net v2 2/2] Revert "virtio_net: Add a lock for per queue RX coalesce"

2024-05-28 Thread Heng Qi



在 2024/5/28 下午6:04, Paolo Abeni 写道:

On Tue, 2024-05-28 at 11:06 +0800, Heng Qi wrote:

On Mon, 27 May 2024 12:42:43 +0200, Paolo Abeni  wrote:

On Thu, 2024-05-23 at 15:46 +0800, Heng Qi wrote:

This reverts commit 4d4ac2ececd3c42a08dd32a6e3a4aaf25f7efe44.

When the following snippet is run, lockdep will report a deadlock[1].

   /* Acquire all queues dim_locks */
   for (i = 0; i < vi->max_queue_pairs; i++)
   mutex_lock(&vi->rq[i].dim_lock);

There's no deadlock here because the vq locks are always taken
in the same order, but lockdep can not figure it out, and we
can not make each lock a separate class because there can be more
than MAX_LOCKDEP_SUBCLASSES of vqs.

However, dropping the lock is harmless:
   1. If dim is enabled, modifications made by dim worker to coalescing
  params may cause the user's query results to be dirty data.

It looks like the above can confuse the user-space/admin?

Maybe, but we don't seem to guarantee this --
the global query interface (.get_coalesce) cannot
guarantee correct results when the DIM and .get_per_queue_coalesce are present:

1. DIM has been around for a long time (it will modify the per-queue 
parameters),
but many nics only have interfaces for querying global parameters.
2. Some nics provide the .get_per_queue_coalesce interface, it is not
synchronized with DIM.

So I think this is acceptable.

Yes, the above sounds acceptable to me.


Have you considered instead re-factoring
virtnet_send_rx_notf_coal_cmds() to avoid acquiring all the mutex in
sequence?

Perhaps it is a way to not traverse and update the parameters of each queue
in the global settings interface.

I'm wondering if something as dumb as the following would suffice? Not
even compile-tested.


This alleviates the problem, and l would like to repost this fix.

Thanks.

---
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4a802c0ea2cb..d844f4c89152 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -4267,27 +4267,27 @@ static int virtnet_send_rx_notf_coal_cmds(struct 
virtnet_info *vi,
   ec->rx_max_coalesced_frames != 
vi->intr_coal_rx.max_packets))
return -EINVAL;
  
-	/* Acquire all queues dim_locks */

-   for (i = 0; i < vi->max_queue_pairs; i++)
-   mutex_lock(&vi->rq[i].dim_lock);
-
if (rx_ctrl_dim_on && !vi->rx_dim_enabled) {
vi->rx_dim_enabled = true;
-   for (i = 0; i < vi->max_queue_pairs; i++)
+   for (i = 0; i < vi->max_queue_pairs; i++) {
+   mutex_lock(&vi->rq[i].dim_lock);
vi->rq[i].dim_enabled = true;
-   goto unlock;
+   mutex_unlock(&vi->rq[i].dim_lock);
+   }
+   return 0;
}
  
  	coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL);

-   if (!coal_rx) {
-   ret = -ENOMEM;
-   goto unlock;
-   }
+   if (!coal_rx)
+   return -ENOMEM;
  
  	if (!rx_ctrl_dim_on && vi->rx_dim_enabled) {

vi->rx_dim_enabled = false;
-   for (i = 0; i < vi->max_queue_pairs; i++)
+   for (i = 0; i < vi->max_queue_pairs; i++) {
+   mutex_lock(&vi->rq[i].dim_lock);
vi->rq[i].dim_enabled = false;
+   mutex_unlock(&vi->rq[i].dim_lock);
+   }
}
  
  	/* Since the per-queue coalescing params can be set,

@@ -4300,21 +4300,17 @@ static int virtnet_send_rx_notf_coal_cmds(struct 
virtnet_info *vi,
  
  	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,

  VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
- &sgs_rx)) {
-   ret = -EINVAL;
-   goto unlock;
-   }
+ &sgs_rx))
+   return -EINVAL;
  
  	vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;

vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
for (i = 0; i < vi->max_queue_pairs; i++) {
+   mutex_lock(&vi->rq[i].dim_lock);
vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs;
vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames;
-   }
-unlock:
-   for (i = vi->max_queue_pairs - 1; i >= 0; i--)
mutex_unlock(&vi->rq[i].dim_lock);
-
+   }
return ret;
  }
---

Otherwise I think you need to add {READ,WRITE}_ONCE annotations while
touching the dim fields to avoid data races.

Thanks,

Paolo








[PATCH net v3 0/2] virtio_net: fix lock warning and unrecoverable state

2024-05-28 Thread Heng Qi
Patch 1 describes and fixes an issue where dim cannot return to
normal state in certain scenarios.

Patch 2 attempts to resolve lockdep's complaints that holding many
nested locks.

Changelog
===
v2->v3:
  Patch(2/2): Refactor code instead of revert the patch.

v1->v2:
  Patch(2/2): rephase the commit log.

Heng Qi (2):
  virtio_net: fix possible dim status unrecoverable
  virtio_net: fix a spurious deadlock issue

 drivers/net/virtio_net.c | 38 +-
 1 file changed, 17 insertions(+), 21 deletions(-)

-- 
2.32.0.3.g01195cf9f




[PATCH net v3 1/2] virtio_net: fix possible dim status unrecoverable

2024-05-28 Thread Heng Qi
When the dim worker is scheduled, if it no longer needs to issue
commands, dim may not be able to return to the working state later.

For example, the following single queue scenario:
  1. The dim worker of rxq0 is scheduled, and the dim status is
 changed to DIM_APPLY_NEW_PROFILE;
  2. dim is disabled or parameters have not been modified;
  3. virtnet_rx_dim_work exits directly;

Then, even if net_dim is invoked again, it cannot work because the
state is not restored to DIM_START_MEASURE.

Fixes: 6208799553a8 ("virtio-net: support rx netdim")
Signed-off-by: Heng Qi 
Reviewed-by: Jiri Pirko 
---
 drivers/net/virtio_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4a802c0ea2cb..4f828a9e5889 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -4417,9 +4417,9 @@ static void virtnet_rx_dim_work(struct work_struct *work)
if (err)
pr_debug("%s: Failed to send dim parameters on rxq%d\n",
 dev->name, qnum);
-   dim->state = DIM_START_MEASURE;
}
 out:
+   dim->state = DIM_START_MEASURE;
mutex_unlock(&rq->dim_lock);
 }
 
-- 
2.32.0.3.g01195cf9f




[PATCH net v3 2/2] virtio_net: fix a spurious deadlock issue

2024-05-28 Thread Heng Qi
When the following snippet is run, lockdep will report a deadlock[1].

  /* Acquire all queues dim_locks */
  for (i = 0; i < vi->max_queue_pairs; i++)
  mutex_lock(&vi->rq[i].dim_lock);

There's no deadlock here because the vq locks are always taken
in the same order, but lockdep can not figure it out. So refactoring
the code to alleviate the problem.

[1]

WARNING: possible recursive locking detected
6.9.0-rc7+ #319 Not tainted

ethtool/962 is trying to acquire lock:

but task is already holding lock:

other info that might help us debug this:
Possible unsafe locking scenario:

  CPU0
  
 lock(&vi->rq[i].dim_lock);
 lock(&vi->rq[i].dim_lock);

*** DEADLOCK ***

 May be due to missing lock nesting notation

3 locks held by ethtool/962:
 #0: 82dbaab0 (cb_lock){}-{3:3}, at: genl_rcv+0x19/0x40
 #1: 82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at:
ethnl_default_set_doit+0xbe/0x1e0

stack backtrace:
CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
   rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
Call Trace:
 
 dump_stack_lvl+0x79/0xb0
 check_deadlock+0x130/0x220
 __lock_acquire+0x861/0x990
 lock_acquire.part.0+0x72/0x1d0
 ? lock_acquire+0xf8/0x130
 __mutex_lock+0x71/0xd50
 virtnet_set_coalesce+0x151/0x190
 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0
 ethnl_set_coalesce+0x34/0x90
 ethnl_default_set_doit+0xdd/0x1e0
 genl_family_rcv_msg_doit+0xdc/0x130
 genl_family_rcv_msg+0x154/0x230
 ? __pfx_ethnl_default_set_doit+0x10/0x10
 genl_rcv_msg+0x4b/0xa0
 ? __pfx_genl_rcv_msg+0x10/0x10
 netlink_rcv_skb+0x5a/0x110
 genl_rcv+0x28/0x40
 netlink_unicast+0x1af/0x280
 netlink_sendmsg+0x20e/0x460
 __sys_sendto+0x1fe/0x210
 ? find_held_lock+0x2b/0x80
 ? do_user_addr_fault+0x3a2/0x8a0
 ? __lock_release+0x5e/0x160
 ? do_user_addr_fault+0x3a2/0x8a0
 ? lock_release+0x72/0x140
 ? do_user_addr_fault+0x3a7/0x8a0
 __x64_sys_sendto+0x29/0x30
 do_syscall_64+0x78/0x180
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce")
Signed-off-by: Heng Qi 
---
 drivers/net/virtio_net.c | 36 
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4f828a9e5889..ecb5203d0372 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -4257,7 +4257,6 @@ static int virtnet_send_rx_notf_coal_cmds(struct 
virtnet_info *vi,
struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL;
bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
struct scatterlist sgs_rx;
-   int ret = 0;
int i;
 
if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, 
VIRTIO_NET_F_VQ_NOTF_COAL))
@@ -4267,27 +4266,27 @@ static int virtnet_send_rx_notf_coal_cmds(struct 
virtnet_info *vi,
   ec->rx_max_coalesced_frames != 
vi->intr_coal_rx.max_packets))
return -EINVAL;
 
-   /* Acquire all queues dim_locks */
-   for (i = 0; i < vi->max_queue_pairs; i++)
-   mutex_lock(&vi->rq[i].dim_lock);
-
if (rx_ctrl_dim_on && !vi->rx_dim_enabled) {
vi->rx_dim_enabled = true;
-   for (i = 0; i < vi->max_queue_pairs; i++)
+   for (i = 0; i < vi->max_queue_pairs; i++) {
+   mutex_lock(&vi->rq[i].dim_lock);
vi->rq[i].dim_enabled = true;
-   goto unlock;
+   mutex_unlock(&vi->rq[i].dim_lock);
+   }
+   return 0;
}
 
coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL);
-   if (!coal_rx) {
-   ret = -ENOMEM;
-   goto unlock;
-   }
+   if (!coal_rx)
+   return -ENOMEM;
 
if (!rx_ctrl_dim_on && vi->rx_dim_enabled) {
vi->rx_dim_enabled = false;
-   for (i = 0; i < vi->max_queue_pairs; i++)
+   for (i = 0; i < vi->max_queue_pairs; i++) {
+   mutex_lock(&vi->rq[i].dim_lock);
vi->rq[i].dim_enabled = false;
+   mutex_unlock(&vi->rq[i].dim_lock);
+   }
}
 
/* Since the per-queue coalescing params can be set,
@@ -4300,22 +4299,19 @@ static int virtnet_send_rx_notf_coal_cmds(struct 
virtnet_info *vi,
 
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
  VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
- &sgs_rx)) {
-   ret = -EINVAL;
-   goto unlock;
-   }
+ &sgs_rx))
+   return -EINVAL;
 
vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;
vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
  

Re: How to implement message forwarding from one CID to another in vhost driver

2024-05-28 Thread Stefano Garzarella

On Mon, May 27, 2024 at 09:54:17AM GMT, Alexander Graf wrote:


On 27.05.24 09:08, Alexander Graf wrote:

Hey Stefano,

On 23.05.24 10:45, Stefano Garzarella wrote:

On Tue, May 21, 2024 at 08:50:22AM GMT, Alexander Graf wrote:

Howdy,

On 20.05.24 14:44, Dorjoy Chowdhury wrote:

Hey Stefano,

Thanks for the reply.


On Mon, May 20, 2024, 2:55 PM Stefano Garzarella 
 wrote:

Hi Dorjoy,

On Sat, May 18, 2024 at 04:17:38PM GMT, Dorjoy Chowdhury wrote:

Hi,

Hope you are doing well. I am working on adding AWS Nitro Enclave[1]
emulation support in QEMU. Alexander Graf is mentoring me 
on this work. A v1
patch series has already been posted to the qemu-devel 
mailing list[2].


AWS nitro enclaves is an Amazon EC2[3] feature that allows 
creating isolated
execution environments, called enclaves, from Amazon EC2 
instances, which are
used for processing highly sensitive data. Enclaves have 
no persistent storage
and no external networking. The enclave VMs are based on 
Firecracker microvm
and have a vhost-vsock device for communication with the 
parent EC2 instance
that spawned it and a Nitro Secure Module (NSM) device for 
cryptographic
attestation. The parent instance VM always has CID 3 while 
the enclave VM gets
a dynamic CID. The enclave VMs can communicate with the 
parent instance over
various ports to CID 3, for example, the init process 
inside an enclave sends a
heartbeat to port 9000 upon boot, expecting a heartbeat 
reply, letting the

parent instance know that the enclave VM has successfully booted.

The plan is to eventually make the nitro enclave emulation 
in QEMU standalone

i.e., without needing to run another VM with CID 3 with proper vsock
If you don't have to launch another VM, maybe we can avoid 
vhost-vsock
and emulate virtio-vsock in user-space, having complete 
control over the

behavior.

So we could use this opportunity to implement virtio-vsock 
in QEMU [4]

or use vhost-user-vsock [5] and customize it somehow.
(Note: vhost-user-vsock already supports sibling 
communication, so maybe

with a few modifications it fits your case perfectly)

[4] https://gitlab.com/qemu-project/qemu/-/issues/2095
[5] https://github.com/rust-vmm/vhost-device/tree/main/vhost-device-vsock



Thanks for letting me know. Right now I don't have a complete picture
but I will look into them. Thank you.



communication support. For this to work, one approach 
could be to teach the

vhost driver in kernel to forward CID 3 messages to another CID N

So in this case both CID 3 and N would be assigned to the same QEMU
process?



CID N is assigned to the enclave VM. CID 3 was supposed to be the
parent VM that spawns the enclave VM (this is how it is in AWS, where
an EC2 instance VM spawns the enclave VM from inside it and that
parent EC2 instance always has CID 3). But in the QEMU case as we
don't want a parent VM (we want to run enclave VMs standalone) we
would need to forward the CID 3 messages to host CID. I don't know if
it means CID 3 and CID N is assigned to the same QEMU process. Sorry.



There are 2 use cases here:

1) Enclave wants to treat host as parent (default). In this 
scenario,

the "parent instance" that shows up as CID 3 in the Enclave doesn't
really exist. Instead, when the Enclave attempts to talk to CID 3, it
should really land on CID 0 (hypervisor). When the hypervisor tries to
connect to the Enclave on port X, it should look as if it originates
from CID 3, not CID 0.

2) Multiple parent VMs. Think of an actual cloud hosting scenario.
Here, we have multiple "parent instances". Each of them thinks it's
CID 3. Each can spawn an Enclave that talks to CID 3 and reach the
parent. For this case, I think implementing all of virtio-vsock in
user space is the best path forward. But in theory, you could also
swizzle CIDs to make random "real" CIDs appear as CID 3.



Thank you for clarifying the use cases!

Also for case 1, vhost-vsock doesn't support CID 0, so in my opinion
it's easier to go into user-space with vhost-user-vsock or the built-in
device.



Sorry, I believe I meant CID 2. Effectively for case 1, when a 
process on the hypervisor listens on port 1234, it should be visible 
as 3:1234 from the VM and when the hypervisor process connects to 
:1234, it should look as if that connection came from CID 3.



Now that I'm thinking about my message again: What if we just introduce 
a sysfs/sysctl file for vsock that indicates the "host CID" (default: 
2)? Users that want vhost-vsock to behave as if the host is CID 3 can 
just write 3 to it.


I don't know if I understand the final use case well, so I'll try to 
summarize it:


what you would like is to have the ability to receive/send messages from 
the host to a guest as if it were a sibling VM, so as if it had a CID 
!=2 (in your case 3). The important point is to use AF_VSOCK in the host 
application, so no a unix-socket like firecracker.


Is this correct?

I thought you were using firecracker for this scenario, so it seemed to 
make sense to e

Re: How to implement message forwarding from one CID to another in vhost driver

2024-05-28 Thread Paolo Bonzini

On 5/27/24 09:54, Alexander Graf wrote:


On 27.05.24 09:08, Alexander Graf wrote:

Hey Stefano,

On 23.05.24 10:45, Stefano Garzarella wrote:

On Tue, May 21, 2024 at 08:50:22AM GMT, Alexander Graf wrote:

Howdy,

On 20.05.24 14:44, Dorjoy Chowdhury wrote:

Hey Stefano,

Thanks for the reply.


On Mon, May 20, 2024, 2:55 PM Stefano Garzarella 
 wrote:

Hi Dorjoy,

On Sat, May 18, 2024 at 04:17:38PM GMT, Dorjoy Chowdhury wrote:

Hi,

Hope you are doing well. I am working on adding AWS Nitro Enclave[1]
emulation support in QEMU. Alexander Graf is mentoring me on this 
work. A v1
patch series has already been posted to the qemu-devel mailing 
list[2].


AWS nitro enclaves is an Amazon EC2[3] feature that allows 
creating isolated
execution environments, called enclaves, from Amazon EC2 
instances, which are
used for processing highly sensitive data. Enclaves have no 
persistent storage
and no external networking. The enclave VMs are based on 
Firecracker microvm
and have a vhost-vsock device for communication with the parent 
EC2 instance
that spawned it and a Nitro Secure Module (NSM) device for 
cryptographic
attestation. The parent instance VM always has CID 3 while the 
enclave VM gets
a dynamic CID. The enclave VMs can communicate with the parent 
instance over
various ports to CID 3, for example, the init process inside an 
enclave sends a
heartbeat to port 9000 upon boot, expecting a heartbeat reply, 
letting the

parent instance know that the enclave VM has successfully booted.

The plan is to eventually make the nitro enclave emulation in 
QEMU standalone

i.e., without needing to run another VM with CID 3 with proper vsock
If you don't have to launch another VM, maybe we can avoid 
vhost-vsock
and emulate virtio-vsock in user-space, having complete control 
over the

behavior.

So we could use this opportunity to implement virtio-vsock in QEMU 
[4]

or use vhost-user-vsock [5] and customize it somehow.
(Note: vhost-user-vsock already supports sibling communication, so 
maybe

with a few modifications it fits your case perfectly)

[4] https://gitlab.com/qemu-project/qemu/-/issues/2095
[5] 
https://github.com/rust-vmm/vhost-device/tree/main/vhost-device-vsock



Thanks for letting me know. Right now I don't have a complete picture
but I will look into them. Thank you.



communication support. For this to work, one approach could be to 
teach the

vhost driver in kernel to forward CID 3 messages to another CID N

So in this case both CID 3 and N would be assigned to the same QEMU
process?



CID N is assigned to the enclave VM. CID 3 was supposed to be the
parent VM that spawns the enclave VM (this is how it is in AWS, where
an EC2 instance VM spawns the enclave VM from inside it and that
parent EC2 instance always has CID 3). But in the QEMU case as we
don't want a parent VM (we want to run enclave VMs standalone) we
would need to forward the CID 3 messages to host CID. I don't know if
it means CID 3 and CID N is assigned to the same QEMU process. Sorry.



There are 2 use cases here:

1) Enclave wants to treat host as parent (default). In this scenario,
the "parent instance" that shows up as CID 3 in the Enclave doesn't
really exist. Instead, when the Enclave attempts to talk to CID 3, it
should really land on CID 0 (hypervisor). When the hypervisor tries to
connect to the Enclave on port X, it should look as if it originates
from CID 3, not CID 0.

2) Multiple parent VMs. Think of an actual cloud hosting scenario.
Here, we have multiple "parent instances". Each of them thinks it's
CID 3. Each can spawn an Enclave that talks to CID 3 and reach the
parent. For this case, I think implementing all of virtio-vsock in
user space is the best path forward. But in theory, you could also
swizzle CIDs to make random "real" CIDs appear as CID 3.



Thank you for clarifying the use cases!

Also for case 1, vhost-vsock doesn't support CID 0, so in my opinion
it's easier to go into user-space with vhost-user-vsock or the built-in
device.



Sorry, I believe I meant CID 2. Effectively for case 1, when a process 
on the hypervisor listens on port 1234, it should be visible as 3:1234 
from the VM and when the hypervisor process connects to :1234, 
it should look as if that connection came from CID 3.



Now that I'm thinking about my message again: What if we just introduce 
a sysfs/sysctl file for vsock that indicates the "host CID" (default: 
2)? Users that want vhost-vsock to behave as if the host is CID 3 can 
just write 3 to it.


It means we'd need to change all references to VMADDR_CID_HOST to 
instead refer to a global variable that indicates the new "host CID". 
It'd need some more careful massaging to not break number namespace 
assumptions (<= CID_HOST no longer works), but the idea should fly.


Forwarding one or more ports of a given CID to CID 2 (the host) should 
be doable with a dummy vhost client that listens to CID 3, connects to 
CID 2 and send data back and forth.  Not hard enough to justify ch

Re: How to implement message forwarding from one CID to another in vhost driver

2024-05-28 Thread Stefano Garzarella

On Tue, May 28, 2024 at 05:19:34PM GMT, Paolo Bonzini wrote:

On 5/27/24 09:54, Alexander Graf wrote:


On 27.05.24 09:08, Alexander Graf wrote:

Hey Stefano,

On 23.05.24 10:45, Stefano Garzarella wrote:

On Tue, May 21, 2024 at 08:50:22AM GMT, Alexander Graf wrote:

Howdy,

On 20.05.24 14:44, Dorjoy Chowdhury wrote:

Hey Stefano,

Thanks for the reply.


On Mon, May 20, 2024, 2:55 PM Stefano Garzarella 
 wrote:

Hi Dorjoy,

On Sat, May 18, 2024 at 04:17:38PM GMT, Dorjoy Chowdhury wrote:

Hi,

Hope you are doing well. I am working on adding AWS Nitro Enclave[1]
emulation support in QEMU. Alexander Graf is mentoring 
me on this work. A v1
patch series has already been posted to the qemu-devel 
mailing list[2].


AWS nitro enclaves is an Amazon EC2[3] feature that 
allows creating isolated
execution environments, called enclaves, from Amazon EC2 
instances, which are
used for processing highly sensitive data. Enclaves have 
no persistent storage
and no external networking. The enclave VMs are based on 
Firecracker microvm
and have a vhost-vsock device for communication with the 
parent EC2 instance
that spawned it and a Nitro Secure Module (NSM) device 
for cryptographic
attestation. The parent instance VM always has CID 3 
while the enclave VM gets
a dynamic CID. The enclave VMs can communicate with the 
parent instance over
various ports to CID 3, for example, the init process 
inside an enclave sends a
heartbeat to port 9000 upon boot, expecting a heartbeat 
reply, letting the

parent instance know that the enclave VM has successfully booted.

The plan is to eventually make the nitro enclave 
emulation in QEMU standalone

i.e., without needing to run another VM with CID 3 with proper vsock
If you don't have to launch another VM, maybe we can avoid 
vhost-vsock
and emulate virtio-vsock in user-space, having complete 
control over the

behavior.

So we could use this opportunity to implement virtio-vsock 
in QEMU [4]

or use vhost-user-vsock [5] and customize it somehow.
(Note: vhost-user-vsock already supports sibling 
communication, so maybe

with a few modifications it fits your case perfectly)

[4] https://gitlab.com/qemu-project/qemu/-/issues/2095
[5] https://github.com/rust-vmm/vhost-device/tree/main/vhost-device-vsock



Thanks for letting me know. Right now I don't have a complete picture
but I will look into them. Thank you.



communication support. For this to work, one approach 
could be to teach the

vhost driver in kernel to forward CID 3 messages to another CID N

So in this case both CID 3 and N would be assigned to the same QEMU
process?



CID N is assigned to the enclave VM. CID 3 was supposed to be the
parent VM that spawns the enclave VM (this is how it is in AWS, where
an EC2 instance VM spawns the enclave VM from inside it and that
parent EC2 instance always has CID 3). But in the QEMU case as we
don't want a parent VM (we want to run enclave VMs standalone) we
would need to forward the CID 3 messages to host CID. I don't know if
it means CID 3 and CID N is assigned to the same QEMU process. Sorry.



There are 2 use cases here:

1) Enclave wants to treat host as parent (default). In this scenario,
the "parent instance" that shows up as CID 3 in the Enclave doesn't
really exist. Instead, when the Enclave attempts to talk to CID 3, it
should really land on CID 0 (hypervisor). When the hypervisor tries to
connect to the Enclave on port X, it should look as if it originates
from CID 3, not CID 0.

2) Multiple parent VMs. Think of an actual cloud hosting scenario.
Here, we have multiple "parent instances". Each of them thinks it's
CID 3. Each can spawn an Enclave that talks to CID 3 and reach the
parent. For this case, I think implementing all of virtio-vsock in
user space is the best path forward. But in theory, you could also
swizzle CIDs to make random "real" CIDs appear as CID 3.



Thank you for clarifying the use cases!

Also for case 1, vhost-vsock doesn't support CID 0, so in my opinion
it's easier to go into user-space with vhost-user-vsock or the built-in
device.



Sorry, I believe I meant CID 2. Effectively for case 1, when a 
process on the hypervisor listens on port 1234, it should be 
visible as 3:1234 from the VM and when the hypervisor process 
connects to :1234, it should look as if that connection 
came from CID 3.



Now that I'm thinking about my message again: What if we just 
introduce a sysfs/sysctl file for vsock that indicates the "host 
CID" (default: 2)? Users that want vhost-vsock to behave as if the 
host is CID 3 can just write 3 to it.


It means we'd need to change all references to VMADDR_CID_HOST to 
instead refer to a global variable that indicates the new "host 
CID". It'd need some more careful massaging to not break number 
namespace assumptions (<= CID_HOST no longer works), but the idea 
should fly.


Forwarding one or more ports of a given CID to CID 2 (the host) should 
be doable with a dummy vhost client that listens to CID 3, connects to 
CID

Re: [PATCH net 2/2] virtio_net: fix missing lock protection on control_buf access

2024-05-28 Thread Michael S. Tsirkin
On Tue, May 28, 2024 at 03:52:26PM +0800, Heng Qi wrote:
> Refactored the handling of control_buf to be within the cvq_lock
> critical section, mitigating race conditions between reading device
> responses and new command submissions.
> 
> Fixes: 6f45ab3e0409 ("virtio_net: Add a lock for the command VQ.")
> Signed-off-by: Heng Qi 


I don't get what does this change. status can change immediately
after you drop the mutex, can it not? what exactly is the
race conditions you are worried about?

> ---
>  drivers/net/virtio_net.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 6b0512a628e0..3d8407d9e3d2 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2686,6 +2686,7 @@ static bool virtnet_send_command_reply(struct 
> virtnet_info *vi, u8 class, u8 cmd
>  {
>   struct scatterlist *sgs[5], hdr, stat;
>   u32 out_num = 0, tmp, in_num = 0;
> + bool ret;
>   int err;
>  
>   /* Caller should know better */
> @@ -2731,8 +2732,9 @@ static bool virtnet_send_command_reply(struct 
> virtnet_info *vi, u8 class, u8 cmd
>   }
>  
>  unlock:
> + ret = vi->ctrl->status == VIRTIO_NET_OK;
>   mutex_unlock(&vi->cvq_lock);
> - return vi->ctrl->status == VIRTIO_NET_OK;
> + return ret;
>  }
>  
>  static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> -- 
> 2.32.0.3.g01195cf9f




Re: How to implement message forwarding from one CID to another in vhost driver

2024-05-28 Thread Paolo Bonzini
On Tue, May 28, 2024 at 5:41 PM Stefano Garzarella  wrote:
> >I think it's either that or implementing virtio-vsock in userspace
> >(https://lore.kernel.org/qemu-devel/30baeb56-64d2-4ea3-8e53-6a5c50999...@redhat.com/,
> >search for "To connect host<->guest").
>
> For in this case AF_VSOCK can't be used in the host, right?
> So it's similar to vhost-user-vsock.

Not sure if I understand but in this case QEMU knows which CIDs are
forwarded to the host (either listen on vsock and connect to the host,
or vice versa), so there is no kernel and no VMADDR_FLAG_TO_HOST
involved.

Paolo




Re: How to implement message forwarding from one CID to another in vhost driver

2024-05-28 Thread Stefano Garzarella

On Tue, May 28, 2024 at 05:49:32PM GMT, Paolo Bonzini wrote:

On Tue, May 28, 2024 at 5:41 PM Stefano Garzarella  wrote:

>I think it's either that or implementing virtio-vsock in userspace
>(https://lore.kernel.org/qemu-devel/30baeb56-64d2-4ea3-8e53-6a5c50999...@redhat.com/,
>search for "To connect host<->guest").

For in this case AF_VSOCK can't be used in the host, right?
So it's similar to vhost-user-vsock.


Not sure if I understand but in this case QEMU knows which CIDs are
forwarded to the host (either listen on vsock and connect to the host,
or vice versa), so there is no kernel and no VMADDR_FLAG_TO_HOST
involved.



I meant that the application in the host that wants to connect to the 
guest cannot use AF_VSOCK in the host, but must use the one where QEMU 
is listening (e.g. AF_INET, AF_UNIX), right?


I think one of Alex's requirements was that the application in the host 
continue to use AF_VSOCK as in their environment.


Stefano




Re: [PATCH net 2/2] virtio_net: fix missing lock protection on control_buf access

2024-05-28 Thread Heng Qi
On Tue, 28 May 2024 11:46:28 -0400, "Michael S. Tsirkin"  
wrote:
> On Tue, May 28, 2024 at 03:52:26PM +0800, Heng Qi wrote:
> > Refactored the handling of control_buf to be within the cvq_lock
> > critical section, mitigating race conditions between reading device
> > responses and new command submissions.
> > 
> > Fixes: 6f45ab3e0409 ("virtio_net: Add a lock for the command VQ.")
> > Signed-off-by: Heng Qi 
> 
> 
> I don't get what does this change. status can change immediately
> after you drop the mutex, can it not? what exactly is the
> race conditions you are worried about?

See the following case:

1. Command A is acknowledged and successfully executed by the device.
2. After releasing the mutex (mutex_unlock), process P1 gets preempted before
   it can read vi->ctrl->status, *which should be VIRTIO_NET_OK*.
3. A new command B (like the DIM command) is issued.
4. Post vi->ctrl->status being set to VIRTIO_NET_ERR by
   virtnet_send_command_reply(), process P2 gets preempted.
5. Process P1 resumes, reads *vi->ctrl->status as VIRTIO_NET_ERR*, and reports
   this error back for Command A. <-- Race causes incorrect results to be read.

Thanks.

> 
> > ---
> >  drivers/net/virtio_net.c | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 6b0512a628e0..3d8407d9e3d2 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -2686,6 +2686,7 @@ static bool virtnet_send_command_reply(struct 
> > virtnet_info *vi, u8 class, u8 cmd
> >  {
> > struct scatterlist *sgs[5], hdr, stat;
> > u32 out_num = 0, tmp, in_num = 0;
> > +   bool ret;
> > int err;
> >  
> > /* Caller should know better */
> > @@ -2731,8 +2732,9 @@ static bool virtnet_send_command_reply(struct 
> > virtnet_info *vi, u8 class, u8 cmd
> > }
> >  
> >  unlock:
> > +   ret = vi->ctrl->status == VIRTIO_NET_OK;
> > mutex_unlock(&vi->cvq_lock);
> > -   return vi->ctrl->status == VIRTIO_NET_OK;
> > +   return ret;
> >  }
> >  
> >  static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
> > -- 
> > 2.32.0.3.g01195cf9f
> 



Re: How to implement message forwarding from one CID to another in vhost driver

2024-05-28 Thread Paolo Bonzini
On Tue, May 28, 2024 at 5:53 PM Stefano Garzarella  wrote:
>
> On Tue, May 28, 2024 at 05:49:32PM GMT, Paolo Bonzini wrote:
> >On Tue, May 28, 2024 at 5:41 PM Stefano Garzarella  
> >wrote:
> >> >I think it's either that or implementing virtio-vsock in userspace
> >> >(https://lore.kernel.org/qemu-devel/30baeb56-64d2-4ea3-8e53-6a5c50999...@redhat.com/,
> >> >search for "To connect host<->guest").
> >>
> >> For in this case AF_VSOCK can't be used in the host, right?
> >> So it's similar to vhost-user-vsock.
> >
> >Not sure if I understand but in this case QEMU knows which CIDs are
> >forwarded to the host (either listen on vsock and connect to the host,
> >or vice versa), so there is no kernel and no VMADDR_FLAG_TO_HOST
> >involved.
>
> I meant that the application in the host that wants to connect to the
> guest cannot use AF_VSOCK in the host, but must use the one where QEMU
> is listening (e.g. AF_INET, AF_UNIX), right?
>
> I think one of Alex's requirements was that the application in the host
> continue to use AF_VSOCK as in their environment.

Can the host use VMADDR_CID_LOCAL for host-to-host communication? If
so, the proposed "-object vsock-forward" syntax can connect to it and
it should work as long as the application on the host does not assume
that it is on CID 3.

Paolo




Re: [PATCH net 2/2] virtio_net: fix missing lock protection on control_buf access

2024-05-28 Thread Michael S. Tsirkin
On Wed, May 29, 2024 at 12:01:45AM +0800, Heng Qi wrote:
> On Tue, 28 May 2024 11:46:28 -0400, "Michael S. Tsirkin"  
> wrote:
> > On Tue, May 28, 2024 at 03:52:26PM +0800, Heng Qi wrote:
> > > Refactored the handling of control_buf to be within the cvq_lock
> > > critical section, mitigating race conditions between reading device
> > > responses and new command submissions.
> > > 
> > > Fixes: 6f45ab3e0409 ("virtio_net: Add a lock for the command VQ.")
> > > Signed-off-by: Heng Qi 
> > 
> > 
> > I don't get what does this change. status can change immediately
> > after you drop the mutex, can it not? what exactly is the
> > race conditions you are worried about?
> 
> See the following case:
> 
> 1. Command A is acknowledged and successfully executed by the device.
> 2. After releasing the mutex (mutex_unlock), process P1 gets preempted before
>it can read vi->ctrl->status, *which should be VIRTIO_NET_OK*.
> 3. A new command B (like the DIM command) is issued.
> 4. Post vi->ctrl->status being set to VIRTIO_NET_ERR by
>virtnet_send_command_reply(), process P2 gets preempted.
> 5. Process P1 resumes, reads *vi->ctrl->status as VIRTIO_NET_ERR*, and reports
>this error back for Command A. <-- Race causes incorrect results to be 
> read.
> 
> Thanks.


Why is it important that P1 gets VIRTIO_NET_OK?
After all it is no longer the state.

> > 
> > > ---
> > >  drivers/net/virtio_net.c | 4 +++-
> > >  1 file changed, 3 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index 6b0512a628e0..3d8407d9e3d2 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -2686,6 +2686,7 @@ static bool virtnet_send_command_reply(struct 
> > > virtnet_info *vi, u8 class, u8 cmd
> > >  {
> > >   struct scatterlist *sgs[5], hdr, stat;
> > >   u32 out_num = 0, tmp, in_num = 0;
> > > + bool ret;
> > >   int err;
> > >  
> > >   /* Caller should know better */
> > > @@ -2731,8 +2732,9 @@ static bool virtnet_send_command_reply(struct 
> > > virtnet_info *vi, u8 class, u8 cmd
> > >   }
> > >  
> > >  unlock:
> > > + ret = vi->ctrl->status == VIRTIO_NET_OK;
> > >   mutex_unlock(&vi->cvq_lock);
> > > - return vi->ctrl->status == VIRTIO_NET_OK;
> > > + return ret;
> > >  }
> > >  
> > >  static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 
> > > cmd,
> > > -- 
> > > 2.32.0.3.g01195cf9f
> > 




Re: [PATCH net 2/2] virtio_net: fix missing lock protection on control_buf access

2024-05-28 Thread Heng Qi
On Tue, 28 May 2024 12:45:32 -0400, "Michael S. Tsirkin"  
wrote:
> On Wed, May 29, 2024 at 12:01:45AM +0800, Heng Qi wrote:
> > On Tue, 28 May 2024 11:46:28 -0400, "Michael S. Tsirkin"  
> > wrote:
> > > On Tue, May 28, 2024 at 03:52:26PM +0800, Heng Qi wrote:
> > > > Refactored the handling of control_buf to be within the cvq_lock
> > > > critical section, mitigating race conditions between reading device
> > > > responses and new command submissions.
> > > > 
> > > > Fixes: 6f45ab3e0409 ("virtio_net: Add a lock for the command VQ.")
> > > > Signed-off-by: Heng Qi 
> > > 
> > > 
> > > I don't get what does this change. status can change immediately
> > > after you drop the mutex, can it not? what exactly is the
> > > race conditions you are worried about?
> > 
> > See the following case:
> > 
> > 1. Command A is acknowledged and successfully executed by the device.
> > 2. After releasing the mutex (mutex_unlock), process P1 gets preempted 
> > before
> >it can read vi->ctrl->status, *which should be VIRTIO_NET_OK*.
> > 3. A new command B (like the DIM command) is issued.
> > 4. Post vi->ctrl->status being set to VIRTIO_NET_ERR by
> >virtnet_send_command_reply(), process P2 gets preempted.
> > 5. Process P1 resumes, reads *vi->ctrl->status as VIRTIO_NET_ERR*, and 
> > reports
> >this error back for Command A. <-- Race causes incorrect results to be 
> > read.
> > 
> > Thanks.
> 
> 
> Why is it important that P1 gets VIRTIO_NET_OK?
> After all it is no longer the state.

The driver needs to know whether the command actually executed success.

Thanks.

> 
> > > 
> > > > ---
> > > >  drivers/net/virtio_net.c | 4 +++-
> > > >  1 file changed, 3 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index 6b0512a628e0..3d8407d9e3d2 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -2686,6 +2686,7 @@ static bool virtnet_send_command_reply(struct 
> > > > virtnet_info *vi, u8 class, u8 cmd
> > > >  {
> > > > struct scatterlist *sgs[5], hdr, stat;
> > > > u32 out_num = 0, tmp, in_num = 0;
> > > > +   bool ret;
> > > > int err;
> > > >  
> > > > /* Caller should know better */
> > > > @@ -2731,8 +2732,9 @@ static bool virtnet_send_command_reply(struct 
> > > > virtnet_info *vi, u8 class, u8 cmd
> > > > }
> > > >  
> > > >  unlock:
> > > > +   ret = vi->ctrl->status == VIRTIO_NET_OK;
> > > > mutex_unlock(&vi->cvq_lock);
> > > > -   return vi->ctrl->status == VIRTIO_NET_OK;
> > > > +   return ret;
> > > >  }
> > > >  
> > > >  static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 
> > > > cmd,
> > > > -- 
> > > > 2.32.0.3.g01195cf9f
> > > 
>