[PATCH] [SCSI] sr: Fix multi-drive performance by using per-device mutexes

2013-01-01 Thread Otto Meta
The single mutex for the sr module, introduced as a BKL replacement,
globally serialises all sr ioctls, which hurts multi-drive performance.

This patch replaces sr_mutex with per-device mutexes in struct scsi_cd,
allowing concurrent ioctls on different sr devices.

Signed-off-by: Otto Meta 
---

This issue was discussed before, so here are some relevant archived messages:

sr_mutex was introduced as a BKL replacement:
"[PATCH 7/7] block: autoconvert trivial BKL users to private mutex"
https://lkml.org/lkml/2010/9/14/338

The resulting performance penalty was first mentioned here along with an
easy to reproduce demonstration, but apparently received no attention:
"scsi/sr - Impossible to write CDs/DVDs simultaneously (since BKL removal ?)"
https://lkml.org/lkml/2011/10/29/117

A while later, the problem was discussed in this thread:
"[PATCH] [SCSI] sr: fix multi-drive performance, remove BKL replacement"
https://lkml.org/lkml/2012/2/28/230

More discussion in this thread:
"Burning multiple DVDs at one time"
https://lkml.org/lkml/2012/3/15/558

A short summary of the discussion:
- The driver-global mutex serialises all sr ioctls because it's not released
  while the calling process sleeps, unlike the BKL.
- This hurts when using multiple drives, such as during burning or raw reads.
- Removing the mutex completely fixes the performance penalty but exposes
  some data to race conditions.
- Arnd Bergmann had an idea similar to mine, namely replacing sr_mutex
  with a per-device mutex inside of cdrom_device_info. James Bottomley and
  Stefan Richter agreed with that:
  https://lkml.org/lkml/2012/2/28/312

After running into the issue myself while using the CD recovery program
dvdisaster, I believe the following patch fixes the issue and should
provide the same safety as the previous sr_mutex.

Unlike Arnd's suggestion, this solution places the mutex in struct scsi_cd
and thus only affects the sr driver, not the cdrom driver.

sr.h and sr.c haven't seen any changes since at least 3.0.57 and I've
tested the patch with both 3.2.35 and 3.8-rc1.


diff -Naurp linux-3.8-rc1.orig/drivers/scsi/sr.c 
linux-3.8-rc1.fixed/drivers/scsi/sr.c
--- linux-3.8-rc1.orig/drivers/scsi/sr.c2012-12-22 02:19:00.0 
+0100
+++ linux-3.8-rc1.fixed/drivers/scsi/sr.c   2013-01-01 12:56:47.127203406 
+0100
@@ -75,7 +75,6 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_WORM);
 CDC_CD_R|CDC_CD_RW|CDC_DVD|CDC_DVD_R|CDC_DVD_RAM|CDC_GENERIC_PACKET| \
 CDC_MRW|CDC_MRW_W|CDC_RAM)

-static DEFINE_MUTEX(sr_mutex);
 static int sr_probe(struct device *);
 static int sr_remove(struct device *);
 static int sr_done(struct scsi_cmnd *);
@@ -511,24 +510,24 @@ static int sr_block_open(struct block_de
struct scsi_cd *cd;
int ret = -ENXIO;

-   mutex_lock(&sr_mutex);
cd = scsi_cd_get(bdev->bd_disk);
if (cd) {
+   mutex_lock(&cd->lock);
ret = cdrom_open(&cd->cdi, bdev, mode);
+   mutex_unlock(&cd->lock);
if (ret)
scsi_cd_put(cd);
}
-   mutex_unlock(&sr_mutex);
return ret;
 }

 static int sr_block_release(struct gendisk *disk, fmode_t mode)
 {
struct scsi_cd *cd = scsi_cd(disk);
-   mutex_lock(&sr_mutex);
+   mutex_lock(&cd->lock);
cdrom_release(&cd->cdi, mode);
+   mutex_unlock(&cd->lock);
scsi_cd_put(cd);
-   mutex_unlock(&sr_mutex);
return 0;
 }

@@ -540,7 +539,7 @@ static int sr_block_ioctl(struct block_d
void __user *argp = (void __user *)arg;
int ret;

-   mutex_lock(&sr_mutex);
+   mutex_lock(&cd->lock);

/*
 * Send SCSI addressing ioctls directly to mid level, send other
@@ -570,7 +569,7 @@ static int sr_block_ioctl(struct block_d
ret = scsi_ioctl(sdev, cmd, argp);

 out:
-   mutex_unlock(&sr_mutex);
+   mutex_unlock(&cd->lock);
return ret;
 }

@@ -660,6 +659,8 @@ static int sr_probe(struct device *dev)
if (!disk)
goto fail_free;

+   mutex_init(&cd->lock);
+
spin_lock(&sr_index_lock);
minor = find_first_zero_bit(sr_index_bits, SR_DISKS);
if (minor == SR_DISKS) {
@@ -722,6 +723,7 @@ static int sr_probe(struct device *dev)

 fail_put:
put_disk(disk);
+   mutex_destroy(&cd->lock);
 fail_free:
kfree(cd);
 fail:
@@ -958,6 +960,8 @@ static void sr_kref_release(struct kref

put_disk(disk);

+   mutex_destroy(&cd->lock);
+
kfree(cd);
 }

diff -Naurp linux-3.8-rc1.orig/drivers/scsi/sr.h 
linux-3.8-rc1.fixed/drivers/scsi/sr.h
--- linux-3.8-rc1.orig/drivers/scsi/sr.h2012-12-22 02:19:00.0 
+0100
+++ linux-3.8-rc1.fixed/drivers/scsi/sr.h   2013-01-01 12:56:47.127203406 
+0100
@@ -19,6 +19,7 @@

 #include 
 #include 
+#include 

 #define MAX_RETRIES3
 #define SR_TIMEOUT (30 * HZ)
@@ -49,6 +50,7 @@ typedef struct scsi_cd {
bool ignore_get_event:1;/* GET_EVENT is unrelia

Re: [PATCH v2 1/5] virtio: add functions for piecewise addition of buffers

2013-01-01 Thread Rusty Russell
Paolo Bonzini  writes:
> The virtqueue_add_buf function has two limitations:
>
> 1) it requires the caller to provide all the buffers in a single call;
>
> 2) it does not support chained scatterlists: the buffers must be
> provided as an array of struct scatterlist;

Chained scatterlists are a horrible interface, but that doesn't mean we
shouldn't support them if there's a need.

I think I once even had a patch which passed two chained sgs, rather
than a combo sg and two length numbers.  It's very old, but I've pasted
it below.

Duplicating the implementation by having another interface is pretty
nasty; I think I'd prefer the chained scatterlists, if that's optimal
for you.

Cheers,
Rusty.

From: Rusty Russell 
Subject: virtio: use chained scatterlists.

Rather than handing a scatterlist[] and out and in numbers to
virtqueue_add_buf(), hand two separate ones which can be chained.

I shall refrain from ranting about what a disgusting hack chained
scatterlists are.  I'll just note that this doesn't make things
simpler (see diff).

The scatterlists we use can be too large for the stack, so we put them
in our device struct and reuse them.  But in many cases we don't want
to pay the cost of sg_init_table() as we don't know how many elements
we'll have and we'd have to initialize the entire table.

This means we have two choices: carefully reset the end markers after
we call virtqueue_add_buf(), which we do in virtio_net for the xmit
path where it's easy and we want to be optimal.  Elsewhere we
implement a helper to unset the end markers after we've filled the
array.

Signed-off-by: Rusty Russell 
---
 drivers/block/virtio_blk.c  |   37 +-
 drivers/char/hw_random/virtio-rng.c |2 -
 drivers/char/virtio_console.c   |6 +--
 drivers/net/virtio_net.c|   67 ++---
 drivers/virtio/virtio_balloon.c |6 +--
 drivers/virtio/virtio_ring.c|   71 ++--
 include/linux/virtio.h  |5 +-
 net/9p/trans_virtio.c   |   38 +--
 8 files changed, 151 insertions(+), 81 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -100,6 +100,14 @@ static void blk_done(struct virtqueue *v
spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
 }
 
+static void sg_unset_end_markers(struct scatterlist *sg, unsigned int num)
+{
+   unsigned int i;
+
+   for (i = 0; i < num; i++)
+   sg[i].page_link &= ~0x02;
+}
+
 static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
   struct request *req)
 {
@@ -140,6 +148,7 @@ static bool do_req(struct request_queue 
}
}
 
+   /* We layout out scatterlist in a single array, out then in. */
sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
 
/*
@@ -151,17 +160,8 @@ static bool do_req(struct request_queue 
if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
 
+   /* This marks the end of the sg list at vblk->sg[out]. */
num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
-
-   if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
-   sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 
SCSI_SENSE_BUFFERSIZE);
-   sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
-  sizeof(vbr->in_hdr));
-   }
-
-   sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
-  sizeof(vbr->status));
-
if (num) {
if (rq_data_dir(vbr->req) == WRITE) {
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
@@ -172,7 +172,22 @@ static bool do_req(struct request_queue 
}
}
 
-   if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) {
+   if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
+   sg_set_buf(&vblk->sg[out + in++], vbr->req->sense,
+  SCSI_SENSE_BUFFERSIZE);
+   sg_set_buf(&vblk->sg[out + in++], &vbr->in_hdr,
+  sizeof(vbr->in_hdr));
+   }
+
+   sg_set_buf(&vblk->sg[out + in++], &vbr->status,
+  sizeof(vbr->status));
+
+   sg_unset_end_markers(vblk->sg, out+in);
+   sg_mark_end(&vblk->sg[out-1]);
+   sg_mark_end(&vblk->sg[out+in-1]);
+
+   if (virtqueue_add_buf(vblk->vq, vblk->sg, vblk->sg+out, vbr, GFP_ATOMIC)
+   < 0) {
mempool_free(vbr, vblk->pool);
return false;
}
diff --git a/drivers/char/hw_random/virtio-rng.c 
b/drivers/char/hw_random/virtio-rng.c
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -47,7 +47,7 @@ static void register_buffer(u8 *buf, siz
sg_init_one(&sg, buf, size);
 
/* There sh