Re: [PATCH v2 03/17] compat_ioctl: use correct compat_ptr() translation in drivers

2018-09-13 Thread Felipe Balbi
Arnd Bergmann  writes:

> A handful of drivers all have a trivial wrapper around their ioctl
> handler, but don't call the compat_ptr() conversion function at the
> moment. In practice this does not matter, since none of them are used
> on the s390 architecture and for all other architectures, compat_ptr()
> does not do anything, but using the new generic_compat_ioctl_ptrarg
> helper makes it more correct in theory, and simplifies the code.
>
> Signed-off-by: Arnd Bergmann 

Acked-by: Felipe Balbi 

-- 
balbi


signature.asc
Description: PGP signature


[PATCH RFC] scsi: ufs: Disable blk-mq for now

2018-09-13 Thread Adrian Hunter
blk-mq does not support runtime pm, so disable blk-mq support for now.

Fixes: d5038a13eca7 ("scsi: core: switch to scsi-mq by default")
Signed-off-by: Adrian Hunter 
---
 drivers/scsi/ufs/ufshcd.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 40548bae8efa..a4d36497a047 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -7975,6 +7975,13 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba 
**hba_handle)
err = -ENOMEM;
goto out_error;
}
+
+   /*
+* Do not use blk-mq at this time because blk-mq does not support
+* runtime pm.
+*/
+   host->use_blk_mq = false;
+
hba = shost_priv(host);
hba->host = host;
hba->dev = dev;
-- 
2.17.1



Re: [PATCH RFC] scsi: ufs: Disable blk-mq for now

2018-09-13 Thread Ming Lei
On Thu, Sep 13, 2018 at 02:28:48PM +0300, Adrian Hunter wrote:
> blk-mq does not support runtime pm, so disable blk-mq support for now.

So could you describe a bit what the issue you are trying to fix?

This is host level runtime PM you are trying to address, and if blk-mq
runtime isn't enabled, I guess the host won't be runtime suspended at all
because some of its descendant are always active.

So seems we need to do nothing for preventing the host controller from
entering runtime suspend.

Thanks,
Ming


[PATCH V3 01/17] blk-mq: allow to pass default queue flags for creating & initializing queue

2018-09-13 Thread Ming Lei
Prepare for converting the flag of BLK_MQ_F_NO_SCHED into per-queue
flag, since the following patches need this way for supporting per-host
admin queue.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 block/blk-mq.c | 16 +---
 include/linux/blk-mq.h | 19 ---
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 85a1c1a59c72..d524efc5d1bc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2476,7 +2476,8 @@ void blk_mq_release(struct request_queue *q)
free_percpu(q->queue_ctx);
 }
 
-struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
+struct request_queue *__blk_mq_init_queue(struct blk_mq_tag_set *set,
+ unsigned long def_flags)
 {
struct request_queue *uninit_q, *q;
 
@@ -2484,13 +2485,13 @@ struct request_queue *blk_mq_init_queue(struct 
blk_mq_tag_set *set)
if (!uninit_q)
return ERR_PTR(-ENOMEM);
 
-   q = blk_mq_init_allocated_queue(set, uninit_q);
+   q = __blk_mq_init_allocated_queue(set, uninit_q, def_flags);
if (IS_ERR(q))
blk_cleanup_queue(uninit_q);
 
return q;
 }
-EXPORT_SYMBOL(blk_mq_init_queue);
+EXPORT_SYMBOL(__blk_mq_init_queue);
 
 static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
 {
@@ -2564,8 +2565,9 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set 
*set,
blk_mq_sysfs_register(q);
 }
 
-struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
- struct request_queue *q)
+struct request_queue *__blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+   struct request_queue *q,
+   unsigned long def_flags)
 {
/* mark the queue as mq asap */
q->mq_ops = set->ops;
@@ -2599,7 +2601,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct 
blk_mq_tag_set *set,
 
q->nr_queues = nr_cpu_ids;
 
-   q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
+   q->queue_flags |= def_flags;
 
if (!(set->flags & BLK_MQ_F_SG_MERGE))
queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
@@ -2649,7 +2651,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct 
blk_mq_tag_set *set,
q->mq_ops = NULL;
return ERR_PTR(-ENOMEM);
 }
-EXPORT_SYMBOL(blk_mq_init_allocated_queue);
+EXPORT_SYMBOL(__blk_mq_init_allocated_queue);
 
 void blk_mq_free_queue(struct request_queue *q)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1da59c16f637..7f6ecd7b35ce 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -200,9 +200,22 @@ enum {
((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
 
-struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
-struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
- struct request_queue *q);
+struct request_queue *__blk_mq_init_queue(struct blk_mq_tag_set *, unsigned 
long);
+struct request_queue *__blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+   struct request_queue *q,
+   unsigned long def_flags);
+
+static inline struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set 
*set)
+{
+   return __blk_mq_init_queue(set, QUEUE_FLAG_MQ_DEFAULT);
+}
+
+static inline struct request_queue *
+blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue 
*q)
+{
+   return __blk_mq_init_allocated_queue(set, q, QUEUE_FLAG_MQ_DEFAULT);
+}
+
 int blk_mq_register_dev(struct device *, struct request_queue *);
 void blk_mq_unregister_dev(struct device *, struct request_queue *);
 
-- 
2.9.5



[PATCH V3 05/17] SCSI: try to retrieve request_queue via 'scsi_cmnd' if possible

2018-09-13 Thread Ming Lei
Prepare for introduing per-host admin queue.

The most important part is that the request originated from admin queue
can't be called back to the IO queue associated with scsi_device, especially,
one request may be requeued, timedout or completed via block layer helper, so
what we should do is to use 'scsi_cmnd->request->q' to retrieve the request
queue, and pass that to block layer helper, instead of
sdev->request_queue.

Fortunately most of users of 'scsi_device->request_queue' aren't in related IO
path(requeue, timeout, complete, run queue), so the audit isn't more difficult
than I thought of.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 drivers/ata/libata-eh.c | 2 +-
 drivers/scsi/libsas/sas_ata.c   | 2 +-
 drivers/scsi/libsas/sas_scsi_host.c | 2 +-
 drivers/scsi/scsi_error.c   | 2 +-
 drivers/scsi/scsi_lib.c | 6 +++---
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 01306c018398..fbceea6b62a9 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -919,7 +919,7 @@ static void ata_eh_set_pending(struct ata_port *ap, int 
fastdrain)
 void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
 {
struct ata_port *ap = qc->ap;
-   struct request_queue *q = qc->scsicmd->device->request_queue;
+   struct request_queue *q = qc->scsicmd->request->q;
unsigned long flags;
 
WARN_ON(!ap->ops->error_handler);
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 64a958a99f6a..fcf46437b756 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -601,7 +601,7 @@ void sas_ata_task_abort(struct sas_task *task)
 
/* Bounce SCSI-initiated commands to the SCSI EH */
if (qc->scsicmd) {
-   struct request_queue *q = qc->scsicmd->device->request_queue;
+   struct request_queue *q = qc->scsicmd->request->q;
unsigned long flags;
 
spin_lock_irqsave(q->queue_lock, flags);
diff --git a/drivers/scsi/libsas/sas_scsi_host.c 
b/drivers/scsi/libsas/sas_scsi_host.c
index 33229348dcb6..91e192f93ae1 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -933,7 +933,7 @@ void sas_task_abort(struct sas_task *task)
if (dev_is_sata(task->dev)) {
sas_ata_task_abort(task);
} else {
-   struct request_queue *q = sc->device->request_queue;
+   struct request_queue *q = sc->request->q;
unsigned long flags;
 
spin_lock_irqsave(q->queue_lock, flags);
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index b7a8fdfeb2f4..9f19c80b983c 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1360,7 +1360,7 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd)
int i, rtn = NEEDS_RETRY;
 
for (i = 0; rtn == NEEDS_RETRY && i < 2; i++)
-   rtn = scsi_send_eh_cmnd(scmd, stu_command, 6, 
scmd->device->request_queue->rq_timeout, 0);
+   rtn = scsi_send_eh_cmnd(scmd, stu_command, 6, 
scmd->request->q->rq_timeout, 0);
 
if (rtn == SUCCESS)
return 0;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index eb97d2dd3651..6fb8fd3ccc2c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -168,7 +168,7 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
 static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
 {
struct scsi_device *device = cmd->device;
-   struct request_queue *q = device->request_queue;
+   struct request_queue *q = cmd->request->q;
unsigned long flags;
 
SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd,
@@ -668,7 +668,7 @@ static bool scsi_end_request(struct request *req, 
blk_status_t error,
 {
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
struct scsi_device *sdev = cmd->device;
-   struct request_queue *q = sdev->request_queue;
+   struct request_queue *q = cmd->request->q;
 
if (blk_update_request(req, error, bytes))
return true;
@@ -1038,7 +1038,7 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd 
*cmd, int result,
 void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 {
int result = cmd->result;
-   struct request_queue *q = cmd->device->request_queue;
+   struct request_queue *q = cmd->request->q;
struct request *req = cmd->request;
blk_status_t blk_stat = BLK_STS_OK;
 
-- 
2.9.5



[PATCH V3 04/17] blk-mq: don't reserve tags for admin queue

2018-09-13 Thread Ming Lei
Not necessary to reserve tags for admin queue since there isn't
many inflight commands in admin queue usually.

This change won't starve admin queue too because each blocked queue
has equal priority to get one new tag when one driver tag is released,
no matter it is freed from any queue.

So that IO performance won't be affected after admin queue(shared tags
with IO queues) is introduced in the following patches.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 block/blk-mq-tag.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 94e1ed667b6e..7b0390f1c764 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -30,7 +30,8 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
 bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
-   !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+   !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
+   !blk_queue_admin(hctx->queue))
atomic_inc(&hctx->tags->active_queues);
 
return true;
@@ -57,7 +58,8 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return;
 
-   atomic_dec(&tags->active_queues);
+   if (!blk_queue_admin(hctx->queue))
+   atomic_dec(&tags->active_queues);
 
blk_mq_tag_wakeup_all(tags, false);
 }
@@ -82,6 +84,12 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
if (bt->sb.depth == 1)
return true;
 
+   /*
+* Needn't to deal with admin queue specially here even though we
+* don't take it account to tags->active_queues, so blk_queue_admin()
+* can be avoided to check in the fast path, also with implicit benefit
+* of avoiding too many in-flight admin requests
+*/
users = atomic_read(&hctx->tags->active_queues);
if (!users)
return true;
-- 
2.9.5



[PATCH V3 06/17] SCSI: pass 'scsi_device' instance from 'scsi_request'

2018-09-13 Thread Ming Lei
This patch prepares for introducing SCSI per-host admin queue, which
is only used for queuing admin requests, which are now submitted via
__scsi_execute().

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 drivers/scsi/scsi_lib.c | 2 ++
 include/scsi/scsi_request.h | 5 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 6fb8fd3ccc2c..2800dfae19cd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -279,6 +279,8 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned 
char *cmd,
rq->cmd_len = COMMAND_SIZE(cmd[0]);
memcpy(rq->cmd, cmd, rq->cmd_len);
rq->retries = retries;
+   rq->sdev = sdev;/* only valid in submit path */
+
req->timeout = timeout;
req->cmd_flags |= flags;
req->rq_flags |= rq_flags | RQF_QUIET;
diff --git a/include/scsi/scsi_request.h b/include/scsi/scsi_request.h
index b06f28c74908..0de6901b48ab 100644
--- a/include/scsi/scsi_request.h
+++ b/include/scsi/scsi_request.h
@@ -14,7 +14,10 @@ struct scsi_request {
unsigned intsense_len;
unsigned intresid_len;  /* residual count */
int retries;
-   void*sense;
+   union {
+   void*sense;
+   struct scsi_device *sdev;
+   };
 };
 
 static inline struct scsi_request *scsi_req(struct request *rq)
-- 
2.9.5



[PATCH V3 03/17] block: rename QUEUE_FLAG_NO_SCHED as QUEUE_FLAG_ADMIN

2018-09-13 Thread Ming Lei
Now all users of QUEUE_FLAG_NO_SCHED is for admin queue only, and not
see any drivers need this flag for IO queue.

So rename it as QUEUE_FLAG_ADMIN, which looks more straightforward.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 block/blk-mq-debugfs.c| 2 +-
 block/blk-mq.c| 2 +-
 block/elevator.c  | 2 +-
 drivers/block/null_blk_main.c | 2 +-
 drivers/nvme/host/fc.c| 2 +-
 drivers/nvme/host/pci.c   | 2 +-
 drivers/nvme/host/rdma.c  | 2 +-
 drivers/nvme/target/loop.c| 2 +-
 include/linux/blkdev.h| 8 
 9 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 246c9afb6f5d..8df013e9f242 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -133,7 +133,7 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
QUEUE_FLAG_NAME(QUIESCED),
QUEUE_FLAG_NAME(PREEMPT_ONLY),
-   QUEUE_FLAG_NAME(NO_SCHED),
+   QUEUE_FLAG_NAME(ADMIN),
 };
 #undef QUEUE_FLAG_NAME
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5b56ed306cd9..7868daaf6de0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2633,7 +2633,7 @@ struct request_queue 
*__blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
blk_mq_add_queue_tag_set(set, q);
blk_mq_map_swqueue(q);
 
-   if (!blk_queue_no_sched(q)) {
+   if (!blk_queue_admin(q)) {
int ret;
 
ret = elevator_init_mq(q);
diff --git a/block/elevator.c b/block/elevator.c
index 8fb8754222fa..d6abba76c89e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -,7 +,7 @@ static int __elevator_change(struct request_queue *q, 
const char *name)
 
 static inline bool elv_support_iosched(struct request_queue *q)
 {
-   if (q->mq_ops && blk_queue_no_sched(q))
+   if (q->mq_ops && blk_queue_admin(q))
return false;
return true;
 }
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 5d9504e65725..9fb358007e43 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1702,7 +1702,7 @@ static int null_add_dev(struct nullb_device *dev)
 
if (dev->queue_mode == NULL_Q_MQ) {
unsigned long q_flags = g_no_sched ?
-   QUEUE_FLAG_MQ_NO_SCHED_DEFAULT : QUEUE_FLAG_MQ_DEFAULT;
+   QUEUE_FLAG_MQ_ADMIN_DEFAULT : QUEUE_FLAG_MQ_DEFAULT;
 
if (shared_tags) {
nullb->tag_set = &tag_set;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 7048e1444210..a920d13c3538 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3041,7 +3041,7 @@ nvme_fc_init_ctrl(struct device *dev, struct 
nvmf_ctrl_options *opts,
ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
 
ctrl->ctrl.admin_q = __blk_mq_init_queue(&ctrl->admin_tag_set,
-   QUEUE_FLAG_MQ_NO_SCHED_DEFAULT);
+   QUEUE_FLAG_MQ_ADMIN_DEFAULT);
if (IS_ERR(ctrl->ctrl.admin_q)) {
ret = PTR_ERR(ctrl->ctrl.admin_q);
goto out_free_admin_tag_set;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 73a3bd980fc9..10716a00a6b4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1499,7 +1499,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->ctrl.admin_tagset = &dev->admin_tagset;
 
dev->ctrl.admin_q = __blk_mq_init_queue(&dev->admin_tagset,
-   QUEUE_FLAG_MQ_NO_SCHED_DEFAULT);
+   QUEUE_FLAG_MQ_ADMIN_DEFAULT);
if (IS_ERR(dev->ctrl.admin_q)) {
blk_mq_free_tag_set(&dev->admin_tagset);
return -ENOMEM;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d61c057c0a71..f901b3dafac5 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -770,7 +770,7 @@ static int nvme_rdma_configure_admin_queue(struct 
nvme_rdma_ctrl *ctrl,
}
 
ctrl->ctrl.admin_q = __blk_mq_init_queue(&ctrl->admin_tag_set,
-   QUEUE_FLAG_MQ_NO_SCHED_DEFAULT);
+   QUEUE_FLAG_MQ_ADMIN_DEFAULT);
if (IS_ERR(ctrl->ctrl.admin_q)) {
error = PTR_ERR(ctrl->ctrl.admin_q);
goto out_free_tagset;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index c689621c2187..8fca59e6b3c3 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -381,7 +381,7 @@ static int nvme_loop_configure_admin_queue(struct 
nvme_loop_ctrl *ctrl)
ctrl-

[PATCH V3 07/17] SCSI: prepare for introducing admin queue for legacy path

2018-09-13 Thread Ming Lei
Uses scsi_is_admin_queue() and scsi_get_scsi_dev() to retrieve
'scsi_device' for legacy path.

The same approach can be used in SCSI_MQ path too, just not very efficiently,
and will deal with that in the patch when introducing admin queue for SCSI_MQ.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 drivers/scsi/scsi_lib.c | 37 +
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 2800dfae19cd..2f541b4fb32b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -46,6 +46,20 @@ static DEFINE_MUTEX(scsi_sense_cache_mutex);
 
 static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd);
 
+/* For admin queue, its queuedata is NULL */
+static inline bool scsi_is_admin_queue(struct request_queue *q)
+{
+   return !q->queuedata;
+}
+
+/* This helper can only be used in req prep stage */
+static inline struct scsi_device *scsi_get_scsi_dev(struct request *rq)
+{
+   if (scsi_is_admin_queue(rq->q))
+   return scsi_req(rq)->sdev;
+   return rq->q->queuedata;
+}
+
 static inline struct kmem_cache *
 scsi_select_sense_cache(bool unchecked_isa_dma)
 {
@@ -1426,10 +1440,9 @@ scsi_prep_state_check(struct scsi_device *sdev, struct 
request *req)
 }
 
 static int
-scsi_prep_return(struct request_queue *q, struct request *req, int ret)
+scsi_prep_return(struct scsi_device *sdev, struct request_queue *q,
+   struct request *req, int ret)
 {
-   struct scsi_device *sdev = q->queuedata;
-
switch (ret) {
case BLKPREP_KILL:
case BLKPREP_INVALID:
@@ -1461,7 +1474,7 @@ scsi_prep_return(struct request_queue *q, struct request 
*req, int ret)
 
 static int scsi_prep_fn(struct request_queue *q, struct request *req)
 {
-   struct scsi_device *sdev = q->queuedata;
+   struct scsi_device *sdev = scsi_get_scsi_dev(req);
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
int ret;
 
@@ -1486,7 +1499,7 @@ static int scsi_prep_fn(struct request_queue *q, struct 
request *req)
 
ret = scsi_setup_cmnd(sdev, req);
 out:
-   return scsi_prep_return(q, req, ret);
+   return scsi_prep_return(sdev, q, req, ret);
 }
 
 static void scsi_unprep_fn(struct request_queue *q, struct request *req)
@@ -1663,6 +1676,9 @@ static int scsi_lld_busy(struct request_queue *q)
if (blk_queue_dying(q))
return 0;
 
+   if (WARN_ON_ONCE(scsi_is_admin_queue(q)))
+   return 0;
+
shost = sdev->host;
 
/*
@@ -1866,7 +1882,7 @@ static void scsi_request_fn(struct request_queue *q)
__releases(q->queue_lock)
__acquires(q->queue_lock)
 {
-   struct scsi_device *sdev = q->queuedata;
+   struct scsi_device *sdev;
struct Scsi_Host *shost;
struct scsi_cmnd *cmd;
struct request *req;
@@ -1875,7 +1891,6 @@ static void scsi_request_fn(struct request_queue *q)
 * To start with, we keep looping until the queue is empty, or until
 * the host is no longer able to accept any more requests.
 */
-   shost = sdev->host;
for (;;) {
int rtn;
/*
@@ -1887,6 +1902,10 @@ static void scsi_request_fn(struct request_queue *q)
if (!req)
break;
 
+   cmd = blk_mq_rq_to_pdu(req);
+   sdev = cmd->device;
+   shost = sdev->host;
+
if (unlikely(!scsi_device_online(sdev))) {
sdev_printk(KERN_ERR, sdev,
"rejecting I/O to offline device\n");
@@ -1904,7 +1923,6 @@ static void scsi_request_fn(struct request_queue *q)
blk_start_request(req);
 
spin_unlock_irq(q->queue_lock);
-   cmd = blk_mq_rq_to_pdu(req);
if (cmd != req->special) {
printk(KERN_CRIT "impossible request in %s.\n"
 "please mail a stack trace to "
@@ -2382,6 +2400,9 @@ struct scsi_device *scsi_device_from_queue(struct 
request_queue *q)
 {
struct scsi_device *sdev = NULL;
 
+   /* admin queue won't be exposed to external users */
+   WARN_ON_ONCE(scsi_is_admin_queue(q));
+
if (q->mq_ops) {
if (q->mq_ops == &scsi_mq_ops)
sdev = q->queuedata;
-- 
2.9.5



[PATCH V3 02/17] blk-mq: convert BLK_MQ_F_NO_SCHED into per-queue flag

2018-09-13 Thread Ming Lei
We need to support admin queue for scsi host, and not like NVMe,
this support is only from logic view, and the admin queue still has
to share same tags with IO queues.

Convert BLK_MQ_F_NO_SCHED into per-queue flag so that we can support
admin queue for SCSI.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 block/blk-mq-debugfs.c| 2 +-
 block/blk-mq.c| 2 +-
 block/elevator.c  | 3 +--
 drivers/block/null_blk_main.c | 7 ---
 drivers/nvme/host/fc.c| 4 ++--
 drivers/nvme/host/pci.c   | 4 ++--
 drivers/nvme/host/rdma.c  | 4 ++--
 drivers/nvme/target/loop.c| 4 ++--
 include/linux/blk-mq.h| 1 -
 include/linux/blkdev.h| 5 +
 10 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index cb1e6cf7ac48..246c9afb6f5d 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -133,6 +133,7 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
QUEUE_FLAG_NAME(QUIESCED),
QUEUE_FLAG_NAME(PREEMPT_ONLY),
+   QUEUE_FLAG_NAME(NO_SCHED),
 };
 #undef QUEUE_FLAG_NAME
 
@@ -246,7 +247,6 @@ static const char *const hctx_flag_name[] = {
HCTX_FLAG_NAME(TAG_SHARED),
HCTX_FLAG_NAME(SG_MERGE),
HCTX_FLAG_NAME(BLOCKING),
-   HCTX_FLAG_NAME(NO_SCHED),
 };
 #undef HCTX_FLAG_NAME
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d524efc5d1bc..5b56ed306cd9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2633,7 +2633,7 @@ struct request_queue 
*__blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
blk_mq_add_queue_tag_set(set, q);
blk_mq_map_swqueue(q);
 
-   if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
+   if (!blk_queue_no_sched(q)) {
int ret;
 
ret = elevator_init_mq(q);
diff --git a/block/elevator.c b/block/elevator.c
index 6a06b5d040e5..8fb8754222fa 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -,8 +,7 @@ static int __elevator_change(struct request_queue *q, 
const char *name)
 
 static inline bool elv_support_iosched(struct request_queue *q)
 {
-   if (q->mq_ops && q->tag_set && (q->tag_set->flags &
-   BLK_MQ_F_NO_SCHED))
+   if (q->mq_ops && blk_queue_no_sched(q))
return false;
return true;
 }
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 6127e3ff7b4b..5d9504e65725 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1617,8 +1617,6 @@ static int null_init_tag_set(struct nullb *nullb, struct 
blk_mq_tag_set *set)
set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
set->cmd_size   = sizeof(struct nullb_cmd);
set->flags = BLK_MQ_F_SHOULD_MERGE;
-   if (g_no_sched)
-   set->flags |= BLK_MQ_F_NO_SCHED;
set->driver_data = NULL;
 
if ((nullb && nullb->dev->blocking) || g_blocking)
@@ -1703,6 +1701,9 @@ static int null_add_dev(struct nullb_device *dev)
goto out_free_nullb;
 
if (dev->queue_mode == NULL_Q_MQ) {
+   unsigned long q_flags = g_no_sched ?
+   QUEUE_FLAG_MQ_NO_SCHED_DEFAULT : QUEUE_FLAG_MQ_DEFAULT;
+
if (shared_tags) {
nullb->tag_set = &tag_set;
rv = 0;
@@ -1718,7 +1719,7 @@ static int null_add_dev(struct nullb_device *dev)
goto out_cleanup_queues;
 
nullb->tag_set->timeout = 5 * HZ;
-   nullb->q = blk_mq_init_queue(nullb->tag_set);
+   nullb->q = __blk_mq_init_queue(nullb->tag_set, q_flags);
if (IS_ERR(nullb->q)) {
rv = -ENOMEM;
goto out_cleanup_tags;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 611e70cae754..7048e1444210 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3034,14 +3034,14 @@ nvme_fc_init_ctrl(struct device *dev, struct 
nvmf_ctrl_options *opts,
ctrl->admin_tag_set.driver_data = ctrl;
ctrl->admin_tag_set.nr_hw_queues = 1;
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
-   ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED;
 
ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
if (ret)
goto out_free_queues;
ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set;
 
-   ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
+   ctrl->ctrl.admin_q = __blk_mq_init_queue(&ctrl->admin_tag_set,
+   QUEUE_FLAG_MQ_NO_SCHED_DEFAULT);
if (IS_ERR(ctrl->ctrl.admin_q)) {
ret = PTR_ERR(ctrl->ctrl.admin_q);
goto out_fre

[PATCH V3 13/17] SCSI: use the dedicated admin queue to send admin commands

2018-09-13 Thread Ming Lei
Now the per-host dedicated admin queue is ready, so use this queue to
send admin commands only.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 drivers/scsi/scsi_lib.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 87a88094b1eb..1e75515cc7ba 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -279,14 +279,14 @@ int __scsi_execute(struct scsi_device *sdev, const 
unsigned char *cmd,
struct scsi_request *rq;
int ret = DRIVER_ERROR << 24;
 
-   req = blk_get_request(sdev->request_queue,
+   req = blk_get_request(sdev->host->admin_q,
data_direction == DMA_TO_DEVICE ?
REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT);
if (IS_ERR(req))
return ret;
rq = scsi_req(req);
 
-   if (bufflen &&  blk_rq_map_kern(sdev->request_queue, req,
+   if (bufflen &&  blk_rq_map_kern(req->q, req,
buffer, bufflen, GFP_NOIO))
goto out;
 
-- 
2.9.5



[PATCH V3 11/17] SCSI: track pending admin commands

2018-09-13 Thread Ming Lei
Firstly we have to make sure that all pending admin commands to
one same scsi_device are completed before removing the scsi_device.

Secondly scsi_internal_device_block() needs this too.

So introduce one waitqueue and atomic counter for this purpose.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 drivers/scsi/scsi_lib.c| 6 ++
 drivers/scsi/scsi_scan.c   | 1 +
 drivers/scsi/scsi_sysfs.c  | 1 +
 include/scsi/scsi_device.h | 4 
 4 files changed, 12 insertions(+)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 8d129b601cc5..4db08458a127 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -299,6 +299,8 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned 
char *cmd,
req->cmd_flags |= flags;
req->rq_flags |= rq_flags | RQF_QUIET;
 
+   atomic_inc(&sdev->nr_admin_pending);
+
/*
 * head injection *required* here otherwise quiesce won't work
 */
@@ -323,6 +325,9 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned 
char *cmd,
  out:
blk_put_request(req);
 
+   atomic_dec(&sdev->nr_admin_pending);
+   wake_up_all(&sdev->admin_wq);
+
return ret;
 }
 EXPORT_SYMBOL(__scsi_execute);
@@ -3246,6 +3251,7 @@ static int scsi_internal_device_block(struct scsi_device 
*sdev)
else
scsi_wait_for_queuecommand(sdev);
}
+   wait_event(sdev->admin_wq, !atomic_read(&sdev->nr_admin_pending));
mutex_unlock(&sdev->state_mutex);
 
return err;
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 78ca63dfba4a..b83ad0dc8890 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -243,6 +243,7 @@ static struct scsi_device *scsi_alloc_sdev(struct 
scsi_target *starget,
mutex_init(&sdev->inquiry_mutex);
INIT_WORK(&sdev->event_work, scsi_evt_thread);
INIT_WORK(&sdev->requeue_work, scsi_requeue_run_queue);
+   init_waitqueue_head(&sdev->admin_wq);
 
sdev->sdev_gendev.parent = get_device(&starget->dev);
sdev->sdev_target = starget;
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 3aee9464a7bf..8bcb7ecc0c06 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1393,6 +1393,7 @@ void __scsi_remove_device(struct scsi_device *sdev)
 
blk_cleanup_queue(sdev->request_queue);
cancel_work_sync(&sdev->requeue_work);
+   wait_event(sdev->admin_wq, !atomic_read(&sdev->nr_admin_pending));
 
if (sdev->host->hostt->slave_destroy)
sdev->host->hostt->slave_destroy(sdev);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 202f4d6a4342..f6820da1dc37 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -227,6 +227,10 @@ struct scsi_device {
struct mutexstate_mutex;
enum scsi_device_state sdev_state;
struct task_struct  *quiesced_by;
+
+   atomic_tnr_admin_pending;
+   wait_queue_head_t   admin_wq;
+
unsigned long   sdev_data[0];
 } __attribute__((aligned(sizeof(unsigned long;
 
-- 
2.9.5



[PATCH V3 14/17] SCSI: transport_spi: resume a quiesced device

2018-09-13 Thread Ming Lei
We have to preempt freeze queue in scsi_device_quiesce(),
and unfreeze in scsi_device_resume(), so call scsi_device_resume()
for the device which is quiesced by scsi_device_quiesce().

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 drivers/scsi/scsi_transport_spi.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/scsi_transport_spi.c 
b/drivers/scsi/scsi_transport_spi.c
index 40b85b752b79..e4174e8137a8 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c
@@ -1052,6 +1052,9 @@ spi_dv_device(struct scsi_device *sdev)
 
scsi_target_resume(starget);
 
+   /* undo what scsi_device_quiesce() did */
+   scsi_device_resume(sdev);
+
spi_initial_dv(starget) = 1;
 
  out_free:
-- 
2.9.5



[PATCH V3 09/17] SCSI: don't set .queuedata in scsi_mq_alloc_queue()

2018-09-13 Thread Ming Lei
.queuedata is set in scsi_alloc_sdev() for both non-mq and scsi_mq,
so not necessary to do it in scsi_mq_alloc_queue().

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 drivers/scsi/scsi_lib.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index bc04389de560..1072b2e303d9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2354,7 +2354,6 @@ struct request_queue *scsi_mq_alloc_queue(struct 
scsi_device *sdev)
if (IS_ERR(sdev->request_queue))
return NULL;
 
-   sdev->request_queue->queuedata = sdev;
__scsi_init_queue(sdev->host, sdev->request_queue);
blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, sdev->request_queue);
return sdev->request_queue;
-- 
2.9.5



[PATCH V3 10/17] SCSI: deal with admin queue busy

2018-09-13 Thread Ming Lei
When request originated from admin queue isn't queued successfully, we
deal with it just like for normal requests, that said the admin queue
will be rerun after one request in this host is completed.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 drivers/scsi/scsi_lib.c  | 61 ++--
 include/scsi/scsi_host.h |  2 ++
 2 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1072b2e303d9..8d129b601cc5 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -475,10 +475,14 @@ static void scsi_starved_list_run(struct Scsi_Host *shost)
LIST_HEAD(starved_list);
struct scsi_device *sdev;
unsigned long flags;
+   bool run_admin;
 
spin_lock_irqsave(shost->host_lock, flags);
list_splice_init(&shost->starved_list, &starved_list);
 
+   run_admin = shost->run_admin_queue;
+   shost->run_admin_queue = false;
+
while (!list_empty(&starved_list)) {
struct request_queue *slq;
 
@@ -527,6 +531,10 @@ static void scsi_starved_list_run(struct Scsi_Host *shost)
/* put any unprocessed entries back */
list_splice(&starved_list, &shost->starved_list);
spin_unlock_irqrestore(shost->host_lock, flags);
+
+   /* no need to get queue for admin_q */
+   if (run_admin)
+   scsi_kick_queue(shost->admin_q);
 }
 
 /*
@@ -534,26 +542,30 @@ static void scsi_starved_list_run(struct Scsi_Host *shost)
  *
  * Purpose:Select a proper request queue to serve next
  *
- * Arguments:  q   - last request's queue
+ * Arguments:  sdev- the last request's scsi_device
+ * q   - last request's queue, which may points to
+ * host->admin_q
  *
  * Returns: Nothing
  *
  * Notes:  The previous command was completely finished, start
  * a new one if possible.
  */
-static void scsi_run_queue(struct request_queue *q)
+static void scsi_run_queue(struct scsi_device *sdev, struct request_queue *q)
 {
-   struct scsi_device *sdev = q->queuedata;
+   struct Scsi_Host *shost = sdev->host;
 
if (scsi_target(sdev)->single_lun)
scsi_single_lun_run(sdev);
-   if (!list_empty(&sdev->host->starved_list))
-   scsi_starved_list_run(sdev->host);
 
-   if (q->mq_ops)
-   blk_mq_run_hw_queues(q, false);
-   else
-   blk_run_queue(q);
+   if (!list_empty(&shost->starved_list) || shost->run_admin_queue)
+   scsi_starved_list_run(shost);
+
+   scsi_kick_queue(q);
+
+   /* q may points to host->admin_queue */
+   if (sdev->request_queue != q)
+   scsi_kick_queue(sdev->request_queue);
 }
 
 void scsi_requeue_run_queue(struct work_struct *work)
@@ -563,7 +575,7 @@ void scsi_requeue_run_queue(struct work_struct *work)
 
sdev = container_of(work, struct scsi_device, requeue_work);
q = sdev->request_queue;
-   scsi_run_queue(q);
+   scsi_run_queue(sdev, q);
 }
 
 /*
@@ -597,7 +609,7 @@ static void scsi_requeue_command(struct request_queue *q, 
struct scsi_cmnd *cmd)
blk_requeue_request(q, req);
spin_unlock_irqrestore(q->queue_lock, flags);
 
-   scsi_run_queue(q);
+   scsi_run_queue(sdev, q);
 
put_device(&sdev->sdev_gendev);
 }
@@ -607,7 +619,7 @@ void scsi_run_host_queues(struct Scsi_Host *shost)
struct scsi_device *sdev;
 
shost_for_each_device(sdev, shost)
-   scsi_run_queue(sdev->request_queue);
+   scsi_run_queue(sdev, sdev->request_queue);
 }
 
 static void scsi_uninit_cmd(struct scsi_cmnd *cmd)
@@ -715,8 +727,13 @@ static bool scsi_end_request(struct request *req, 
blk_status_t error,
 
__blk_mq_end_request(req, error);
 
+   /*
+* scsi_device is shared between host->admin_queue and
+* sdev->request_queue
+*/
if (scsi_target(sdev)->single_lun ||
-   !list_empty(&sdev->host->starved_list))
+   !list_empty(&sdev->host->starved_list) ||
+   sdev->host->run_admin_queue || scsi_is_admin_queue(q))
kblockd_schedule_work(&sdev->requeue_work);
else
blk_mq_run_hw_queues(q, true);
@@ -732,7 +749,7 @@ static bool scsi_end_request(struct request *req, 
blk_status_t error,
blk_finish_request(req, error);
spin_unlock_irqrestore(q->queue_lock, flags);
 
-   scsi_run_queue(q);
+   scsi_run_queue(sdev, q);
}
 
put_device(&sdev->sdev_gendev);
@@ -1544,6 +1561,12 @@ static inline int scsi_dev_queue_ready

[PATCH V3 12/17] SCSI: create admin queue for each host

2018-09-13 Thread Ming Lei
The created admin queue will be used to send internal admin commands,
so we can simplify the sync between some admin commands and IO requests,
typical examples are system suspend and runtime PM.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 drivers/scsi/hosts.c |   9 
 drivers/scsi/scsi_lib.c  | 117 +--
 drivers/scsi/scsi_priv.h |   1 +
 3 files changed, 114 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index ea4b0bb0c1cd..7c1f56c85475 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -242,6 +242,9 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct 
device *dev,
 
shost->dma_dev = dma_dev;
 
+   if (!scsi_init_admin_queue(shost))
+   goto out_remove_tags;
+
/*
 * Increase usage count temporarily here so that calling
 * scsi_autopm_put_host() will trigger runtime idle if there is
@@ -309,6 +312,9 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct 
device *dev,
pm_runtime_disable(&shost->shost_gendev);
pm_runtime_set_suspended(&shost->shost_gendev);
pm_runtime_put_noidle(&shost->shost_gendev);
+   blk_cleanup_queue(shost->admin_q);
+   blk_put_queue(shost->admin_q);
+ out_remove_tags:
if (shost_use_blk_mq(shost))
scsi_mq_destroy_tags(shost);
  fail:
@@ -344,6 +350,9 @@ static void scsi_host_dev_release(struct device *dev)
kfree(dev_name(&shost->shost_dev));
}
 
+   blk_cleanup_queue(shost->admin_q);
+   blk_put_queue(shost->admin_q);
+
if (shost_use_blk_mq(shost)) {
if (shost->tag_set.tags)
scsi_mq_destroy_tags(shost);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 4db08458a127..87a88094b1eb 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2099,19 +2099,22 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
blk_mq_complete_request(cmd->request);
 }
 
-static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx)
+static void __scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx,
+   struct scsi_device *sdev)
 {
-   struct request_queue *q = hctx->queue;
-   struct scsi_device *sdev = q->queuedata;
-
atomic_dec(&sdev->device_busy);
put_device(&sdev->sdev_gendev);
 }
 
-static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx)
+static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx)
+{
+   __scsi_mq_put_budget(hctx, hctx->queue->queuedata);
+}
+
+static bool __scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx,
+   struct scsi_device *sdev)
 {
struct request_queue *q = hctx->queue;
-   struct scsi_device *sdev = q->queuedata;
 
if (!get_device(&sdev->sdev_gendev))
goto out;
@@ -2128,12 +2131,17 @@ static bool scsi_mq_get_budget(struct blk_mq_hw_ctx 
*hctx)
return false;
 }
 
-static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
-const struct blk_mq_queue_data *bd)
+static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx)
+{
+   return __scsi_mq_get_budget(hctx, hctx->queue->queuedata);
+}
+
+static blk_status_t __scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+const struct blk_mq_queue_data *bd,
+struct scsi_device *sdev)
 {
struct request *req = bd->rq;
struct request_queue *q = req->q;
-   struct scsi_device *sdev = q->queuedata;
struct Scsi_Host *shost = sdev->host;
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
blk_status_t ret;
@@ -2181,7 +2189,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx 
*hctx,
if (scsi_target(sdev)->can_queue > 0)
atomic_dec(&scsi_target(sdev)->target_busy);
 out_put_budget:
-   scsi_mq_put_budget(hctx);
+   __scsi_mq_put_budget(hctx, sdev);
switch (ret) {
case BLK_STS_OK:
break;
@@ -2203,6 +2211,29 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx 
*hctx,
return ret;
 }
 
+static blk_status_t scsi_admin_queue_rq(struct blk_mq_hw_ctx *hctx,
+const struct blk_mq_queue_data *bd)
+{
+   struct scsi_device *sdev = scsi_req(bd->rq)->sdev;
+
+   WARN_ON_ONCE(hctx->queue == sdev->request_queue);
+
+   if (!__scsi_mq_get_budget(hctx, sdev))
+   return BLK_STS_RESOURCE;
+
+   return __scsi_queue_rq(hctx, bd, sdev);
+}
+
+static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+const struct blk_mq_queue_data *bd)
+{
+   struct scsi_device *sdev = hctx->queue->queuedata;
+
+   WARN_ON_ONCE(hctx->queue == sdev->host->admin_q);
+
+   ret

[PATCH V3 08/17] SCSI: pass scsi_device to scsi_mq_prep_fn

2018-09-13 Thread Ming Lei
This patchset will introduce per-host admin queue, so it may not to get
'scsi_device' via q->queuedata.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 drivers/scsi/scsi_lib.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 2f541b4fb32b..bc04389de560 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2020,10 +2020,9 @@ static unsigned int scsi_mq_sgl_size(struct Scsi_Host 
*shost)
sizeof(struct scatterlist);
 }
 
-static int scsi_mq_prep_fn(struct request *req)
+static int scsi_mq_prep_fn(struct scsi_device *sdev, struct request *req)
 {
struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
-   struct scsi_device *sdev = req->q->queuedata;
struct Scsi_Host *shost = sdev->host;
struct scatterlist *sg;
 
@@ -2119,7 +2118,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx 
*hctx,
goto out_dec_target_busy;
 
if (!(req->rq_flags & RQF_DONTPREP)) {
-   ret = prep_to_mq(scsi_mq_prep_fn(req));
+   ret = prep_to_mq(scsi_mq_prep_fn(sdev, req));
if (ret != BLK_STS_OK)
goto out_dec_host_busy;
req->rq_flags |= RQF_DONTPREP;
-- 
2.9.5



[PATCH V3 17/17] block: enable runtime PM for blk-mq

2018-09-13 Thread Ming Lei
Now blk-mq can borrow the runtime PM approach from legacy path, so
enable it simply. The only difference with legacy is that:

1) blk_mq_queue_sched_tag_busy_iter() is introduced for checking if queue
is idle, instead of maintaining one counter.

2) we have to iterate over scheduler tags for counting how many requests
entering queue because requests in hw tags don't cover these allocated
and not dispatched.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 block/blk-core.c   | 39 ++-
 block/blk-mq-tag.c | 21 +++--
 block/blk-mq-tag.h |  2 ++
 block/blk-mq.c |  4 
 4 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 07c5243c51ec..1aac76ae8c52 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3755,11 +3755,8 @@ EXPORT_SYMBOL(blk_finish_plug);
  */
 void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
 {
-   /* Don't enable runtime PM for blk-mq until it is ready */
-   if (q->mq_ops) {
-   pm_runtime_disable(dev);
+   if (WARN_ON_ONCE(blk_queue_admin(q)))
return;
-   }
 
q->dev = dev;
q->rpm_status = RPM_ACTIVE;
@@ -3768,6 +3765,23 @@ void blk_pm_runtime_init(struct request_queue *q, struct 
device *dev)
 }
 EXPORT_SYMBOL(blk_pm_runtime_init);
 
+static void blk_mq_pm_count_req(struct blk_mq_hw_ctx *hctx,
+   struct request *rq, void *priv, bool reserved)
+{
+   unsigned long *cnt = priv;
+
+   (*cnt)++;
+}
+
+static bool blk_mq_pm_queue_busy(struct request_queue *q)
+{
+   unsigned long cnt = 0;
+
+   blk_mq_queue_sched_tag_busy_iter(q, blk_mq_pm_count_req, &cnt);
+
+   return cnt > 0;
+}
+
 /**
  * blk_pre_runtime_suspend - Pre runtime suspend check
  * @q: the queue of the device
@@ -3792,12 +3806,20 @@ EXPORT_SYMBOL(blk_pm_runtime_init);
 int blk_pre_runtime_suspend(struct request_queue *q)
 {
int ret = 0;
+   bool busy = true;
+   unsigned long last_busy;
 
if (!q->dev)
return ret;
 
+   last_busy = READ_ONCE(q->dev->power.last_busy);
+
+   if (q->mq_ops)
+   busy = blk_mq_pm_queue_busy(q);
+
spin_lock_irq(q->queue_lock);
-   if (q->nr_pending) {
+   busy = q->mq_ops ? busy : !!q->nr_pending;
+   if (busy) {
ret = -EBUSY;
pm_runtime_mark_last_busy(q->dev);
} else {
@@ -3805,6 +3827,13 @@ int blk_pre_runtime_suspend(struct request_queue *q)
}
spin_unlock_irq(q->queue_lock);
 
+   /*
+* Any new IO during this window will prevent the current suspend
+* from going on
+*/
+   if (unlikely(last_busy != READ_ONCE(q->dev->power.last_busy)))
+   ret = -EBUSY;
+
if (!ret)
blk_freeze_queue_lock(q);
 
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 7b0390f1c764..70e76dd035c1 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -322,8 +322,8 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 }
 EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
 
-void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
-   void *priv)
+static void __blk_mq_queue_tag_busy_iter(struct request_queue *q,
+   busy_iter_fn *fn, void *priv, bool sched_tag)
 {
struct blk_mq_hw_ctx *hctx;
int i;
@@ -344,6 +344,9 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, 
busy_iter_fn *fn,
queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_tags *tags = hctx->tags;
 
+   if (sched_tag && hctx->sched_tags)
+   tags = hctx->sched_tags;
+
/*
 * If not software queues are currently mapped to this
 * hardware queue, there's nothing to check
@@ -358,6 +361,20 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, 
busy_iter_fn *fn,
rcu_read_unlock();
 }
 
+void blk_mq_queue_tag_busy_iter(struct request_queue *q,
+   busy_iter_fn *fn, void *priv)
+{
+
+   __blk_mq_queue_tag_busy_iter(q, fn, priv, false);
+}
+
+void blk_mq_queue_sched_tag_busy_iter(struct request_queue *q,
+   busy_iter_fn *fn, void *priv)
+{
+
+   __blk_mq_queue_tag_busy_iter(q, fn, priv, true);
+}
+
 static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
bool round_robin, int node)
 {
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 61deab0b5a5a..5513c3eeab00 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -35,6 +35,8 @@ extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
 void blk_mq_queue_tag_busy

[PATCH V3 15/17] SCSI: use admin queue to implement queue QUIESCE

2018-09-13 Thread Ming Lei
All admin commands are sent via per-host admin queue, so we can simply
freeze the IO queue for quiescing scsi device.

Also the current SCSI stack guarantees that any request originated from
admin queue won't be called back to block layer via the associated IO queue,
and it is always dealt with by the admin queue.

So it is safe to submit admin request via admin queue when the associated IO
queue is frozen, and this way matches the PREEMPT flag perfectly.

Finally, we can remove the preempt_only approach for supporting SCSI
quiesce, then the code in block fast path is simplified a lot.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 block/blk-core.c   | 44 ++--
 block/blk-mq-debugfs.c |  1 -
 drivers/scsi/scsi_lib.c| 29 +++--
 include/linux/blkdev.h |  6 --
 include/scsi/scsi_device.h |  1 -
 5 files changed, 5 insertions(+), 76 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 4dbc93f43b38..f51c19b381e3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -421,26 +421,6 @@ void blk_sync_queue(struct request_queue *q)
 EXPORT_SYMBOL(blk_sync_queue);
 
 /**
- * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY
- * @q: request queue pointer
- *
- * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not
- * set and 1 if the flag was already set.
- */
-int blk_set_preempt_only(struct request_queue *q)
-{
-   return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
-}
-EXPORT_SYMBOL_GPL(blk_set_preempt_only);
-
-void blk_clear_preempt_only(struct request_queue *q)
-{
-   blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
-   wake_up_all(&q->mq_freeze_wq);
-}
-EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
-
-/**
  * __blk_run_queue_uncond - run a queue whether or not it has been stopped
  * @q: The queue to run
  *
@@ -917,27 +897,8 @@ EXPORT_SYMBOL(blk_alloc_queue);
  */
 int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 {
-   const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
-
while (true) {
-   bool success = false;
-
-   rcu_read_lock();
-   if (percpu_ref_tryget_live(&q->q_usage_counter)) {
-   /*
-* The code that sets the PREEMPT_ONLY flag is
-* responsible for ensuring that that flag is globally
-* visible before the queue is unfrozen.
-*/
-   if (preempt || !blk_queue_preempt_only(q)) {
-   success = true;
-   } else {
-   percpu_ref_put(&q->q_usage_counter);
-   }
-   }
-   rcu_read_unlock();
-
-   if (success)
+   if (percpu_ref_tryget_live(&q->q_usage_counter))
return 0;
 
if (flags & BLK_MQ_REQ_NOWAIT)
@@ -953,8 +914,7 @@ int blk_queue_enter(struct request_queue *q, 
blk_mq_req_flags_t flags)
smp_rmb();
 
wait_event(q->mq_freeze_wq,
-  (atomic_read(&q->mq_freeze_depth) == 0 &&
-   (preempt || !blk_queue_preempt_only(q))) ||
+  atomic_read(&q->mq_freeze_depth) == 0 ||
   blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 8df013e9f242..82df43ec322f 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -132,7 +132,6 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(REGISTERED),
QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
QUEUE_FLAG_NAME(QUIESCED),
-   QUEUE_FLAG_NAME(PREEMPT_ONLY),
QUEUE_FLAG_NAME(ADMIN),
 };
 #undef QUEUE_FLAG_NAME
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1e75515cc7ba..85fed8d96c8a 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -3180,34 +3180,12 @@ static void scsi_wait_for_queuecommand(struct 
scsi_device *sdev)
 int
 scsi_device_quiesce(struct scsi_device *sdev)
 {
-   struct request_queue *q = sdev->request_queue;
int err;
 
-   /*
-* It is allowed to call scsi_device_quiesce() multiple times from
-* the same context but concurrent scsi_device_quiesce() calls are
-* not allowed.
-*/
-   WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);
-
-   blk_set_preempt_only(q);
-
-   blk_mq_freeze_queue(q);
-   /*
-* Ensure that the effect of blk_set_preempt_only() will be visible
-* for percpu_ref_tryget() callers tha

[PATCH V3 16/17] block: simplify runtime PM support

2018-09-13 Thread Ming Lei
This patch simplifies runtime PM support by the following approach:

1) resume device in blk_queue_enter() if this device is
runtime-suspended or runtime-suspending

2) freeze queue in blk_pre_runtime_suspend()

3) unfreeze queue in blk_pre_runtime_resume()

4) remove checking on RRF_PM because now we requires out-of-band PM
request to resume device

5) introduce blk_unfreeze_queue_lock() and blk_freeze_queue_lock()
so that both runtime-PM and system-PM can use them to freeze/unfreeze
queue and avoid freeze & unfreeze mismatch

Then we can remove blk_pm_allow_request(), and more importantly this way
can be applied to blk-mq path too.

Finally the IO queue associated with scsi_device is kept as runtime
resumed in __scsi_execute() when sending non-PM RQF_REQUEST, and this
way makes sure that the LUN is active for handling non-PM RQF_PREEMPT.

Cc: Alan Stern 
Cc: Christoph Hellwig 
Cc: Bart Van Assche 
Cc: Jianchao Wang 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Adrian Hunter 
Cc: "James E.J. Bottomley" 
Cc: "Martin K. Petersen" 
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Ming Lei 
---
 block/blk-core.c| 106 
 block/blk-mq.c  |  22 ++
 block/elevator.c|  25 
 drivers/scsi/scsi_lib.c |  14 +--
 include/linux/blk-mq.h  |   2 +
 include/linux/blkdev.h  |   3 ++
 6 files changed, 101 insertions(+), 71 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index f51c19b381e3..07c5243c51ec 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -890,6 +890,28 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
+#ifdef CONFIG_PM
+static void blk_resume_queue(struct request_queue *q)
+{
+   int rpm_status;
+
+   if (!q->dev)
+   return;
+
+   spin_lock_irq(q->queue_lock);
+   rpm_status = q->rpm_status;
+   spin_unlock_irq(q->queue_lock);
+
+   /* PM request needs to be dealt with out of band */
+   if (rpm_status == RPM_SUSPENDED || rpm_status == RPM_SUSPENDING)
+   pm_runtime_resume(q->dev);
+}
+#else
+static void blk_resume_queue(struct request_queue *q)
+{
+}
+#endif
+
 /**
  * blk_queue_enter() - try to increase q->q_usage_counter
  * @q: request queue pointer
@@ -913,11 +935,20 @@ int blk_queue_enter(struct request_queue *q, 
blk_mq_req_flags_t flags)
 */
smp_rmb();
 
+   blk_resume_queue(q);
+
wait_event(q->mq_freeze_wq,
   atomic_read(&q->mq_freeze_depth) == 0 ||
   blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
+
+   /*
+* This allocation may be blocked via queue freezing before
+* the queue is suspended, so we have to resume queue again
+* after waking up.
+*/
+   blk_resume_queue(q);
}
 }
 
@@ -1023,6 +1054,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t 
gfp_mask, int node_id,
q->bypass_depth = 1;
queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
 
+   mutex_init(&q->freeze_lock);
init_waitqueue_head(&q->mq_freeze_wq);
 
/*
@@ -1470,6 +1502,23 @@ static struct request *__get_request(struct request_list 
*rl, unsigned int op,
return ERR_PTR(-ENOMEM);
 }
 
+#ifdef CONFIG_PM
+static void blk_pm_add_request(struct request_queue *q)
+{
+   if (q->dev)
+   q->nr_pending++;
+}
+static void blk_pm_put_request(struct request_queue *q)
+{
+   if (q->dev && !--q->nr_pending)
+   pm_runtime_mark_last_busy(q->dev);
+}
+#else
+static inline void blk_pm_put_request(struct request_queue *q) {}
+static inline void blk_pm_add_request(struct request_queue *q){}
+#endif
+
+
 /**
  * get_request - get a free request
  * @q: request_queue to allocate request from
@@ -1498,16 +1547,19 @@ static struct request *get_request(struct request_queue 
*q, unsigned int op,
 
rl = blk_get_rl(q, bio);/* transferred to @rq on success */
 retry:
+   blk_pm_add_request(q);
rq = __get_request(rl, op, bio, flags, gfp);
if (!IS_ERR(rq))
return rq;
 
if (op & REQ_NOWAIT) {
+   blk_pm_put_request(q);
blk_put_rl(rl);
return ERR_PTR(-EAGAIN);
}
 
if ((flags & BLK_MQ_REQ_NOWAIT) || unlikely(blk_queue_dying(q))) {
+   blk_pm_put_request(q);
blk_put_rl(rl);
return rq;
}
@@ -1518,6 +1570,7 @@ static struct request *get_request(struct request_queue 
*q, unsigned int op,
 
trace_block_sleeprq(q, bio, op);
 
+   blk_pm_put_request(q);
spin_unlock_irq(q->queue_lock);
io_schedule();
 
@@ -1686,16 +1739,6 @@ void part_round_stats(struct request_queue *q, int cpu, 
struct hd_struct *part)
 }
 EXPORT_SYMBOL_GPL(part_round_stats)

Re: [PATCH RFC] scsi: ufs: Disable blk-mq for now

2018-09-13 Thread Adrian Hunter
On 13/09/18 15:05, Ming Lei wrote:
> On Thu, Sep 13, 2018 at 02:28:48PM +0300, Adrian Hunter wrote:
>> blk-mq does not support runtime pm, so disable blk-mq support for now.
> 
> So could you describe a bit what the issue you are trying to fix?

UFS is a low-power solution, so we must be able to runtime suspend.

> 
> This is host level runtime PM you are trying to address, and if blk-mq
> runtime isn't enabled, I guess the host won't be runtime suspended at all
> because some of its descendant are always active.
> 
> So seems we need to do nothing for preventing the host controller from
> entering runtime suspend.

We don't want to prevent the host controller from runtime suspending, quite
the opposite.


Re: [PATCH RFC] scsi: ufs: Disable blk-mq for now

2018-09-13 Thread Ming Lei
On Thu, Sep 13, 2018 at 03:15:39PM +0300, Adrian Hunter wrote:
> On 13/09/18 15:05, Ming Lei wrote:
> > On Thu, Sep 13, 2018 at 02:28:48PM +0300, Adrian Hunter wrote:
> >> blk-mq does not support runtime pm, so disable blk-mq support for now.
> > 
> > So could you describe a bit what the issue you are trying to fix?
> 
> UFS is a low-power solution, so we must be able to runtime suspend.
> 
> > 
> > This is host level runtime PM you are trying to address, and if blk-mq
> > runtime isn't enabled, I guess the host won't be runtime suspended at all
> > because some of its descendant are always active.
> > 
> > So seems we need to do nothing for preventing the host controller from
> > entering runtime suspend.
> 
> We don't want to prevent the host controller from runtime suspending, quite
> the opposite.

OK, got it.

However, in previous discussion, it is strongly objected to use
per-driver/device .use_blk_mq switch, so not sure if this way can
be accepted.

BTW, I just posted the runtime PM enablement patches[1] for blk-mq,
and I verified that it works fine and passed blktests & xfstest & my
other sanity tests, could you try it on UFS?

[1] https://marc.info/?l=linux-block&m=153684095523409&w=2

Thanks,
Ming


Re: [PATCH V3 11/17] SCSI: track pending admin commands

2018-09-13 Thread jianchao.wang
Hi Ming

On 09/13/2018 08:15 PM, Ming Lei wrote:
>  EXPORT_SYMBOL(__scsi_execute);
> @@ -3246,6 +3251,7 @@ static int scsi_internal_device_block(struct 
> scsi_device *sdev)
>   else
>   scsi_wait_for_queuecommand(sdev);
>   }
> + wait_event(sdev->admin_wq, !atomic_read(&sdev->nr_admin_pending));
>   mutex_unlock(&sdev->state_mutex);
>  
>   return err;
...
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index 3aee9464a7bf..8bcb7ecc0c06 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -1393,6 +1393,7 @@ void __scsi_remove_device(struct scsi_device *sdev)
>  
>   blk_cleanup_queue(sdev->request_queue);
>   cancel_work_sync(&sdev->requeue_work);
> + wait_event(sdev->admin_wq, !atomic_read(&sdev->nr_admin_pending))

This nr_admin_pending could drain the ongoing scsi_request_fn/scsi_queue_rq,
but I'm afraid it cannot stop new ones coming in, such as the ones that have 
passed
the sdev state checking and have not crossed the 
atomic_inc(&sdev->nr_admin_pending).



Thanks
Jianchao



Re: [PATCH] scsi: sd: Contribute to randomness when running rotational device

2018-09-13 Thread Maciej Żenczykowski
On Sun, Sep 9, 2018 at 4:52 AM, Ming Lei  wrote:
> On Thu, Sep 06, 2018 at 01:37:19PM -0700, Xuewei Zhang wrote:
>> Currently a scsi device won't contribute to kernel randomness when it
>> uses blk-mq. Since we commonly use scsi on rotational device with
>> blk-mq, it make sense to keep contributing to kernel randomness in these
>> cases. This is especially important for virtual machines.
>>
>> commit b5b6e8c8d3b4 ("scsi: virtio_scsi: fix IO hang caused by automatic
>> irq vector affinity") made all virtio-scsi device to use blk-mq, which
>> does not contribute to randomness today. So for a virtual machine only
>> having virtio-scsi disk (which is common), it will simple stop getting
>> randomness from its disks in today's implementation.
>>
>> With this patch, if the above VM has rotational virtio-scsi device, then
>> it can still benefit from the entropy generated from the disk.
>>
>> Reported-by: Xuewei Zhang 
>> Signed-off-by: Xuewei Zhang 
>> ---
>>  drivers/scsi/sd.c | 3 +++
>>  1 file changed, 3 insertions(+)
>>
>> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
>> index b79b366a94f7..5e4f10d28065 100644
>> --- a/drivers/scsi/sd.c
>> +++ b/drivers/scsi/sd.c
>> @@ -2959,6 +2959,9 @@ static void sd_read_block_characteristics(struct 
>> scsi_disk *sdkp)
>>   if (rot == 1) {
>>   blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
>>   blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
>> + } else {
>> + blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
>> + blk_queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q);
>>   }
>>
>>   if (sdkp->device->type == TYPE_ZBC) {
>> --
>> 2.19.0.rc2.392.g5ba43deb5a-goog
>>
>
> Look reasonable, especially the disk randomness is added by SCSI itself.
>
> Reviewed-by: Ming Lei 
>
> Thanks,
> Ming

Also, see: https://bugzilla.redhat.com/show_bug.cgi?id=1572944
where we're having randomness starvation problems on FC28
running 4.18.5 due to lack of virtio-rng device in VM.
(VM boot takes 9+ hours or 2 haswell VMs)

I'd kindly request we get this not only into 4.19 but also stable trees.
(along with Ted's other randomization fixes)

Reviewed-by: Maciej Żenczykowski 


Re: [PATCH RFC] scsi: ufs: Disable blk-mq for now

2018-09-13 Thread Adrian Hunter
On 14/09/18 04:52, Ming Lei wrote:
> On Thu, Sep 13, 2018 at 03:15:39PM +0300, Adrian Hunter wrote:
>> On 13/09/18 15:05, Ming Lei wrote:
>>> On Thu, Sep 13, 2018 at 02:28:48PM +0300, Adrian Hunter wrote:
 blk-mq does not support runtime pm, so disable blk-mq support for now.
>>>
>>> So could you describe a bit what the issue you are trying to fix?
>>
>> UFS is a low-power solution, so we must be able to runtime suspend.
>>
>>>
>>> This is host level runtime PM you are trying to address, and if blk-mq
>>> runtime isn't enabled, I guess the host won't be runtime suspended at all
>>> because some of its descendant are always active.
>>>
>>> So seems we need to do nothing for preventing the host controller from
>>> entering runtime suspend.
>>
>> We don't want to prevent the host controller from runtime suspending, quite
>> the opposite.
> 
> OK, got it.
> 
> However, in previous discussion, it is strongly objected to use
> per-driver/device .use_blk_mq switch, so not sure if this way can
> be accepted.

It is only needed for 4.19 so far.  Otherwise just revert d5038a13eca7
("scsi: core: switch to scsi-mq by default")

> 
> BTW, I just posted the runtime PM enablement patches[1] for blk-mq,
> and I verified that it works fine and passed blktests & xfstest & my
> other sanity tests, could you try it on UFS?
> 
> [1] https://marc.info/?l=linux-block&m=153684095523409&w=2

I will give it a go.

Obviously, if those patches go in, we wouldn't need to disable blk-mq
anymore, but that isn't until 4.20 at least.


Re: [PATCH -next] scsi: pm80xx: Remove set but not used variable 'device_id'

2018-09-13 Thread Jinpu Wang
On Thu, Sep 13, 2018 at 3:44 AM YueHaibing  wrote:
>
> Fixes gcc '-Wunused-but-set-variable' warning:
>
> drivers/scsi/pm8001/pm8001_sas.c: In function 
> 'pm8001_I_T_nexus_event_handler':
> drivers/scsi/pm8001/pm8001_sas.c:1052:6: warning:
>  variable 'device_id' set but not used [-Wunused-but-set-variable]
>
> drivers/scsi/pm8001/pm8001_sas.c: In function 'pm8001_abort_task':
> drivers/scsi/pm8001/pm8001_sas.c:1191:6: warning:
>  variable 'device_id' set but not used [-Wunused-but-set-variable]
>
> Signed-off-by: YueHaibing 
Acked-by: Jack Wang 
Thanks!
> ---
>  drivers/scsi/pm8001/pm8001_sas.c | 4 
>  1 file changed, 4 deletions(-)
>
> diff --git a/drivers/scsi/pm8001/pm8001_sas.c 
> b/drivers/scsi/pm8001/pm8001_sas.c
> index b1e7d26..84092e4 100644
> --- a/drivers/scsi/pm8001/pm8001_sas.c
> +++ b/drivers/scsi/pm8001/pm8001_sas.c
> @@ -1049,13 +1049,11 @@ int pm8001_I_T_nexus_event_handler(struct 
> domain_device *dev)
> struct pm8001_device *pm8001_dev;
> struct pm8001_hba_info *pm8001_ha;
> struct sas_phy *phy;
> -   u32 device_id = 0;
>
> if (!dev || !dev->lldd_dev)
> return -1;
>
> pm8001_dev = dev->lldd_dev;
> -   device_id = pm8001_dev->device_id;
> pm8001_ha = pm8001_find_ha_by_dev(dev);
>
> PM8001_EH_DBG(pm8001_ha,
> @@ -1188,7 +1186,6 @@ int pm8001_abort_task(struct sas_task *task)
>  {
> unsigned long flags;
> u32 tag;
> -   u32 device_id;
> struct domain_device *dev ;
> struct pm8001_hba_info *pm8001_ha;
> struct scsi_lun lun;
> @@ -1202,7 +1199,6 @@ int pm8001_abort_task(struct sas_task *task)
> dev = task->dev;
> pm8001_dev = dev->lldd_dev;
> pm8001_ha = pm8001_find_ha_by_dev(dev);
> -   device_id = pm8001_dev->device_id;
> phy_id = pm8001_dev->attached_phy;
> rc = pm8001_find_tag(task, &tag);
> if (rc == 0) {
>


-- 
Jack Wang
Linux Kernel Developer

ProfitBricks GmbH
Greifswalder Str. 207
D - 10405 Berlin

Tel:   +49 30 577 008  042
Fax:  +49 30 577 008 299
Email:jinpu.w...@profitbricks.com
URL:  https://www.profitbricks.de

Sitz der Gesellschaft: Berlin
Registergericht: Amtsgericht Charlottenburg, HRB 125506 B
Geschäftsführer: Achim Weiss, Matthias Steinberg, Christoph Steffens


[PATCH v9 0/7] Support for Qualcomm UFS QMP PHY on SDM845

2018-09-13 Thread Can Guo
This patch series adds support for UFS QMP PHY on SDM845 and the
compatible string for it. This patch series depends on the current
proposed QMP V3 USB3 UNI PHY support for sdm845 driver [1], on
the DT bindings for the QMP V3 USB3 PHYs based dirver [2], and also
rebased on updated pipe_clk initialization sequence [3]. This series
can only be merged once the dependent patches do.
[1] 
http://lists-archives.com/linux-kernel/29071659-dt-bindings-phy-qcom-qmp-update-bindings-for-sdm845.html
[2] 
http://lists-archives.com/linux-kernel/29071660-phy-qcom-qmp-add-qmp-v3-usb3-uni-phy-support-for-sdm845.html
[3] https://patchwork.kernel.org/patch/10376551/

Changes since v8:
- Add one new change to support ufs core reset.
- Incorporated review comments from Evan, Vivek.

Changes since v7:
- Add one new change to update UFS PHY power on sequence
- Incorporated review comments from Evan, Vivek and Manu.

Changes since v6:
- Add one new change to clean up some structs and field
- Updates the PHY power control sequence.
- Incorporated review comments from Vivek and Manu.

Changes since v5:
- Updates the PHY power control sequence.
- Updates UFS PHY power on condition check.

Changes since v4:
- Adds 'ref_aux' clock back to SDM845 UFS PHY clock list.
- Power on PHY before serdes configuration starts.
- Updates the UFS PHY initialization sequence.
- Updates a few UFS PHY registers.
- Incorporated review comments from Vivek and Manu.

Changes since v3:
- Incorporated review comments from Vivek and Rob.

Changes since v2:
- Incorporated review comments from Vivek and Rob.
- Remove "ref_aux" from sdm845 ufs phy clock list structure.

Changes since v1:
- Incorporated review comments from Vivek and Manu.
- Update the commit title of patch 2.

Can Guo (6):
  phy: Update PHY power control sequence
  phy: General struct and field cleanup
  phy: Add QMP phy based UFS phy support for sdm845
  scsi: ufs: Power on phy after it is initialized
  dt-bindings: phy-qcom-qmp: Add UFS phy compatible string for sdm845
  dt-bindings: ufshcd-pltfrm: Add core reset string

Dov Levenglick (1):
  scsi: ufs: Add core reset support

 .../devicetree/bindings/phy/qcom-qmp-phy.txt   |   4 +-
 .../devicetree/bindings/ufs/ufshcd-pltfrm.txt  |   7 +
 drivers/phy/qualcomm/phy-qcom-qmp.c| 216 +++--
 drivers/phy/qualcomm/phy-qcom-qmp.h|  15 ++
 drivers/scsi/ufs/ufs-qcom.c|  34 +++-
 drivers/scsi/ufs/ufs-qcom.h|   1 +
 drivers/scsi/ufs/ufshcd-pltfrm.c   |  22 +++
 drivers/scsi/ufs/ufshcd.c  |  13 ++
 drivers/scsi/ufs/ufshcd.h  |  12 ++
 9 files changed, 303 insertions(+), 21 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v9 0/7] Support for Qualcomm UFS QMP PHY on SDM845

2018-09-13 Thread Can Guo
This patch series adds support for UFS QMP PHY on SDM845 and the
compatible string for it. This patch series depends on the current
proposed QMP V3 USB3 UNI PHY support for sdm845 driver [1], on
the DT bindings for the QMP V3 USB3 PHYs based dirver [2], and also
rebased on updated pipe_clk initialization sequence [3]. This series
can only be merged once the dependent patches do.
[1] 
http://lists-archives.com/linux-kernel/29071659-dt-bindings-phy-qcom-qmp-update-bindings-for-sdm845.html
[2] 
http://lists-archives.com/linux-kernel/29071660-phy-qcom-qmp-add-qmp-v3-usb3-uni-phy-support-for-sdm845.html
[3] https://patchwork.kernel.org/patch/10376551/

Changes since v8:
- Add one new change to support ufs core reset.
- Incorporated review comments from Evan, Vivek.

Changes since v7:
- Add one new change to update UFS PHY power on sequence
- Incorporated review comments from Evan, Vivek and Manu.

Changes since v6:
- Add one new change to clean up some structs and field
- Updates the PHY power control sequence.
- Incorporated review comments from Vivek and Manu.

Changes since v5:
- Updates the PHY power control sequence.
- Updates UFS PHY power on condition check.

Changes since v4:
- Adds 'ref_aux' clock back to SDM845 UFS PHY clock list.
- Power on PHY before serdes configuration starts.
- Updates the UFS PHY initialization sequence.
- Updates a few UFS PHY registers.
- Incorporated review comments from Vivek and Manu.

Changes since v3:
- Incorporated review comments from Vivek and Rob.

Changes since v2:
- Incorporated review comments from Vivek and Rob.
- Remove "ref_aux" from sdm845 ufs phy clock list structure.

Changes since v1:
- Incorporated review comments from Vivek and Manu.
- Update the commit title of patch 2.

Can Guo (6):
  phy: Update PHY power control sequence
  phy: General struct and field cleanup
  phy: Add QMP phy based UFS phy support for sdm845
  scsi: ufs: Power on phy after it is initialized
  dt-bindings: phy-qcom-qmp: Add UFS phy compatible string for sdm845
  dt-bindings: ufshcd-pltfrm: Add core reset string

Dov Levenglick (1):
  scsi: ufs: Add core reset support

 .../devicetree/bindings/phy/qcom-qmp-phy.txt   |   4 +-
 .../devicetree/bindings/ufs/ufshcd-pltfrm.txt  |   7 +
 drivers/phy/qualcomm/phy-qcom-qmp.c| 216 +++--
 drivers/phy/qualcomm/phy-qcom-qmp.h|  15 ++
 drivers/scsi/ufs/ufs-qcom.c|  34 +++-
 drivers/scsi/ufs/ufs-qcom.h|   1 +
 drivers/scsi/ufs/ufshcd-pltfrm.c   |  22 +++
 drivers/scsi/ufs/ufshcd.c  |  13 ++
 drivers/scsi/ufs/ufshcd.h  |  12 ++
 9 files changed, 303 insertions(+), 21 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



Re: [Patch v1 0/7] mpt3sas: Hot-Plug Surprise removal support on IOC.

2018-09-13 Thread Lukas Wunner
On Wed, Sep 12, 2018 at 03:31:07PM +0530, Sreekanth Reddy wrote:
> On Wed, Sep 5, 2018 at 1:08 PM, Lukas Wunner  wrote:
> > On Wed, Sep 05, 2018 at 11:45:45AM +0530, Sreekanth Reddy wrote:
> > > I have one more instance where still we need this poll kthread, i.e
> > > during the device probe time we have some commands which has more
> > > timeout value (e.g. 300 seconds), so if user has unplugged this device
> > > just after sending this more time-out valued command then driver has
> > > to wait until this time-out value expires. i.e. this device is still
> > > visible in lspci output until this 300 seconds timeout value expires
> > > even though device is unplugged. if we have a poll kthread (which will
> > > poll for every one second) then driver can early detect the unplugged
> > > state and can terminate the outstanding commands and hence probe
> > > operation can be completed quickly.
> >
> > The only instances I can see in your driver where it waits for 300 s
> > is in _base_diag_reset(), which does an msleep(256) in a loop for up
> > to 300 s, and scsih_scan_finished(), which is called in a loop with an
> > msleep(10) by do_scsi_scan_host().
> >
> > Any harm in simply checking for removal of the device in those loops
> > and bailing out if so?  Instead of the poll kthread to achieve the same?
> 
> we can do for this port enable request but still we have other
> requests where we don't have this type of loops and used
> wait_for_completion_timeout () API where we can't bailout the request
> in-between and we have to wait until the timeout value expires. For
> these types of request terminating it though watchdog thread will be
> simple.

When the HBA is hot-removed, its driver's ->remove callback is invoked.
You could just check at the beginning of the ->remove callback whether
the device is no longer present, and if it isn't, complete() any
completions that may be pending.

I think that would obviate the need for a watchdog.

Thanks,

Lukas


Edit your photos 11

2018-09-13 Thread Jimmy

Hi,

We are waiting for your photos.
Please send photos to hansre...@outlook.com for further editing.

Our team is ready to edit your photos. Do you have photos for editing?
We are team who can do following work for you.
Cutting out your photos, or retouching if needed.

It is for products photos or portrait photos, catalog photos.

Please send photo editing work to hansre...@outlook.com

Thanks,
Jimmy Button
Email: hansre...@outlook.com



[PATCH] scsi: csiostor: remove automatic irq affinity assignment

2018-09-13 Thread Varun Prakash
If number of interrupt vectors are more than num_online_cpus()
then pci_alloc_irq_vectors_affinity() assigns cpumask based
on num_possible_cpus() to the remaining vectors because of
this interrupts are not generating for these vectors.

This patch fixes this issue by using pci_alloc_irq_vectors()
instead of pci_alloc_irq_vectors_affinity().

Signed-off-by: Varun Prakash 
---
 drivers/scsi/csiostor/csio_isr.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/scsi/csiostor/csio_isr.c b/drivers/scsi/csiostor/csio_isr.c
index 7c88147..8b92c59 100644
--- a/drivers/scsi/csiostor/csio_isr.c
+++ b/drivers/scsi/csiostor/csio_isr.c
@@ -480,7 +480,6 @@ csio_enable_msix(struct csio_hw *hw)
int i, j, k, n, min, cnt;
int extra = CSIO_EXTRA_VECS;
struct csio_scsi_cpu_info *info;
-   struct irq_affinity desc = { .pre_vectors = 2 };
 
min = hw->num_pports + extra;
cnt = hw->num_sqsets + extra;
@@ -491,8 +490,7 @@ csio_enable_msix(struct csio_hw *hw)
 
csio_dbg(hw, "FW supp #niq:%d, trying %d msix's\n", hw->cfg_niq, cnt);
 
-   cnt = pci_alloc_irq_vectors_affinity(hw->pdev, min, cnt,
-   PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, &desc);
+   cnt = pci_alloc_irq_vectors(hw->pdev, min, cnt, PCI_IRQ_MSIX);
if (cnt < 0)
return cnt;
 
-- 
2.0.2



[PATCH] scsi : cxgb4i: add DCB support for iSCSI connections

2018-09-13 Thread Varun Prakash
Add IEEE and CEE DCBX support for iSCSI connections.

Signed-off-by: Rohit Maheshwari 
Signed-off-by: Varun Prakash 
---
 drivers/scsi/cxgbi/cxgb4i/cxgb4i.c | 154 -
 drivers/scsi/cxgbi/libcxgbi.h  |   3 +
 2 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c 
b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 211da1d..064ef57 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -35,6 +35,11 @@ static unsigned int dbg_level;
 
 #include "../libcxgbi.h"
 
+#ifdef CONFIG_CHELSIO_T4_DCB
+#include 
+#include "cxgb4_dcb.h"
+#endif
+
 #defineDRV_MODULE_NAME "cxgb4i"
 #define DRV_MODULE_DESC"Chelsio T4-T6 iSCSI Driver"
 #defineDRV_MODULE_VERSION  "0.9.5-ko"
@@ -155,6 +160,15 @@ static struct iscsi_transport cxgb4i_iscsi_transport = {
.session_recovery_timedout = iscsi_session_recovery_timedout,
 };
 
+#ifdef CONFIG_CHELSIO_T4_DCB
+static int
+cxgb4_dcb_change_notify(struct notifier_block *, unsigned long, void *);
+
+static struct notifier_block cxgb4_dcb_change = {
+   .notifier_call = cxgb4_dcb_change_notify,
+};
+#endif
+
 static struct scsi_transport_template *cxgb4i_stt;
 
 /*
@@ -574,6 +588,9 @@ static inline int tx_flowc_wr_credits(int *nparamsp, int 
*flowclenp)
int nparams, flowclen16, flowclen;
 
nparams = FLOWC_WR_NPARAMS_MIN;
+#ifdef CONFIG_CHELSIO_T4_DCB
+   nparams++;
+#endif
flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
flowclen16 = DIV_ROUND_UP(flowclen, 16);
flowclen = flowclen16 * 16;
@@ -595,6 +612,9 @@ static inline int send_tx_flowc_wr(struct cxgbi_sock *csk)
struct fw_flowc_wr *flowc;
int nparams, flowclen16, flowclen;
 
+#ifdef CONFIG_CHELSIO_T4_DCB
+   u16 vlan = ((struct l2t_entry *)csk->l2t)->vlan;
+#endif
flowclen16 = tx_flowc_wr_credits(&nparams, &flowclen);
skb = alloc_wr(flowclen, 0, GFP_ATOMIC);
flowc = (struct fw_flowc_wr *)skb->head;
@@ -622,6 +642,17 @@ static inline int send_tx_flowc_wr(struct cxgbi_sock *csk)
flowc->mnemval[8].val = 0;
flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX;
flowc->mnemval[8].val = 16384;
+#ifdef CONFIG_CHELSIO_T4_DCB
+   flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_DCBPRIO;
+   if (vlan == CPL_L2T_VLAN_NONE) {
+   pr_warn_ratelimited("csk %u without VLAN Tag on DCB Link\n",
+   csk->tid);
+   flowc->mnemval[9].val = cpu_to_be32(0);
+   } else {
+   flowc->mnemval[9].val = cpu_to_be32((vlan & VLAN_PRIO_MASK) >>
+   VLAN_PRIO_SHIFT);
+   }
+#endif
 
set_wr_txq(skb, CPL_PRIORITY_DATA, csk->port_id);
 
@@ -1600,6 +1631,46 @@ static void release_offload_resources(struct cxgbi_sock 
*csk)
csk->dst = NULL;
 }
 
+#ifdef CONFIG_CHELSIO_T4_DCB
+static inline u8 get_iscsi_dcb_state(struct net_device *ndev)
+{
+   return ndev->dcbnl_ops->getstate(ndev);
+}
+
+static int select_priority(int pri_mask)
+{
+   if (!pri_mask)
+   return 0;
+   return (ffs(pri_mask) - 1);
+}
+
+static u8 get_iscsi_dcb_priority(struct net_device *ndev)
+{
+   int rv;
+   u8 caps;
+
+   struct dcb_app iscsi_dcb_app = {
+   .protocol = 3260
+   };
+
+   rv = (int)ndev->dcbnl_ops->getcap(ndev, DCB_CAP_ATTR_DCBX, &caps);
+   if (rv)
+   return 0;
+
+   if (caps & DCB_CAP_DCBX_VER_IEEE) {
+   iscsi_dcb_app.selector = IEEE_8021QAZ_APP_SEL_ANY;
+   rv = dcb_ieee_getapp_mask(ndev, &iscsi_dcb_app);
+   } else if (caps & DCB_CAP_DCBX_VER_CEE) {
+   iscsi_dcb_app.selector = DCB_APP_IDTYPE_PORTNUM;
+   rv = dcb_getapp(ndev, &iscsi_dcb_app);
+   }
+
+   log_debug(1 << CXGBI_DBG_ISCSI,
+ "iSCSI priority is set to %u\n", select_priority(rv));
+   return select_priority(rv);
+}
+#endif
+
 static int init_act_open(struct cxgbi_sock *csk)
 {
struct cxgbi_device *cdev = csk->cdev;
@@ -1613,7 +1684,9 @@ static int init_act_open(struct cxgbi_sock *csk)
unsigned int size, size6;
unsigned int linkspeed;
unsigned int rcv_winf, snd_winf;
-
+#ifdef CONFIG_CHELSIO_T4_DCB
+   u8 priority = 0;
+#endif
log_debug(1 << CXGBI_DBG_TOE | 1 << CXGBI_DBG_SOCK,
"csk 0x%p,%u,0x%lx,%u.\n",
csk, csk->state, csk->flags, csk->tid);
@@ -1647,7 +1720,15 @@ static int init_act_open(struct cxgbi_sock *csk)
cxgbi_sock_set_flag(csk, CTPF_HAS_ATID);
cxgbi_sock_get(csk);
 
+#ifdef CONFIG_CHELSIO_T4_DCB
+   if (get_iscsi_dcb_state(ndev))
+   priority = get_iscsi_dcb_priority(ndev);
+
+   csk->dcb_priority = priority;
+   csk->l2t = cxgb4_l2t_get(lldi->l2t, n, ndev, priority);
+#else
csk->l2t = cxgb4_l2t_get(lldi->l2t, n, ndev, 0);

Re: [PATCH] lpfc: Synchronize access to remoteport via rport

2018-09-13 Thread James Smart

On 9/12/2018 4:28 PM, James Smart wrote:

The driver currently uses the ndlp to get the local rport which is
then used to get the nvme transport remoteport pointer. There can
be cases where a stale remoteport pointer is obtained as
synchronization isn't done through the different dereferences.

Correct by using locks to synchronize the dereferences.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 


Please disregard. There was a slight error in this fix.

-- james



[PATCH v2] lpfc: Synchronize access to remoteport via rport

2018-09-13 Thread James Smart
The driver currently uses the ndlp to get the local rport which is
then used to get the nvme transport remoteport pointer. There can
be cases where a stale remoteport pointer is obtained as
synchronization isn't done through the different dereferences.

Correct by using locks to synchronize the dereferences.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 

---
v2:
  Needed to reinit nrport to NULL at start of every ndlp check
---
 drivers/scsi/lpfc/lpfc_attr.c| 15 ++-
 drivers/scsi/lpfc/lpfc_debugfs.c | 10 +-
 drivers/scsi/lpfc/lpfc_nvme.c| 11 ---
 3 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 0169cd406518..dda7f450b96d 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -360,12 +360,12 @@ lpfc_nvme_info_show(struct device *dev, struct 
device_attribute *attr,
goto buffer_done;
 
list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+   nrport = NULL;
+   spin_lock(&vport->phba->hbalock);
rport = lpfc_ndlp_get_nrport(ndlp);
-   if (!rport)
-   continue;
-
-   /* local short-hand pointer. */
-   nrport = rport->remoteport;
+   if (rport)
+   nrport = rport->remoteport;
+   spin_unlock(&vport->phba->hbalock);
if (!nrport)
continue;
 
@@ -3386,6 +3386,7 @@ lpfc_update_rport_devloss_tmo(struct lpfc_vport *vport)
struct lpfc_nodelist  *ndlp;
 #if (IS_ENABLED(CONFIG_NVME_FC))
struct lpfc_nvme_rport *rport;
+   struct nvme_fc_remote_port *remoteport = NULL;
 #endif
 
shost = lpfc_shost_from_vport(vport);
@@ -3396,8 +3397,12 @@ lpfc_update_rport_devloss_tmo(struct lpfc_vport *vport)
if (ndlp->rport)
ndlp->rport->dev_loss_tmo = vport->cfg_devloss_tmo;
 #if (IS_ENABLED(CONFIG_NVME_FC))
+   spin_lock(&vport->phba->hbalock);
rport = lpfc_ndlp_get_nrport(ndlp);
if (rport)
+   remoteport = rport->remoteport;
+   spin_unlock(&vport->phba->hbalock);
+   if (remoteport)
nvme_fc_set_remoteport_devloss(rport->remoteport,
   vport->cfg_devloss_tmo);
 #endif
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 9df0c051349f..aec5b10a8c85 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -551,7 +551,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char 
*buf, int size)
unsigned char *statep;
struct nvme_fc_local_port *localport;
struct lpfc_nvmet_tgtport *tgtp;
-   struct nvme_fc_remote_port *nrport;
+   struct nvme_fc_remote_port *nrport = NULL;
struct lpfc_nvme_rport *rport;
 
cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
@@ -696,11 +696,11 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char 
*buf, int size)
len += snprintf(buf + len, size - len, "\tRport List:\n");
list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
/* local short-hand pointer. */
+   spin_lock(&phba->hbalock);
rport = lpfc_ndlp_get_nrport(ndlp);
-   if (!rport)
-   continue;
-
-   nrport = rport->remoteport;
+   if (rport)
+   nrport = rport->remoteport;
+   spin_unlock(&phba->hbalock);
if (!nrport)
continue;
 
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index 543873232d5a..23343a65530c 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2718,7 +2718,9 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct 
lpfc_nodelist *ndlp)
rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
 
+   spin_lock_irq(&vport->phba->hbalock);
oldrport = lpfc_ndlp_get_nrport(ndlp);
+   spin_unlock_irq(&vport->phba->hbalock);
if (!oldrport)
lpfc_nlp_get(ndlp);
 
@@ -2833,7 +2835,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, 
struct lpfc_nodelist *ndlp)
struct nvme_fc_local_port *localport;
struct lpfc_nvme_lport *lport;
struct lpfc_nvme_rport *rport;
-   struct nvme_fc_remote_port *remoteport;
+   struct nvme_fc_remote_port *remoteport = NULL;
 
localport = vport->localport;
 
@@ -2847,11 +2849,14 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, 
struct lpfc_nodelist *ndlp)
if (!lport)
goto input_err;
 
+   spin_lock_irq(&vport->phba->hbalock);
rport = lpfc_ndlp_get_nrport(ndlp);
-   

Re: [PATCH v5] target/iblock: split T10 PI SGL across command bios

2018-09-13 Thread Mike Christie
On 09/04/2018 12:19 PM, Greg Edwards wrote:
> When T10 PI is enabled on a backing device for the iblock backstore, the
> PI SGL for the entire command is attached to the first bio only.  This
> works fine if the command is covered by a single bio, but can result in
> ref tag errors in the client for the other bios in a multi-bio command,
> e.g.
> 
> [   47.631236] sda: ref tag error at location 2048 (rcvd 0)
> [   47.637658] sda: ref tag error at location 4096 (rcvd 0)
> [   47.644228] sda: ref tag error at location 6144 (rcvd 0)
> 
> The command will be split into multiple bios if the number of data SG
> elements exceeds BIO_MAX_PAGES (see iblock_get_bio()).
> 
> The bios may later be split again in the block layer on the host after
> iblock_submit_bios(), depending on the queue limits of the backing
> device.  The block and SCSI layers will pass through the whole PI SGL
> down to the LLDD however that first bio is split up, but the LLDD may
> only use the portion that corresponds to the data length (depends on the
> LLDD, tested with scsi_debug).
> 
> Split the PI SGL across the bios in the command, so each bio's
> bio_integrity_payload contains the protection information for the data
> in the bio.  Use an sg_mapping_iter to keep track of where we are in PI
> SGL, so we know where to start with the next bio.
> 
> Signed-off-by: Greg Edwards 
> ---
> Changes from v4:
>   * use %zu for size_t in pr_debug()
> 
> Changes from v3:
>   * cast a size_t as unsigned long in a pr_debug() for 32-bit arches,
> turned up by the kbuild test robot
> 
> Changes from v2:
>   * add back min(cmd->t_prot_nents, BIO_MAX_PAGES) for bio_integrity_alloc()
> from v1
> 
> Changes from v1:
>   * expand commit message
>   * use an sg_mapping_iter to track where we are in the PI SGL
> 


Looks ok to me.

Reviewed-by: Mike Christie 



[PATCH -next] scsi: pm80xx: Remove set but not used variable 'page_code'

2018-09-13 Thread YueHaibing
Fixes gcc '-Wunused-but-set-variable' warning:

drivers/scsi/pm8001/pm80xx_hwi.c: In function 'pm8001_set_phy_profile':
drivers/scsi/pm8001/pm80xx_hwi.c:4679:6: warning:
 variable 'page_code' set but not used [-Wunused-but-set-variable]

Signed-off-by: YueHaibing 
---
 drivers/scsi/pm8001/pm80xx_hwi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index b641875..9864a3c 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -4676,9 +4676,8 @@ void mpi_set_phy_profile_req(struct pm8001_hba_info 
*pm8001_ha,
 void pm8001_set_phy_profile(struct pm8001_hba_info *pm8001_ha,
u32 length, u8 *buf)
 {
-   u32 page_code, i;
+   u32 i;
 
-   page_code = SAS_PHY_ANALOG_SETTINGS_PAGE;
for (i = 0; i < pm8001_ha->chip->n_phy; i++) {
mpi_set_phy_profile_req(pm8001_ha,
SAS_PHY_ANALOG_SETTINGS_PAGE, i, length, (u32 *)buf);



[PATCH -next] scsi: mptfusion: Use kmemdup rather than duplicating its implementation in mpt_remove_dead_ioc_func

2018-09-13 Thread YueHaibing
Use kmemdup rather than duplicating its implementation

Signed-off-by: YueHaibing 
---
 drivers/message/fusion/mptbase.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index dc1e43a..a32d3d0 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -6001,13 +6001,12 @@ static int mpt_remove_dead_ioc_func(void *arg)
if (mpt_config(ioc, &cfg) != 0)
goto out;
 
-   mem = kmalloc(iocpage2sz, GFP_KERNEL);
+   mem = kmemdup((u8 *)pIoc2, iocpage2sz, GFP_KERNEL);
if (!mem) {
rc = -ENOMEM;
goto out;
}
 
-   memcpy(mem, (u8 *)pIoc2, iocpage2sz);
ioc->raid_data.pIocPg2 = (IOCPage2_t *) mem;
 
mpt_read_ioc_pg_3(ioc);



[PATCH -next] [SCSI] aic7xxx: Use kmemdup rather than duplicating its implementation in ahc_loadseq

2018-09-13 Thread YueHaibing
Use kmemdup rather than duplicating its implementation

Signed-off-by: YueHaibing 
---
 drivers/scsi/aic7xxx/aic7xxx_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c 
b/drivers/scsi/aic7xxx/aic7xxx_core.c
index f3362f4..8549f1e 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_core.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_core.c
@@ -6901,10 +6901,10 @@ struct ahc_softc *
if (cs_count != 0) {
 
cs_count *= sizeof(struct cs);
-   ahc->critical_sections = kmalloc(cs_count, GFP_ATOMIC);
+   ahc->critical_sections = kmemdup(cs_table, cs_count,
+GFP_ATOMIC);
if (ahc->critical_sections == NULL)
panic("ahc_loadseq: Could not malloc");
-   memcpy(ahc->critical_sections, cs_table, cs_count);
}
ahc_outb(ahc, SEQCTL, PERRORDIS|FAILDIS|FASTMODE);



[PATCH -next] [SCSI] aic79xx: Use kmemdup rather than duplicating its implementation in ahd_loadseq

2018-09-13 Thread YueHaibing
Use kmemdup rather than duplicating its implementation

Signed-off-by: YueHaibing 
---
 drivers/scsi/aic7xxx/aic79xx_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c 
b/drivers/scsi/aic7xxx/aic79xx_core.c
index 9ee75c9..0970bb5 100644
--- a/drivers/scsi/aic7xxx/aic79xx_core.c
+++ b/drivers/scsi/aic7xxx/aic79xx_core.c
@@ -9438,10 +9438,10 @@ struct scb *
if (cs_count != 0) {
 
cs_count *= sizeof(struct cs);
-   ahd->critical_sections = kmalloc(cs_count, GFP_ATOMIC);
+   ahd->critical_sections = kmemdup(cs_table, cs_count,
+GFP_ATOMIC);
if (ahd->critical_sections == NULL)
panic("ahd_loadseq: Could not malloc");
-   memcpy(ahd->critical_sections, cs_table, cs_count);
}
ahd_outb(ahd, SEQCTL0, PERRORDIS|FAILDIS|FASTMODE);



Compliment of the day to you Dear Friend.

2018-09-13 Thread Mrs. Amina Kadi
 Compliment of the day to you Dear Friend.

Dear Friend.
 
  I am Mrs. Amina Kadi. am sending this brief letter to solicit your
partnership to transfer $5.5 million US Dollars. I shall send you
more information and procedures when I receive positive response from
you.

Mrs. Amina Kadi