From 164c80ed84a7669114869d9347c0f3ea7f56ea89 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 19 Sep 2016 15:12:41 -0600 Subject: [PATCH 1/3] blk-throttle: Extend slice if throttle group is not empty Right now, if slice is expired, we start a new slice. If a bio is queued, we keep on extending slice by throtle_slice interval (100ms). This worked well as long as pending timer function got executed with-in few milli seconds of scheduled time. But looks like with recent changes in timer subsystem, slack can be much longer depending on the expiry time of the scheduled timer. commit 500462a9de65 ("timers: Switch to a non-cascading wheel") This means, by the time timer function gets executed, it is possible the delay from scheduled time is more than 100ms. That means current code will conclude that existing slice has expired and a new one needs to be started. New slice will be 100ms by default and that will not be sufficient to meet rate requirement of group given the bio size and bio will not be dispatched and we will start a new timer function to wait. And when that timer expires, same process will repeat and we will wait again and this can easily be an infinite loop. Solve this issue by starting a new slice only if throttle gropup is empty. If it is not empty, that means there should be an active slice going on. Ideally it should not be expired but given the slack, it is possible that it has expired. Reported-by: Hou Tao Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index f1aba26f4719..a3ea8260c94c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -780,9 +780,11 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, /* * If previous slice expired, start a new one otherwise renew/extend * existing slice to make sure it is at least throtl_slice interval - * long since now. + * long since now. New slice is started only for empty throttle group. + * If there is queued bio, that means there should be an active + * slice and it should be extended instead. */ - if (throtl_slice_used(tg, rw)) + if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw])) throtl_start_new_slice(tg, rw); else { if (time_before(tg->slice_end[rw], jiffies + throtl_slice)) From 3b4ac78610690bd83fb33762ef97e8b8a89285ae Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 22 Sep 2016 19:58:17 -0600 Subject: [PATCH 2/3] nvme-rdma: only clear queue flags after successful connect Otherwise, nvme_rdma_stop_and_clear_queue() will incorrectly try to stop/free rdma qps/cm_ids that are already freed. Fixes: e89ca58f9c90 ("nvme-rdma: add DELETING queue flag") Reported-by: Steve Wise Tested-by: Steve Wise Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c2c2c28e6eb5..fbdb2267e460 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -561,7 +561,6 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, queue = &ctrl->queues[idx]; queue->ctrl = ctrl; - queue->flags = 0; init_completion(&queue->cm_done); if (idx > 0) @@ -595,6 +594,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, goto out_destroy_cm_id; } + clear_bit(NVME_RDMA_Q_DELETING, &queue->flags); set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags); return 0; From c8712c6a674e3382fe4d26d108251ccfa55d08e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 23 Sep 2016 10:25:48 -0600 Subject: [PATCH 3/3] blk-mq: skip unmapped queues in blk_mq_alloc_request_hctx This provides the caller a feedback that a given hctx is not mapped and thus no command can be sent on it. Signed-off-by: Christoph Hellwig Tested-by: Steve Wise Signed-off-by: Jens Axboe --- block/blk-mq.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 13f5a6c1de76..c207fa9870eb 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -296,17 +296,29 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, if (ret) return ERR_PTR(ret); + /* + * Check if the hardware context is actually mapped to anything. + * If not tell the caller that it should skip this queue. + */ hctx = q->queue_hw_ctx[hctx_idx]; + if (!blk_mq_hw_queue_mapped(hctx)) { + ret = -EXDEV; + goto out_queue_exit; + } ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask)); blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, rw, 0); if (!rq) { - blk_queue_exit(q); - return ERR_PTR(-EWOULDBLOCK); + ret = -EWOULDBLOCK; + goto out_queue_exit; } return rq; + +out_queue_exit: + blk_queue_exit(q); + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);