virtio_blk: blk-mq support

Switch virtio-blk from the dual support for old-style requests and bios
to use the block-multiqueue.

Acked-by: Asias He <asias@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
Jens Axboe 2013-11-01 10:52:52 -06:00
parent 1355b37f11
commit 1cf7e9c68f

View file

@ -11,12 +11,11 @@
#include <linux/string_helpers.h> #include <linux/string_helpers.h>
#include <scsi/scsi_cmnd.h> #include <scsi/scsi_cmnd.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/blk-mq.h>
#include <linux/numa.h>
#define PART_BITS 4 #define PART_BITS 4
static bool use_bio;
module_param(use_bio, bool, S_IRUGO);
static int major; static int major;
static DEFINE_IDA(vd_index_ida); static DEFINE_IDA(vd_index_ida);
@ -26,13 +25,11 @@ struct virtio_blk
{ {
struct virtio_device *vdev; struct virtio_device *vdev;
struct virtqueue *vq; struct virtqueue *vq;
wait_queue_head_t queue_wait; spinlock_t vq_lock;
/* The disk structure for the kernel. */ /* The disk structure for the kernel. */
struct gendisk *disk; struct gendisk *disk;
mempool_t *pool;
/* Process context for config space updates */ /* Process context for config space updates */
struct work_struct config_work; struct work_struct config_work;
@ -47,31 +44,17 @@ struct virtio_blk
/* Ida index - used to track minor number allocations. */ /* Ida index - used to track minor number allocations. */
int index; int index;
/* Scatterlist: can be too big for stack. */
struct scatterlist sg[/*sg_elems*/];
}; };
struct virtblk_req struct virtblk_req
{ {
struct request *req; struct request *req;
struct bio *bio;
struct virtio_blk_outhdr out_hdr; struct virtio_blk_outhdr out_hdr;
struct virtio_scsi_inhdr in_hdr; struct virtio_scsi_inhdr in_hdr;
struct work_struct work;
struct virtio_blk *vblk;
int flags;
u8 status; u8 status;
struct scatterlist sg[]; struct scatterlist sg[];
}; };
enum {
VBLK_IS_FLUSH = 1,
VBLK_REQ_FLUSH = 2,
VBLK_REQ_DATA = 4,
VBLK_REQ_FUA = 8,
};
static inline int virtblk_result(struct virtblk_req *vbr) static inline int virtblk_result(struct virtblk_req *vbr)
{ {
switch (vbr->status) { switch (vbr->status) {
@ -84,22 +67,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
} }
} }
static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
gfp_t gfp_mask)
{
struct virtblk_req *vbr;
vbr = mempool_alloc(vblk->pool, gfp_mask);
if (!vbr)
return NULL;
vbr->vblk = vblk;
if (use_bio)
sg_init_table(vbr->sg, vblk->sg_elems);
return vbr;
}
static int __virtblk_add_req(struct virtqueue *vq, static int __virtblk_add_req(struct virtqueue *vq,
struct virtblk_req *vbr, struct virtblk_req *vbr,
struct scatterlist *data_sg, struct scatterlist *data_sg,
@ -143,83 +110,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
} }
static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
{
struct virtio_blk *vblk = vbr->vblk;
DEFINE_WAIT(wait);
int ret;
spin_lock_irq(vblk->disk->queue->queue_lock);
while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
have_data)) < 0)) {
prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
TASK_UNINTERRUPTIBLE);
spin_unlock_irq(vblk->disk->queue->queue_lock);
io_schedule();
spin_lock_irq(vblk->disk->queue->queue_lock);
finish_wait(&vblk->queue_wait, &wait);
}
virtqueue_kick(vblk->vq);
spin_unlock_irq(vblk->disk->queue->queue_lock);
}
static void virtblk_bio_send_flush(struct virtblk_req *vbr)
{
vbr->flags |= VBLK_IS_FLUSH;
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
vbr->out_hdr.sector = 0;
vbr->out_hdr.ioprio = 0;
virtblk_add_req(vbr, false);
}
static void virtblk_bio_send_data(struct virtblk_req *vbr)
{
struct virtio_blk *vblk = vbr->vblk;
struct bio *bio = vbr->bio;
bool have_data;
vbr->flags &= ~VBLK_IS_FLUSH;
vbr->out_hdr.type = 0;
vbr->out_hdr.sector = bio->bi_sector;
vbr->out_hdr.ioprio = bio_prio(bio);
if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
have_data = true;
if (bio->bi_rw & REQ_WRITE)
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
else
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
} else
have_data = false;
virtblk_add_req(vbr, have_data);
}
static void virtblk_bio_send_data_work(struct work_struct *work)
{
struct virtblk_req *vbr;
vbr = container_of(work, struct virtblk_req, work);
virtblk_bio_send_data(vbr);
}
static void virtblk_bio_send_flush_work(struct work_struct *work)
{
struct virtblk_req *vbr;
vbr = container_of(work, struct virtblk_req, work);
virtblk_bio_send_flush(vbr);
}
static inline void virtblk_request_done(struct virtblk_req *vbr) static inline void virtblk_request_done(struct virtblk_req *vbr)
{ {
struct virtio_blk *vblk = vbr->vblk;
struct request *req = vbr->req; struct request *req = vbr->req;
int error = virtblk_result(vbr); int error = virtblk_result(vbr);
@ -231,90 +123,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
req->errors = (error != 0); req->errors = (error != 0);
} }
__blk_end_request_all(req, error); blk_mq_end_io(req, error);
mempool_free(vbr, vblk->pool);
}
static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
{
struct virtio_blk *vblk = vbr->vblk;
if (vbr->flags & VBLK_REQ_DATA) {
/* Send out the actual write data */
INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
queue_work(virtblk_wq, &vbr->work);
} else {
bio_endio(vbr->bio, virtblk_result(vbr));
mempool_free(vbr, vblk->pool);
}
}
static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
{
struct virtio_blk *vblk = vbr->vblk;
if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
/* Send out a flush before end the bio */
vbr->flags &= ~VBLK_REQ_DATA;
INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
queue_work(virtblk_wq, &vbr->work);
} else {
bio_endio(vbr->bio, virtblk_result(vbr));
mempool_free(vbr, vblk->pool);
}
}
static inline void virtblk_bio_done(struct virtblk_req *vbr)
{
if (unlikely(vbr->flags & VBLK_IS_FLUSH))
virtblk_bio_flush_done(vbr);
else
virtblk_bio_data_done(vbr);
} }
static void virtblk_done(struct virtqueue *vq) static void virtblk_done(struct virtqueue *vq)
{ {
struct virtio_blk *vblk = vq->vdev->priv; struct virtio_blk *vblk = vq->vdev->priv;
bool bio_done = false, req_done = false; bool req_done = false;
struct virtblk_req *vbr; struct virtblk_req *vbr;
unsigned long flags; unsigned long flags;
unsigned int len; unsigned int len;
spin_lock_irqsave(vblk->disk->queue->queue_lock, flags); spin_lock_irqsave(&vblk->vq_lock, flags);
do { do {
virtqueue_disable_cb(vq); virtqueue_disable_cb(vq);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
if (vbr->bio) {
virtblk_bio_done(vbr);
bio_done = true;
} else {
virtblk_request_done(vbr); virtblk_request_done(vbr);
req_done = true; req_done = true;
} }
}
} while (!virtqueue_enable_cb(vq)); } while (!virtqueue_enable_cb(vq));
spin_unlock_irqrestore(&vblk->vq_lock, flags);
/* In case queue is stopped waiting for more buffers. */ /* In case queue is stopped waiting for more buffers. */
if (req_done) if (req_done)
blk_start_queue(vblk->disk->queue); blk_mq_start_stopped_hw_queues(vblk->disk->queue);
spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
if (bio_done)
wake_up(&vblk->queue_wait);
} }
static bool do_req(struct request_queue *q, struct virtio_blk *vblk, static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
struct request *req)
{ {
struct virtio_blk *vblk = hctx->queue->queuedata;
struct virtblk_req *vbr = req->special;
unsigned long flags;
unsigned int num; unsigned int num;
struct virtblk_req *vbr; const bool last = (req->cmd_flags & REQ_END) != 0;
vbr = virtblk_alloc_req(vblk, GFP_ATOMIC); BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
if (!vbr)
/* When another request finishes we'll try again. */
return false;
vbr->req = req; vbr->req = req;
vbr->bio = NULL;
if (req->cmd_flags & REQ_FLUSH) { if (req->cmd_flags & REQ_FLUSH) {
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH; vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
vbr->out_hdr.sector = 0; vbr->out_hdr.sector = 0;
@ -342,7 +187,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
} }
} }
num = blk_rq_map_sg(q, vbr->req, vblk->sg); num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
if (num) { if (num) {
if (rq_data_dir(vbr->req) == WRITE) if (rq_data_dir(vbr->req) == WRITE)
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT; vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
@ -350,63 +195,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
vbr->out_hdr.type |= VIRTIO_BLK_T_IN; vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
} }
if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) { spin_lock_irqsave(&vblk->vq_lock, flags);
mempool_free(vbr, vblk->pool); if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
return false; spin_unlock_irqrestore(&vblk->vq_lock, flags);
} blk_mq_stop_hw_queue(hctx);
return true;
}
static void virtblk_request(struct request_queue *q)
{
struct virtio_blk *vblk = q->queuedata;
struct request *req;
unsigned int issued = 0;
while ((req = blk_peek_request(q)) != NULL) {
BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
/* If this request fails, stop queue and wait for something to
finish to restart it. */
if (!do_req(q, vblk, req)) {
blk_stop_queue(q);
break;
}
blk_start_request(req);
issued++;
}
if (issued)
virtqueue_kick(vblk->vq); virtqueue_kick(vblk->vq);
return BLK_MQ_RQ_QUEUE_BUSY;
} }
spin_unlock_irqrestore(&vblk->vq_lock, flags);
static void virtblk_make_request(struct request_queue *q, struct bio *bio) if (last)
{ virtqueue_kick(vblk->vq);
struct virtio_blk *vblk = q->queuedata; return BLK_MQ_RQ_QUEUE_OK;
struct virtblk_req *vbr;
BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
vbr = virtblk_alloc_req(vblk, GFP_NOIO);
if (!vbr) {
bio_endio(bio, -ENOMEM);
return;
}
vbr->bio = bio;
vbr->flags = 0;
if (bio->bi_rw & REQ_FLUSH)
vbr->flags |= VBLK_REQ_FLUSH;
if (bio->bi_rw & REQ_FUA)
vbr->flags |= VBLK_REQ_FUA;
if (bio->bi_size)
vbr->flags |= VBLK_REQ_DATA;
if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
virtblk_bio_send_flush(vbr);
else
virtblk_bio_send_data(vbr);
} }
/* return id (s/n) string for *disk to *id_str /* return id (s/n) string for *disk to *id_str
@ -680,12 +480,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
__ATTR(cache_type, S_IRUGO|S_IWUSR, __ATTR(cache_type, S_IRUGO|S_IWUSR,
virtblk_cache_type_show, virtblk_cache_type_store); virtblk_cache_type_show, virtblk_cache_type_store);
static struct blk_mq_ops virtio_mq_ops = {
.queue_rq = virtio_queue_rq,
.map_queue = blk_mq_map_queue,
.alloc_hctx = blk_mq_alloc_single_hw_queue,
.free_hctx = blk_mq_free_single_hw_queue,
};
static struct blk_mq_reg virtio_mq_reg = {
.ops = &virtio_mq_ops,
.nr_hw_queues = 1,
.queue_depth = 64,
.numa_node = NUMA_NO_NODE,
.flags = BLK_MQ_F_SHOULD_MERGE,
};
static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
struct request *rq, unsigned int nr)
{
struct virtio_blk *vblk = data;
struct virtblk_req *vbr = rq->special;
sg_init_table(vbr->sg, vblk->sg_elems);
}
static int virtblk_probe(struct virtio_device *vdev) static int virtblk_probe(struct virtio_device *vdev)
{ {
struct virtio_blk *vblk; struct virtio_blk *vblk;
struct request_queue *q; struct request_queue *q;
int err, index; int err, index;
int pool_size;
u64 cap; u64 cap;
u32 v, blk_size, sg_elems, opt_io_size; u32 v, blk_size, sg_elems, opt_io_size;
@ -709,17 +532,14 @@ static int virtblk_probe(struct virtio_device *vdev)
/* We need an extra sg elements at head and tail. */ /* We need an extra sg elements at head and tail. */
sg_elems += 2; sg_elems += 2;
vdev->priv = vblk = kmalloc(sizeof(*vblk) + vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
if (!vblk) { if (!vblk) {
err = -ENOMEM; err = -ENOMEM;
goto out_free_index; goto out_free_index;
} }
init_waitqueue_head(&vblk->queue_wait);
vblk->vdev = vdev; vblk->vdev = vdev;
vblk->sg_elems = sg_elems; vblk->sg_elems = sg_elems;
sg_init_table(vblk->sg, vblk->sg_elems);
mutex_init(&vblk->config_lock); mutex_init(&vblk->config_lock);
INIT_WORK(&vblk->config_work, virtblk_config_changed_work); INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
@ -728,31 +548,27 @@ static int virtblk_probe(struct virtio_device *vdev)
err = init_vq(vblk); err = init_vq(vblk);
if (err) if (err)
goto out_free_vblk; goto out_free_vblk;
spin_lock_init(&vblk->vq_lock);
pool_size = sizeof(struct virtblk_req);
if (use_bio)
pool_size += sizeof(struct scatterlist) * sg_elems;
vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
if (!vblk->pool) {
err = -ENOMEM;
goto out_free_vq;
}
/* FIXME: How many partitions? How long is a piece of string? */ /* FIXME: How many partitions? How long is a piece of string? */
vblk->disk = alloc_disk(1 << PART_BITS); vblk->disk = alloc_disk(1 << PART_BITS);
if (!vblk->disk) { if (!vblk->disk) {
err = -ENOMEM; err = -ENOMEM;
goto out_mempool; goto out_free_vq;
} }
q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL); virtio_mq_reg.cmd_size =
sizeof(struct virtblk_req) +
sizeof(struct scatterlist) * sg_elems;
q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
if (!q) { if (!q) {
err = -ENOMEM; err = -ENOMEM;
goto out_put_disk; goto out_put_disk;
} }
if (use_bio) blk_mq_init_commands(q, virtblk_init_vbr, vblk);
blk_queue_make_request(q, virtblk_make_request);
q->queuedata = vblk; q->queuedata = vblk;
virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
@ -857,8 +673,6 @@ static int virtblk_probe(struct virtio_device *vdev)
blk_cleanup_queue(vblk->disk->queue); blk_cleanup_queue(vblk->disk->queue);
out_put_disk: out_put_disk:
put_disk(vblk->disk); put_disk(vblk->disk);
out_mempool:
mempool_destroy(vblk->pool);
out_free_vq: out_free_vq:
vdev->config->del_vqs(vdev); vdev->config->del_vqs(vdev);
out_free_vblk: out_free_vblk:
@ -890,7 +704,6 @@ static void virtblk_remove(struct virtio_device *vdev)
refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
put_disk(vblk->disk); put_disk(vblk->disk);
mempool_destroy(vblk->pool);
vdev->config->del_vqs(vdev); vdev->config->del_vqs(vdev);
kfree(vblk); kfree(vblk);
@ -914,10 +727,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
flush_work(&vblk->config_work); flush_work(&vblk->config_work);
spin_lock_irq(vblk->disk->queue->queue_lock); blk_mq_stop_hw_queues(vblk->disk->queue);
blk_stop_queue(vblk->disk->queue);
spin_unlock_irq(vblk->disk->queue->queue_lock);
blk_sync_queue(vblk->disk->queue);
vdev->config->del_vqs(vdev); vdev->config->del_vqs(vdev);
return 0; return 0;
@ -930,11 +740,9 @@ static int virtblk_restore(struct virtio_device *vdev)
vblk->config_enable = true; vblk->config_enable = true;
ret = init_vq(vdev->priv); ret = init_vq(vdev->priv);
if (!ret) { if (!ret)
spin_lock_irq(vblk->disk->queue->queue_lock); blk_mq_start_stopped_hw_queues(vblk->disk->queue);
blk_start_queue(vblk->disk->queue);
spin_unlock_irq(vblk->disk->queue->queue_lock);
}
return ret; return ret;
} }
#endif #endif