Add 'discard' request handling
Some block devices benefit from a hint that they can forget the contents of certain sectors. Add basic support for this to the block core, along with a 'blkdev_issue_discard()' helper function which issues such requests. The caller doesn't get to provide an end_io functio, since blkdev_issue_discard() will automatically split the request up into multiple bios if appropriate. Neither does the function wait for completion -- it's expected that callers won't care about when, or even _if_, the request completes. It's only a hint to the device anyway. By definition, the file system doesn't _care_ about these sectors any more. [With feedback from OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> and Jens Axboe <jens.axboe@oracle.com] Signed-off-by: David Woodhouse <David.Woodhouse@intel.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
This commit is contained in:
parent
d628eaef31
commit
fb2dce862d
6 changed files with 130 additions and 11 deletions
|
@ -315,3 +315,72 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
|
|||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blkdev_issue_flush);
|
||||
|
||||
static void blkdev_discard_end_io(struct bio *bio, int err)
|
||||
{
|
||||
if (err) {
|
||||
if (err == -EOPNOTSUPP)
|
||||
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
|
||||
clear_bit(BIO_UPTODATE, &bio->bi_flags);
|
||||
}
|
||||
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkdev_issue_discard - queue a discard
|
||||
* @bdev: blockdev to issue discard for
|
||||
* @sector: start sector
|
||||
* @nr_sects: number of sectors to discard
|
||||
*
|
||||
* Description:
|
||||
* Issue a discard request for the sectors in question. Does not wait.
|
||||
*/
|
||||
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
unsigned nr_sects)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct bio *bio;
|
||||
int ret = 0;
|
||||
|
||||
if (bdev->bd_disk == NULL)
|
||||
return -ENXIO;
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
if (!q)
|
||||
return -ENXIO;
|
||||
|
||||
if (!q->prepare_discard_fn)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
while (nr_sects && !ret) {
|
||||
bio = bio_alloc(GFP_KERNEL, 0);
|
||||
if (!bio)
|
||||
return -ENOMEM;
|
||||
|
||||
bio->bi_end_io = blkdev_discard_end_io;
|
||||
bio->bi_bdev = bdev;
|
||||
|
||||
bio->bi_sector = sector;
|
||||
|
||||
if (nr_sects > q->max_hw_sectors) {
|
||||
bio->bi_size = q->max_hw_sectors << 9;
|
||||
nr_sects -= q->max_hw_sectors;
|
||||
sector += q->max_hw_sectors;
|
||||
} else {
|
||||
bio->bi_size = nr_sects << 9;
|
||||
nr_sects = 0;
|
||||
}
|
||||
bio_get(bio);
|
||||
submit_bio(WRITE_DISCARD, bio);
|
||||
|
||||
/* Check if it failed immediately */
|
||||
if (bio_flagged(bio, BIO_EOPNOTSUPP))
|
||||
ret = -EOPNOTSUPP;
|
||||
else if (!bio_flagged(bio, BIO_UPTODATE))
|
||||
ret = -EIO;
|
||||
bio_put(bio);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(blkdev_issue_discard);
|
||||
|
|
|
@ -1079,6 +1079,10 @@ void init_request_from_bio(struct request *req, struct bio *bio)
|
|||
*/
|
||||
if (unlikely(bio_barrier(bio)))
|
||||
req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
|
||||
if (unlikely(bio_discard(bio))) {
|
||||
req->cmd_flags |= (REQ_SOFTBARRIER | REQ_DISCARD);
|
||||
req->q->prepare_discard_fn(req->q, req);
|
||||
}
|
||||
|
||||
if (bio_sync(bio))
|
||||
req->cmd_flags |= REQ_RW_SYNC;
|
||||
|
@ -1095,7 +1099,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
|
|||
static int __make_request(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
struct request *req;
|
||||
int el_ret, nr_sectors, barrier, err;
|
||||
int el_ret, nr_sectors, barrier, discard, err;
|
||||
const unsigned short prio = bio_prio(bio);
|
||||
const int sync = bio_sync(bio);
|
||||
int rw_flags;
|
||||
|
@ -1115,6 +1119,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
|
|||
goto end_io;
|
||||
}
|
||||
|
||||
discard = bio_discard(bio);
|
||||
if (unlikely(discard) && !q->prepare_discard_fn) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
|
||||
if (unlikely(barrier) || elv_queue_empty(q))
|
||||
|
@ -1405,7 +1415,8 @@ static inline void __generic_make_request(struct bio *bio)
|
|||
|
||||
if (bio_check_eod(bio, nr_sectors))
|
||||
goto end_io;
|
||||
if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
|
||||
if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
|
||||
(bio_discard(bio) && !q->prepare_discard_fn)) {
|
||||
err = -EOPNOTSUPP;
|
||||
goto end_io;
|
||||
}
|
||||
|
@ -1487,7 +1498,6 @@ void submit_bio(int rw, struct bio *bio)
|
|||
* go through the normal accounting stuff before submission.
|
||||
*/
|
||||
if (bio_has_data(bio)) {
|
||||
|
||||
if (rw & WRITE) {
|
||||
count_vm_events(PGPGOUT, count);
|
||||
} else {
|
||||
|
@ -1881,7 +1891,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
|
|||
struct request_queue *q = rq->q;
|
||||
unsigned long flags = 0UL;
|
||||
|
||||
if (bio_has_data(rq->bio)) {
|
||||
if (bio_has_data(rq->bio) || blk_discard_rq(rq)) {
|
||||
if (__end_that_request_first(rq, error, nr_bytes))
|
||||
return 1;
|
||||
|
||||
|
@ -1939,7 +1949,7 @@ EXPORT_SYMBOL_GPL(blk_end_request);
|
|||
**/
|
||||
int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
|
||||
{
|
||||
if (bio_has_data(rq->bio) &&
|
||||
if ((bio_has_data(rq->bio) || blk_discard_rq(rq)) &&
|
||||
__end_that_request_first(rq, error, nr_bytes))
|
||||
return 1;
|
||||
|
||||
|
@ -2012,12 +2022,14 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
|
|||
we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
|
||||
rq->cmd_flags |= (bio->bi_rw & 3);
|
||||
|
||||
if (bio_has_data(bio)) {
|
||||
rq->nr_phys_segments = bio_phys_segments(q, bio);
|
||||
rq->nr_hw_segments = bio_hw_segments(q, bio);
|
||||
rq->buffer = bio_data(bio);
|
||||
}
|
||||
rq->current_nr_sectors = bio_cur_sectors(bio);
|
||||
rq->hard_cur_sectors = rq->current_nr_sectors;
|
||||
rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
|
||||
rq->buffer = bio_data(bio);
|
||||
rq->data_len = bio->bi_size;
|
||||
|
||||
rq->bio = rq->biotail = bio;
|
||||
|
|
|
@ -32,6 +32,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
|
|||
}
|
||||
EXPORT_SYMBOL(blk_queue_prep_rq);
|
||||
|
||||
/**
|
||||
* blk_queue_set_discard - set a discard_sectors function for queue
|
||||
* @q: queue
|
||||
* @dfn: prepare_discard function
|
||||
*
|
||||
* It's possible for a queue to register a discard callback which is used
|
||||
* to transform a discard request into the appropriate type for the
|
||||
* hardware. If none is registered, then discard requests are failed
|
||||
* with %EOPNOTSUPP.
|
||||
*
|
||||
*/
|
||||
void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
|
||||
{
|
||||
q->prepare_discard_fn = dfn;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_set_discard);
|
||||
|
||||
/**
|
||||
* blk_queue_merge_bvec - set a merge_bvec function for queue
|
||||
* @q: queue
|
||||
|
|
|
@ -149,6 +149,8 @@ struct bio {
|
|||
* bit 2 -- barrier
|
||||
* bit 3 -- fail fast, don't want low level driver retries
|
||||
* bit 4 -- synchronous I/O hint: the block layer will unplug immediately
|
||||
* bit 5 -- metadata request
|
||||
* bit 6 -- discard sectors
|
||||
*/
|
||||
#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */
|
||||
#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */
|
||||
|
@ -156,6 +158,7 @@ struct bio {
|
|||
#define BIO_RW_FAILFAST 3
|
||||
#define BIO_RW_SYNC 4
|
||||
#define BIO_RW_META 5
|
||||
#define BIO_RW_DISCARD 6
|
||||
|
||||
/*
|
||||
* upper 16 bits of bi_rw define the io priority of this bio
|
||||
|
@ -186,13 +189,14 @@ struct bio {
|
|||
#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
|
||||
#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META))
|
||||
#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio))
|
||||
#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD))
|
||||
|
||||
static inline unsigned int bio_cur_sectors(struct bio *bio)
|
||||
{
|
||||
if (bio->bi_vcnt)
|
||||
return bio_iovec(bio)->bv_len >> 9;
|
||||
|
||||
return 0;
|
||||
else /* dataless requests such as discard */
|
||||
return bio->bi_size >> 9;
|
||||
}
|
||||
|
||||
static inline void *bio_data(struct bio *bio)
|
||||
|
|
|
@ -89,6 +89,7 @@ enum {
|
|||
enum rq_flag_bits {
|
||||
__REQ_RW, /* not set, read. set, write */
|
||||
__REQ_FAILFAST, /* no low level driver retries */
|
||||
__REQ_DISCARD, /* request to discard sectors */
|
||||
__REQ_SORTED, /* elevator knows about this request */
|
||||
__REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
|
||||
__REQ_HARDBARRIER, /* may not be passed by drive either */
|
||||
|
@ -111,6 +112,7 @@ enum rq_flag_bits {
|
|||
};
|
||||
|
||||
#define REQ_RW (1 << __REQ_RW)
|
||||
#define REQ_DISCARD (1 << __REQ_DISCARD)
|
||||
#define REQ_FAILFAST (1 << __REQ_FAILFAST)
|
||||
#define REQ_SORTED (1 << __REQ_SORTED)
|
||||
#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
|
||||
|
@ -252,6 +254,7 @@ typedef void (request_fn_proc) (struct request_queue *q);
|
|||
typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
|
||||
typedef int (prep_rq_fn) (struct request_queue *, struct request *);
|
||||
typedef void (unplug_fn) (struct request_queue *);
|
||||
typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
|
||||
|
||||
struct bio_vec;
|
||||
struct bvec_merge_data {
|
||||
|
@ -307,6 +310,7 @@ struct request_queue
|
|||
make_request_fn *make_request_fn;
|
||||
prep_rq_fn *prep_rq_fn;
|
||||
unplug_fn *unplug_fn;
|
||||
prepare_discard_fn *prepare_discard_fn;
|
||||
merge_bvec_fn *merge_bvec_fn;
|
||||
prepare_flush_fn *prepare_flush_fn;
|
||||
softirq_done_fn *softirq_done_fn;
|
||||
|
@ -546,6 +550,7 @@ enum {
|
|||
#define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)
|
||||
#define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
|
||||
#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
|
||||
#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD)
|
||||
#define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
|
||||
#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
|
||||
/* rq->queuelist of dequeued request must be list_empty() */
|
||||
|
@ -796,6 +801,7 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
|
|||
extern void blk_queue_dma_alignment(struct request_queue *, int);
|
||||
extern void blk_queue_update_dma_alignment(struct request_queue *, int);
|
||||
extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
|
||||
extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
|
||||
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
|
||||
extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
|
||||
extern int blk_do_ordered(struct request_queue *, struct request **);
|
||||
|
@ -837,6 +843,16 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
|
|||
}
|
||||
|
||||
extern int blkdev_issue_flush(struct block_device *, sector_t *);
|
||||
extern int blkdev_issue_discard(struct block_device *, sector_t sector,
|
||||
unsigned nr_sects);
|
||||
|
||||
static inline int sb_issue_discard(struct super_block *sb,
|
||||
sector_t block, unsigned nr_blocks)
|
||||
{
|
||||
block <<= (sb->s_blocksize_bits - 9);
|
||||
nr_blocks <<= (sb->s_blocksize_bits - 9);
|
||||
return blkdev_issue_discard(sb->s_bdev, block, nr_blocks);
|
||||
}
|
||||
|
||||
/*
|
||||
* command filter functions
|
||||
|
|
|
@ -86,7 +86,8 @@ extern int dir_notify_enable;
|
|||
#define READ_META (READ | (1 << BIO_RW_META))
|
||||
#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC))
|
||||
#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC))
|
||||
#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
|
||||
#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER))
|
||||
#define WRITE_DISCARD (WRITE | (1 << BIO_RW_DISCARD))
|
||||
|
||||
#define SEL_IN 1
|
||||
#define SEL_OUT 2
|
||||
|
|
Loading…
Reference in a new issue