new helper: iov_iter_get_pages()
iov_iter_get_pages(iter, pages, maxsize, &start) grabs references pinning the pages of up to maxsize of (contiguous) data from iter. Returns the amount of memory grabbed or -error. In case of success, the requested area begins at offset start in pages[0] and runs through pages[1], etc. Less than requested amount might be returned - either because the contiguous area in the beginning of iterator is smaller than requested, or because the kernel failed to pin that many pages. direct-io.c switched to using iov_iter_get_pages() Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
3320c60b3a
commit
7b2c99d155
3 changed files with 67 additions and 73 deletions
111
fs/direct-io.c
111
fs/direct-io.c
|
@ -77,7 +77,6 @@ struct dio_submit {
|
|||
unsigned blocks_available; /* At block_in_file. changes */
|
||||
int reap_counter; /* rate limit reaping */
|
||||
sector_t final_block_in_request;/* doesn't change */
|
||||
unsigned first_block_in_page; /* doesn't change, Used only once */
|
||||
int boundary; /* prev block is at a boundary */
|
||||
get_block_t *get_block; /* block mapping function */
|
||||
dio_submit_t *submit_io; /* IO submition function */
|
||||
|
@ -98,19 +97,14 @@ struct dio_submit {
|
|||
sector_t cur_page_block; /* Where it starts */
|
||||
loff_t cur_page_fs_offset; /* Offset in file */
|
||||
|
||||
/*
|
||||
* Page fetching state. These variables belong to dio_refill_pages().
|
||||
*/
|
||||
int curr_page; /* changes */
|
||||
int total_pages; /* doesn't change */
|
||||
unsigned long curr_user_address;/* changes */
|
||||
|
||||
struct iov_iter *iter;
|
||||
/*
|
||||
* Page queue. These variables belong to dio_refill_pages() and
|
||||
* dio_get_page().
|
||||
*/
|
||||
unsigned head; /* next page to process */
|
||||
unsigned tail; /* last valid page + 1 */
|
||||
size_t from, to;
|
||||
};
|
||||
|
||||
/* dio_state communicated between submission path and end_io */
|
||||
|
@ -163,15 +157,10 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio)
|
|||
*/
|
||||
static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
|
||||
{
|
||||
int ret;
|
||||
int nr_pages;
|
||||
ssize_t ret;
|
||||
|
||||
nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES);
|
||||
ret = get_user_pages_fast(
|
||||
sdio->curr_user_address, /* Where from? */
|
||||
nr_pages, /* How many pages? */
|
||||
dio->rw == READ, /* Write to memory? */
|
||||
&dio->pages[0]); /* Put results here */
|
||||
ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE,
|
||||
&sdio->from);
|
||||
|
||||
if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
|
||||
struct page *page = ZERO_PAGE(0);
|
||||
|
@ -186,18 +175,19 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
|
|||
dio->pages[0] = page;
|
||||
sdio->head = 0;
|
||||
sdio->tail = 1;
|
||||
ret = 0;
|
||||
goto out;
|
||||
sdio->from = 0;
|
||||
sdio->to = PAGE_SIZE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ret >= 0) {
|
||||
sdio->curr_user_address += ret * PAGE_SIZE;
|
||||
sdio->curr_page += ret;
|
||||
iov_iter_advance(sdio->iter, ret);
|
||||
ret += sdio->from;
|
||||
sdio->head = 0;
|
||||
sdio->tail = ret;
|
||||
ret = 0;
|
||||
sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1;
|
||||
return 0;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -208,8 +198,9 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
|
|||
* L1 cache.
|
||||
*/
|
||||
static inline struct page *dio_get_page(struct dio *dio,
|
||||
struct dio_submit *sdio)
|
||||
struct dio_submit *sdio, size_t *from, size_t *to)
|
||||
{
|
||||
int n;
|
||||
if (dio_pages_present(sdio) == 0) {
|
||||
int ret;
|
||||
|
||||
|
@ -218,7 +209,10 @@ static inline struct page *dio_get_page(struct dio *dio,
|
|||
return ERR_PTR(ret);
|
||||
BUG_ON(dio_pages_present(sdio) == 0);
|
||||
}
|
||||
return dio->pages[sdio->head++];
|
||||
n = sdio->head++;
|
||||
*from = n ? 0 : sdio->from;
|
||||
*to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
|
||||
return dio->pages[n];
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -422,8 +416,8 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
|
|||
*/
|
||||
static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
|
||||
{
|
||||
while (dio_pages_present(sdio))
|
||||
page_cache_release(dio_get_page(dio, sdio));
|
||||
while (sdio->head < sdio->tail)
|
||||
page_cache_release(dio->pages[sdio->head++]);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -912,23 +906,18 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
|
|||
struct buffer_head *map_bh)
|
||||
{
|
||||
const unsigned blkbits = sdio->blkbits;
|
||||
const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
|
||||
struct page *page;
|
||||
unsigned block_in_page;
|
||||
int ret = 0;
|
||||
|
||||
/* The I/O can start at any block offset within the first page */
|
||||
block_in_page = sdio->first_block_in_page;
|
||||
|
||||
while (sdio->block_in_file < sdio->final_block_in_request) {
|
||||
page = dio_get_page(dio, sdio);
|
||||
struct page *page;
|
||||
size_t from, to;
|
||||
page = dio_get_page(dio, sdio, &from, &to);
|
||||
if (IS_ERR(page)) {
|
||||
ret = PTR_ERR(page);
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (block_in_page < blocks_per_page) {
|
||||
unsigned offset_in_page = block_in_page << blkbits;
|
||||
while (from < to) {
|
||||
unsigned this_chunk_bytes; /* # of bytes mapped */
|
||||
unsigned this_chunk_blocks; /* # of blocks */
|
||||
unsigned u;
|
||||
|
@ -999,10 +988,9 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
|
|||
page_cache_release(page);
|
||||
goto out;
|
||||
}
|
||||
zero_user(page, block_in_page << blkbits,
|
||||
1 << blkbits);
|
||||
zero_user(page, from, 1 << blkbits);
|
||||
sdio->block_in_file++;
|
||||
block_in_page++;
|
||||
from += 1 << blkbits;
|
||||
dio->result += 1 << blkbits;
|
||||
goto next_block;
|
||||
}
|
||||
|
@ -1020,7 +1008,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
|
|||
* can add to this page
|
||||
*/
|
||||
this_chunk_blocks = sdio->blocks_available;
|
||||
u = (PAGE_SIZE - offset_in_page) >> blkbits;
|
||||
u = (to - from) >> blkbits;
|
||||
if (this_chunk_blocks > u)
|
||||
this_chunk_blocks = u;
|
||||
u = sdio->final_block_in_request - sdio->block_in_file;
|
||||
|
@ -1032,7 +1020,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
|
|||
if (this_chunk_blocks == sdio->blocks_available)
|
||||
sdio->boundary = buffer_boundary(map_bh);
|
||||
ret = submit_page_section(dio, sdio, page,
|
||||
offset_in_page,
|
||||
from,
|
||||
this_chunk_bytes,
|
||||
sdio->next_block_for_io,
|
||||
map_bh);
|
||||
|
@ -1043,9 +1031,9 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
|
|||
sdio->next_block_for_io += this_chunk_blocks;
|
||||
|
||||
sdio->block_in_file += this_chunk_blocks;
|
||||
block_in_page += this_chunk_blocks;
|
||||
from += this_chunk_bytes;
|
||||
dio->result += this_chunk_bytes;
|
||||
sdio->blocks_available -= this_chunk_blocks;
|
||||
dio->result += this_chunk_blocks << blkbits;
|
||||
next_block:
|
||||
BUG_ON(sdio->block_in_file > sdio->final_block_in_request);
|
||||
if (sdio->block_in_file == sdio->final_block_in_request)
|
||||
|
@ -1054,7 +1042,6 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
|
|||
|
||||
/* Drop the ref which was taken in get_user_pages() */
|
||||
page_cache_release(page);
|
||||
block_in_page = 0;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
|
@ -1122,7 +1109,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
|
|||
struct dio *dio;
|
||||
struct dio_submit sdio = { 0, };
|
||||
unsigned long user_addr;
|
||||
size_t bytes;
|
||||
struct buffer_head map_bh = { 0, };
|
||||
struct blk_plug plug;
|
||||
unsigned long align = offset | iov_iter_alignment(iter);
|
||||
|
@ -1234,6 +1220,10 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
|
|||
spin_lock_init(&dio->bio_lock);
|
||||
dio->refcount = 1;
|
||||
|
||||
sdio.iter = iter;
|
||||
sdio.final_block_in_request =
|
||||
(offset + iov_iter_count(iter)) >> blkbits;
|
||||
|
||||
/*
|
||||
* In case of non-aligned buffers, we may need 2 more
|
||||
* pages since we need to zero out first and last block.
|
||||
|
@ -1250,34 +1240,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
|
|||
|
||||
blk_start_plug(&plug);
|
||||
|
||||
for (seg = 0; seg < iter->nr_segs; seg++) {
|
||||
user_addr = (unsigned long)iter->iov[seg].iov_base;
|
||||
sdio.size += bytes = iter->iov[seg].iov_len;
|
||||
|
||||
/* Index into the first page of the first block */
|
||||
sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
|
||||
sdio.final_block_in_request = sdio.block_in_file +
|
||||
(bytes >> blkbits);
|
||||
/* Page fetching state */
|
||||
sdio.head = 0;
|
||||
sdio.tail = 0;
|
||||
sdio.curr_page = 0;
|
||||
|
||||
sdio.total_pages = 0;
|
||||
if (user_addr & (PAGE_SIZE-1)) {
|
||||
sdio.total_pages++;
|
||||
bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
|
||||
}
|
||||
sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
sdio.curr_user_address = user_addr;
|
||||
|
||||
retval = do_direct_IO(dio, &sdio, &map_bh);
|
||||
|
||||
if (retval) {
|
||||
dio_cleanup(dio, &sdio);
|
||||
break;
|
||||
}
|
||||
} /* end iovec loop */
|
||||
retval = do_direct_IO(dio, &sdio, &map_bh);
|
||||
if (retval)
|
||||
dio_cleanup(dio, &sdio);
|
||||
|
||||
if (retval == -ENOTBLK) {
|
||||
/*
|
||||
|
|
|
@ -71,6 +71,8 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
|
|||
unsigned long iov_iter_alignment(const struct iov_iter *i);
|
||||
void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
|
||||
unsigned long nr_segs, size_t count);
|
||||
ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
|
||||
size_t maxsize, size_t *start);
|
||||
|
||||
static inline size_t iov_iter_count(struct iov_iter *i)
|
||||
{
|
||||
|
|
|
@ -235,3 +235,30 @@ void iov_iter_init(struct iov_iter *i, int direction,
|
|||
i->count = count;
|
||||
}
|
||||
EXPORT_SYMBOL(iov_iter_init);
|
||||
|
||||
ssize_t iov_iter_get_pages(struct iov_iter *i,
|
||||
struct page **pages, size_t maxsize,
|
||||
size_t *start)
|
||||
{
|
||||
size_t offset = i->iov_offset;
|
||||
const struct iovec *iov = i->iov;
|
||||
size_t len;
|
||||
unsigned long addr;
|
||||
int n;
|
||||
int res;
|
||||
|
||||
len = iov->iov_len - offset;
|
||||
if (len > i->count)
|
||||
len = i->count;
|
||||
if (len > maxsize)
|
||||
len = maxsize;
|
||||
addr = (unsigned long)iov->iov_base + offset;
|
||||
len += *start = addr & (PAGE_SIZE - 1);
|
||||
addr &= ~(PAGE_SIZE - 1);
|
||||
n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
|
||||
if (unlikely(res < 0))
|
||||
return res;
|
||||
return (res == n ? len : res * PAGE_SIZE) - *start;
|
||||
}
|
||||
EXPORT_SYMBOL(iov_iter_get_pages);
|
||||
|
|
Loading…
Reference in a new issue