ext4: add a new nolock flag in ext4_map_blocks
EXT4_GET_BLOCKS_NO_LOCK flag is added to indicate that we don't need to acquire i_data_sem lock in ext4_map_blocks. Meanwhile, it changes ext4_get_block() to not start a new journal because when we do a overwrite dio, there is no any metadata that needs to be modified. We define a new function called ext4_get_block_write_nolock, which is used in dio overwrite nolock. In this function, it doesn't try to acquire i_data_sem lock and doesn't start a new journal as it does a lookup. CC: Tao Ma <tm@tao.ma> CC: Eric Sandeen <sandeen@redhat.com> CC: Robin Dong <hao.bigrat@gmail.com> Signed-off-by: Zheng Liu <wenqing.lz@taobao.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
parent
fbe104942d
commit
729f52c6be
2 changed files with 51 additions and 10 deletions
|
@ -571,6 +571,8 @@ enum {
|
||||||
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
|
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
|
||||||
/* Request will not result in inode size update (user for fallocate) */
|
/* Request will not result in inode size update (user for fallocate) */
|
||||||
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
|
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
|
||||||
|
/* Do not take i_data_sem locking in ext4_map_blocks */
|
||||||
|
#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Flags used by ext4_free_blocks
|
* Flags used by ext4_free_blocks
|
||||||
|
|
|
@ -544,7 +544,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
* Try to see if we can get the block without requesting a new
|
* Try to see if we can get the block without requesting a new
|
||||||
* file system block.
|
* file system block.
|
||||||
*/
|
*/
|
||||||
down_read((&EXT4_I(inode)->i_data_sem));
|
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
|
||||||
|
down_read((&EXT4_I(inode)->i_data_sem));
|
||||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
|
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
|
||||||
retval = ext4_ext_map_blocks(handle, inode, map, flags &
|
retval = ext4_ext_map_blocks(handle, inode, map, flags &
|
||||||
EXT4_GET_BLOCKS_KEEP_SIZE);
|
EXT4_GET_BLOCKS_KEEP_SIZE);
|
||||||
|
@ -552,7 +553,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
retval = ext4_ind_map_blocks(handle, inode, map, flags &
|
retval = ext4_ind_map_blocks(handle, inode, map, flags &
|
||||||
EXT4_GET_BLOCKS_KEEP_SIZE);
|
EXT4_GET_BLOCKS_KEEP_SIZE);
|
||||||
}
|
}
|
||||||
up_read((&EXT4_I(inode)->i_data_sem));
|
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
|
||||||
|
up_read((&EXT4_I(inode)->i_data_sem));
|
||||||
|
|
||||||
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
|
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
|
||||||
int ret = check_block_validity(inode, map);
|
int ret = check_block_validity(inode, map);
|
||||||
|
@ -2818,6 +2820,32 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
|
||||||
EXT4_GET_BLOCKS_IO_CREATE_EXT);
|
EXT4_GET_BLOCKS_IO_CREATE_EXT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
|
||||||
|
struct buffer_head *bh_result, int flags)
|
||||||
|
{
|
||||||
|
handle_t *handle = ext4_journal_current_handle();
|
||||||
|
struct ext4_map_blocks map;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
ext4_debug("ext4_get_block_write_nolock: inode %lu, flag %d\n",
|
||||||
|
inode->i_ino, flags);
|
||||||
|
|
||||||
|
flags = EXT4_GET_BLOCKS_NO_LOCK;
|
||||||
|
|
||||||
|
map.m_lblk = iblock;
|
||||||
|
map.m_len = bh_result->b_size >> inode->i_blkbits;
|
||||||
|
|
||||||
|
ret = ext4_map_blocks(handle, inode, &map, flags);
|
||||||
|
if (ret > 0) {
|
||||||
|
map_bh(bh_result, inode->i_sb, map.m_pblk);
|
||||||
|
bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
|
||||||
|
map.m_flags;
|
||||||
|
bh_result->b_size = inode->i_sb->s_blocksize * map.m_len;
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
||||||
ssize_t size, void *private, int ret,
|
ssize_t size, void *private, int ret,
|
||||||
bool is_async)
|
bool is_async)
|
||||||
|
@ -2966,6 +2994,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||||
|
|
||||||
loff_t final_size = offset + count;
|
loff_t final_size = offset + count;
|
||||||
if (rw == WRITE && final_size <= inode->i_size) {
|
if (rw == WRITE && final_size <= inode->i_size) {
|
||||||
|
int overwrite = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We could direct write to holes and fallocate.
|
* We could direct write to holes and fallocate.
|
||||||
*
|
*
|
||||||
|
@ -3005,13 +3035,22 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||||
EXT4_I(inode)->cur_aio_dio = iocb->private;
|
EXT4_I(inode)->cur_aio_dio = iocb->private;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = __blockdev_direct_IO(rw, iocb, inode,
|
if (overwrite)
|
||||||
inode->i_sb->s_bdev, iov,
|
ret = __blockdev_direct_IO(rw, iocb, inode,
|
||||||
offset, nr_segs,
|
inode->i_sb->s_bdev, iov,
|
||||||
ext4_get_block_write,
|
offset, nr_segs,
|
||||||
ext4_end_io_dio,
|
ext4_get_block_write_nolock,
|
||||||
NULL,
|
ext4_end_io_dio,
|
||||||
DIO_LOCKING);
|
NULL,
|
||||||
|
0);
|
||||||
|
else
|
||||||
|
ret = __blockdev_direct_IO(rw, iocb, inode,
|
||||||
|
inode->i_sb->s_bdev, iov,
|
||||||
|
offset, nr_segs,
|
||||||
|
ext4_get_block_write,
|
||||||
|
ext4_end_io_dio,
|
||||||
|
NULL,
|
||||||
|
DIO_LOCKING);
|
||||||
if (iocb->private)
|
if (iocb->private)
|
||||||
EXT4_I(inode)->cur_aio_dio = NULL;
|
EXT4_I(inode)->cur_aio_dio = NULL;
|
||||||
/*
|
/*
|
||||||
|
@ -3031,7 +3070,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||||
if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
|
if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
|
||||||
ext4_free_io_end(iocb->private);
|
ext4_free_io_end(iocb->private);
|
||||||
iocb->private = NULL;
|
iocb->private = NULL;
|
||||||
} else if (ret > 0 && ext4_test_inode_state(inode,
|
} else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
|
||||||
EXT4_STATE_DIO_UNWRITTEN)) {
|
EXT4_STATE_DIO_UNWRITTEN)) {
|
||||||
int err;
|
int err;
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in a new issue