ext4: journal credits reservation fixes for DIO, fallocate
DIO and fallocate credit calculation is different than writepage, as they do start a new journal right for each call to ext4_get_blocks_wrap(). This patch uses the helper function in DIO and fallocate case, passing a flag indicating that the modified data are contigous thus could account less indirect/index blocks. This patch also fixed the journal credit reservation for direct I/O (DIO). Previously the estimated credits for DIO only was calculated for non-extent files, which was not enough if the file is extent-based. Also fixed was fallocate double-counting credits for modifying the the superblock. Signed-off-by: Mingming Cao <cmm@us.ibm.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
parent
ee12b63068
commit
f3bd1f3fa8
3 changed files with 30 additions and 27 deletions
|
@ -1073,6 +1073,7 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *);
|
||||||
extern void ext4_set_aops(struct inode *inode);
|
extern void ext4_set_aops(struct inode *inode);
|
||||||
extern int ext4_writepage_trans_blocks(struct inode *);
|
extern int ext4_writepage_trans_blocks(struct inode *);
|
||||||
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
|
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
|
||||||
|
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
|
||||||
extern int ext4_block_truncate_page(handle_t *handle,
|
extern int ext4_block_truncate_page(handle_t *handle,
|
||||||
struct address_space *mapping, loff_t from);
|
struct address_space *mapping, loff_t from);
|
||||||
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
|
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
|
||||||
|
|
|
@ -1758,7 +1758,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
|
||||||
{
|
{
|
||||||
if (path) {
|
if (path) {
|
||||||
int depth = ext_depth(inode);
|
int depth = ext_depth(inode);
|
||||||
int ret;
|
int ret = 0;
|
||||||
|
|
||||||
/* probably there is space in leaf? */
|
/* probably there is space in leaf? */
|
||||||
if (le16_to_cpu(path[depth].p_hdr->eh_entries)
|
if (le16_to_cpu(path[depth].p_hdr->eh_entries)
|
||||||
|
@ -1777,7 +1777,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ext4_meta_trans_blocks(inode, num, 1);
|
return ext4_chunk_trans_blocks(inode, num);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2810,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode)
|
||||||
/*
|
/*
|
||||||
* probably first extent we're gonna free will be last in block
|
* probably first extent we're gonna free will be last in block
|
||||||
*/
|
*/
|
||||||
err = ext4_writepage_trans_blocks(inode) + 3;
|
err = ext4_writepage_trans_blocks(inode);
|
||||||
handle = ext4_journal_start(inode, err);
|
handle = ext4_journal_start(inode, err);
|
||||||
if (IS_ERR(handle))
|
if (IS_ERR(handle))
|
||||||
return;
|
return;
|
||||||
|
@ -2923,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
|
||||||
max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
|
max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
|
||||||
- block;
|
- block;
|
||||||
/*
|
/*
|
||||||
* credits to insert 1 extent into extent tree + buffers to be able to
|
* credits to insert 1 extent into extent tree
|
||||||
* modify 1 super block, 1 block bitmap and 1 group descriptor.
|
|
||||||
*/
|
*/
|
||||||
credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
|
credits = ext4_chunk_trans_blocks(inode, max_blocks);
|
||||||
mutex_lock(&inode->i_mutex);
|
mutex_lock(&inode->i_mutex);
|
||||||
retry:
|
retry:
|
||||||
while (ret >= 0 && ret < max_blocks) {
|
while (ret >= 0 && ret < max_blocks) {
|
||||||
|
|
|
@ -1044,18 +1044,6 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
|
||||||
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Maximum number of blocks we map for direct IO at once. */
|
|
||||||
#define DIO_MAX_BLOCKS 4096
|
|
||||||
/*
|
|
||||||
* Number of credits we need for writing DIO_MAX_BLOCKS:
|
|
||||||
* We need sb + group descriptor + bitmap + inode -> 4
|
|
||||||
* For B blocks with A block pointers per block we need:
|
|
||||||
* 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
|
|
||||||
* If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
|
|
||||||
*/
|
|
||||||
#define DIO_CREDITS 25
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The ext4_get_blocks_wrap() function try to look up the requested blocks,
|
* The ext4_get_blocks_wrap() function try to look up the requested blocks,
|
||||||
* and returns if the blocks are already mapped.
|
* and returns if the blocks are already mapped.
|
||||||
|
@ -1167,19 +1155,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Maximum number of blocks we map for direct IO at once. */
|
||||||
|
#define DIO_MAX_BLOCKS 4096
|
||||||
|
|
||||||
static int ext4_get_block(struct inode *inode, sector_t iblock,
|
static int ext4_get_block(struct inode *inode, sector_t iblock,
|
||||||
struct buffer_head *bh_result, int create)
|
struct buffer_head *bh_result, int create)
|
||||||
{
|
{
|
||||||
handle_t *handle = ext4_journal_current_handle();
|
handle_t *handle = ext4_journal_current_handle();
|
||||||
int ret = 0, started = 0;
|
int ret = 0, started = 0;
|
||||||
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
|
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
|
||||||
|
int dio_credits;
|
||||||
|
|
||||||
if (create && !handle) {
|
if (create && !handle) {
|
||||||
/* Direct IO write... */
|
/* Direct IO write... */
|
||||||
if (max_blocks > DIO_MAX_BLOCKS)
|
if (max_blocks > DIO_MAX_BLOCKS)
|
||||||
max_blocks = DIO_MAX_BLOCKS;
|
max_blocks = DIO_MAX_BLOCKS;
|
||||||
handle = ext4_journal_start(inode, DIO_CREDITS +
|
dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
|
||||||
2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
|
handle = ext4_journal_start(inode, dio_credits);
|
||||||
if (IS_ERR(handle)) {
|
if (IS_ERR(handle)) {
|
||||||
ret = PTR_ERR(handle);
|
ret = PTR_ERR(handle);
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -2243,7 +2235,7 @@ static int ext4_da_writepage(struct page *page,
|
||||||
* for DIO, writepages, and truncate
|
* for DIO, writepages, and truncate
|
||||||
*/
|
*/
|
||||||
#define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS
|
#define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS
|
||||||
#define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS
|
#define EXT4_MAX_WRITEBACK_CREDITS 25
|
||||||
|
|
||||||
static int ext4_da_writepages(struct address_space *mapping,
|
static int ext4_da_writepages(struct address_space *mapping,
|
||||||
struct writeback_control *wbc)
|
struct writeback_control *wbc)
|
||||||
|
@ -4441,7 +4433,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Calulate the total number of credits to reserve to fit
|
* Calulate the total number of credits to reserve to fit
|
||||||
* the modification of a single pages into a single transaction
|
* the modification of a single pages into a single transaction,
|
||||||
|
* which may include multiple chunks of block allocations.
|
||||||
*
|
*
|
||||||
* This could be called via ext4_write_begin() or later
|
* This could be called via ext4_write_begin() or later
|
||||||
* ext4_da_writepages() in delalyed allocation case.
|
* ext4_da_writepages() in delalyed allocation case.
|
||||||
|
@ -4449,11 +4442,6 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
|
||||||
* In both case it's possible that we could allocating multiple
|
* In both case it's possible that we could allocating multiple
|
||||||
* chunks of blocks. We need to consider the worse case, when
|
* chunks of blocks. We need to consider the worse case, when
|
||||||
* one new block per extent.
|
* one new block per extent.
|
||||||
*
|
|
||||||
* For Direct IO and fallocate, the journal credits reservation
|
|
||||||
* is based on one single extent allocation, so they could use
|
|
||||||
* EXT4_DATA_TRANS_BLOCKS to get the needed credit to log a single
|
|
||||||
* chunk of allocation needs.
|
|
||||||
*/
|
*/
|
||||||
int ext4_writepage_trans_blocks(struct inode *inode)
|
int ext4_writepage_trans_blocks(struct inode *inode)
|
||||||
{
|
{
|
||||||
|
@ -4467,6 +4455,21 @@ int ext4_writepage_trans_blocks(struct inode *inode)
|
||||||
ret += bpp;
|
ret += bpp;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calculate the journal credits for a chunk of data modification.
|
||||||
|
*
|
||||||
|
* This is called from DIO, fallocate or whoever calling
|
||||||
|
* ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
|
||||||
|
*
|
||||||
|
* journal buffers for data blocks are not included here, as DIO
|
||||||
|
* and fallocate do no need to journal data buffers.
|
||||||
|
*/
|
||||||
|
int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
|
||||||
|
{
|
||||||
|
return ext4_meta_trans_blocks(inode, nrblocks, 1);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The caller must have previously called ext4_reserve_inode_write().
|
* The caller must have previously called ext4_reserve_inode_write().
|
||||||
* Give this, we know that the caller already has write access to iloc->bh.
|
* Give this, we know that the caller already has write access to iloc->bh.
|
||||||
|
|
Loading…
Reference in a new issue