ocfs2: Abstract ocfs2_extent_tree in b-tree operations.
In the old extent tree operation, we take the hypothesis that we are using the ocfs2_extent_list in ocfs2_dinode as the tree root. As xattr will also use ocfs2_extent_list to store large value for a xattr entry, we refactor the tree operation so that xattr can use it directly. The refactoring includes 4 steps: 1. Abstract set/get of last_eb_blk and update_clusters since they may be stored in different location for dinode and xattr. 2. Add a new structure named ocfs2_extent_tree to indicate the extent tree the operation will work on. 3. Remove all the use of fe_bh and di, use root_bh and root_el in extent tree instead. So now all the fe_bh is replaced with et->root_bh, el with root_el accordingly. 4. Make ocfs2_lock_allocators generic. Now it is limited to be only used in file extend allocation. But the whole function is useful when we want to store large EAs. Note: This patch doesn't touch ocfs2_commit_truncate() since it is not used for anything other than truncate inode data btrees. Signed-off-by: Tao Ma <tao.ma@oracle.com> Signed-off-by: Mark Fasheh <mfasheh@suse.com>
This commit is contained in:
parent
811f933df1
commit
e7d4cb6bc1
8 changed files with 456 additions and 288 deletions
508
fs/ocfs2/alloc.c
508
fs/ocfs2/alloc.c
File diff suppressed because it is too large
Load diff
|
@ -26,28 +26,37 @@
|
|||
#ifndef OCFS2_ALLOC_H
|
||||
#define OCFS2_ALLOC_H
|
||||
|
||||
enum ocfs2_extent_tree_type {
|
||||
OCFS2_DINODE_EXTENT = 0,
|
||||
};
|
||||
|
||||
struct ocfs2_alloc_context;
|
||||
int ocfs2_insert_extent(struct ocfs2_super *osb,
|
||||
handle_t *handle,
|
||||
struct inode *inode,
|
||||
struct buffer_head *fe_bh,
|
||||
struct buffer_head *root_bh,
|
||||
u32 cpos,
|
||||
u64 start_blk,
|
||||
u32 new_clusters,
|
||||
u8 flags,
|
||||
struct ocfs2_alloc_context *meta_ac);
|
||||
struct ocfs2_alloc_context *meta_ac,
|
||||
enum ocfs2_extent_tree_type et_type);
|
||||
struct ocfs2_cached_dealloc_ctxt;
|
||||
int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
|
||||
int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *root_bh,
|
||||
handle_t *handle, u32 cpos, u32 len, u32 phys,
|
||||
struct ocfs2_alloc_context *meta_ac,
|
||||
struct ocfs2_cached_dealloc_ctxt *dealloc);
|
||||
int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
|
||||
struct ocfs2_cached_dealloc_ctxt *dealloc,
|
||||
enum ocfs2_extent_tree_type et_type);
|
||||
int ocfs2_remove_extent(struct inode *inode, struct buffer_head *root_bh,
|
||||
u32 cpos, u32 len, handle_t *handle,
|
||||
struct ocfs2_alloc_context *meta_ac,
|
||||
struct ocfs2_cached_dealloc_ctxt *dealloc);
|
||||
struct ocfs2_cached_dealloc_ctxt *dealloc,
|
||||
enum ocfs2_extent_tree_type et_type);
|
||||
int ocfs2_num_free_extents(struct ocfs2_super *osb,
|
||||
struct inode *inode,
|
||||
struct buffer_head *bh);
|
||||
struct buffer_head *root_bh,
|
||||
enum ocfs2_extent_tree_type et_type);
|
||||
|
||||
/*
|
||||
* how many new metadata chunks would an allocation need at maximum?
|
||||
*
|
||||
|
|
|
@ -1278,7 +1278,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
|
|||
} else if (unwritten) {
|
||||
ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,
|
||||
wc->w_handle, cpos, 1, phys,
|
||||
meta_ac, &wc->w_dealloc);
|
||||
meta_ac, &wc->w_dealloc,
|
||||
OCFS2_DINODE_EXTENT);
|
||||
if (ret < 0) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
|
@ -1712,7 +1713,13 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
|
|||
* ocfs2_lock_allocators(). It greatly over-estimates
|
||||
* the work to be done.
|
||||
*/
|
||||
ret = ocfs2_lock_allocators(inode, wc->w_di_bh,
|
||||
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u,"
|
||||
" clusters_to_add = %u, extents_to_split = %u\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
(long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
|
||||
clusters_to_alloc, extents_to_split);
|
||||
|
||||
ret = ocfs2_lock_allocators(inode, wc->w_di_bh, &di->id2.i_list,
|
||||
clusters_to_alloc, extents_to_split,
|
||||
&data_ac, &meta_ac);
|
||||
if (ret) {
|
||||
|
|
|
@ -1306,7 +1306,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
|
|||
* related blocks have been journaled already.
|
||||
*/
|
||||
ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0,
|
||||
NULL);
|
||||
NULL, OCFS2_DINODE_EXTENT);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out_commit;
|
||||
|
@ -1338,7 +1338,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
|
|||
blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
|
||||
|
||||
ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno,
|
||||
len, 0, NULL);
|
||||
len, 0, NULL, OCFS2_DINODE_EXTENT);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out_commit;
|
||||
|
@ -1481,7 +1481,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
|
|||
if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
|
||||
spin_unlock(&OCFS2_I(dir)->ip_lock);
|
||||
num_free_extents = ocfs2_num_free_extents(osb, dir,
|
||||
parent_fe_bh);
|
||||
parent_fe_bh,
|
||||
OCFS2_DINODE_EXTENT);
|
||||
if (num_free_extents < 0) {
|
||||
status = num_free_extents;
|
||||
mlog_errno(status);
|
||||
|
|
104
fs/ocfs2/file.c
104
fs/ocfs2/file.c
|
@ -521,7 +521,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
|||
if (mark_unwritten)
|
||||
flags = OCFS2_EXT_UNWRITTEN;
|
||||
|
||||
free_extents = ocfs2_num_free_extents(osb, inode, fe_bh);
|
||||
free_extents = ocfs2_num_free_extents(osb, inode, fe_bh,
|
||||
OCFS2_DINODE_EXTENT);
|
||||
if (free_extents < 0) {
|
||||
status = free_extents;
|
||||
mlog_errno(status);
|
||||
|
@ -570,7 +571,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
|||
num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
|
||||
status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
|
||||
*logical_offset, block, num_bits,
|
||||
flags, meta_ac);
|
||||
flags, meta_ac, OCFS2_DINODE_EXTENT);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
|
@ -599,92 +600,6 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
|||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* For a given allocation, determine which allocators will need to be
|
||||
* accessed, and lock them, reserving the appropriate number of bits.
|
||||
*
|
||||
* Sparse file systems call this from ocfs2_write_begin_nolock()
|
||||
* and ocfs2_allocate_unwritten_extents().
|
||||
*
|
||||
* File systems which don't support holes call this from
|
||||
* ocfs2_extend_allocation().
|
||||
*/
|
||||
int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *di_bh,
|
||||
u32 clusters_to_add, u32 extents_to_split,
|
||||
struct ocfs2_alloc_context **data_ac,
|
||||
struct ocfs2_alloc_context **meta_ac)
|
||||
{
|
||||
int ret = 0, num_free_extents;
|
||||
unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
|
||||
|
||||
*meta_ac = NULL;
|
||||
if (data_ac)
|
||||
*data_ac = NULL;
|
||||
|
||||
BUG_ON(clusters_to_add != 0 && data_ac == NULL);
|
||||
|
||||
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
|
||||
"clusters_to_add = %u, extents_to_split = %u\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode),
|
||||
le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
|
||||
|
||||
num_free_extents = ocfs2_num_free_extents(osb, inode, di_bh);
|
||||
if (num_free_extents < 0) {
|
||||
ret = num_free_extents;
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sparse allocation file systems need to be more conservative
|
||||
* with reserving room for expansion - the actual allocation
|
||||
* happens while we've got a journal handle open so re-taking
|
||||
* a cluster lock (because we ran out of room for another
|
||||
* extent) will violate ordering rules.
|
||||
*
|
||||
* Most of the time we'll only be seeing this 1 cluster at a time
|
||||
* anyway.
|
||||
*
|
||||
* Always lock for any unwritten extents - we might want to
|
||||
* add blocks during a split.
|
||||
*/
|
||||
if (!num_free_extents ||
|
||||
(ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
|
||||
ret = ocfs2_reserve_new_metadata(osb, &di->id2.i_list, meta_ac);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (clusters_to_add == 0)
|
||||
goto out;
|
||||
|
||||
ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret) {
|
||||
if (*meta_ac) {
|
||||
ocfs2_free_alloc_context(*meta_ac);
|
||||
*meta_ac = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot have an error and a non null *data_ac.
|
||||
*/
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
|
||||
u32 clusters_to_add, int mark_unwritten)
|
||||
{
|
||||
|
@ -725,7 +640,13 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
|
|||
restart_all:
|
||||
BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
|
||||
|
||||
status = ocfs2_lock_allocators(inode, bh, clusters_to_add, 0, &data_ac,
|
||||
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
|
||||
"clusters_to_add = %u\n",
|
||||
(unsigned long long)OCFS2_I(inode)->ip_blkno,
|
||||
(long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
|
||||
clusters_to_add);
|
||||
status = ocfs2_lock_allocators(inode, bh, &fe->id2.i_list,
|
||||
clusters_to_add, 0, &data_ac,
|
||||
&meta_ac);
|
||||
if (status) {
|
||||
mlog_errno(status);
|
||||
|
@ -1397,7 +1318,8 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
|
|||
struct ocfs2_alloc_context *meta_ac = NULL;
|
||||
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
|
||||
|
||||
ret = ocfs2_lock_allocators(inode, di_bh, 0, 1, NULL, &meta_ac);
|
||||
ret = ocfs2_lock_allocators(inode, di_bh, &di->id2.i_list,
|
||||
0, 1, NULL, &meta_ac);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
return ret;
|
||||
|
@ -1428,7 +1350,7 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
|
|||
}
|
||||
|
||||
ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac,
|
||||
dealloc);
|
||||
dealloc, OCFS2_DINODE_EXTENT);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out_commit;
|
||||
|
|
|
@ -57,10 +57,6 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
|
|||
enum ocfs2_alloc_restarted *reason_ret);
|
||||
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
|
||||
u64 zero_to);
|
||||
int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *fe,
|
||||
u32 clusters_to_add, u32 extents_to_split,
|
||||
struct ocfs2_alloc_context **data_ac,
|
||||
struct ocfs2_alloc_context **meta_ac);
|
||||
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat);
|
||||
|
|
|
@ -1894,3 +1894,85 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
|
|||
(unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* For a given allocation, determine which allocators will need to be
|
||||
* accessed, and lock them, reserving the appropriate number of bits.
|
||||
*
|
||||
* Sparse file systems call this from ocfs2_write_begin_nolock()
|
||||
* and ocfs2_allocate_unwritten_extents().
|
||||
*
|
||||
* File systems which don't support holes call this from
|
||||
* ocfs2_extend_allocation().
|
||||
*/
|
||||
int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh,
|
||||
struct ocfs2_extent_list *root_el,
|
||||
u32 clusters_to_add, u32 extents_to_split,
|
||||
struct ocfs2_alloc_context **data_ac,
|
||||
struct ocfs2_alloc_context **meta_ac)
|
||||
{
|
||||
int ret = 0, num_free_extents;
|
||||
unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
|
||||
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
||||
|
||||
*meta_ac = NULL;
|
||||
if (data_ac)
|
||||
*data_ac = NULL;
|
||||
|
||||
BUG_ON(clusters_to_add != 0 && data_ac == NULL);
|
||||
|
||||
num_free_extents = ocfs2_num_free_extents(osb, inode, root_bh,
|
||||
OCFS2_DINODE_EXTENT);
|
||||
if (num_free_extents < 0) {
|
||||
ret = num_free_extents;
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sparse allocation file systems need to be more conservative
|
||||
* with reserving room for expansion - the actual allocation
|
||||
* happens while we've got a journal handle open so re-taking
|
||||
* a cluster lock (because we ran out of room for another
|
||||
* extent) will violate ordering rules.
|
||||
*
|
||||
* Most of the time we'll only be seeing this 1 cluster at a time
|
||||
* anyway.
|
||||
*
|
||||
* Always lock for any unwritten extents - we might want to
|
||||
* add blocks during a split.
|
||||
*/
|
||||
if (!num_free_extents ||
|
||||
(ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
|
||||
ret = ocfs2_reserve_new_metadata(osb, root_el, meta_ac);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (clusters_to_add == 0)
|
||||
goto out;
|
||||
|
||||
ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret) {
|
||||
if (*meta_ac) {
|
||||
ocfs2_free_alloc_context(*meta_ac);
|
||||
*meta_ac = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot have an error and a non null *data_ac.
|
||||
*/
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -162,4 +162,9 @@ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
|
|||
int ocfs2_check_group_descriptor(struct super_block *sb,
|
||||
struct ocfs2_dinode *di,
|
||||
struct ocfs2_group_desc *gd);
|
||||
int ocfs2_lock_allocators(struct inode *inode, struct buffer_head *root_bh,
|
||||
struct ocfs2_extent_list *root_el,
|
||||
u32 clusters_to_add, u32 extents_to_split,
|
||||
struct ocfs2_alloc_context **data_ac,
|
||||
struct ocfs2_alloc_context **meta_ac);
|
||||
#endif /* _CHAINALLOC_H_ */
|
||||
|
|
Loading…
Reference in a new issue