GFS2: Reduce file fragmentation

This patch reduces GFS2 file fragmentation by pre-reserving blocks. The
resulting improved on disk layout greatly speeds up operations in cases
which would have resulted in interlaced allocation of blocks previously.
A typical example of this is 10 parallel dd processes, each writing to a
file in a common dirctory.

The implementation uses an rbtree of reservations attached to each
resource group (and each inode).

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
This commit is contained in:
Bob Peterson 2012-07-19 08:12:40 -04:00 committed by Steven Whitehouse
parent 294f2ad5a5
commit 8e2e004735
9 changed files with 706 additions and 90 deletions

View file

@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
if (error) if (error)
goto out_rlist; goto out_rlist;
if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
gfs2_rs_deltree(ip->i_res);
error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
RES_INDIRECT + RES_STATFS + RES_QUOTA, RES_INDIRECT + RES_STATFS + RES_QUOTA,
revokes); revokes);

View file

@ -383,6 +383,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret) if (ret)
return ret; return ret;
atomic_set(&ip->i_res->rs_sizehint,
PAGE_CACHE_SIZE / sdp->sd_sb.sb_bsize);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh); ret = gfs2_glock_nq(&gh);
if (ret) if (ret)
@ -571,22 +574,15 @@ static int gfs2_open(struct inode *inode, struct file *file)
static int gfs2_release(struct inode *inode, struct file *file) static int gfs2_release(struct inode *inode, struct file *file)
{ {
struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
struct gfs2_file *fp;
struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_inode *ip = GFS2_I(inode);
fp = file->private_data; kfree(file->private_data);
file->private_data = NULL; file->private_data = NULL;
if ((file->f_mode & FMODE_WRITE) && ip->i_res && if ((file->f_mode & FMODE_WRITE) &&
(atomic_read(&inode->i_writecount) == 1)) (atomic_read(&inode->i_writecount) == 1))
gfs2_rs_delete(ip); gfs2_rs_delete(ip);
if (gfs2_assert_warn(sdp, fp))
return -EIO;
kfree(fp);
return 0; return 0;
} }
@ -662,14 +658,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos) unsigned long nr_segs, loff_t pos)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
size_t writesize = iov_length(iov, nr_segs);
struct dentry *dentry = file->f_dentry; struct dentry *dentry = file->f_dentry;
struct gfs2_inode *ip = GFS2_I(dentry->d_inode); struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
struct gfs2_sbd *sdp;
int ret; int ret;
sdp = GFS2_SB(file->f_mapping->host);
ret = gfs2_rs_alloc(ip); ret = gfs2_rs_alloc(ip);
if (ret) if (ret)
return ret; return ret;
atomic_set(&ip->i_res->rs_sizehint, writesize / sdp->sd_sb.sb_bsize);
if (file->f_flags & O_APPEND) { if (file->f_flags & O_APPEND) {
struct gfs2_holder gh; struct gfs2_holder gh;
@ -795,6 +795,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
if (unlikely(error)) if (unlikely(error))
goto out_uninit; goto out_uninit;
atomic_set(&ip->i_res->rs_sizehint, len / sdp->sd_sb.sb_bsize);
while (len > 0) { while (len > 0) {
if (len < bytes) if (len < bytes)
bytes = len; bytes = len;
@ -803,10 +805,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
offset += bytes; offset += bytes;
continue; continue;
} }
error = gfs2_rindex_update(sdp);
if (error)
goto out_unlock;
error = gfs2_quota_lock_check(ip); error = gfs2_quota_lock_check(ip);
if (error) if (error)
goto out_unlock; goto out_unlock;

View file

@ -84,6 +84,7 @@ struct gfs2_rgrpd {
u32 rd_data; /* num of data blocks in rgrp */ u32 rd_data; /* num of data blocks in rgrp */
u32 rd_bitbytes; /* number of bytes in data bitmaps */ u32 rd_bitbytes; /* number of bytes in data bitmaps */
u32 rd_free; u32 rd_free;
u32 rd_reserved; /* number of blocks reserved */
u32 rd_free_clone; u32 rd_free_clone;
u32 rd_dinodes; u32 rd_dinodes;
u64 rd_igeneration; u64 rd_igeneration;
@ -96,6 +97,9 @@ struct gfs2_rgrpd {
#define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */
#define GFS2_RDF_ERROR 0x40000000 /* error in rg */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */
#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */
spinlock_t rd_rsspin; /* protects reservation related vars */
struct rb_root rd_rstree; /* multi-block reservation tree */
u32 rd_rs_cnt; /* count of current reservations */
}; };
enum gfs2_state_bits { enum gfs2_state_bits {
@ -233,6 +237,38 @@ struct gfs2_holder {
unsigned long gh_ip; unsigned long gh_ip;
}; };
/* Resource group multi-block reservation, in order of appearance:
Step 1. Function prepares to write, allocates a mb, sets the size hint.
Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info
Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use
Step 4. Bits are assigned from the rgrp based on either the reservation
or wherever it can.
*/
struct gfs2_blkreserv {
/* components used during write (step 1): */
atomic_t rs_sizehint; /* hint of the write size */
/* components used during inplace_reserve (step 2): */
u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
/* components used during get_local_rgrp (step 3): */
struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */
struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
struct rb_node rs_node; /* link to other block reservations */
/* components used during block searches and assignments (step 4): */
struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */
u32 rs_biblk; /* start block relative to the bi */
u32 rs_free; /* how many blocks are still free */
/* ancillary quota stuff */
struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
unsigned int rs_qa_qd_num;
};
enum { enum {
GLF_LOCK = 1, GLF_LOCK = 1,
GLF_DEMOTE = 3, GLF_DEMOTE = 3,
@ -290,16 +326,6 @@ struct gfs2_glock {
#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
struct gfs2_blkreserv {
u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
/* ancillary quota stuff */
struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
unsigned int rs_qa_qd_num;
};
enum { enum {
GIF_INVALID = 0, GIF_INVALID = 0,
GIF_QD_LOCKED = 1, GIF_QD_LOCKED = 1,
@ -307,7 +333,6 @@ enum {
GIF_SW_PAGED = 3, GIF_SW_PAGED = 3,
}; };
struct gfs2_inode { struct gfs2_inode {
struct inode i_inode; struct inode i_inode;
u64 i_no_addr; u64 i_no_addr;
@ -318,7 +343,7 @@ struct gfs2_inode {
struct gfs2_glock *i_gl; /* Move into i_gh? */ struct gfs2_glock *i_gl; /* Move into i_gh? */
struct gfs2_holder i_iopen_gh; struct gfs2_holder i_iopen_gh;
struct gfs2_holder i_gh; /* for prepare/commit_write only */ struct gfs2_holder i_gh; /* for prepare/commit_write only */
struct gfs2_blkreserv *i_res; /* resource group block reservation */ struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */
struct gfs2_rgrpd *i_rgd; struct gfs2_rgrpd *i_rgd;
u64 i_goal; /* goal block for allocations */ u64 i_goal; /* goal block for allocations */
struct rw_semaphore i_rw_mutex; struct rw_semaphore i_rw_mutex;

View file

@ -521,6 +521,9 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
int error; int error;
munge_mode_uid_gid(dip, &mode, &uid, &gid); munge_mode_uid_gid(dip, &mode, &uid, &gid);
error = gfs2_rindex_update(sdp);
if (error)
return error;
error = gfs2_quota_lock(dip, uid, gid); error = gfs2_quota_lock(dip, uid, gid);
if (error) if (error)
@ -551,6 +554,10 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
struct buffer_head *dibh; struct buffer_head *dibh;
int error; int error;
error = gfs2_rindex_update(sdp);
if (error)
return error;
error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
if (error) if (error)
goto fail; goto fail;
@ -596,7 +603,8 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
gfs2_trans_end(sdp); gfs2_trans_end(sdp);
fail_ipreserv: fail_ipreserv:
gfs2_inplace_release(dip); if (alloc_required)
gfs2_inplace_release(dip);
fail_quota_locks: fail_quota_locks:
gfs2_quota_unlock(dip); gfs2_quota_unlock(dip);
@ -647,7 +655,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
const struct qstr *name = &dentry->d_name; const struct qstr *name = &dentry->d_name;
struct gfs2_holder ghs[2]; struct gfs2_holder ghs[2];
struct inode *inode = NULL; struct inode *inode = NULL;
struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_inode *dip = GFS2_I(dir), *ip;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
int error; int error;
@ -657,6 +665,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (!name->len || name->len > GFS2_FNAMESIZE) if (!name->len || name->len > GFS2_FNAMESIZE)
return -ENAMETOOLONG; return -ENAMETOOLONG;
/* We need a reservation to allocate the new dinode block. The
directory ip temporarily points to the reservation, but this is
being done to get a set of contiguous blocks for the new dinode.
Since this is a create, we don't have a sizehint yet, so it will
have to use the minimum reservation size. */
error = gfs2_rs_alloc(dip); error = gfs2_rs_alloc(dip);
if (error) if (error)
return error; return error;
@ -694,24 +707,29 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode)) if (IS_ERR(inode))
goto fail_gunlock2; goto fail_gunlock2;
error = gfs2_inode_refresh(GFS2_I(inode)); ip = GFS2_I(inode);
error = gfs2_inode_refresh(ip);
if (error) if (error)
goto fail_gunlock2; goto fail_gunlock2;
/* the new inode needs a reservation so it can allocate xattrs. */ /* The newly created inode needs a reservation so it can allocate
error = gfs2_rs_alloc(GFS2_I(inode)); xattrs. At the same time, we want new blocks allocated to the new
if (error) dinode to be as contiguous as possible. Since we allocated the
goto fail_gunlock2; dinode block under the directory's reservation, we transfer
ownership of that reservation to the new inode. The directory
doesn't need a reservation unless it needs a new allocation. */
ip->i_res = dip->i_res;
dip->i_res = NULL;
error = gfs2_acl_create(dip, inode); error = gfs2_acl_create(dip, inode);
if (error) if (error)
goto fail_gunlock2; goto fail_gunlock2;
error = gfs2_security_init(dip, GFS2_I(inode), name); error = gfs2_security_init(dip, ip, name);
if (error) if (error)
goto fail_gunlock2; goto fail_gunlock2;
error = link_dinode(dip, name, GFS2_I(inode)); error = link_dinode(dip, name, ip);
if (error) if (error)
goto fail_gunlock2; goto fail_gunlock2;
@ -738,6 +756,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
iput(inode); iput(inode);
} }
fail: fail:
gfs2_rs_delete(dip);
if (bh) if (bh)
brelse(bh); brelse(bh);
return error; return error;

View file

@ -35,6 +35,9 @@
#define BFITNOENT ((u32)~0) #define BFITNOENT ((u32)~0)
#define NO_BLOCK ((u64)~0) #define NO_BLOCK ((u64)~0)
#define RSRV_CONTENTION_FACTOR 4
#define RGRP_RSRV_MAX_CONTENDERS 2
#if BITS_PER_LONG == 32 #if BITS_PER_LONG == 32
#define LBITMASK (0x55555555UL) #define LBITMASK (0x55555555UL)
#define LBITSKIP55 (0x55555555UL) #define LBITSKIP55 (0x55555555UL)
@ -177,6 +180,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
return tmp; return tmp;
} }
/**
* rs_cmp - multi-block reservation range compare
* @blk: absolute file system block number of the new reservation
* @len: number of blocks in the new reservation
* @rs: existing reservation to compare against
*
* returns: 1 if the block range is beyond the reach of the reservation
* -1 if the block range is before the start of the reservation
* 0 if the block range overlaps with the reservation
*/
static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
{
u64 startblk = gfs2_rs_startblk(rs);
if (blk >= startblk + rs->rs_free)
return 1;
if (blk + len - 1 < startblk)
return -1;
return 0;
}
/**
* rs_find - Find a rgrp multi-block reservation that contains a given block
* @rgd: The rgrp
* @rgblk: The block we're looking for, relative to the rgrp
*/
static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
{
struct rb_node **newn;
int rc;
u64 fsblk = rgblk + rgd->rd_data0;
spin_lock(&rgd->rd_rsspin);
newn = &rgd->rd_rstree.rb_node;
while (*newn) {
struct gfs2_blkreserv *cur =
rb_entry(*newn, struct gfs2_blkreserv, rs_node);
rc = rs_cmp(fsblk, 1, cur);
if (rc < 0)
newn = &((*newn)->rb_left);
else if (rc > 0)
newn = &((*newn)->rb_right);
else {
spin_unlock(&rgd->rd_rsspin);
return cur;
}
}
spin_unlock(&rgd->rd_rsspin);
return NULL;
}
/** /**
* gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
* a block in a given allocation state. * a block in a given allocation state.
@ -424,19 +478,93 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
int gfs2_rs_alloc(struct gfs2_inode *ip) int gfs2_rs_alloc(struct gfs2_inode *ip)
{ {
int error = 0; int error = 0;
struct gfs2_blkreserv *res;
if (ip->i_res)
return 0;
res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
if (!res)
error = -ENOMEM;
down_write(&ip->i_rw_mutex); down_write(&ip->i_rw_mutex);
if (!ip->i_res) { if (ip->i_res)
ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); kmem_cache_free(gfs2_rsrv_cachep, res);
if (!ip->i_res) else
error = -ENOMEM; ip->i_res = res;
}
up_write(&ip->i_rw_mutex); up_write(&ip->i_rw_mutex);
return error; return error;
} }
static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
{
gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n",
rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk,
rs->rs_free);
}
/** /**
* gfs2_rs_delete - delete a reservation * __rs_deltree - remove a multi-block reservation from the rgd tree
* @rs: The reservation to remove
*
*/
static void __rs_deltree(struct gfs2_blkreserv *rs)
{
struct gfs2_rgrpd *rgd;
if (!gfs2_rs_active(rs))
return;
rgd = rs->rs_rgd;
/* We can't do this: The reason is that when the rgrp is invalidated,
it's in the "middle" of acquiring the glock, but the HOLDER bit
isn't set yet:
BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
rb_erase(&rs->rs_node, &rgd->rd_rstree);
BUG_ON(!rgd->rd_rs_cnt);
rgd->rd_rs_cnt--;
if (rs->rs_free) {
/* return reserved blocks to the rgrp and the ip */
BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
rs->rs_rgd->rd_reserved -= rs->rs_free;
rs->rs_free = 0;
clear_bit(GBF_FULL, &rs->rs_bi->bi_flags);
smp_mb__after_clear_bit();
}
/* We can't change any of the step 1 or step 2 components of the rs.
E.g. We can't set rs_rgd to NULL because the rgd glock is held and
dequeued through this pointer.
Can't: atomic_set(&rs->rs_sizehint, 0);
Can't: rs->rs_requested = 0;
Can't: rs->rs_rgd = NULL;*/
rs->rs_bi = NULL;
rs->rs_biblk = 0;
}
/**
* gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
* @rs: The reservation to remove
*
*/
void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
{
struct gfs2_rgrpd *rgd;
if (!gfs2_rs_active(rs))
return;
rgd = rs->rs_rgd;
spin_lock(&rgd->rd_rsspin);
__rs_deltree(rs);
spin_unlock(&rgd->rd_rsspin);
}
/**
* gfs2_rs_delete - delete a multi-block reservation
* @ip: The inode for this reservation * @ip: The inode for this reservation
* *
*/ */
@ -444,12 +572,36 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
{ {
down_write(&ip->i_rw_mutex); down_write(&ip->i_rw_mutex);
if (ip->i_res) { if (ip->i_res) {
gfs2_rs_deltree(ip->i_res);
trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
BUG_ON(ip->i_res->rs_free);
kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
ip->i_res = NULL; ip->i_res = NULL;
} }
up_write(&ip->i_rw_mutex); up_write(&ip->i_rw_mutex);
} }
/**
* return_all_reservations - return all reserved blocks back to the rgrp.
* @rgd: the rgrp that needs its space back
*
* We previously reserved a bunch of blocks for allocation. Now we need to
* give them back. This leave the reservation structures in tact, but removes
* all of their corresponding "no-fly zones".
*/
static void return_all_reservations(struct gfs2_rgrpd *rgd)
{
struct rb_node *n;
struct gfs2_blkreserv *rs;
spin_lock(&rgd->rd_rsspin);
while ((n = rb_first(&rgd->rd_rstree))) {
rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
__rs_deltree(rs);
}
spin_unlock(&rgd->rd_rsspin);
}
void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
{ {
struct rb_node *n; struct rb_node *n;
@ -472,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
gfs2_free_clones(rgd); gfs2_free_clones(rgd);
kfree(rgd->rd_bits); kfree(rgd->rd_bits);
return_all_reservations(rgd);
kmem_cache_free(gfs2_rgrpd_cachep, rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd);
} }
} }
@ -649,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
rgd->rd_data0 = be64_to_cpu(buf.ri_data0); rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
rgd->rd_data = be32_to_cpu(buf.ri_data); rgd->rd_data = be32_to_cpu(buf.ri_data);
rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
spin_lock_init(&rgd->rd_rsspin);
error = compute_bitstructs(rgd); error = compute_bitstructs(rgd);
if (error) if (error)
@ -1114,30 +1268,213 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
return ret; return ret;
} }
/**
* rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
* @bi: the bitmap with the blocks
* @ip: the inode structure
* @biblk: the 32-bit block number relative to the start of the bitmap
* @amount: the number of blocks to reserve
*
* Returns: NULL - reservation was already taken, so not inserted
* pointer to the inserted reservation
*/
static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
struct gfs2_inode *ip, u32 biblk,
int amount)
{
struct rb_node **newn, *parent = NULL;
int rc;
struct gfs2_blkreserv *rs = ip->i_res;
struct gfs2_rgrpd *rgd = rs->rs_rgd;
u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
spin_lock(&rgd->rd_rsspin);
newn = &rgd->rd_rstree.rb_node;
BUG_ON(!ip->i_res);
BUG_ON(gfs2_rs_active(rs));
/* Figure out where to put new node */
/*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
while (*newn) {
struct gfs2_blkreserv *cur =
rb_entry(*newn, struct gfs2_blkreserv, rs_node);
parent = *newn;
rc = rs_cmp(fsblock, amount, cur);
if (rc > 0)
newn = &((*newn)->rb_right);
else if (rc < 0)
newn = &((*newn)->rb_left);
else {
spin_unlock(&rgd->rd_rsspin);
return NULL; /* reservation already in use */
}
}
/* Do our reservation work */
rs = ip->i_res;
rs->rs_free = amount;
rs->rs_biblk = biblk;
rs->rs_bi = bi;
rb_link_node(&rs->rs_node, parent, newn);
rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
/* Do our inode accounting for the reservation */
/*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
/* Do our rgrp accounting for the reservation */
rgd->rd_reserved += amount; /* blocks reserved */
rgd->rd_rs_cnt++; /* number of in-tree reservations */
spin_unlock(&rgd->rd_rsspin);
trace_gfs2_rs(ip, rs, TRACE_RS_INSERT);
return rs;
}
/**
* unclaimed_blocks - return number of blocks that aren't spoken for
*/
static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
{
return rgd->rd_free_clone - rgd->rd_reserved;
}
/**
* rg_mblk_search - find a group of multiple free blocks
* @rgd: the resource group descriptor
* @rs: the block reservation
* @ip: pointer to the inode for which we're reserving blocks
*
* This is very similar to rgblk_search, except we're looking for whole
* 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
* on aligned dwords for speed's sake.
*
* Returns: 0 if successful or BFITNOENT if there isn't enough free space
*/
static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
struct gfs2_bitmap *bi = rgd->rd_bits;
const u32 length = rgd->rd_length;
u32 blk;
unsigned int buf, x, search_bytes;
u8 *buffer = NULL;
u8 *ptr, *end, *nonzero;
u32 goal, rsv_bytes;
struct gfs2_blkreserv *rs;
u32 best_rs_bytes, unclaimed;
int best_rs_blocks;
/* Find bitmap block that contains bits for goal block */
if (rgrp_contains_block(rgd, ip->i_goal))
goal = ip->i_goal - rgd->rd_data0;
else
goal = rgd->rd_last_alloc;
for (buf = 0; buf < length; buf++) {
bi = rgd->rd_bits + buf;
/* Convert scope of "goal" from rgrp-wide to within
found bit block */
if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
goal -= bi->bi_start * GFS2_NBBY;
goto do_search;
}
}
buf = 0;
goal = 0;
do_search:
best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint),
(RGRP_RSRV_MINBLKS * rgd->rd_length));
best_rs_bytes = (best_rs_blocks *
(1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
unclaimed = unclaimed_blocks(rgd);
if (best_rs_bytes * GFS2_NBBY > unclaimed)
best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
for (x = 0; x <= length; x++) {
bi = rgd->rd_bits + buf;
if (test_bit(GBF_FULL, &bi->bi_flags))
goto skip;
WARN_ON(!buffer_uptodate(bi->bi_bh));
if (bi->bi_clone)
buffer = bi->bi_clone + bi->bi_offset;
else
buffer = bi->bi_bh->b_data + bi->bi_offset;
/* We have to keep the reservations aligned on u64 boundaries
otherwise we could get situations where a byte can't be
used because it's after a reservation, but a free bit still
is within the reservation's area. */
ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64));
end = (buffer + bi->bi_len);
while (ptr < end) {
rsv_bytes = 0;
if ((ptr + best_rs_bytes) <= end)
search_bytes = best_rs_bytes;
else
search_bytes = end - ptr;
BUG_ON(!search_bytes);
nonzero = memchr_inv(ptr, 0, search_bytes);
/* If the lot is all zeroes, reserve the whole size. If
there's enough zeroes to satisfy the request, use
what we can. If there's not enough, keep looking. */
if (nonzero == NULL)
rsv_bytes = search_bytes;
else if ((nonzero - ptr) * GFS2_NBBY >=
ip->i_res->rs_requested)
rsv_bytes = (nonzero - ptr);
if (rsv_bytes) {
blk = ((ptr - buffer) * GFS2_NBBY);
BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
rs = rs_insert(bi, ip, blk,
rsv_bytes * GFS2_NBBY);
if (IS_ERR(rs))
return PTR_ERR(rs);
if (rs)
return 0;
}
ptr += ALIGN(search_bytes, sizeof(u64));
}
skip:
/* Try next bitmap block (wrap back to rgrp header
if at end) */
buf++;
buf %= length;
goal = 0;
}
return BFITNOENT;
}
/** /**
* try_rgrp_fit - See if a given reservation will fit in a given RG * try_rgrp_fit - See if a given reservation will fit in a given RG
* @rgd: the RG data * @rgd: the RG data
* @ip: the inode * @ip: the inode
* *
* If there's room for the requested blocks to be allocated from the RG: * If there's room for the requested blocks to be allocated from the RG:
* This will try to get a multi-block reservation first, and if that doesn't
* fit, it will take what it can.
* *
* Returns: 1 on success (it fits), 0 on failure (it doesn't fit) * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
*/ */
static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{ {
const struct gfs2_blkreserv *rs = ip->i_res; struct gfs2_blkreserv *rs = ip->i_res;
if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
return 0; return 0;
if (rgd->rd_free_clone >= rs->rs_requested) /* Look for a multi-block reservation. */
if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS &&
rg_mblk_search(rgd, ip) != BFITNOENT)
return 1;
if (unclaimed_blocks(rgd) >= rs->rs_requested)
return 1; return 1;
return 0;
}
static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) return 0;
{
return (bi->bi_start * GFS2_NBBY) + blk;
} }
/** /**
@ -1217,7 +1554,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
{ {
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd, *begin = NULL; struct gfs2_rgrpd *begin = NULL;
struct gfs2_blkreserv *rs = ip->i_res; struct gfs2_blkreserv *rs = ip->i_res;
int error = 0, rg_locked, flags = LM_FLAG_TRY; int error = 0, rg_locked, flags = LM_FLAG_TRY;
u64 last_unlinked = NO_BLOCK; u64 last_unlinked = NO_BLOCK;
@ -1225,32 +1562,40 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
if (sdp->sd_args.ar_rgrplvb) if (sdp->sd_args.ar_rgrplvb)
flags |= GL_SKIP; flags |= GL_SKIP;
rs = ip->i_res;
rs->rs_requested = requested; rs->rs_requested = requested;
if (gfs2_assert_warn(sdp, requested)) { if (gfs2_assert_warn(sdp, requested)) {
error = -EINVAL; error = -EINVAL;
goto out; goto out;
} }
if (gfs2_rs_active(rs)) {
if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) begin = rs->rs_rgd;
rgd = begin = ip->i_rgd; flags = 0; /* Yoda: Do or do not. There is no try */
else } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); rs->rs_rgd = begin = ip->i_rgd;
} else {
if (rgd == NULL) rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
}
if (rs->rs_rgd == NULL)
return -EBADSLT; return -EBADSLT;
while (loops < 3) { while (loops < 3) {
rg_locked = 0; rg_locked = 0;
if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
rg_locked = 1; rg_locked = 1;
error = 0; error = 0;
} else if (!loops && !gfs2_rs_active(rs) &&
rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
/* If the rgrp already is maxed out for contenders,
we can eliminate it as a "first pass" without even
requesting the rgrp glock. */
error = GLR_TRYFAILED;
} else { } else {
error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
flags, &rs->rs_rgd_gh); LM_ST_EXCLUSIVE, flags,
&rs->rs_rgd_gh);
if (!error && sdp->sd_args.ar_rgrplvb) { if (!error && sdp->sd_args.ar_rgrplvb) {
error = update_rgrp_lvb(rgd); error = update_rgrp_lvb(rs->rs_rgd);
if (error) { if (error) {
gfs2_glock_dq_uninit(&rs->rs_rgd_gh); gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
return error; return error;
@ -1259,25 +1604,37 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
} }
switch (error) { switch (error) {
case 0: case 0:
if (try_rgrp_fit(rgd, ip)) { if (gfs2_rs_active(rs)) {
if (unclaimed_blocks(rs->rs_rgd) +
rs->rs_free >= rs->rs_requested) {
ip->i_rgd = rs->rs_rgd;
return 0;
}
/* We have a multi-block reservation, but the
rgrp doesn't have enough free blocks to
satisfy the request. Free the reservation
and look for a suitable rgrp. */
gfs2_rs_deltree(rs);
}
if (try_rgrp_fit(rs->rs_rgd, ip)) {
if (sdp->sd_args.ar_rgrplvb) if (sdp->sd_args.ar_rgrplvb)
gfs2_rgrp_bh_get(rgd); gfs2_rgrp_bh_get(rs->rs_rgd);
ip->i_rgd = rgd; ip->i_rgd = rs->rs_rgd;
return 0; return 0;
} }
if (rgd->rd_flags & GFS2_RDF_CHECK) { if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
if (sdp->sd_args.ar_rgrplvb) if (sdp->sd_args.ar_rgrplvb)
gfs2_rgrp_bh_get(rgd); gfs2_rgrp_bh_get(rs->rs_rgd);
try_rgrp_unlink(rgd, &last_unlinked, try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
ip->i_no_addr); ip->i_no_addr);
} }
if (!rg_locked) if (!rg_locked)
gfs2_glock_dq_uninit(&rs->rs_rgd_gh); gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
/* fall through */ /* fall through */
case GLR_TRYFAILED: case GLR_TRYFAILED:
rgd = gfs2_rgrpd_get_next(rgd); rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
rgd = rgd ? : begin; /* if NULL, wrap */ rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */
if (rgd != begin) /* If we didn't wrap */ if (rs->rs_rgd != begin) /* If we didn't wrap */
break; break;
flags &= ~LM_FLAG_TRY; flags &= ~LM_FLAG_TRY;
@ -1315,6 +1672,12 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
{ {
struct gfs2_blkreserv *rs = ip->i_res; struct gfs2_blkreserv *rs = ip->i_res;
if (!rs)
return;
if (!rs->rs_free)
gfs2_rs_deltree(rs);
if (rs->rs_rgd_gh.gh_gl) if (rs->rs_rgd_gh.gh_gl)
gfs2_glock_dq_uninit(&rs->rs_rgd_gh); gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
rs->rs_requested = 0; rs->rs_requested = 0;
@ -1413,7 +1776,27 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state,
if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
buffer = bi->bi_clone + bi->bi_offset; buffer = bi->bi_clone + bi->bi_offset;
biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); while (1) {
struct gfs2_blkreserv *rs;
u32 rgblk;
biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
if (biblk == BFITNOENT)
break;
/* Check if this block is reserved() */
rgblk = gfs2_bi2rgd_blk(bi, biblk);
rs = rs_find(rgd, rgblk);
if (rs == NULL)
break;
BUG_ON(rs->rs_bi != bi);
biblk = BFITNOENT;
/* This should jump to the first block after the
reservation. */
goal = rs->rs_biblk + rs->rs_free;
if (goal >= bi->bi_len * GFS2_NBBY)
break;
}
if (biblk != BFITNOENT) if (biblk != BFITNOENT)
break; break;
@ -1449,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
u32 blk, bool dinode, unsigned int *n) u32 blk, bool dinode, unsigned int *n)
{ {
const unsigned int elen = *n; const unsigned int elen = *n;
u32 goal; u32 goal, rgblk;
const u8 *buffer = NULL; const u8 *buffer = NULL;
struct gfs2_blkreserv *rs;
*n = 0; *n = 0;
buffer = bi->bi_bh->b_data + bi->bi_offset; buffer = bi->bi_bh->b_data + bi->bi_offset;
@ -1463,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
goal++; goal++;
if (goal >= (bi->bi_len * GFS2_NBBY)) if (goal >= (bi->bi_len * GFS2_NBBY))
break; break;
rgblk = gfs2_bi2rgd_blk(bi, goal);
rs = rs_find(rgd, rgblk);
if (rs) /* Oops, we bumped into someone's reservation */
break;
if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
GFS2_BLKST_FREE) GFS2_BLKST_FREE)
break; break;
@ -1538,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
{ {
const struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_rgrpd *rgd = gl->gl_object;
struct gfs2_blkreserv *trs;
const struct rb_node *n;
if (rgd == NULL) if (rgd == NULL)
return 0; return 0;
gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
(unsigned long long)rgd->rd_addr, rgd->rd_flags, (unsigned long long)rgd->rd_addr, rgd->rd_flags,
rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
rgd->rd_reserved);
spin_lock(&rgd->rd_rsspin);
for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
dump_rs(seq, trs);
}
spin_unlock(&rgd->rd_rsspin);
return 0; return 0;
} }
@ -1557,11 +1955,64 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
rgd->rd_flags |= GFS2_RDF_ERROR; rgd->rd_flags |= GFS2_RDF_ERROR;
} }
/**
* claim_reserved_blks - Claim previously reserved blocks
* @ip: the inode that's claiming the reservation
* @dinode: 1 if this block is a dinode block, otherwise data block
* @nblocks: desired extent length
*
* Lay claim to previously allocated block reservation blocks.
* Returns: Starting block number of the blocks claimed.
* Sets *nblocks to the actual extent length allocated.
*/
static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
unsigned int *nblocks)
{
struct gfs2_blkreserv *rs = ip->i_res;
struct gfs2_rgrpd *rgd = rs->rs_rgd;
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_bitmap *bi;
u64 start_block = gfs2_rs_startblk(rs);
const unsigned int elen = *nblocks;
/*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
gfs2_assert_withdraw(sdp, rgd);
/*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
bi = rs->rs_bi;
gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
/* Make sure the bitmap hasn't changed */
gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
rs->rs_biblk++;
rs->rs_free--;
BUG_ON(!rgd->rd_reserved);
rgd->rd_reserved--;
dinode = false;
trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
}
if (!rs->rs_free) {
struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
gfs2_rs_deltree(rs);
/* -nblocks because we haven't returned to do the math yet.
I'm doing the math backwards to prevent negative numbers,
but think of it as:
if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */
if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks)
rg_mblk_search(rgd, ip);
}
return start_block;
}
/** /**
* gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
* @ip: the inode to allocate the block for * @ip: the inode to allocate the block for
* @bn: Used to return the starting block number * @bn: Used to return the starting block number
* @ndata: requested number of blocks/extent length (value/result) * @nblocks: requested number of blocks/extent length (value/result)
* @dinode: 1 if we're allocating a dinode block, else 0 * @dinode: 1 if we're allocating a dinode block, else 0
* @generation: the generation number of the inode * @generation: the generation number of the inode
* *
@ -1586,20 +2037,34 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
if (ip->i_res->rs_requested == 0) if (ip->i_res->rs_requested == 0)
return -ECANCELED; return -ECANCELED;
rgd = ip->i_rgd; /* Check if we have a multi-block reservation, and if so, claim the
next free block from it. */
if (gfs2_rs_active(ip->i_res)) {
BUG_ON(!ip->i_res->rs_free);
rgd = ip->i_res->rs_rgd;
block = claim_reserved_blks(ip, dinode, nblocks);
} else {
rgd = ip->i_rgd;
if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
goal = ip->i_goal - rgd->rd_data0; goal = ip->i_goal - rgd->rd_data0;
else else
goal = rgd->rd_last_alloc; goal = rgd->rd_last_alloc;
blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
/* Since all blocks are reserved in advance, this shouldn't happen */ /* Since all blocks are reserved in advance, this shouldn't
if (blk == BFITNOENT) happen */
goto rgrp_error; if (blk == BFITNOENT) {
printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
*nblocks);
printk(KERN_WARNING "FULL=%d\n",
test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
goto rgrp_error;
}
block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
}
ndata = *nblocks; ndata = *nblocks;
if (dinode) if (dinode)
ndata--; ndata--;
@ -1616,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
brelse(dibh); brelse(dibh);
} }
} }
if (rgd->rd_free < *nblocks) if (rgd->rd_free < *nblocks) {
printk(KERN_WARNING "nblocks=%u\n", *nblocks);
goto rgrp_error; goto rgrp_error;
}
rgd->rd_free -= *nblocks; rgd->rd_free -= *nblocks;
if (dinode) { if (dinode) {
@ -1877,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
for (x = 0; x < rlist->rl_rgrps; x++) for (x = 0; x < rlist->rl_rgrps; x++)
gfs2_holder_uninit(&rlist->rl_ghs[x]); gfs2_holder_uninit(&rlist->rl_ghs[x]);
kfree(rlist->rl_ghs); kfree(rlist->rl_ghs);
rlist->rl_ghs = NULL;
} }
} }

View file

@ -13,6 +13,14 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
/* Since each block in the file system is represented by two bits in the
* bitmap, one 64-bit word in the bitmap will represent 32 blocks.
* By reserving 32 blocks at a time, we can optimize / shortcut how we search
* through the bitmaps by looking a word at a time.
*/
#define RGRP_RSRV_MINBYTES 8
#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
struct gfs2_rgrpd; struct gfs2_rgrpd;
struct gfs2_sbd; struct gfs2_sbd;
struct gfs2_holder; struct gfs2_holder;
@ -29,6 +37,8 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
extern void gfs2_inplace_release(struct gfs2_inode *ip); extern void gfs2_inplace_release(struct gfs2_inode *ip);
@ -36,6 +46,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
bool dinode, u64 *generation); bool dinode, u64 *generation);
extern int gfs2_rs_alloc(struct gfs2_inode *ip); extern int gfs2_rs_alloc(struct gfs2_inode *ip);
extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void gfs2_rs_delete(struct gfs2_inode *ip);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
@ -62,7 +73,7 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
extern int gfs2_fitrim(struct file *filp, void __user *argp); extern int gfs2_fitrim(struct file *filp, void __user *argp);
/* This is how to tell if a reservation is "inplace" reserved: */ /* This is how to tell if a multi-block reservation is "inplace" reserved: */
static inline int gfs2_mb_reserved(struct gfs2_inode *ip) static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
{ {
if (ip->i_res && ip->i_res->rs_requested) if (ip->i_res && ip->i_res->rs_requested)
@ -70,4 +81,22 @@ static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
return 0; return 0;
} }
/* This is how to tell if a multi-block reservation is in the rgrp tree: */
static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
{
if (rs && rs->rs_bi)
return 1;
return 0;
}
static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk)
{
return (bi->bi_start * GFS2_NBBY) + blk;
}
static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs)
{
return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0;
}
#endif /* __RGRP_DOT_H__ */ #endif /* __RGRP_DOT_H__ */

View file

@ -1420,6 +1420,10 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
return -EIO; return -EIO;
} }
error = gfs2_rindex_update(sdp);
if (error)
return error;
error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
if (error) if (error)
return error; return error;
@ -1550,6 +1554,9 @@ static void gfs2_evict_inode(struct inode *inode)
out_unlock: out_unlock:
/* Error path for case 1 */ /* Error path for case 1 */
if (gfs2_rs_active(ip->i_res))
gfs2_rs_deltree(ip->i_res);
if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
gfs2_glock_dq(&ip->i_iopen_gh); gfs2_glock_dq(&ip->i_iopen_gh);
gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_holder_uninit(&ip->i_iopen_gh);

View file

@ -14,6 +14,7 @@
#include <linux/ktime.h> #include <linux/ktime.h>
#include "incore.h" #include "incore.h"
#include "glock.h" #include "glock.h"
#include "rgrp.h"
#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn } #define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
#define glock_trace_name(x) __print_symbolic(x, \ #define glock_trace_name(x) __print_symbolic(x, \
@ -31,6 +32,17 @@
{ GFS2_BLKST_DINODE, "dinode" }, \ { GFS2_BLKST_DINODE, "dinode" }, \
{ GFS2_BLKST_UNLINKED, "unlinked" }) { GFS2_BLKST_UNLINKED, "unlinked" })
#define TRACE_RS_DELETE 0
#define TRACE_RS_TREEDEL 1
#define TRACE_RS_INSERT 2
#define TRACE_RS_CLAIM 3
#define rs_func_name(x) __print_symbolic(x, \
{ 0, "del " }, \
{ 1, "tdel" }, \
{ 2, "ins " }, \
{ 3, "clm " })
#define show_glock_flags(flags) __print_flags(flags, "", \ #define show_glock_flags(flags) __print_flags(flags, "", \
{(1UL << GLF_LOCK), "l" }, \ {(1UL << GLF_LOCK), "l" }, \
{(1UL << GLF_DEMOTE), "D" }, \ {(1UL << GLF_DEMOTE), "D" }, \
@ -470,6 +482,7 @@ TRACE_EVENT(gfs2_block_alloc,
__field( u8, block_state ) __field( u8, block_state )
__field( u64, rd_addr ) __field( u64, rd_addr )
__field( u32, rd_free_clone ) __field( u32, rd_free_clone )
__field( u32, rd_reserved )
), ),
TP_fast_assign( TP_fast_assign(
@ -480,16 +493,58 @@ TRACE_EVENT(gfs2_block_alloc,
__entry->block_state = block_state; __entry->block_state = block_state;
__entry->rd_addr = rgd->rd_addr; __entry->rd_addr = rgd->rd_addr;
__entry->rd_free_clone = rgd->rd_free_clone; __entry->rd_free_clone = rgd->rd_free_clone;
__entry->rd_reserved = rgd->rd_reserved;
), ),
TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u", TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum, (unsigned long long)__entry->inum,
(unsigned long long)__entry->start, (unsigned long long)__entry->start,
(unsigned long)__entry->len, (unsigned long)__entry->len,
block_state_name(__entry->block_state), block_state_name(__entry->block_state),
(unsigned long long)__entry->rd_addr, (unsigned long long)__entry->rd_addr,
__entry->rd_free_clone) __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
);
/* Keep track of multi-block reservations as they are allocated/freed */
TRACE_EVENT(gfs2_rs,
TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs,
u8 func),
TP_ARGS(ip, rs, func),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, rd_addr )
__field( u32, rd_free_clone )
__field( u32, rd_reserved )
__field( u64, inum )
__field( u64, start )
__field( u32, free )
__field( u8, func )
),
TP_fast_assign(
__entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0;
__entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0;
__entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0;
__entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0;
__entry->inum = ip ? ip->i_no_addr : 0;
__entry->start = gfs2_rs_startblk(rs);
__entry->free = rs->rs_free;
__entry->func = func;
),
TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s "
"f:%lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->inum,
(unsigned long long)__entry->start,
(unsigned long long)__entry->rd_addr,
(unsigned long)__entry->rd_free_clone,
(unsigned long)__entry->rd_reserved,
rs_func_name(__entry->func), (unsigned long)__entry->free)
); );
#endif /* _TRACE_GFS2_H */ #endif /* _TRACE_GFS2_H */

View file

@ -327,6 +327,10 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
{ {
int error; int error;
error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
if (error)
return error;
error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
if (error) if (error)
goto out_alloc; goto out_alloc;
@ -710,6 +714,10 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
struct buffer_head *dibh; struct buffer_head *dibh;
int error; int error;
error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
if (error)
return error;
error = gfs2_quota_lock_check(ip); error = gfs2_quota_lock_check(ip);
if (error) if (error)
return error; return error;
@ -1483,6 +1491,10 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
{ {
int error; int error;
error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
if (error)
return error;
error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
if (error) if (error)
return error; return error;