diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index e80a464850c8..d6526347d386 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -614,7 +614,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, unsigned int data_blocks = 0, ind_blocks = 0, rblocks; int alloc_required; int error = 0; - struct gfs2_qadata *qa = NULL; pgoff_t index = pos >> PAGE_CACHE_SHIFT; unsigned from = pos & (PAGE_CACHE_SIZE - 1); struct page *page; @@ -638,15 +637,9 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); if (alloc_required) { - qa = gfs2_qadata_get(ip); - if (!qa) { - error = -ENOMEM; - goto out_unlock; - } - error = gfs2_quota_lock_check(ip); if (error) - goto out_alloc_put; + goto out_unlock; error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); if (error) @@ -708,8 +701,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_qadata_put(ip); } out_unlock: if (&ip->i_inode == sdp->sd_rindex) { @@ -846,7 +837,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct buffer_head *dibh; - struct gfs2_qadata *qa = ip->i_qadata; unsigned int from = pos & (PAGE_CACHE_SIZE - 1); unsigned int to = from + len; int ret; @@ -878,12 +868,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); failed: gfs2_trans_end(sdp); - if (ip->i_res) + if (gfs2_mb_reserved(ip)) gfs2_inplace_release(ip); - if (qa) { + if (ip->i_res->rs_qa_qd_num) gfs2_quota_unlock(ip); - gfs2_qadata_put(ip); - } if (inode == sdp->sd_rindex) { gfs2_glock_dq(&m_ip->i_gh); gfs2_holder_uninit(&m_ip->i_gh); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index dab54099dd98..49cd7dd4a9fa 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, if (error) goto out_rlist; + if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ + gfs2_rs_deltree(ip->i_res); + error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, revokes); @@ -1045,12 +1048,13 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; find_metapath(sdp, lblock, &mp, ip->i_height); - if (!gfs2_qadata_get(ip)) - return -ENOMEM; + error = gfs2_rindex_update(sdp); + if (error) + return error; error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) - goto out; + return error; while (height--) { struct strip_mine sm; @@ -1064,8 +1068,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) gfs2_quota_unhold(ip); -out: - gfs2_qadata_put(ip); return error; } @@ -1167,19 +1169,14 @@ static int do_grow(struct inode *inode, u64 size) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh; - struct gfs2_qadata *qa = NULL; int error; int unstuff = 0; if (gfs2_is_stuffed(ip) && (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { - qa = gfs2_qadata_get(ip); - if (qa == NULL) - return -ENOMEM; - error = gfs2_quota_lock_check(ip); if (error) - goto do_grow_alloc_put; + return error; error = gfs2_inplace_reserve(ip, 1); if (error) @@ -1214,8 +1211,6 @@ static int do_grow(struct inode *inode, u64 size) gfs2_inplace_release(ip); do_grow_qunlock: gfs2_quota_unlock(ip); -do_grow_alloc_put: - gfs2_qadata_put(ip); } return error; } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 8aaeb07a07b5..259b088cfc4c 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1854,14 +1854,9 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, if (!ht) return -ENOMEM; - if (!gfs2_qadata_get(dip)) { - error = -ENOMEM; - goto out; - } - error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) - goto out_put; + goto out; /* Count the number of leaves */ bh = leaf_bh; @@ -1942,8 +1937,6 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, out_rlist: gfs2_rlist_free(&rlist); gfs2_quota_unhold(dip); -out_put: - gfs2_qadata_put(dip); out: kfree(ht); return error; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 31b199f6efc1..9aa6af13823c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -142,6 +142,7 @@ static const u32 fsflags_to_gfs2[32] = { [7] = GFS2_DIF_NOATIME, [12] = GFS2_DIF_EXHASH, [14] = GFS2_DIF_INHERIT_JDATA, + [17] = GFS2_DIF_TOPDIR, }; static const u32 gfs2_to_fsflags[32] = { @@ -150,6 +151,7 @@ static const u32 gfs2_to_fsflags[32] = { [gfs2fl_AppendOnly] = FS_APPEND_FL, [gfs2fl_NoAtime] = FS_NOATIME_FL, [gfs2fl_ExHash] = FS_INDEX_FL, + [gfs2fl_TopLevel] = FS_TOPDIR_FL, [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, }; @@ -203,6 +205,7 @@ void gfs2_set_inode_flags(struct inode *inode) GFS2_DIF_NOATIME| \ GFS2_DIF_SYNC| \ GFS2_DIF_SYSTEM| \ + GFS2_DIF_TOPDIR| \ GFS2_DIF_INHERIT_JDATA) /** @@ -298,6 +301,7 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr) gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags); if (!S_ISDIR(inode->i_mode)) { + gfsflags &= ~GFS2_DIF_TOPDIR; if (gfsflags & GFS2_DIF_INHERIT_JDATA) gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); return do_gfs2_set_flags(filp, gfsflags, ~0); @@ -366,7 +370,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; struct gfs2_holder gh; - struct gfs2_qadata *qa; loff_t size; int ret; @@ -376,6 +379,13 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) */ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + ret = gfs2_rs_alloc(ip); + if (ret) + return ret; + + atomic_set(&ip->i_res->rs_sizehint, + PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) @@ -393,14 +403,13 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out_unlock; } - ret = -ENOMEM; - qa = gfs2_qadata_get(ip); - if (qa == NULL) + ret = gfs2_rindex_update(sdp); + if (ret) goto out_unlock; ret = gfs2_quota_lock_check(ip); if (ret) - goto out_alloc_put; + goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); if (ret) @@ -447,8 +456,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) gfs2_inplace_release(ip); out_quota_unlock: gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_qadata_put(ip); out_unlock: gfs2_glock_dq(&gh); out: @@ -567,16 +574,14 @@ static int gfs2_open(struct inode *inode, struct file *file) static int gfs2_release(struct inode *inode, struct file *file) { - struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; - struct gfs2_file *fp; + struct gfs2_inode *ip = GFS2_I(inode); - fp = file->private_data; + kfree(file->private_data); file->private_data = NULL; - if (gfs2_assert_warn(sdp, fp)) - return -EIO; - - kfree(fp); + if ((file->f_mode & FMODE_WRITE) && + (atomic_read(&inode->i_writecount) == 1)) + gfs2_rs_delete(ip); return 0; } @@ -653,12 +658,20 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; + size_t writesize = iov_length(iov, nr_segs); + struct dentry *dentry = file->f_dentry; + struct gfs2_inode *ip = GFS2_I(dentry->d_inode); + struct gfs2_sbd *sdp; + int ret; + sdp = GFS2_SB(file->f_mapping->host); + ret = gfs2_rs_alloc(ip); + if (ret) + return ret; + + atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift); if (file->f_flags & O_APPEND) { - struct dentry *dentry = file->f_dentry; - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); struct gfs2_holder gh; - int ret; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) @@ -751,7 +764,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, struct gfs2_inode *ip = GFS2_I(inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes; - struct gfs2_qadata *qa; int error; const loff_t pos = offset; const loff_t count = len; @@ -774,11 +786,17 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; + error = gfs2_rs_alloc(ip); + if (error) + return error; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); error = gfs2_glock_nq(&ip->i_gh); if (unlikely(error)) goto out_uninit; + atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift); + while (len > 0) { if (len < bytes) bytes = len; @@ -787,15 +805,9 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, offset += bytes; continue; } - qa = gfs2_qadata_get(ip); - if (!qa) { - error = -ENOMEM; - goto out_unlock; - } - error = gfs2_quota_lock_check(ip); if (error) - goto out_alloc_put; + goto out_unlock; retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); @@ -835,7 +847,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, offset += max_bytes; gfs2_inplace_release(ip); gfs2_quota_unlock(ip); - gfs2_qadata_put(ip); } if (error == 0) @@ -846,8 +857,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); -out_alloc_put: - gfs2_qadata_put(ip); out_unlock: gfs2_glock_dq(&ip->i_gh); out_uninit: diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index dab2526071cc..1ed81f40da0d 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -46,10 +46,11 @@ #include "trace_gfs2.h" struct gfs2_glock_iter { - int hash; /* hash bucket index */ - struct gfs2_sbd *sdp; /* incore superblock */ - struct gfs2_glock *gl; /* current glock struct */ - char string[512]; /* scratch space */ + int hash; /* hash bucket index */ + unsigned nhash; /* Index within current bucket */ + struct gfs2_sbd *sdp; /* incore superblock */ + struct gfs2_glock *gl; /* current glock struct */ + loff_t last_pos; /* last position */ }; typedef void (*glock_examiner) (struct gfs2_glock * gl); @@ -767,6 +768,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); + memset(gl->gl_lvb, 0, 32 * sizeof(char)); gl->gl_lksb.sb_lvbptr = gl->gl_lvb; gl->gl_tchange = jiffies; gl->gl_object = NULL; @@ -948,9 +950,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) va_start(args, fmt); if (seq) { - struct gfs2_glock_iter *gi = seq->private; - vsprintf(gi->string, fmt, args); - seq_printf(seq, gi->string); + seq_vprintf(seq, fmt, args); } else { vaf.fmt = fmt; vaf.va = &args; @@ -1854,8 +1854,14 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) gl = gi->gl; if (gl) { gi->gl = glock_hash_next(gl); + gi->nhash++; } else { + if (gi->hash >= GFS2_GL_HASH_SIZE) { + rcu_read_unlock(); + return 1; + } gi->gl = glock_hash_chain(gi->hash); + gi->nhash = 0; } while (gi->gl == NULL) { gi->hash++; @@ -1864,6 +1870,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) return 1; } gi->gl = glock_hash_chain(gi->hash); + gi->nhash = 0; } /* Skip entries for other sb and dead entries */ } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0); @@ -1876,7 +1883,12 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) struct gfs2_glock_iter *gi = seq->private; loff_t n = *pos; - gi->hash = 0; + if (gi->last_pos <= *pos) + n = gi->nhash + (*pos - gi->last_pos); + else + gi->hash = 0; + + gi->nhash = 0; rcu_read_lock(); do { @@ -1884,6 +1896,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) return NULL; } while (n--); + gi->last_pos = *pos; return gi->gl; } @@ -1893,7 +1906,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, struct gfs2_glock_iter *gi = seq->private; (*pos)++; - + gi->last_pos = *pos; if (gfs2_glock_iter_next(gi)) return NULL; @@ -1964,6 +1977,8 @@ static const struct seq_operations gfs2_sbstats_seq_ops = { .show = gfs2_sbstats_seq_show, }; +#define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL) + static int gfs2_glocks_open(struct inode *inode, struct file *file) { int ret = seq_open_private(file, &gfs2_glock_seq_ops, @@ -1972,6 +1987,9 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; gi->sdp = inode->i_private; + seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); + if (seq->buf) + seq->size = GFS2_SEQ_GOODSIZE; } return ret; } @@ -1984,6 +2002,9 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct gfs2_glock_iter *gi = seq->private; gi->sdp = inode->i_private; + seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); + if (seq->buf) + seq->size = GFS2_SEQ_GOODSIZE; } return ret; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 67fd6beffece..aaecc8085fc5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -84,17 +84,22 @@ struct gfs2_rgrpd { u32 rd_data; /* num of data blocks in rgrp */ u32 rd_bitbytes; /* number of bytes in data bitmaps */ u32 rd_free; + u32 rd_reserved; /* number of blocks reserved */ u32 rd_free_clone; u32 rd_dinodes; u64 rd_igeneration; struct gfs2_bitmap *rd_bits; struct gfs2_sbd *rd_sbd; + struct gfs2_rgrp_lvb *rd_rgl; u32 rd_last_alloc; u32 rd_flags; #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ + spinlock_t rd_rsspin; /* protects reservation related vars */ + struct rb_root rd_rstree; /* multi-block reservation tree */ + u32 rd_rs_cnt; /* count of current reservations */ }; enum gfs2_state_bits { @@ -232,6 +237,38 @@ struct gfs2_holder { unsigned long gh_ip; }; +/* Resource group multi-block reservation, in order of appearance: + + Step 1. Function prepares to write, allocates a mb, sets the size hint. + Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info + Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use + Step 4. Bits are assigned from the rgrp based on either the reservation + or wherever it can. +*/ + +struct gfs2_blkreserv { + /* components used during write (step 1): */ + atomic_t rs_sizehint; /* hint of the write size */ + + /* components used during inplace_reserve (step 2): */ + u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ + + /* components used during get_local_rgrp (step 3): */ + struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */ + struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ + struct rb_node rs_node; /* link to other block reservations */ + + /* components used during block searches and assignments (step 4): */ + struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */ + u32 rs_biblk; /* start block relative to the bi */ + u32 rs_free; /* how many blocks are still free */ + + /* ancillary quota stuff */ + struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; + struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS]; + unsigned int rs_qa_qd_num; +}; + enum { GLF_LOCK = 1, GLF_DEMOTE = 3, @@ -289,18 +326,6 @@ struct gfs2_glock { #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ -struct gfs2_qadata { /* quota allocation data */ - /* Quota stuff */ - struct gfs2_quota_data *qa_qd[2*MAXQUOTAS]; - struct gfs2_holder qa_qd_ghs[2*MAXQUOTAS]; - unsigned int qa_qd_num; -}; - -struct gfs2_blkreserv { - u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ - struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */ -}; - enum { GIF_INVALID = 0, GIF_QD_LOCKED = 1, @@ -308,7 +333,6 @@ enum { GIF_SW_PAGED = 3, }; - struct gfs2_inode { struct inode i_inode; u64 i_no_addr; @@ -319,8 +343,7 @@ struct gfs2_inode { struct gfs2_glock *i_gl; /* Move into i_gh? */ struct gfs2_holder i_iopen_gh; struct gfs2_holder i_gh; /* for prepare/commit_write only */ - struct gfs2_qadata *i_qadata; /* quota allocation data */ - struct gfs2_blkreserv *i_res; /* resource group block reservation */ + struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */ struct gfs2_rgrpd *i_rgd; u64 i_goal; /* goal block for allocations */ struct rw_semaphore i_rw_mutex; @@ -473,6 +496,7 @@ struct gfs2_args { unsigned int ar_discard:1; /* discard requests */ unsigned int ar_errors:2; /* errors=withdraw | panic */ unsigned int ar_nobarrier:1; /* do not send barriers */ + unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */ int ar_commit; /* Commit interval */ int ar_statfs_quantum; /* The fast statfs interval */ int ar_quota_quantum; /* The quota interval */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 867674785fcf..4ce22e547308 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -521,12 +521,13 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, int error; munge_mode_uid_gid(dip, &mode, &uid, &gid); - if (!gfs2_qadata_get(dip)) - return -ENOMEM; + error = gfs2_rindex_update(sdp); + if (error) + return error; error = gfs2_quota_lock(dip, uid, gid); if (error) - goto out; + return error; error = gfs2_quota_check(dip, uid, gid); if (error) @@ -542,8 +543,6 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, out_quota: gfs2_quota_unlock(dip); -out: - gfs2_qadata_put(dip); return error; } @@ -551,14 +550,13 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_qadata *qa; int alloc_required; struct buffer_head *dibh; int error; - qa = gfs2_qadata_get(dip); - if (!qa) - return -ENOMEM; + error = gfs2_rindex_update(sdp); + if (error) + return error; error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) @@ -605,13 +603,13 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, gfs2_trans_end(sdp); fail_ipreserv: - gfs2_inplace_release(dip); + if (alloc_required) + gfs2_inplace_release(dip); fail_quota_locks: gfs2_quota_unlock(dip); fail: - gfs2_qadata_put(dip); return error; } @@ -657,7 +655,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, const struct qstr *name = &dentry->d_name; struct gfs2_holder ghs[2]; struct inode *inode = NULL; - struct gfs2_inode *dip = GFS2_I(dir); + struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; int error; @@ -667,6 +665,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; + /* We need a reservation to allocate the new dinode block. The + directory ip temporarily points to the reservation, but this is + being done to get a set of contiguous blocks for the new dinode. + Since this is a create, we don't have a sizehint yet, so it will + have to use the minimum reservation size. */ + error = gfs2_rs_alloc(dip); + if (error) + return error; + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); if (error) goto fail; @@ -700,19 +707,29 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto fail_gunlock2; - error = gfs2_inode_refresh(GFS2_I(inode)); + ip = GFS2_I(inode); + error = gfs2_inode_refresh(ip); if (error) goto fail_gunlock2; + /* The newly created inode needs a reservation so it can allocate + xattrs. At the same time, we want new blocks allocated to the new + dinode to be as contiguous as possible. Since we allocated the + dinode block under the directory's reservation, we transfer + ownership of that reservation to the new inode. The directory + doesn't need a reservation unless it needs a new allocation. */ + ip->i_res = dip->i_res; + dip->i_res = NULL; + error = gfs2_acl_create(dip, inode); if (error) goto fail_gunlock2; - error = gfs2_security_init(dip, GFS2_I(inode), name); + error = gfs2_security_init(dip, ip, name); if (error) goto fail_gunlock2; - error = link_dinode(dip, name, GFS2_I(inode)); + error = link_dinode(dip, name, ip); if (error) goto fail_gunlock2; @@ -722,10 +739,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_trans_end(sdp); /* Check if we reserved space in the rgrp. Function link_dinode may not, depending on whether alloc is required. */ - if (dip->i_res) + if (gfs2_mb_reserved(dip)) gfs2_inplace_release(dip); gfs2_quota_unlock(dip); - gfs2_qadata_put(dip); mark_inode_dirty(inode); gfs2_glock_dq_uninit_m(2, ghs); d_instantiate(dentry, inode); @@ -740,6 +756,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, iput(inode); } fail: + gfs2_rs_delete(dip); if (bh) brelse(bh); return error; @@ -816,6 +833,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (S_ISDIR(inode->i_mode)) return -EPERM; + error = gfs2_rs_alloc(dip); + if (error) + return error; + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); @@ -867,16 +888,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, error = 0; if (alloc_required) { - struct gfs2_qadata *qa = gfs2_qadata_get(dip); - - if (!qa) { - error = -ENOMEM; - goto out_gunlock; - } - error = gfs2_quota_lock_check(dip); if (error) - goto out_alloc; + goto out_gunlock; error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); if (error) @@ -919,9 +933,6 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, out_gunlock_q: if (alloc_required) gfs2_quota_unlock(dip); -out_alloc: - if (alloc_required) - gfs2_qadata_put(dip); out_gunlock: gfs2_glock_dq(ghs + 1); out_child: @@ -1231,6 +1242,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) return error; + error = gfs2_rs_alloc(ndip); + if (error) + return error; + if (odip != ndip) { error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, &r_gh); @@ -1354,16 +1369,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock; if (alloc_required) { - struct gfs2_qadata *qa = gfs2_qadata_get(ndip); - - if (!qa) { - error = -ENOMEM; - goto out_gunlock; - } - error = gfs2_quota_lock_check(ndip); if (error) - goto out_alloc; + goto out_gunlock; error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); if (error) @@ -1424,9 +1432,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, out_gunlock_q: if (alloc_required) gfs2_quota_unlock(ndip); -out_alloc: - if (alloc_required) - gfs2_qadata_put(ndip); out_gunlock: while (x--) { gfs2_glock_dq(ghs + x); @@ -1587,12 +1592,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) ogid = ngid = NO_QUOTA_CHANGE; - if (!gfs2_qadata_get(ip)) - return -ENOMEM; - error = gfs2_quota_lock(ip, nuid, ngid); if (error) - goto out_alloc; + return error; if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { error = gfs2_quota_check(ip, nuid, ngid); @@ -1618,8 +1620,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) gfs2_trans_end(sdp); out_gunlock_q: gfs2_quota_unlock(ip); -out_alloc: - gfs2_qadata_put(ip); return error; } @@ -1641,6 +1641,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) struct gfs2_holder i_gh; int error; + error = gfs2_rs_alloc(ip); + if (error) + return error; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); if (error) return error; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 852c1be1dd3b..8ff95a2d54ee 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -401,9 +401,14 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) goto out; set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); - gfs2_meta_check(sdp, bd->bd_bh); - gfs2_pin(sdp, bd->bd_bh); mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; + if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { + printk(KERN_ERR + "Attempting to add uninitialised block to journal (inplace block=%lld)\n", + (unsigned long long)bd->bd_bh->b_blocknr); + BUG(); + } + gfs2_pin(sdp, bd->bd_bh); mh->__pad0 = cpu_to_be64(0); mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); sdp->sd_log_num_buf++; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 6cdb0f2a1b09..e04d0e09ee7b 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -43,7 +43,6 @@ static void gfs2_init_inode_once(void *foo) inode_init_once(&ip->i_inode); init_rwsem(&ip->i_rw_mutex); INIT_LIST_HEAD(&ip->i_trunc_list); - ip->i_qadata = NULL; ip->i_res = NULL; ip->i_hash_cache = NULL; } diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 6c1e5d1c404a..3a56c8d94de0 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -213,8 +213,10 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct gfs2_sbd *sdp = gl->gl_sbd; struct buffer_head *bh; - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { + *bhp = NULL; return -EIO; + } *bhp = bh = gfs2_getbuf(gl, blkno, CREATE); @@ -235,6 +237,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, if (tr && tr->tr_touched) gfs2_io_error_bh(sdp, bh); brelse(bh); + *bhp = NULL; return -EIO; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 6c906078f657..e5af9dc420ef 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1118,20 +1118,33 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent } error = init_names(sdp, silent); - if (error) - goto fail; + if (error) { + /* In this case, we haven't initialized sysfs, so we have to + manually free the sdp. */ + free_percpu(sdp->sd_lkstats); + kfree(sdp); + sb->s_fs_info = NULL; + return error; + } snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name); - gfs2_create_debugfs_file(sdp); - error = gfs2_sys_fs_add(sdp); + /* + * If we hit an error here, gfs2_sys_fs_add will have called function + * kobject_put which causes the sysfs usage count to go to zero, which + * causes sysfs to call function gfs2_sbd_release, which frees sdp. + * Subsequent error paths here will call gfs2_sys_fs_del, which also + * kobject_put to free sdp. + */ if (error) - goto fail; + return error; + + gfs2_create_debugfs_file(sdp); error = gfs2_lm_mount(sdp, silent); if (error) - goto fail_sys; + goto fail_debug; error = init_locking(sdp, &mount_gh, DO); if (error) @@ -1215,12 +1228,12 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent fail_lm: gfs2_gl_hash_clear(sdp); gfs2_lm_unmount(sdp); -fail_sys: - gfs2_sys_fs_del(sdp); -fail: +fail_debug: gfs2_delete_debugfs_file(sdp); free_percpu(sdp->sd_lkstats); - kfree(sdp); + /* gfs2_sys_fs_del must be the last thing we do, since it causes + * sysfs to call function gfs2_sbd_release, which frees sdp. */ + gfs2_sys_fs_del(sdp); sb->s_fs_info = NULL; return error; } @@ -1389,10 +1402,9 @@ static void gfs2_kill_sb(struct super_block *sb) sdp->sd_root_dir = NULL; sdp->sd_master_dir = NULL; shrink_dcache_sb(sb); - kill_block_super(sb); gfs2_delete_debugfs_file(sdp); free_percpu(sdp->sd_lkstats); - kfree(sdp); + kill_block_super(sb); } struct file_system_type gfs2_fs_type = { diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 27b5cc7d6881..a3bde91645c2 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -494,11 +494,15 @@ static void qdsb_put(struct gfs2_quota_data *qd) int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_qadata *qa = ip->i_qadata; - struct gfs2_quota_data **qd = qa->qa_qd; + struct gfs2_quota_data **qd; int error; - if (gfs2_assert_warn(sdp, !qa->qa_qd_num) || + if (ip->i_res == NULL) + gfs2_rs_alloc(ip); + + qd = ip->i_res->rs_qa_qd; + + if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) || gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) return -EIO; @@ -508,20 +512,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); if (error) goto out; - qa->qa_qd_num++; + ip->i_res->rs_qa_qd_num++; qd++; error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); if (error) goto out; - qa->qa_qd_num++; + ip->i_res->rs_qa_qd_num++; qd++; if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { error = qdsb_get(sdp, QUOTA_USER, uid, qd); if (error) goto out; - qa->qa_qd_num++; + ip->i_res->rs_qa_qd_num++; qd++; } @@ -529,7 +533,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); if (error) goto out; - qa->qa_qd_num++; + ip->i_res->rs_qa_qd_num++; qd++; } @@ -542,16 +546,17 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) void gfs2_quota_unhold(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_qadata *qa = ip->i_qadata; unsigned int x; + if (ip->i_res == NULL) + return; gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); - for (x = 0; x < qa->qa_qd_num; x++) { - qdsb_put(qa->qa_qd[x]); - qa->qa_qd[x] = NULL; + for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { + qdsb_put(ip->i_res->rs_qa_qd[x]); + ip->i_res->rs_qa_qd[x] = NULL; } - qa->qa_qd_num = 0; + ip->i_res->rs_qa_qd_num = 0; } static int sort_qd(const void *a, const void *b) @@ -764,6 +769,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) unsigned int nalloc = 0, blocks; int error; + error = gfs2_rs_alloc(ip); + if (error) + return error; + gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); @@ -915,7 +924,6 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh, int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qd; unsigned int x; int error = 0; @@ -928,15 +936,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; - sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *), - sort_qd, NULL); + sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num, + sizeof(struct gfs2_quota_data *), sort_qd, NULL); - for (x = 0; x < qa->qa_qd_num; x++) { + for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { int force = NO_FORCE; - qd = qa->qa_qd[x]; + qd = ip->i_res->rs_qa_qd[x]; if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) force = FORCE; - error = do_glock(qd, force, &qa->qa_qd_ghs[x]); + error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]); if (error) break; } @@ -945,7 +953,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) set_bit(GIF_QD_LOCKED, &ip->i_flags); else { while (x--) - gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]); + gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); gfs2_quota_unhold(ip); } @@ -990,7 +998,6 @@ static int need_sync(struct gfs2_quota_data *qd) void gfs2_quota_unlock(struct gfs2_inode *ip) { - struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qda[4]; unsigned int count = 0; unsigned int x; @@ -998,14 +1005,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) goto out; - for (x = 0; x < qa->qa_qd_num; x++) { + for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { struct gfs2_quota_data *qd; int sync; - qd = qa->qa_qd[x]; + qd = ip->i_res->rs_qa_qd[x]; sync = need_sync(qd); - gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]); + gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); if (sync && qd_trylock(qd)) qda[count++] = qd; @@ -1038,7 +1045,6 @@ static int print_message(struct gfs2_quota_data *qd, char *type) int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qd; s64 value; unsigned int x; @@ -1050,8 +1056,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; - for (x = 0; x < qa->qa_qd_num; x++) { - qd = qa->qa_qd[x]; + for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { + qd = ip->i_res->rs_qa_qd[x]; if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) @@ -1089,7 +1095,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 uid, u32 gid) { - struct gfs2_qadata *qa = ip->i_qadata; struct gfs2_quota_data *qd; unsigned int x; @@ -1098,8 +1103,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, if (ip->i_diskflags & GFS2_DIF_SYSTEM) return; - for (x = 0; x < qa->qa_qd_num; x++) { - qd = qa->qa_qd[x]; + for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { + qd = ip->i_res->rs_qa_qd[x]; if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { @@ -1549,10 +1554,14 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, if (error) return error; + error = gfs2_rs_alloc(ip); + if (error) + goto out_put; + mutex_lock(&ip->i_inode.i_mutex); error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh); if (error) - goto out_put; + goto out_unlockput; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); if (error) goto out_q; @@ -1609,8 +1618,9 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, gfs2_glock_dq_uninit(&i_gh); out_q: gfs2_glock_dq_uninit(&q_gh); -out_put: +out_unlockput: mutex_unlock(&ip->i_inode.i_mutex); +out_put: qd_put(qd); return error; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f74fb9bd1973..4d34887a601d 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -35,6 +35,9 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) +#define RSRV_CONTENTION_FACTOR 4 +#define RGRP_RSRV_MAX_CONTENDERS 2 + #if BITS_PER_LONG == 32 #define LBITMASK (0x55555555UL) #define LBITSKIP55 (0x55555555UL) @@ -177,6 +180,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) return tmp; } +/** + * rs_cmp - multi-block reservation range compare + * @blk: absolute file system block number of the new reservation + * @len: number of blocks in the new reservation + * @rs: existing reservation to compare against + * + * returns: 1 if the block range is beyond the reach of the reservation + * -1 if the block range is before the start of the reservation + * 0 if the block range overlaps with the reservation + */ +static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) +{ + u64 startblk = gfs2_rs_startblk(rs); + + if (blk >= startblk + rs->rs_free) + return 1; + if (blk + len - 1 < startblk) + return -1; + return 0; +} + +/** + * rs_find - Find a rgrp multi-block reservation that contains a given block + * @rgd: The rgrp + * @rgblk: The block we're looking for, relative to the rgrp + */ +static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) +{ + struct rb_node **newn; + int rc; + u64 fsblk = rgblk + rgd->rd_data0; + + spin_lock(&rgd->rd_rsspin); + newn = &rgd->rd_rstree.rb_node; + while (*newn) { + struct gfs2_blkreserv *cur = + rb_entry(*newn, struct gfs2_blkreserv, rs_node); + rc = rs_cmp(fsblk, 1, cur); + if (rc < 0) + newn = &((*newn)->rb_left); + else if (rc > 0) + newn = &((*newn)->rb_right); + else { + spin_unlock(&rgd->rd_rsspin); + return cur; + } + } + spin_unlock(&rgd->rd_rsspin); + return NULL; +} + /** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. @@ -417,6 +471,137 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd) } } +/** + * gfs2_rs_alloc - make sure we have a reservation assigned to the inode + * @ip: the inode for this reservation + */ +int gfs2_rs_alloc(struct gfs2_inode *ip) +{ + int error = 0; + struct gfs2_blkreserv *res; + + if (ip->i_res) + return 0; + + res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); + if (!res) + error = -ENOMEM; + + down_write(&ip->i_rw_mutex); + if (ip->i_res) + kmem_cache_free(gfs2_rsrv_cachep, res); + else + ip->i_res = res; + up_write(&ip->i_rw_mutex); + return error; +} + +static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) +{ + gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", + rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, + rs->rs_free); +} + +/** + * __rs_deltree - remove a multi-block reservation from the rgd tree + * @rs: The reservation to remove + * + */ +static void __rs_deltree(struct gfs2_blkreserv *rs) +{ + struct gfs2_rgrpd *rgd; + + if (!gfs2_rs_active(rs)) + return; + + rgd = rs->rs_rgd; + /* We can't do this: The reason is that when the rgrp is invalidated, + it's in the "middle" of acquiring the glock, but the HOLDER bit + isn't set yet: + BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ + trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); + + if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) + rb_erase(&rs->rs_node, &rgd->rd_rstree); + BUG_ON(!rgd->rd_rs_cnt); + rgd->rd_rs_cnt--; + + if (rs->rs_free) { + /* return reserved blocks to the rgrp and the ip */ + BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); + rs->rs_rgd->rd_reserved -= rs->rs_free; + rs->rs_free = 0; + clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); + smp_mb__after_clear_bit(); + } + /* We can't change any of the step 1 or step 2 components of the rs. + E.g. We can't set rs_rgd to NULL because the rgd glock is held and + dequeued through this pointer. + Can't: atomic_set(&rs->rs_sizehint, 0); + Can't: rs->rs_requested = 0; + Can't: rs->rs_rgd = NULL;*/ + rs->rs_bi = NULL; + rs->rs_biblk = 0; +} + +/** + * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree + * @rs: The reservation to remove + * + */ +void gfs2_rs_deltree(struct gfs2_blkreserv *rs) +{ + struct gfs2_rgrpd *rgd; + + if (!gfs2_rs_active(rs)) + return; + + rgd = rs->rs_rgd; + spin_lock(&rgd->rd_rsspin); + __rs_deltree(rs); + spin_unlock(&rgd->rd_rsspin); +} + +/** + * gfs2_rs_delete - delete a multi-block reservation + * @ip: The inode for this reservation + * + */ +void gfs2_rs_delete(struct gfs2_inode *ip) +{ + down_write(&ip->i_rw_mutex); + if (ip->i_res) { + gfs2_rs_deltree(ip->i_res); + trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); + BUG_ON(ip->i_res->rs_free); + kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); + ip->i_res = NULL; + } + up_write(&ip->i_rw_mutex); +} + +/** + * return_all_reservations - return all reserved blocks back to the rgrp. + * @rgd: the rgrp that needs its space back + * + * We previously reserved a bunch of blocks for allocation. Now we need to + * give them back. This leave the reservation structures in tact, but removes + * all of their corresponding "no-fly zones". + */ +static void return_all_reservations(struct gfs2_rgrpd *rgd) +{ + struct rb_node *n; + struct gfs2_blkreserv *rs; + + spin_lock(&rgd->rd_rsspin); + while ((n = rb_first(&rgd->rd_rstree))) { + rs = rb_entry(n, struct gfs2_blkreserv, rs_node); + __rs_deltree(rs); + } + spin_unlock(&rgd->rd_rsspin); +} + void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) { struct rb_node *n; @@ -439,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) gfs2_free_clones(rgd); kfree(rgd->rd_bits); + return_all_reservations(rgd); kmem_cache_free(gfs2_rgrpd_cachep, rgd); } } @@ -616,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_data0 = be64_to_cpu(buf.ri_data0); rgd->rd_data = be32_to_cpu(buf.ri_data); rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); + spin_lock_init(&rgd->rd_rsspin); error = compute_bitstructs(rgd); if (error) @@ -627,6 +814,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_gl->gl_object = rgd; + rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; @@ -736,9 +924,65 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); } +static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) +{ + struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; + struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; + + if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free || + rgl->rl_dinodes != str->rg_dinodes || + rgl->rl_igeneration != str->rg_igeneration) + return 0; + return 1; +} + +static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) +{ + const struct gfs2_rgrp *str = buf; + + rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); + rgl->rl_flags = str->rg_flags; + rgl->rl_free = str->rg_free; + rgl->rl_dinodes = str->rg_dinodes; + rgl->rl_igeneration = str->rg_igeneration; + rgl->__pad = 0UL; +} + +static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) +{ + struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; + u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; + rgl->rl_unlinked = cpu_to_be32(unlinked); +} + +static u32 count_unlinked(struct gfs2_rgrpd *rgd) +{ + struct gfs2_bitmap *bi; + const u32 length = rgd->rd_length; + const u8 *buffer = NULL; + u32 i, goal, count = 0; + + for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) { + goal = 0; + buffer = bi->bi_bh->b_data + bi->bi_offset; + WARN_ON(!buffer_uptodate(bi->bi_bh)); + while (goal < bi->bi_len * GFS2_NBBY) { + goal = gfs2_bitfit(buffer, bi->bi_len, goal, + GFS2_BLKST_UNLINKED); + if (goal == BFITNOENT) + break; + count++; + goal++; + } + } + + return count; +} + + /** - * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps - * @gh: The glock holder for the resource group + * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps + * @rgd: the struct gfs2_rgrpd describing the RG to read in * * Read in all of a Resource Group's header and bitmap blocks. * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. @@ -746,9 +990,8 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) * Returns: errno */ -int gfs2_rgrp_go_lock(struct gfs2_holder *gh) +int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) { - struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl = rgd->rd_gl; unsigned int length = rgd->rd_length; @@ -756,6 +999,9 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) unsigned int x, y; int error; + if (rgd->rd_bits[0].bi_bh != NULL) + return 0; + for (x = 0; x < length; x++) { bi = rgd->rd_bits + x; error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); @@ -782,7 +1028,20 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; } - + if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { + rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, + rgd->rd_bits[0].bi_bh->b_data); + } + else if (sdp->sd_args.ar_rgrplvb) { + if (!gfs2_rgrp_lvb_valid(rgd)){ + gfs2_consist_rgrpd(rgd); + error = -EIO; + goto fail; + } + if (rgd->rd_rgl->rl_unlinked == 0) + rgd->rd_flags &= ~GFS2_RDF_CHECK; + } return 0; fail: @@ -796,6 +1055,39 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) return error; } +int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +{ + u32 rl_flags; + + if (rgd->rd_flags & GFS2_RDF_UPTODATE) + return 0; + + if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) + return gfs2_rgrp_bh_get(rgd); + + rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); + rl_flags &= ~GFS2_RDF_MASK; + rgd->rd_flags &= GFS2_RDF_MASK; + rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); + if (rgd->rd_rgl->rl_unlinked == 0) + rgd->rd_flags &= ~GFS2_RDF_CHECK; + rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); + rgd->rd_free_clone = rgd->rd_free; + rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); + rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); + return 0; +} + +int gfs2_rgrp_go_lock(struct gfs2_holder *gh) +{ + struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object; + struct gfs2_sbd *sdp = rgd->rd_sbd; + + if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) + return 0; + return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); +} + /** * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() * @gh: The glock holder for the resource group @@ -809,8 +1101,10 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh) for (x = 0; x < length; x++) { struct gfs2_bitmap *bi = rgd->rd_bits + x; - brelse(bi->bi_bh); - bi->bi_bh = NULL; + if (bi->bi_bh) { + brelse(bi->bi_bh); + bi->bi_bh = NULL; + } } } @@ -954,6 +1248,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) rgd->rd_flags |= GFS2_RGF_TRIMMED; gfs2_trans_add_bh(rgd->rd_gl, bh, 1); gfs2_rgrp_out(rgd, bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); gfs2_trans_end(sdp); } } @@ -974,38 +1269,184 @@ int gfs2_fitrim(struct file *filp, void __user *argp) } /** - * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode - * @ip: the incore GFS2 inode structure + * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree + * @bi: the bitmap with the blocks + * @ip: the inode structure + * @biblk: the 32-bit block number relative to the start of the bitmap + * @amount: the number of blocks to reserve * - * Returns: the struct gfs2_qadata + * Returns: NULL - reservation was already taken, so not inserted + * pointer to the inserted reservation */ - -struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) +static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, + struct gfs2_inode *ip, u32 biblk, + int amount) { - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - int error; - BUG_ON(ip->i_qadata != NULL); - ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS); - error = gfs2_rindex_update(sdp); - if (error) - fs_warn(sdp, "rindex update returns %d\n", error); - return ip->i_qadata; + struct rb_node **newn, *parent = NULL; + int rc; + struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_rgrpd *rgd = rs->rs_rgd; + u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; + + spin_lock(&rgd->rd_rsspin); + newn = &rgd->rd_rstree.rb_node; + BUG_ON(!ip->i_res); + BUG_ON(gfs2_rs_active(rs)); + /* Figure out where to put new node */ + /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ + while (*newn) { + struct gfs2_blkreserv *cur = + rb_entry(*newn, struct gfs2_blkreserv, rs_node); + + parent = *newn; + rc = rs_cmp(fsblock, amount, cur); + if (rc > 0) + newn = &((*newn)->rb_right); + else if (rc < 0) + newn = &((*newn)->rb_left); + else { + spin_unlock(&rgd->rd_rsspin); + return NULL; /* reservation already in use */ + } + } + + /* Do our reservation work */ + rs = ip->i_res; + rs->rs_free = amount; + rs->rs_biblk = biblk; + rs->rs_bi = bi; + rb_link_node(&rs->rs_node, parent, newn); + rb_insert_color(&rs->rs_node, &rgd->rd_rstree); + + /* Do our inode accounting for the reservation */ + /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ + + /* Do our rgrp accounting for the reservation */ + rgd->rd_reserved += amount; /* blocks reserved */ + rgd->rd_rs_cnt++; /* number of in-tree reservations */ + spin_unlock(&rgd->rd_rsspin); + trace_gfs2_rs(ip, rs, TRACE_RS_INSERT); + return rs; } /** - * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode - * @ip: the incore GFS2 inode structure + * unclaimed_blocks - return number of blocks that aren't spoken for + */ +static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd) +{ + return rgd->rd_free_clone - rgd->rd_reserved; +} + +/** + * rg_mblk_search - find a group of multiple free blocks + * @rgd: the resource group descriptor + * @rs: the block reservation + * @ip: pointer to the inode for which we're reserving blocks * - * Returns: the struct gfs2_qadata + * This is very similar to rgblk_search, except we're looking for whole + * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing + * on aligned dwords for speed's sake. + * + * Returns: 0 if successful or BFITNOENT if there isn't enough free space */ -static int gfs2_blkrsv_get(struct gfs2_inode *ip) +static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { - BUG_ON(ip->i_res != NULL); - ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); - if (!ip->i_res) - return -ENOMEM; - return 0; + struct gfs2_bitmap *bi = rgd->rd_bits; + const u32 length = rgd->rd_length; + u32 blk; + unsigned int buf, x, search_bytes; + u8 *buffer = NULL; + u8 *ptr, *end, *nonzero; + u32 goal, rsv_bytes; + struct gfs2_blkreserv *rs; + u32 best_rs_bytes, unclaimed; + int best_rs_blocks; + + /* Find bitmap block that contains bits for goal block */ + if (rgrp_contains_block(rgd, ip->i_goal)) + goal = ip->i_goal - rgd->rd_data0; + else + goal = rgd->rd_last_alloc; + for (buf = 0; buf < length; buf++) { + bi = rgd->rd_bits + buf; + /* Convert scope of "goal" from rgrp-wide to within + found bit block */ + if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { + goal -= bi->bi_start * GFS2_NBBY; + goto do_search; + } + } + buf = 0; + goal = 0; + +do_search: + best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint), + (RGRP_RSRV_MINBLKS * rgd->rd_length)); + best_rs_bytes = (best_rs_blocks * + (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) / + GFS2_NBBY; /* 1 + is for our not-yet-created reservation */ + best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64)); + unclaimed = unclaimed_blocks(rgd); + if (best_rs_bytes * GFS2_NBBY > unclaimed) + best_rs_bytes = unclaimed >> GFS2_BIT_SIZE; + + for (x = 0; x <= length; x++) { + bi = rgd->rd_bits + buf; + + if (test_bit(GBF_FULL, &bi->bi_flags)) + goto skip; + + WARN_ON(!buffer_uptodate(bi->bi_bh)); + if (bi->bi_clone) + buffer = bi->bi_clone + bi->bi_offset; + else + buffer = bi->bi_bh->b_data + bi->bi_offset; + + /* We have to keep the reservations aligned on u64 boundaries + otherwise we could get situations where a byte can't be + used because it's after a reservation, but a free bit still + is within the reservation's area. */ + ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64)); + end = (buffer + bi->bi_len); + while (ptr < end) { + rsv_bytes = 0; + if ((ptr + best_rs_bytes) <= end) + search_bytes = best_rs_bytes; + else + search_bytes = end - ptr; + BUG_ON(!search_bytes); + nonzero = memchr_inv(ptr, 0, search_bytes); + /* If the lot is all zeroes, reserve the whole size. If + there's enough zeroes to satisfy the request, use + what we can. If there's not enough, keep looking. */ + if (nonzero == NULL) + rsv_bytes = search_bytes; + else if ((nonzero - ptr) * GFS2_NBBY >= + ip->i_res->rs_requested) + rsv_bytes = (nonzero - ptr); + + if (rsv_bytes) { + blk = ((ptr - buffer) * GFS2_NBBY); + BUG_ON(blk >= bi->bi_len * GFS2_NBBY); + rs = rs_insert(bi, ip, blk, + rsv_bytes * GFS2_NBBY); + if (IS_ERR(rs)) + return PTR_ERR(rs); + if (rs) + return 0; + } + ptr += ALIGN(search_bytes, sizeof(u64)); + } +skip: + /* Try next bitmap block (wrap back to rgrp header + if at end) */ + buf++; + buf %= length; + goal = 0; + } + + return BFITNOENT; } /** @@ -1014,24 +1455,26 @@ static int gfs2_blkrsv_get(struct gfs2_inode *ip) * @ip: the inode * * If there's room for the requested blocks to be allocated from the RG: + * This will try to get a multi-block reservation first, and if that doesn't + * fit, it will take what it can. * * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) */ -static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) +static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { - const struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_blkreserv *rs = ip->i_res; if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; - if (rgd->rd_free_clone >= rs->rs_requested) + /* Look for a multi-block reservation. */ + if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS && + rg_mblk_search(rgd, ip) != BFITNOENT) + return 1; + if (unclaimed_blocks(rgd) >= rs->rs_requested) return 1; - return 0; -} -static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) -{ - return (bi->bi_start * GFS2_NBBY) + blk; + return 0; } /** @@ -1100,75 +1543,6 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip return; } -/** - * get_local_rgrp - Choose and lock a rgrp for allocation - * @ip: the inode to reserve space for - * @last_unlinked: the last unlinked block - * - * Try to acquire rgrp in way which avoids contending with others. - * - * Returns: errno - */ - -static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_rgrpd *rgd, *begin = NULL; - struct gfs2_blkreserv *rs = ip->i_res; - int error, rg_locked, flags = LM_FLAG_TRY; - int loops = 0; - - if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) - rgd = begin = ip->i_rgd; - else - rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); - - if (rgd == NULL) - return -EBADSLT; - - while (loops < 3) { - rg_locked = 0; - - if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { - rg_locked = 1; - error = 0; - } else { - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, - flags, &rs->rs_rgd_gh); - } - switch (error) { - case 0: - if (try_rgrp_fit(rgd, ip)) { - ip->i_rgd = rgd; - return 0; - } - if (rgd->rd_flags & GFS2_RDF_CHECK) - try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); - if (!rg_locked) - gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - /* fall through */ - case GLR_TRYFAILED: - rgd = gfs2_rgrpd_get_next(rgd); - if (rgd == begin) { - flags = 0; - loops++; - } - break; - default: - return error; - } - } - - return -ENOSPC; -} - -static void gfs2_blkrsv_put(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_res == NULL); - kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); - ip->i_res = NULL; -} - /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for @@ -1180,40 +1554,110 @@ static void gfs2_blkrsv_put(struct gfs2_inode *ip) int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_blkreserv *rs; - int error; + struct gfs2_rgrpd *begin = NULL; + struct gfs2_blkreserv *rs = ip->i_res; + int error = 0, rg_locked, flags = LM_FLAG_TRY; u64 last_unlinked = NO_BLOCK; - int tries = 0; + int loops = 0; - error = gfs2_blkrsv_get(ip); - if (error) - return error; - - rs = ip->i_res; + if (sdp->sd_args.ar_rgrplvb) + flags |= GL_SKIP; rs->rs_requested = requested; if (gfs2_assert_warn(sdp, requested)) { error = -EINVAL; goto out; } + if (gfs2_rs_active(rs)) { + begin = rs->rs_rgd; + flags = 0; /* Yoda: Do or do not. There is no try */ + } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { + rs->rs_rgd = begin = ip->i_rgd; + } else { + rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); + } + if (rs->rs_rgd == NULL) + return -EBADSLT; - do { - error = get_local_rgrp(ip, &last_unlinked); - if (error != -ENOSPC) - break; - /* Check that fs hasn't grown if writing to rindex */ - if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { - error = gfs2_ri_update(ip); - if (error) - break; - continue; + while (loops < 3) { + rg_locked = 0; + + if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { + rg_locked = 1; + error = 0; + } else if (!loops && !gfs2_rs_active(rs) && + rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) { + /* If the rgrp already is maxed out for contenders, + we can eliminate it as a "first pass" without even + requesting the rgrp glock. */ + error = GLR_TRYFAILED; + } else { + error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl, + LM_ST_EXCLUSIVE, flags, + &rs->rs_rgd_gh); + if (!error && sdp->sd_args.ar_rgrplvb) { + error = update_rgrp_lvb(rs->rs_rgd); + if (error) { + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); + return error; + } + } } - /* Flushing the log may release space */ - gfs2_log_flush(sdp, NULL); - } while (tries++ < 3); + switch (error) { + case 0: + if (gfs2_rs_active(rs)) { + if (unclaimed_blocks(rs->rs_rgd) + + rs->rs_free >= rs->rs_requested) { + ip->i_rgd = rs->rs_rgd; + return 0; + } + /* We have a multi-block reservation, but the + rgrp doesn't have enough free blocks to + satisfy the request. Free the reservation + and look for a suitable rgrp. */ + gfs2_rs_deltree(rs); + } + if (try_rgrp_fit(rs->rs_rgd, ip)) { + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rs->rs_rgd); + ip->i_rgd = rs->rs_rgd; + return 0; + } + if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) { + if (sdp->sd_args.ar_rgrplvb) + gfs2_rgrp_bh_get(rs->rs_rgd); + try_rgrp_unlink(rs->rs_rgd, &last_unlinked, + ip->i_no_addr); + } + if (!rg_locked) + gfs2_glock_dq_uninit(&rs->rs_rgd_gh); + /* fall through */ + case GLR_TRYFAILED: + rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd); + rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */ + if (rs->rs_rgd != begin) /* If we didn't wrap */ + break; + + flags &= ~LM_FLAG_TRY; + loops++; + /* Check that fs hasn't grown if writing to rindex */ + if (ip == GFS2_I(sdp->sd_rindex) && + !sdp->sd_rindex_uptodate) { + error = gfs2_ri_update(ip); + if (error) + goto out; + } else if (loops == 2) + /* Flushing the log may release space */ + gfs2_log_flush(sdp, NULL); + break; + default: + goto out; + } + } + error = -ENOSPC; out: if (error) - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; return error; } @@ -1228,9 +1672,15 @@ void gfs2_inplace_release(struct gfs2_inode *ip) { struct gfs2_blkreserv *rs = ip->i_res; + if (!rs) + return; + + if (!rs->rs_free) + gfs2_rs_deltree(rs); + if (rs->rs_rgd_gh.gh_gl) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); - gfs2_blkrsv_put(ip); + rs->rs_requested = 0; } /** @@ -1326,7 +1776,27 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, unsigned char state, if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) buffer = bi->bi_clone + bi->bi_offset; - biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); + while (1) { + struct gfs2_blkreserv *rs; + u32 rgblk; + + biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); + if (biblk == BFITNOENT) + break; + /* Check if this block is reserved() */ + rgblk = gfs2_bi2rgd_blk(bi, biblk); + rs = rs_find(rgd, rgblk); + if (rs == NULL) + break; + + BUG_ON(rs->rs_bi != bi); + biblk = BFITNOENT; + /* This should jump to the first block after the + reservation. */ + goal = rs->rs_biblk + rs->rs_free; + if (goal >= bi->bi_len * GFS2_NBBY) + break; + } if (biblk != BFITNOENT) break; @@ -1362,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, u32 blk, bool dinode, unsigned int *n) { const unsigned int elen = *n; - u32 goal; + u32 goal, rgblk; const u8 *buffer = NULL; + struct gfs2_blkreserv *rs; *n = 0; buffer = bi->bi_bh->b_data + bi->bi_offset; @@ -1376,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi, goal++; if (goal >= (bi->bi_len * GFS2_NBBY)) break; + rgblk = gfs2_bi2rgd_blk(bi, goal); + rs = rs_find(rgd, rgblk); + if (rs) /* Oops, we bumped into someone's reservation */ + break; if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != GFS2_BLKST_FREE) break; @@ -1451,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) { - const struct gfs2_rgrpd *rgd = gl->gl_object; + struct gfs2_rgrpd *rgd = gl->gl_object; + struct gfs2_blkreserv *trs; + const struct rb_node *n; + if (rgd == NULL) return 0; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", + gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", (unsigned long long)rgd->rd_addr, rgd->rd_flags, - rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); + rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, + rgd->rd_reserved); + spin_lock(&rgd->rd_rsspin); + for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { + trs = rb_entry(n, struct gfs2_blkreserv, rs_node); + dump_rs(seq, trs); + } + spin_unlock(&rgd->rd_rsspin); return 0; } @@ -1470,11 +1955,64 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) rgd->rd_flags |= GFS2_RDF_ERROR; } +/** + * claim_reserved_blks - Claim previously reserved blocks + * @ip: the inode that's claiming the reservation + * @dinode: 1 if this block is a dinode block, otherwise data block + * @nblocks: desired extent length + * + * Lay claim to previously allocated block reservation blocks. + * Returns: Starting block number of the blocks claimed. + * Sets *nblocks to the actual extent length allocated. + */ +static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode, + unsigned int *nblocks) +{ + struct gfs2_blkreserv *rs = ip->i_res; + struct gfs2_rgrpd *rgd = rs->rs_rgd; + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_bitmap *bi; + u64 start_block = gfs2_rs_startblk(rs); + const unsigned int elen = *nblocks; + + /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ + gfs2_assert_withdraw(sdp, rgd); + /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ + bi = rs->rs_bi; + gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); + + for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) { + /* Make sure the bitmap hasn't changed */ + gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk, + dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); + rs->rs_biblk++; + rs->rs_free--; + + BUG_ON(!rgd->rd_reserved); + rgd->rd_reserved--; + dinode = false; + trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM); + } + + if (!rs->rs_free) { + struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd; + + gfs2_rs_deltree(rs); + /* -nblocks because we haven't returned to do the math yet. + I'm doing the math backwards to prevent negative numbers, + but think of it as: + if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */ + if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks) + rg_mblk_search(rgd, ip); + } + return start_block; +} + /** * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode * @ip: the inode to allocate the block for * @bn: Used to return the starting block number - * @ndata: requested number of blocks/extent length (value/result) + * @nblocks: requested number of blocks/extent length (value/result) * @dinode: 1 if we're allocating a dinode block, else 0 * @generation: the generation number of the inode * @@ -1496,23 +2034,37 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, /* Only happens if there is a bug in gfs2, return something distinctive * to ensure that it is noticed. */ - if (ip->i_res == NULL) + if (ip->i_res->rs_requested == 0) return -ECANCELED; - rgd = ip->i_rgd; + /* Check if we have a multi-block reservation, and if so, claim the + next free block from it. */ + if (gfs2_rs_active(ip->i_res)) { + BUG_ON(!ip->i_res->rs_free); + rgd = ip->i_res->rs_rgd; + block = claim_reserved_blks(ip, dinode, nblocks); + } else { + rgd = ip->i_rgd; - if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) - goal = ip->i_goal - rgd->rd_data0; - else - goal = rgd->rd_last_alloc; + if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) + goal = ip->i_goal - rgd->rd_data0; + else + goal = rgd->rd_last_alloc; - blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); + blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); - /* Since all blocks are reserved in advance, this shouldn't happen */ - if (blk == BFITNOENT) - goto rgrp_error; + /* Since all blocks are reserved in advance, this shouldn't + happen */ + if (blk == BFITNOENT) { + printk(KERN_WARNING "BFITNOENT, nblocks=%u\n", + *nblocks); + printk(KERN_WARNING "FULL=%d\n", + test_bit(GBF_FULL, &rgd->rd_bits->bi_flags)); + goto rgrp_error; + } - block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); + } ndata = *nblocks; if (dinode) ndata--; @@ -1529,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, brelse(dibh); } } - if (rgd->rd_free < *nblocks) + if (rgd->rd_free < *nblocks) { + printk(KERN_WARNING "nblocks=%u\n", *nblocks); goto rgrp_error; + } rgd->rd_free -= *nblocks; if (dinode) { @@ -1542,6 +2096,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -1588,6 +2143,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); /* Directories keep their data in the metadata address space */ if (meta || ip->i_depth) @@ -1624,6 +2180,8 @@ void gfs2_unlink_di(struct inode *inode) trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + update_rgrp_lvb_unlinked(rgd, 1); } static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) @@ -1643,6 +2201,8 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); + update_rgrp_lvb_unlinked(rgd, -1); gfs2_statfs_change(sdp, 0, +1, -1); } @@ -1784,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) for (x = 0; x < rlist->rl_rgrps; x++) gfs2_holder_uninit(&rlist->rl_ghs[x]); kfree(rlist->rl_ghs); + rlist->rl_ghs = NULL; } } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index b4b10f4de25f..ca6e26729b86 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -13,6 +13,14 @@ #include #include +/* Since each block in the file system is represented by two bits in the + * bitmap, one 64-bit word in the bitmap will represent 32 blocks. + * By reserving 32 blocks at a time, we can optimize / shortcut how we search + * through the bitmaps by looking a word at a time. + */ +#define RGRP_RSRV_MINBYTES 8 +#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY)) + struct gfs2_rgrpd; struct gfs2_sbd; struct gfs2_holder; @@ -29,13 +37,7 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd); extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); -extern struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip); -static inline void gfs2_qadata_put(struct gfs2_inode *ip) -{ - BUG_ON(ip->i_qadata == NULL); - kfree(ip->i_qadata); - ip->i_qadata = NULL; -} +extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); extern void gfs2_inplace_release(struct gfs2_inode *ip); @@ -43,6 +45,9 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); +extern int gfs2_rs_alloc(struct gfs2_inode *ip); +extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); @@ -68,4 +73,30 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); extern int gfs2_fitrim(struct file *filp, void __user *argp); +/* This is how to tell if a multi-block reservation is "inplace" reserved: */ +static inline int gfs2_mb_reserved(struct gfs2_inode *ip) +{ + if (ip->i_res && ip->i_res->rs_requested) + return 1; + return 0; +} + +/* This is how to tell if a multi-block reservation is in the rgrp tree: */ +static inline int gfs2_rs_active(struct gfs2_blkreserv *rs) +{ + if (rs && rs->rs_bi) + return 1; + return 0; +} + +static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk) +{ + return (bi->bi_start * GFS2_NBBY) + blk; +} + +static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs) +{ + return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0; +} + #endif /* __RGRP_DOT_H__ */ diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index f3d6bbfb32c5..fc3168f47a14 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -78,6 +78,8 @@ enum { Opt_quota_quantum, Opt_barrier, Opt_nobarrier, + Opt_rgrplvb, + Opt_norgrplvb, Opt_error, }; @@ -115,6 +117,8 @@ static const match_table_t tokens = { {Opt_quota_quantum, "quota_quantum=%d"}, {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, + {Opt_rgrplvb, "rgrplvb"}, + {Opt_norgrplvb, "norgrplvb"}, {Opt_error, NULL} }; @@ -267,6 +271,12 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) case Opt_nobarrier: args->ar_nobarrier = 1; break; + case Opt_rgrplvb: + args->ar_rgrplvb = 1; + break; + case Opt_norgrplvb: + args->ar_rgrplvb = 0; + break; case Opt_error: default: printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); @@ -1381,6 +1391,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",nobarrier"); if (test_bit(SDF_DEMOTE, &sdp->sd_flags)) seq_printf(s, ",demote_interface_used"); + if (args->ar_rgrplvb) + seq_printf(s, ",rgrplvb"); return 0; } @@ -1401,7 +1413,6 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip) static int gfs2_dinode_dealloc(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_qadata *qa; struct gfs2_rgrpd *rgd; struct gfs2_holder gh; int error; @@ -1411,13 +1422,13 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) return -EIO; } - qa = gfs2_qadata_get(ip); - if (!qa) - return -ENOMEM; + error = gfs2_rindex_update(sdp); + if (error) + return error; error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) - goto out; + return error; rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); if (!rgd) { @@ -1445,8 +1456,6 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) gfs2_glock_dq_uninit(&gh); out_qs: gfs2_quota_unhold(ip); -out: - gfs2_qadata_put(ip); return error; } @@ -1547,6 +1556,9 @@ static void gfs2_evict_inode(struct inode *inode) out_unlock: /* Error path for case 1 */ + if (gfs2_rs_active(ip->i_res)) + gfs2_rs_deltree(ip->i_res); + if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) gfs2_glock_dq(&ip->i_iopen_gh); gfs2_holder_uninit(&ip->i_iopen_gh); @@ -1556,6 +1568,7 @@ static void gfs2_evict_inode(struct inode *inode) out: /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); + gfs2_rs_delete(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); ip->i_gl->gl_object = NULL; @@ -1578,6 +1591,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb) ip->i_flags = 0; ip->i_gl = NULL; ip->i_rgd = NULL; + ip->i_res = NULL; } return &ip->i_inode; } diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 73ecc34c4342..8056b7b7238e 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -276,7 +276,15 @@ static struct attribute *gfs2_attrs[] = { NULL, }; +static void gfs2_sbd_release(struct kobject *kobj) +{ + struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); + + kfree(sdp); +} + static struct kobj_type gfs2_ktype = { + .release = gfs2_sbd_release, .default_attrs = gfs2_attrs, .sysfs_ops = &gfs2_attr_ops, }; @@ -583,6 +591,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) char ro[20]; char spectator[20]; char *envp[] = { ro, spectator, NULL }; + int sysfs_frees_sdp = 0; sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0); sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0); @@ -591,8 +600,10 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL, "%s", sdp->sd_table_name); if (error) - goto fail; + goto fail_reg; + sysfs_frees_sdp = 1; /* Freeing sdp is now done by sysfs calling + function gfs2_sbd_release. */ error = sysfs_create_group(&sdp->sd_kobj, &tune_group); if (error) goto fail_reg; @@ -615,9 +626,13 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp) fail_tune: sysfs_remove_group(&sdp->sd_kobj, &tune_group); fail_reg: - kobject_put(&sdp->sd_kobj); -fail: + free_percpu(sdp->sd_lkstats); fs_err(sdp, "error %d adding sysfs files", error); + if (sysfs_frees_sdp) + kobject_put(&sdp->sd_kobj); + else + kfree(sdp); + sb->s_fs_info = NULL; return error; } diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 1b8b81588199..a25c252fe412 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -14,6 +14,7 @@ #include #include "incore.h" #include "glock.h" +#include "rgrp.h" #define dlm_state_name(nn) { DLM_LOCK_##nn, #nn } #define glock_trace_name(x) __print_symbolic(x, \ @@ -31,6 +32,17 @@ { GFS2_BLKST_DINODE, "dinode" }, \ { GFS2_BLKST_UNLINKED, "unlinked" }) +#define TRACE_RS_DELETE 0 +#define TRACE_RS_TREEDEL 1 +#define TRACE_RS_INSERT 2 +#define TRACE_RS_CLAIM 3 + +#define rs_func_name(x) __print_symbolic(x, \ + { 0, "del " }, \ + { 1, "tdel" }, \ + { 2, "ins " }, \ + { 3, "clm " }) + #define show_glock_flags(flags) __print_flags(flags, "", \ {(1UL << GLF_LOCK), "l" }, \ {(1UL << GLF_DEMOTE), "D" }, \ @@ -470,6 +482,7 @@ TRACE_EVENT(gfs2_block_alloc, __field( u8, block_state ) __field( u64, rd_addr ) __field( u32, rd_free_clone ) + __field( u32, rd_reserved ) ), TP_fast_assign( @@ -480,16 +493,58 @@ TRACE_EVENT(gfs2_block_alloc, __entry->block_state = block_state; __entry->rd_addr = rgd->rd_addr; __entry->rd_free_clone = rgd->rd_free_clone; + __entry->rd_reserved = rgd->rd_reserved; ), - TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u", + TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, (unsigned long)__entry->len, block_state_name(__entry->block_state), (unsigned long long)__entry->rd_addr, - __entry->rd_free_clone) + __entry->rd_free_clone, (unsigned long)__entry->rd_reserved) +); + +/* Keep track of multi-block reservations as they are allocated/freed */ +TRACE_EVENT(gfs2_rs, + + TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs, + u8 func), + + TP_ARGS(ip, rs, func), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, rd_addr ) + __field( u32, rd_free_clone ) + __field( u32, rd_reserved ) + __field( u64, inum ) + __field( u64, start ) + __field( u32, free ) + __field( u8, func ) + ), + + TP_fast_assign( + __entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0; + __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0; + __entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0; + __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0; + __entry->inum = ip ? ip->i_no_addr : 0; + __entry->start = gfs2_rs_startblk(rs); + __entry->free = rs->rs_free; + __entry->func = func; + ), + + TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s " + "f:%lu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->inum, + (unsigned long long)__entry->start, + (unsigned long long)__entry->rd_addr, + (unsigned long)__entry->rd_free_clone, + (unsigned long)__entry->rd_reserved, + rs_func_name(__entry->func), (unsigned long)__entry->free) ); #endif /* _TRACE_GFS2_H */ diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 125d4572e1c0..41f42cdccbb8 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -31,7 +31,7 @@ struct gfs2_glock; static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) { const struct gfs2_blkreserv *rs = ip->i_res; - if (rs->rs_requested < ip->i_rgd->rd_length) + if (rs && rs->rs_requested < ip->i_rgd->rd_length) return rs->rs_requested + 1; return ip->i_rgd->rd_length; } diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 3586b0dd6aa7..80535739ac7b 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -79,23 +79,19 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, const char *type, const char *function, char *file, unsigned int line); -static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp, - struct buffer_head *bh, - const char *function, - char *file, unsigned int line) +static inline int gfs2_meta_check(struct gfs2_sbd *sdp, + struct buffer_head *bh) { struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; u32 magic = be32_to_cpu(mh->mh_magic); - if (unlikely(magic != GFS2_MAGIC)) - return gfs2_meta_check_ii(sdp, bh, "magic number", function, - file, line); + if (unlikely(magic != GFS2_MAGIC)) { + printk(KERN_ERR "GFS2: Magic number missing at %llu\n", + (unsigned long long)bh->b_blocknr); + return -EIO; + } return 0; } -#define gfs2_meta_check(sdp, bh) \ -gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__) - - int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, u16 type, u16 t, const char *function, diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 927f4df874ae..27a0b4a901f5 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -325,12 +325,11 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, int leave) { - struct gfs2_qadata *qa; int error; - qa = gfs2_qadata_get(ip); - if (!qa) - return -ENOMEM; + error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); + if (error) + return error; error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) @@ -340,7 +339,6 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, gfs2_quota_unhold(ip); out_alloc: - gfs2_qadata_put(ip); return error; } @@ -713,17 +711,16 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, unsigned int blks, ea_skeleton_call_t skeleton_call, void *private) { - struct gfs2_qadata *qa; struct buffer_head *dibh; int error; - qa = gfs2_qadata_get(ip); - if (!qa) - return -ENOMEM; + error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); + if (error) + return error; error = gfs2_quota_lock_check(ip); if (error) - goto out; + return error; error = gfs2_inplace_reserve(ip, blks); if (error) @@ -753,8 +750,6 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, gfs2_inplace_release(ip); out_gunlock_q: gfs2_quota_unlock(ip); -out: - gfs2_qadata_put(ip); return error; } @@ -1494,16 +1489,15 @@ static int ea_dealloc_block(struct gfs2_inode *ip) int gfs2_ea_dealloc(struct gfs2_inode *ip) { - struct gfs2_qadata *qa; int error; - qa = gfs2_qadata_get(ip); - if (!qa) - return -ENOMEM; + error = gfs2_rindex_update(GFS2_SB(&ip->i_inode)); + if (error) + return error; error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); if (error) - goto out_alloc; + return error; error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); if (error) @@ -1519,8 +1513,6 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) out_quota: gfs2_quota_unhold(ip); -out_alloc: - gfs2_qadata_put(ip); return error; } diff --git a/fs/seq_file.c b/fs/seq_file.c index 0cbd0494b79e..14cf9de1dbe1 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -385,15 +385,12 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc) } EXPORT_SYMBOL(seq_escape); -int seq_printf(struct seq_file *m, const char *f, ...) +int seq_vprintf(struct seq_file *m, const char *f, va_list args) { - va_list args; int len; if (m->count < m->size) { - va_start(args, f); len = vsnprintf(m->buf + m->count, m->size - m->count, f, args); - va_end(args); if (m->count + len < m->size) { m->count += len; return 0; @@ -402,6 +399,19 @@ int seq_printf(struct seq_file *m, const char *f, ...) seq_set_overflow(m); return -1; } +EXPORT_SYMBOL(seq_vprintf); + +int seq_printf(struct seq_file *m, const char *f, ...) +{ + int ret; + va_list args; + + va_start(args, f); + ret = seq_vprintf(m, f, args); + va_end(args); + + return ret; +} EXPORT_SYMBOL(seq_printf); /** diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index fa98bdb073b9..b2de1f9a88d6 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -170,6 +170,16 @@ struct gfs2_rindex { #define GFS2_RGF_NOALLOC 0x00000008 #define GFS2_RGF_TRIMMED 0x00000010 +struct gfs2_rgrp_lvb { + __be32 rl_magic; + __be32 rl_flags; + __be32 rl_free; + __be32 rl_dinodes; + __be64 rl_igeneration; + __be32 rl_unlinked; + __be32 __pad; +}; + struct gfs2_rgrp { struct gfs2_meta_header rg_header; @@ -214,6 +224,7 @@ enum { gfs2fl_NoAtime = 7, gfs2fl_Sync = 8, gfs2fl_System = 9, + gfs2fl_TopLevel = 10, gfs2fl_TruncInProg = 29, gfs2fl_InheritDirectio = 30, gfs2fl_InheritJdata = 31, @@ -230,8 +241,9 @@ enum { #define GFS2_DIF_NOATIME 0x00000080 #define GFS2_DIF_SYNC 0x00000100 #define GFS2_DIF_SYSTEM 0x00000200 /* New in gfs2 */ +#define GFS2_DIF_TOPDIR 0x00000400 /* New in gfs2 */ #define GFS2_DIF_TRUNC_IN_PROG 0x20000000 /* New in gfs2 */ -#define GFS2_DIF_INHERIT_DIRECTIO 0x40000000 +#define GFS2_DIF_INHERIT_DIRECTIO 0x40000000 /* only in gfs1 */ #define GFS2_DIF_INHERIT_JDATA 0x80000000 struct gfs2_dinode { diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index fc61854f6224..83c44eefe698 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -86,6 +86,7 @@ int seq_puts(struct seq_file *m, const char *s); int seq_write(struct seq_file *seq, const void *data, size_t len); __printf(2, 3) int seq_printf(struct seq_file *, const char *, ...); +__printf(2, 0) int seq_vprintf(struct seq_file *, const char *, va_list args); int seq_path(struct seq_file *, const struct path *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *);