From 95c8e17f2f00f6af7474fac0e4050a79db6c3cea Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 22 Mar 2011 10:49:12 -0400 Subject: [PATCH 01/32] GFS2: Dump better debug info if a bitmap inconsistency is detected On rare occasions we encounter gfs2 problems where an invalid bitmap state transition is attempted. For example, trying to "unlink" a free block. In these cases, there is really no useful information logged to debug the problem. This patch adds more debug details that should allow us to more closely examine the problem and possibly solve it. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 6fcae8469f6d..b643c14caff9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -78,10 +78,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, unsigned char *buf2, unsigned int offset, - unsigned int buflen, u32 block, + struct gfs2_bitmap *bi, u32 block, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; + unsigned int buflen = bi->bi_len; const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; byte1 = buf1 + offset + (block / GFS2_NBBY); @@ -92,6 +93,16 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; if (unlikely(!valid_change[new_state * 4 + cur_state])) { + printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " + "new_state=%d\n", + (unsigned long long)block, cur_state, new_state); + printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", + (unsigned long long)rgd->rd_addr, + (unsigned long)bi->bi_start); + printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", + (unsigned long)bi->bi_offset, + (unsigned long)bi->bi_len); + dump_stack(); gfs2_consist_rgrpd(rgd); return; } @@ -1365,7 +1376,7 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, - bi->bi_len, blk, new_state); + bi, blk, new_state); goal = blk; while (*n < elen) { goal++; @@ -1375,7 +1386,7 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, GFS2_BLKST_FREE) break; gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, - bi->bi_len, goal, new_state); + bi, goal, new_state); (*n)++; } out: @@ -1432,7 +1443,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, } gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, - bi->bi_len, buf_blk, new_state); + bi, buf_blk, new_state); } return rgd; From 0d95326d9bd39f6eae80b91392b308c5fa8b1a0f Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 22 Mar 2011 13:52:44 -0400 Subject: [PATCH 02/32] GFS2: remove *leaf_call_t and simplify leaf_dealloc Since foreach_leaf is only called with leaf_dealloc as its only possible call function, we can simplify the code by making it call leaf_dealloc directly. This simplifies the code and eliminates the need for leaf_call_t, the generic call method. This is a first small step in simplifying the directory leaf deallocation code. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index f789c5732b7c..0bb5f6bed591 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -82,11 +82,11 @@ struct qstr gfs2_qdot __read_mostly; struct qstr gfs2_qdotdot __read_mostly; -typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, - u64 leaf_no, void *data); typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque); +static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, + u64 leaf_no); int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp) @@ -1770,13 +1770,11 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, /** * foreach_leaf - call a function for each leaf in a directory * @dip: the directory - * @lc: the function to call for each each - * @data: private data to pass to it * * Returns: errno */ -static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) +static int foreach_leaf(struct gfs2_inode *dip) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct buffer_head *bh; @@ -1823,7 +1821,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); brelse(bh); - error = lc(dip, index, len, leaf_no, data); + error = leaf_dealloc(dip, index, len, leaf_no); if (error) goto out; @@ -1855,7 +1853,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) */ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, - u64 leaf_no, void *data) + u64 leaf_no) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_leaf *tmp_leaf; @@ -1978,7 +1976,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) int error; /* Dealloc on-disk leaves to FREEMETA state */ - error = foreach_leaf(dip, leaf_dealloc, NULL); + error = foreach_leaf(dip); if (error) return error; From d24a7a439a329b60f8e168c03e80566519e09be2 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 22 Mar 2011 13:54:03 -0400 Subject: [PATCH 03/32] GFS2: Combine transaction from gfs2_dir_exhash_dealloc At the end of function gfs2_dir_exhash_dealloc, it was setting the dinode type to "file" to prevent directory corruption in case of a crash. It was doing so in its own journal transaction. This patch makes the change occur when the last call is make to leaf_dealloc, since it needs to rewrite the directory dinode at that time anyway. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 49 ++++++++++++++----------------------------------- 1 file changed, 14 insertions(+), 35 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 0bb5f6bed591..bd575871f0f2 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -86,7 +86,7 @@ typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque); static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, - u64 leaf_no); + u64 leaf_no, int last_dealloc); int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp) @@ -1781,10 +1781,10 @@ static int foreach_leaf(struct gfs2_inode *dip) struct gfs2_leaf *leaf; u32 hsize, len; u32 ht_offset, lp_offset, ht_offset_cur = -1; - u32 index = 0; + u32 index = 0, next_index; __be64 *lp; u64 leaf_no; - int error = 0; + int error = 0, last; hsize = 1 << dip->i_depth; if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { @@ -1819,13 +1819,13 @@ static int foreach_leaf(struct gfs2_inode *dip) goto out; leaf = (struct gfs2_leaf *)bh->b_data; len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); + next_index = (index & ~(len - 1)) + len; + last = ((next_index >= hsize) ? 1 : 0); brelse(bh); - - error = leaf_dealloc(dip, index, len, leaf_no); + error = leaf_dealloc(dip, index, len, leaf_no, last); if (error) goto out; - - index = (index & ~(len - 1)) + len; + index = next_index; } else index++; } @@ -1847,13 +1847,13 @@ static int foreach_leaf(struct gfs2_inode *dip) * @index: the hash table offset in the directory * @len: the number of pointers to this leaf * @leaf_no: the leaf number - * @data: not used + * last_dealloc: 1 if this is the final dealloc for the leaf, else 0 * * Returns: errno */ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, - u64 leaf_no) + u64 leaf_no, int last_dealloc) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_leaf *tmp_leaf; @@ -1940,6 +1940,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, goto out_end_trans; gfs2_trans_add_bh(dip->i_gl, dibh, 1); + /* On the last dealloc, make this a regular file in case we crash. + (We don't want to free these blocks a second time.) */ + if (last_dealloc) + dip->i_inode.i_mode = S_IFREG; gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); @@ -1971,33 +1975,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) { - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct buffer_head *bh; - int error; - /* Dealloc on-disk leaves to FREEMETA state */ - error = foreach_leaf(dip); - if (error) - return error; - - /* Make this a regular file in case we crash. - (We don't want to free these blocks a second time.) */ - - error = gfs2_trans_begin(sdp, RES_DINODE, 0); - if (error) - return error; - - error = gfs2_meta_inode_buffer(dip, &bh); - if (!error) { - gfs2_trans_add_bh(dip->i_gl, bh, 1); - ((struct gfs2_dinode *)bh->b_data)->di_mode = - cpu_to_be32(S_IFREG); - brelse(bh); - } - - gfs2_trans_end(sdp); - - return error; + return foreach_leaf(dip); } /** From ec038c826b5c3c163ad1673390f10e869020c28c Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 22 Mar 2011 13:55:23 -0400 Subject: [PATCH 04/32] GFS2: pass leaf_bh into leaf_dealloc Function foreach_leaf used to look up the leaf block address and get a buffer_head. Then it would call leaf_dealloc which did the same lookup. This patch combines the two operations by making foreach_leaf pass the leaf bh to leaf_dealloc. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index bd575871f0f2..1f5a7ac97f7d 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -86,7 +86,8 @@ typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque); static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, - u64 leaf_no, int last_dealloc); + u64 leaf_no, struct buffer_head *leaf_bh, + int last_dealloc); int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp) @@ -1821,8 +1822,9 @@ static int foreach_leaf(struct gfs2_inode *dip) len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); next_index = (index & ~(len - 1)) + len; last = ((next_index >= hsize) ? 1 : 0); + error = leaf_dealloc(dip, index, len, leaf_no, bh, + last); brelse(bh); - error = leaf_dealloc(dip, index, len, leaf_no, last); if (error) goto out; index = next_index; @@ -1847,13 +1849,15 @@ static int foreach_leaf(struct gfs2_inode *dip) * @index: the hash table offset in the directory * @len: the number of pointers to this leaf * @leaf_no: the leaf number + * @leaf_bh: buffer_head for the starting leaf * last_dealloc: 1 if this is the final dealloc for the leaf, else 0 * * Returns: errno */ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, - u64 leaf_no, int last_dealloc) + u64 leaf_no, struct buffer_head *leaf_bh, + int last_dealloc) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_leaf *tmp_leaf; @@ -1885,14 +1889,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, goto out_qs; /* Count the number of leaves */ + bh = leaf_bh; for (blk = leaf_no; blk; blk = nblk) { - error = get_leaf(dip, blk, &bh); - if (error) - goto out_rlist; + if (blk != leaf_no) { + error = get_leaf(dip, blk, &bh); + if (error) + goto out_rlist; + } tmp_leaf = (struct gfs2_leaf *)bh->b_data; nblk = be64_to_cpu(tmp_leaf->lf_next); - brelse(bh); + if (blk != leaf_no) + brelse(bh); gfs2_rlist_add(sdp, &rlist, blk); l_blocks++; @@ -1916,13 +1924,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, if (error) goto out_rg_gunlock; + bh = leaf_bh; + for (blk = leaf_no; blk; blk = nblk) { - error = get_leaf(dip, blk, &bh); - if (error) - goto out_end_trans; + if (blk != leaf_no) { + error = get_leaf(dip, blk, &bh); + if (error) + goto out_end_trans; + } tmp_leaf = (struct gfs2_leaf *)bh->b_data; nblk = be64_to_cpu(tmp_leaf->lf_next); - brelse(bh); + if (blk != leaf_no) + brelse(bh); gfs2_free_meta(dip, blk, 1); gfs2_add_inode_blocks(&dip->i_inode, -1); From 556bb17998a37dabf7e9e96aa545bcea899be745 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 22 Mar 2011 13:56:37 -0400 Subject: [PATCH 05/32] GFS2: move function foreach_leaf to gfs2_dir_exhash_dealloc The previous patches made function gfs2_dir_exhash_dealloc do nothing but call function foreach_leaf. This patch simplifies the code by moving the entire function foreach_leaf into gfs2_dir_exhash_dealloc. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 146 ++++++++++++++++++++++---------------------------- 1 file changed, 65 insertions(+), 81 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 1f5a7ac97f7d..f7a31374ff82 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -85,10 +85,6 @@ struct qstr gfs2_qdotdot __read_mostly; typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, const struct qstr *name, void *opaque); -static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, - u64 leaf_no, struct buffer_head *leaf_bh, - int last_dealloc); - int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp) { @@ -1768,81 +1764,6 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, return 0; } -/** - * foreach_leaf - call a function for each leaf in a directory - * @dip: the directory - * - * Returns: errno - */ - -static int foreach_leaf(struct gfs2_inode *dip) -{ - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct buffer_head *bh; - struct gfs2_leaf *leaf; - u32 hsize, len; - u32 ht_offset, lp_offset, ht_offset_cur = -1; - u32 index = 0, next_index; - __be64 *lp; - u64 leaf_no; - int error = 0, last; - - hsize = 1 << dip->i_depth; - if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { - gfs2_consist_inode(dip); - return -EIO; - } - - lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); - if (!lp) - return -ENOMEM; - - while (index < hsize) { - lp_offset = index & (sdp->sd_hash_ptrs - 1); - ht_offset = index - lp_offset; - - if (ht_offset_cur != ht_offset) { - error = gfs2_dir_read_data(dip, (char *)lp, - ht_offset * sizeof(__be64), - sdp->sd_hash_bsize, 1); - if (error != sdp->sd_hash_bsize) { - if (error >= 0) - error = -EIO; - goto out; - } - ht_offset_cur = ht_offset; - } - - leaf_no = be64_to_cpu(lp[lp_offset]); - if (leaf_no) { - error = get_leaf(dip, leaf_no, &bh); - if (error) - goto out; - leaf = (struct gfs2_leaf *)bh->b_data; - len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); - next_index = (index & ~(len - 1)) + len; - last = ((next_index >= hsize) ? 1 : 0); - error = leaf_dealloc(dip, index, len, leaf_no, bh, - last); - brelse(bh); - if (error) - goto out; - index = next_index; - } else - index++; - } - - if (index != hsize) { - gfs2_consist_inode(dip); - error = -EIO; - } - -out: - kfree(lp); - - return error; -} - /** * leaf_dealloc - Deallocate a directory leaf * @dip: the directory @@ -1988,8 +1909,71 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) { - /* Dealloc on-disk leaves to FREEMETA state */ - return foreach_leaf(dip); + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + struct buffer_head *bh; + struct gfs2_leaf *leaf; + u32 hsize, len; + u32 ht_offset, lp_offset, ht_offset_cur = -1; + u32 index = 0, next_index; + __be64 *lp; + u64 leaf_no; + int error = 0, last; + + hsize = 1 << dip->i_depth; + if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { + gfs2_consist_inode(dip); + return -EIO; + } + + lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); + if (!lp) + return -ENOMEM; + + while (index < hsize) { + lp_offset = index & (sdp->sd_hash_ptrs - 1); + ht_offset = index - lp_offset; + + if (ht_offset_cur != ht_offset) { + error = gfs2_dir_read_data(dip, (char *)lp, + ht_offset * sizeof(__be64), + sdp->sd_hash_bsize, 1); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto out; + } + ht_offset_cur = ht_offset; + } + + leaf_no = be64_to_cpu(lp[lp_offset]); + if (leaf_no) { + error = get_leaf(dip, leaf_no, &bh); + if (error) + goto out; + leaf = (struct gfs2_leaf *)bh->b_data; + len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth)); + + next_index = (index & ~(len - 1)) + len; + last = ((next_index >= hsize) ? 1 : 0); + error = leaf_dealloc(dip, index, len, leaf_no, bh, + last); + brelse(bh); + if (error) + goto out; + index = next_index; + } else + index++; + } + + if (index != hsize) { + gfs2_consist_inode(dip); + error = -EIO; + } + +out: + kfree(lp); + + return error; } /** From 1027efaa238e1b65c07b6c2d9e270e548c2bdb07 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 30 Mar 2011 16:13:25 +0100 Subject: [PATCH 06/32] GFS2: Make ->write_inode() really write The GFS2 ->write_inode function should be more aggressive at writing back to the filesystem. This adopts the XFS system of returning -EAGAIN when the writeback has not been completely done. Also, we now kick off in-place writeback when called with WB_SYNC_NONE, but we only wait for it and flush the log when WB_SYNC_ALL is requested. Signed-off-by: Steven Whitehouse --- fs/gfs2/super.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b9f28e66dad1..d827b934cbd3 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -704,7 +704,7 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) /** * gfs2_write_inode - Make sure the inode is stable on the disk * @inode: The inode - * @sync: synchronous write flag + * @wbc: The writeback control structure * * Returns: errno */ @@ -713,15 +713,16 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); struct gfs2_holder gh; struct buffer_head *bh; struct timespec atime; struct gfs2_dinode *di; - int ret = 0; + int ret = -EAGAIN; - /* Check this is a "normal" inode, etc */ + /* Skip timestamp update, if this is from a memalloc */ if (current->flags & PF_MEMALLOC) - return 0; + goto do_flush; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); if (ret) goto do_flush; @@ -745,6 +746,11 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) do_flush: if (wbc->sync_mode == WB_SYNC_ALL) gfs2_log_flush(GFS2_SB(inode), ip->i_gl); + filemap_fdatawrite(metamapping); + if (!ret && (wbc->sync_mode == WB_SYNC_ALL)) + ret = filemap_fdatawait(metamapping); + if (ret) + mark_inode_dirty_sync(inode); return ret; } @@ -874,8 +880,9 @@ static void gfs2_put_super(struct super_block *sb) static int gfs2_sync_fs(struct super_block *sb, int wait) { - if (wait && sb->s_fs_info) - gfs2_log_flush(sb->s_fs_info, NULL); + struct gfs2_sbd *sdp = sb->s_fs_info; + if (wait && sdp) + gfs2_log_flush(sdp, NULL); return 0; } From 5ac048bb7ea6e87b06504b999017cfa1f38f4092 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 30 Mar 2011 16:25:51 +0100 Subject: [PATCH 07/32] GFS2: Use filemap_fdatawrite() to write back the AIL In order to ensure that the mapping stats (and thus the bdi) are correctly updated, this patch changes the AIL writeback to use the filemap_datawrite function. This helps prevent stalls in balance_dirty_pages() due to large amounts of dirty metadata when there is little or no dirty data around. Signed-off-by: Steven Whitehouse --- fs/gfs2/log.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 5b102c1887fd..3ebafa1efad0 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -91,6 +91,7 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) __releases(&sdp->sd_ail_lock) __acquires(&sdp->sd_ail_lock) { + struct gfs2_glock *gl = NULL; struct gfs2_bufdata *bd, *s; struct buffer_head *bh; int retry; @@ -113,19 +114,13 @@ __acquires(&sdp->sd_ail_lock) if (!buffer_dirty(bh)) continue; - + if (gl == bd->bd_gl) + continue; + gl = bd->bd_gl; list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); - get_bh(bh); spin_unlock(&sdp->sd_ail_lock); - lock_buffer(bh); - if (test_clear_buffer_dirty(bh)) { - bh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE_SYNC, bh); - } else { - unlock_buffer(bh); - brelse(bh); - } + filemap_fdatawrite(gfs2_glock2aspace(gl)); spin_lock(&sdp->sd_ail_lock); retry = 1; From 29687a2ac8dfcd5363e515ea715ec226aef8c26b Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 30 Mar 2011 16:33:25 +0100 Subject: [PATCH 08/32] GFS2: Alter point of entry to glock lru list for glocks with an address_space Rather than allowing the glocks to be scheduled for possible reclaim as soon as they have exited the journal, this patch delays their entry to the list until the glocks in question are no longer in use. This means that we will rely on the vm for writeback of all dirty data and metadata from now on. When glocks are added to the lru list they should be freeable much faster since all the I/O required to free them should have already been completed. This should lead to much better I/O patterns under low memory conditions. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 33 +++++++++++++++------------------ fs/gfs2/glock.h | 3 +-- fs/gfs2/lops.c | 12 +++++------- fs/gfs2/rgrp.c | 1 + fs/gfs2/super.c | 1 + 5 files changed, 23 insertions(+), 27 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f07643e21bfa..1019183232fe 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -160,6 +160,19 @@ static int demote_ok(const struct gfs2_glock *gl) } +void gfs2_glock_add_to_lru(struct gfs2_glock *gl) +{ + spin_lock(&lru_lock); + + if (!list_empty(&gl->gl_lru)) + list_del_init(&gl->gl_lru); + else + atomic_inc(&lru_count); + + list_add_tail(&gl->gl_lru, &lru_list); + spin_unlock(&lru_lock); +} + /** * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list * @gl: the glock @@ -170,24 +183,8 @@ static int demote_ok(const struct gfs2_glock *gl) static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) { - if (demote_ok(gl)) { - spin_lock(&lru_lock); - - if (!list_empty(&gl->gl_lru)) - list_del_init(&gl->gl_lru); - else - atomic_inc(&lru_count); - - list_add_tail(&gl->gl_lru, &lru_list); - spin_unlock(&lru_lock); - } -} - -void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) -{ - spin_lock(&gl->gl_spin); - __gfs2_glock_schedule_for_reclaim(gl); - spin_unlock(&gl->gl_spin); + if (demote_ok(gl)) + gfs2_glock_add_to_lru(gl); } /** diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index aea160690e94..6b2f757b9281 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -225,11 +225,10 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); -extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp); extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); -extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); +extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl); extern void gfs2_glock_free(struct gfs2_glock *gl); extern int __init gfs2_glock_init(void); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 51d27f00ebb4..611a51d476b2 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -40,7 +40,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) { struct gfs2_bufdata *bd; - gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); + BUG_ON(!current->journal_info); clear_buffer_dirty(bh); if (test_set_buffer_pinned(bh)) @@ -65,6 +65,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) * @sdp: the filesystem the buffer belongs to * @bh: The buffer to unpin * @ai: + * @flags: The inode dirty flags * */ @@ -73,10 +74,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, { struct gfs2_bufdata *bd = bh->b_private; - gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); - - if (!buffer_pinned(bh)) - gfs2_assert_withdraw(sdp, 0); + BUG_ON(!buffer_uptodate(bh)); + BUG_ON(!buffer_pinned(bh)); lock_buffer(bh); mark_buffer_dirty(bh); @@ -95,8 +94,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); spin_unlock(&sdp->sd_ail_lock); - if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags)) - gfs2_glock_schedule_for_reclaim(bd->bd_gl); + clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); trace_gfs2_pin(bd, 0); unlock_buffer(bh); atomic_dec(&sdp->sd_log_pinned); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b643c14caff9..7273ad3c85ba 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -392,6 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp) if (gl) { gl->gl_object = NULL; + gfs2_glock_add_to_lru(gl); gfs2_glock_put(gl); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index d827b934cbd3..b62c8427672c 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1401,6 +1401,7 @@ static void gfs2_evict_inode(struct inode *inode) end_writeback(inode); ip->i_gl->gl_object = NULL; + gfs2_glock_add_to_lru(ip->i_gl); gfs2_glock_put(ip->i_gl); ip->i_gl = NULL; if (ip->i_iopen_gh.gh_gl) { From efc1a9c2a70e4e49f4cf179a7ed8064b7a406e4a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 13 Apr 2011 13:03:34 +0100 Subject: [PATCH 09/32] GFS2: Remove unused macro The buffer_in_io() macro has been unused for some time, so remove it. Signed-off-by: Steven Whitehouse --- fs/gfs2/meta_io.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 6a1d9ba16411..22c526593131 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -77,8 +77,6 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen); #define buffer_busy(bh) \ ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned))) -#define buffer_in_io(bh) \ -((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock))) #endif /* __DIO_DOT_H__ */ From dba898b02defa66e5fe493d58ec0293a940f9c93 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 14 Apr 2011 09:54:02 +0100 Subject: [PATCH 10/32] GFS2: Clean up fsync() This patch is designed to clean up GFS2's fsync implementation and ensure that it really does get everything on disk. Since ->write_inode() has been updated, we can call that via the vfs library function sync_inode_metadata() and the only remaining thing that has to be done is to ensure that we get any revoke records in the log after the inode has been written back. Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 38 ++++++++++++------------------- fs/gfs2/glops.c | 59 +++++++++++++++++++++++++++++++++++-------------- fs/gfs2/glops.h | 2 ++ 3 files changed, 58 insertions(+), 41 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index e48310885c48..23eab473f09d 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -545,18 +545,10 @@ static int gfs2_close(struct inode *inode, struct file *file) /** * gfs2_fsync - sync the dirty data for a file (across the cluster) * @file: the file that points to the dentry (we ignore this) - * @dentry: the dentry that points to the inode to sync + * @datasync: set if we can ignore timestamp changes * - * The VFS will flush "normal" data for us. We only need to worry - * about metadata here. For journaled data, we just do a log flush - * as we can't avoid it. Otherwise we can just bale out if datasync - * is set. For stuffed inodes we must flush the log in order to - * ensure that all data is on disk. - * - * The call to write_inode_now() is there to write back metadata and - * the inode itself. It does also try and write the data, but thats - * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite() - * for us. + * The VFS will flush data for us. We only need to worry + * about metadata here. * * Returns: errno */ @@ -565,22 +557,20 @@ static int gfs2_fsync(struct file *file, int datasync) { struct inode *inode = file->f_mapping->host; int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); - int ret = 0; + struct gfs2_inode *ip = GFS2_I(inode); + int ret; - if (gfs2_is_jdata(GFS2_I(inode))) { - gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); - return 0; + if (datasync) + sync_state &= ~I_DIRTY_SYNC; + + if (sync_state) { + ret = sync_inode_metadata(inode, 1); + if (ret) + return ret; + gfs2_ail_flush(ip->i_gl); } - if (sync_state != 0) { - if (!datasync) - ret = write_inode_now(inode, 0); - - if (gfs2_is_stuffed(GFS2_I(inode))) - gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); - } - - return ret; + return 0; } /** diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 25eeb2bcee47..7c1b08f63ddb 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -28,33 +28,18 @@ #include "trans.h" /** - * ail_empty_gl - remove all buffers for a given lock from the AIL + * __gfs2_ail_flush - remove all buffers for a given lock from the AIL * @gl: the glock * * None of the buffers should be dirty, locked, or pinned. */ -static void gfs2_ail_empty_gl(struct gfs2_glock *gl) +static void __gfs2_ail_flush(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; struct list_head *head = &gl->gl_ail_list; struct gfs2_bufdata *bd; struct buffer_head *bh; - struct gfs2_trans tr; - - memset(&tr, 0, sizeof(tr)); - tr.tr_revokes = atomic_read(&gl->gl_ail_count); - - if (!tr.tr_revokes) - return; - - /* A shortened, inline version of gfs2_trans_begin() */ - tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); - tr.tr_ip = (unsigned long)__builtin_return_address(0); - INIT_LIST_HEAD(&tr.tr_list_buf); - gfs2_log_reserve(sdp, tr.tr_reserved); - BUG_ON(current->journal_info); - current->journal_info = &tr; spin_lock(&sdp->sd_ail_lock); while (!list_empty(head)) { @@ -76,7 +61,47 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) } gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); spin_unlock(&sdp->sd_ail_lock); +} + +static void gfs2_ail_empty_gl(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_trans tr; + + memset(&tr, 0, sizeof(tr)); + tr.tr_revokes = atomic_read(&gl->gl_ail_count); + + if (!tr.tr_revokes) + return; + + /* A shortened, inline version of gfs2_trans_begin() */ + tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); + tr.tr_ip = (unsigned long)__builtin_return_address(0); + INIT_LIST_HEAD(&tr.tr_list_buf); + gfs2_log_reserve(sdp, tr.tr_reserved); + BUG_ON(current->journal_info); + current->journal_info = &tr; + + __gfs2_ail_flush(gl); + + gfs2_trans_end(sdp); + gfs2_log_flush(sdp, NULL); +} + +void gfs2_ail_flush(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + unsigned int revokes = atomic_read(&gl->gl_ail_count); + int ret; + + if (!revokes) + return; + + ret = gfs2_trans_begin(sdp, 0, revokes); + if (ret) + return; + __gfs2_ail_flush(gl); gfs2_trans_end(sdp); gfs2_log_flush(sdp, NULL); } diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index b3aa2e3210fd..6fce409b5a50 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h @@ -23,4 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops; extern const struct gfs2_glock_operations gfs2_journal_glops; extern const struct gfs2_glock_operations *gfs2_glops_list[]; +extern void gfs2_ail_flush(struct gfs2_glock *gl); + #endif /* __GLOPS_DOT_H__ */ From 627c10b7e471b5dcfb7101d6cc74d219619c9bc4 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 14 Apr 2011 14:09:52 +0100 Subject: [PATCH 11/32] GFS2: Improve tracing support (adds two flags) This adds support for two new flags. One keeps track of whether the glock is on the LRU list or not. The other isn't really a flag as such, but an indication of whether the glock has an attached object or not. This indication is reported without any locking, which is ok since we do not dereference the object pointer but merely report whether it is NULL or not. Also, this fixes one place where a tracepoint was missing, which was at the point we remove deallocated blocks from the journal. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 13 +++++++++++-- fs/gfs2/incore.h | 2 ++ fs/gfs2/meta_io.c | 2 ++ fs/gfs2/trace_gfs2.h | 10 ++++++---- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1019183232fe..0c6c69090140 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -170,6 +170,7 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl) atomic_inc(&lru_count); list_add_tail(&gl->gl_lru, &lru_list); + set_bit(GLF_LRU, &gl->gl_flags); spin_unlock(&lru_lock); } @@ -1364,6 +1365,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m while(nr && !list_empty(&lru_list)) { gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); + clear_bit(GLF_LRU, &gl->gl_flags); atomic_dec(&lru_count); /* Test for being demotable */ @@ -1386,6 +1388,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m } nr_skipped++; list_add(&gl->gl_lru, &skipped); + set_bit(GLF_LRU, &gl->gl_flags); } list_splice(&skipped, &lru_list); atomic_add(nr_skipped, &lru_count); @@ -1598,9 +1601,11 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) return 0; } -static const char *gflags2str(char *buf, const unsigned long *gflags) +static const char *gflags2str(char *buf, const struct gfs2_glock *gl) { + const unsigned long *gflags = &gl->gl_flags; char *p = buf; + if (test_bit(GLF_LOCK, gflags)) *p++ = 'l'; if (test_bit(GLF_DEMOTE, gflags)) @@ -1623,6 +1628,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags) *p++ = 'F'; if (test_bit(GLF_QUEUED, gflags)) *p++ = 'q'; + if (test_bit(GLF_LRU, gflags)) + *p++ = 'L'; + if (gl->gl_object) + *p++ = 'o'; *p = 0; return buf; } @@ -1661,7 +1670,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, - gflags2str(gflags_buf, &gl->gl_flags), + gflags2str(gflags_buf, gl), state2str(gl->gl_target), state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 870a89d6d4dc..48eb1eed51b5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -200,6 +200,8 @@ enum { GLF_INITIAL = 10, GLF_FROZEN = 11, GLF_QUEUED = 12, + GLF_LRU = 13, + GLF_OBJECT = 14, /* Used only for tracing */ }; struct gfs2_glock { diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 675349b5a133..747238cd9f96 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -31,6 +31,7 @@ #include "rgrp.h" #include "trans.h" #include "util.h" +#include "trace_gfs2.h" static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) { @@ -310,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int struct gfs2_bufdata *bd = bh->b_private; if (test_clear_buffer_pinned(bh)) { + trace_gfs2_pin(bd, 0); atomic_dec(&sdp->sd_log_pinned); list_del_init(&bd->bd_le.le_list); if (meta) { diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index cedb0bb96d96..9133d0aaac26 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -40,7 +40,9 @@ {(1UL << GLF_REPLY_PENDING), "r" }, \ {(1UL << GLF_INITIAL), "I" }, \ {(1UL << GLF_FROZEN), "F" }, \ - {(1UL << GLF_QUEUED), "q" }) + {(1UL << GLF_QUEUED), "q" }, \ + {(1UL << GLF_LRU), "L" }, \ + {(1UL << GLF_OBJECT), "o" }) #ifndef NUMPTY #define NUMPTY @@ -94,7 +96,7 @@ TRACE_EVENT(gfs2_glock_state_change, __entry->new_state = glock_trace_state(new_state); __entry->tgt_state = glock_trace_state(gl->gl_target); __entry->dmt_state = glock_trace_state(gl->gl_demote_state); - __entry->flags = gl->gl_flags; + __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<gltype = gl->gl_name.ln_type; __entry->glnum = gl->gl_name.ln_number; __entry->cur_state = glock_trace_state(gl->gl_state); - __entry->flags = gl->gl_flags; + __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL< %s flags:%s", @@ -161,7 +163,7 @@ TRACE_EVENT(gfs2_demote_rq, __entry->glnum = gl->gl_name.ln_number; __entry->cur_state = glock_trace_state(gl->gl_state); __entry->dmt_state = glock_trace_state(gl->gl_demote_state); - __entry->flags = gl->gl_flags; + __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL< Date: Thu, 14 Apr 2011 16:50:31 +0100 Subject: [PATCH 12/32] GFS2: Optimise glock lru and end of life inodes The GLF_LRU flag introduced in the previous patch can be used to check if a glock is on the lru list when a new holder is queued and if so remove it, without having first to get the lru_lock. The main purpose of this patch however is to optimise the glocks left over when an inode at end of life is being evicted. Previously such glocks were left with the GLF_LFLUSH flag set, so that when reclaimed, each one required a log flush. This patch resets the GLF_LFLUSH flag when there is nothing left to flush thus preventing later log flushes as glocks are reused or demoted. In order to do this, we need to keep track of the number of revokes which are outstanding, and also to clear the GLF_LFLUSH bit after a log commit when only revokes have been processed. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 41 ++++++++++++------------- fs/gfs2/incore.h | 1 + fs/gfs2/inode.c | 59 ------------------------------------ fs/gfs2/inode.h | 1 - fs/gfs2/lops.c | 27 +++++++++++++---- fs/gfs2/main.c | 1 + fs/gfs2/super.c | 78 +++++++++++++++++++++++++++++++++++++++++++++--- 7 files changed, 119 insertions(+), 89 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 0c6c69090140..cb8776f0102e 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -145,14 +145,9 @@ static int demote_ok(const struct gfs2_glock *gl) { const struct gfs2_glock_operations *glops = gl->gl_ops; - /* assert_spin_locked(&gl->gl_spin); */ - if (gl->gl_state == LM_ST_UNLOCKED) return 0; - if (test_bit(GLF_LFLUSH, &gl->gl_flags)) - return 0; - if ((gl->gl_name.ln_type != LM_TYPE_INODE) && - !list_empty(&gl->gl_holders)) + if (!list_empty(&gl->gl_holders)) return 0; if (glops->go_demote_ok) return glops->go_demote_ok(gl); @@ -174,6 +169,17 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl) spin_unlock(&lru_lock); } +static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) +{ + spin_lock(&lru_lock); + if (!list_empty(&gl->gl_lru)) { + list_del_init(&gl->gl_lru); + atomic_dec(&lru_count); + clear_bit(GLF_LRU, &gl->gl_flags); + } + spin_unlock(&lru_lock); +} + /** * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list * @gl: the glock @@ -217,12 +223,7 @@ void gfs2_glock_put(struct gfs2_glock *gl) spin_lock_bucket(gl->gl_hash); hlist_bl_del_rcu(&gl->gl_list); spin_unlock_bucket(gl->gl_hash); - spin_lock(&lru_lock); - if (!list_empty(&gl->gl_lru)) { - list_del_init(&gl->gl_lru); - atomic_dec(&lru_count); - } - spin_unlock(&lru_lock); + gfs2_glock_remove_from_lru(gl); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); GLOCK_BUG_ON(gl, mapping && mapping->nrpages); trace_gfs2_glock_put(gl); @@ -1025,6 +1026,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh) if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) return -EIO; + if (test_bit(GLF_LRU, &gl->gl_flags)) + gfs2_glock_remove_from_lru(gl); + spin_lock(&gl->gl_spin); add_to_queue(gh); if ((LM_FLAG_NOEXP & gh->gh_flags) && @@ -1082,7 +1086,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } - __gfs2_glock_schedule_for_reclaim(gl); + if (!test_bit(GLF_LFLUSH, &gl->gl_flags)) + __gfs2_glock_schedule_for_reclaim(gl); trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); if (likely(fast_path)) @@ -1461,12 +1466,7 @@ static void thaw_glock(struct gfs2_glock *gl) static void clear_glock(struct gfs2_glock *gl) { - spin_lock(&lru_lock); - if (!list_empty(&gl->gl_lru)) { - list_del_init(&gl->gl_lru); - atomic_dec(&lru_count); - } - spin_unlock(&lru_lock); + gfs2_glock_remove_from_lru(gl); spin_lock(&gl->gl_spin); if (gl->gl_state != LM_ST_UNLOCKED) @@ -1666,7 +1666,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) dtime *= 1000000/HZ; /* demote time in uSec */ if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) dtime = 0; - gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n", + gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n", state2str(gl->gl_state), gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, @@ -1674,6 +1674,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_target), state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), + atomic_read(&gl->gl_revokes), atomic_read(&gl->gl_ref)); list_for_each_entry(gh, &gl->gl_holders, gh_list) { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 48eb1eed51b5..5067beaffa68 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -236,6 +236,7 @@ struct gfs2_glock { struct list_head gl_ail_list; atomic_t gl_ail_count; + atomic_t gl_revokes; struct delayed_work gl_work; struct work_struct gl_delete; struct rcu_head gl_rcu; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 9134dcb89479..9b7b9e40073b 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -341,65 +341,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) return error; } -int gfs2_dinode_dealloc(struct gfs2_inode *ip) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct gfs2_alloc *al; - struct gfs2_rgrpd *rgd; - int error; - - if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - return -EIO; - } - - al = gfs2_alloc_get(ip); - if (!al) - return -ENOMEM; - - error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); - if (error) - goto out; - - error = gfs2_rindex_hold(sdp, &al->al_ri_gh); - if (error) - goto out_qs; - - rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); - if (!rgd) { - gfs2_consist_inode(ip); - error = -EIO; - goto out_rindex_relse; - } - - error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, - &al->al_rgd_gh); - if (error) - goto out_rindex_relse; - - error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1); - if (error) - goto out_rg_gunlock; - - set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); - set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags); - - gfs2_free_di(rgd, ip); - - gfs2_trans_end(sdp); - -out_rg_gunlock: - gfs2_glock_dq_uninit(&al->al_rgd_gh); -out_rindex_relse: - gfs2_glock_dq_uninit(&al->al_ri_gh); -out_qs: - gfs2_quota_unhold(ip); -out: - gfs2_alloc_put(ip); - return error; -} - /** * gfs2_change_nlink - Change nlink count on inode * @ip: The GFS2 inode diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 099ca305e518..842346eae836 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -106,7 +106,6 @@ extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); extern int gfs2_inode_refresh(struct gfs2_inode *ip); -extern int gfs2_dinode_dealloc(struct gfs2_inode *inode); extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff); extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 611a51d476b2..05bbb124699f 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -320,12 +320,16 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { + struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); + struct gfs2_glock *gl = bd->bd_gl; struct gfs2_trans *tr; tr = current->journal_info; tr->tr_touched = 1; tr->tr_num_revoke++; sdp->sd_log_num_revoke++; + atomic_inc(&gl->gl_revokes); + set_bit(GLF_LFLUSH, &gl->gl_flags); list_add(&le->le_list, &sdp->sd_log_le_revoke); } @@ -348,9 +352,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); offset = sizeof(struct gfs2_log_descriptor); - while (!list_empty(head)) { - bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); - list_del_init(&bd->bd_le.le_list); + list_for_each_entry(bd, head, bd_le.le_list) { sdp->sd_log_num_revoke--; if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { @@ -365,8 +367,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) } *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); - kmem_cache_free(gfs2_bufdata_cachep, bd); - offset += sizeof(u64); } gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); @@ -374,6 +374,22 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) submit_bh(WRITE_SYNC, bh); } +static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &sdp->sd_log_le_revoke; + struct gfs2_bufdata *bd; + struct gfs2_glock *gl; + + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); + list_del_init(&bd->bd_le.le_list); + gl = bd->bd_gl; + atomic_dec(&gl->gl_revokes); + clear_bit(GLF_LFLUSH, &gl->gl_flags); + kmem_cache_free(gfs2_bufdata_cachep, bd); + } +} + static void revoke_lo_before_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, int pass) { @@ -747,6 +763,7 @@ const struct gfs2_log_operations gfs2_buf_lops = { const struct gfs2_log_operations gfs2_revoke_lops = { .lo_add = revoke_lo_add, .lo_before_commit = revoke_lo_before_commit, + .lo_after_commit = revoke_lo_after_commit, .lo_before_scan = revoke_lo_before_scan, .lo_scan_elements = revoke_lo_scan_elements, .lo_after_scan = revoke_lo_after_scan, diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 888a5f5a1a58..cfa327d33194 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -53,6 +53,7 @@ static void gfs2_init_glock_once(void *foo) INIT_LIST_HEAD(&gl->gl_lru); INIT_LIST_HEAD(&gl->gl_ail_list); atomic_set(&gl->gl_ail_count, 0); + atomic_set(&gl->gl_revokes, 0); } static void gfs2_init_gl_aspace_once(void *foo) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b62c8427672c..215c37bfc2a4 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1315,6 +1315,78 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) return 0; } +static void gfs2_final_release_pages(struct gfs2_inode *ip) +{ + struct inode *inode = &ip->i_inode; + struct gfs2_glock *gl = ip->i_gl; + + truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0); + truncate_inode_pages(&inode->i_data, 0); + + if (atomic_read(&gl->gl_revokes) == 0) { + clear_bit(GLF_LFLUSH, &gl->gl_flags); + clear_bit(GLF_DIRTY, &gl->gl_flags); + } +} + +static int gfs2_dinode_dealloc(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_alloc *al; + struct gfs2_rgrpd *rgd; + int error; + + if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(ip); + return -EIO; + } + + al = gfs2_alloc_get(ip); + if (!al) + return -ENOMEM; + + error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + error = gfs2_rindex_hold(sdp, &al->al_ri_gh); + if (error) + goto out_qs; + + rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); + if (!rgd) { + gfs2_consist_inode(ip); + error = -EIO; + goto out_rindex_relse; + } + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, + &al->al_rgd_gh); + if (error) + goto out_rindex_relse; + + error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1); + if (error) + goto out_rg_gunlock; + + gfs2_free_di(rgd, ip); + + gfs2_final_release_pages(ip); + + gfs2_trans_end(sdp); + +out_rg_gunlock: + gfs2_glock_dq_uninit(&al->al_rgd_gh); +out_rindex_relse: + gfs2_glock_dq_uninit(&al->al_ri_gh); +out_qs: + gfs2_quota_unhold(ip); +out: + gfs2_alloc_put(ip); + return error; +} + /* * We have to (at the moment) hold the inodes main lock to cover * the gap between unlocking the shared lock on the iopen lock and @@ -1378,15 +1450,13 @@ static void gfs2_evict_inode(struct inode *inode) } error = gfs2_dinode_dealloc(ip); - if (error) - goto out_unlock; + goto out_unlock; out_truncate: error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); if (error) goto out_unlock; - /* Needs to be done before glock release & also in a transaction */ - truncate_inode_pages(&inode->i_data, 0); + gfs2_final_release_pages(ip); gfs2_trans_end(sdp); out_unlock: From 4667a0ec32867865fd4deccf834594b3ea831baf Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 18 Apr 2011 14:18:09 +0100 Subject: [PATCH 13/32] GFS2: Make writeback more responsive to system conditions This patch adds writeback_control to writing back the AIL list. This means that we can then take advantage of the information we get in ->write_inode() in order to set off some pre-emptive writeback. In addition, the AIL code is cleaned up a bit to make it a bit simpler to understand. There is still more which can usefully be done in this area, but this is a good start at least. Signed-off-by: Steven Whitehouse --- fs/gfs2/export.c | 2 +- fs/gfs2/glock.c | 2 +- fs/gfs2/incore.h | 4 -- fs/gfs2/inode.c | 4 +- fs/gfs2/inode.h | 2 +- fs/gfs2/log.c | 173 ++++++++++++++++++++++++----------------------- fs/gfs2/log.h | 2 + fs/gfs2/super.c | 7 +- 8 files changed, 102 insertions(+), 94 deletions(-) diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index b5a5e60df0d5..fe9945f2ff72 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -139,7 +139,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, struct gfs2_sbd *sdp = sb->s_fs_info; struct inode *inode; - inode = gfs2_ilookup(sb, inum->no_addr); + inode = gfs2_ilookup(sb, inum->no_addr, 0); if (inode) { if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { iput(inode); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index cb8776f0102e..eed4b6855614 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -649,7 +649,7 @@ static void delete_work_func(struct work_struct *work) /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ if (ip) - inode = gfs2_ilookup(sdp->sd_vfs, no_addr); + inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1); else inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); if (inode && !IS_ERR(inode)) { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 5067beaffa68..69a63823f7c5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -20,7 +20,6 @@ #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 -#define DIO_ALL 0x00000100 struct gfs2_log_operations; struct gfs2_log_element; @@ -377,8 +376,6 @@ struct gfs2_ail { unsigned int ai_first; struct list_head ai_ail1_list; struct list_head ai_ail2_list; - - u64 ai_sync_gen; }; struct gfs2_journal_extent { @@ -657,7 +654,6 @@ struct gfs2_sbd { spinlock_t sd_ail_lock; struct list_head sd_ail1_list; struct list_head sd_ail2_list; - u64 sd_ail_sync_gen; /* Replay stuff */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 9b7b9e40073b..94c3a7db1116 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -74,14 +74,14 @@ static int iget_set(struct inode *inode, void *opaque) return 0; } -struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) +struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block) { unsigned long hash = (unsigned long)no_addr; struct gfs2_skip_data data; data.no_addr = no_addr; data.skipped = 0; - data.non_block = 0; + data.non_block = non_block; return ilookup5(sb, hash, iget_test, &data); } diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 842346eae836..8d1344a4e673 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -102,7 +102,7 @@ extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, u64 *no_formal_ino, unsigned int blktype); -extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); +extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock); extern int gfs2_inode_refresh(struct gfs2_inode *ip); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 3ebafa1efad0..03e00417061b 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -83,50 +84,90 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) /** * gfs2_ail1_start_one - Start I/O on a part of the AIL * @sdp: the filesystem - * @tr: the part of the AIL + * @wbc: The writeback control structure + * @ai: The ail structure * */ -static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, + struct writeback_control *wbc, + struct gfs2_ail *ai) __releases(&sdp->sd_ail_lock) __acquires(&sdp->sd_ail_lock) { struct gfs2_glock *gl = NULL; + struct address_space *mapping; struct gfs2_bufdata *bd, *s; struct buffer_head *bh; - int retry; - do { - retry = 0; +restart: + list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) { + bh = bd->bd_bh; - list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, - bd_ail_st_list) { - bh = bd->bd_bh; + gfs2_assert(sdp, bd->bd_ail == ai); - gfs2_assert(sdp, bd->bd_ail == ai); - - if (!buffer_busy(bh)) { - if (!buffer_uptodate(bh)) - gfs2_io_error_bh(sdp, bh); - list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); - continue; - } - - if (!buffer_dirty(bh)) - continue; - if (gl == bd->bd_gl) - continue; - gl = bd->bd_gl; - list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); - - spin_unlock(&sdp->sd_ail_lock); - filemap_fdatawrite(gfs2_glock2aspace(gl)); - spin_lock(&sdp->sd_ail_lock); - - retry = 1; - break; + if (!buffer_busy(bh)) { + if (!buffer_uptodate(bh)) + gfs2_io_error_bh(sdp, bh); + list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); + continue; } - } while (retry); + + if (!buffer_dirty(bh)) + continue; + if (gl == bd->bd_gl) + continue; + gl = bd->bd_gl; + list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); + mapping = bh->b_page->mapping; + spin_unlock(&sdp->sd_ail_lock); + generic_writepages(mapping, wbc); + spin_lock(&sdp->sd_ail_lock); + if (wbc->nr_to_write <= 0) + break; + goto restart; + } +} + + +/** + * gfs2_ail1_flush - start writeback of some ail1 entries + * @sdp: The super block + * @wbc: The writeback control structure + * + * Writes back some ail1 entries, according to the limits in the + * writeback control structure + */ + +void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) +{ + struct list_head *head = &sdp->sd_ail1_list; + struct gfs2_ail *ai; + + spin_lock(&sdp->sd_ail_lock); + list_for_each_entry_reverse(ai, head, ai_list) { + if (wbc->nr_to_write <= 0) + break; + gfs2_ail1_start_one(sdp, wbc, ai); /* This may drop ail lock */ + } + spin_unlock(&sdp->sd_ail_lock); +} + +/** + * gfs2_ail1_start - start writeback of all ail1 entries + * @sdp: The superblock + */ + +static void gfs2_ail1_start(struct gfs2_sbd *sdp) +{ + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = LONG_MAX, + .range_start = 0, + .range_end = LLONG_MAX, + }; + + return gfs2_ail1_flush(sdp, &wbc); } /** @@ -136,7 +177,7 @@ __acquires(&sdp->sd_ail_lock) * */ -static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags) +static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) { struct gfs2_bufdata *bd, *s; struct buffer_head *bh; @@ -144,71 +185,37 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) { bh = bd->bd_bh; - gfs2_assert(sdp, bd->bd_ail == ai); - - if (buffer_busy(bh)) { - if (flags & DIO_ALL) - continue; - else - break; - } - + if (buffer_busy(bh)) + continue; if (!buffer_uptodate(bh)) gfs2_io_error_bh(sdp, bh); - list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); } - return list_empty(&ai->ai_ail1_list); } -static void gfs2_ail1_start(struct gfs2_sbd *sdp) -{ - struct list_head *head; - u64 sync_gen; - struct gfs2_ail *ai; - int done = 0; +/** + * gfs2_ail1_empty - Try to empty the ail1 lists + * @sdp: The superblock + * + * Tries to empty the ail1 lists, starting with the oldest first + */ - spin_lock(&sdp->sd_ail_lock); - head = &sdp->sd_ail1_list; - if (list_empty(head)) { - spin_unlock(&sdp->sd_ail_lock); - return; - } - sync_gen = sdp->sd_ail_sync_gen++; - - while(!done) { - done = 1; - list_for_each_entry_reverse(ai, head, ai_list) { - if (ai->ai_sync_gen >= sync_gen) - continue; - ai->ai_sync_gen = sync_gen; - gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */ - done = 0; - break; - } - } - - spin_unlock(&sdp->sd_ail_lock); -} - -static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) +static int gfs2_ail1_empty(struct gfs2_sbd *sdp) { struct gfs2_ail *ai, *s; int ret; spin_lock(&sdp->sd_ail_lock); - list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { - if (gfs2_ail1_empty_one(sdp, ai, flags)) + gfs2_ail1_empty_one(sdp, ai); + if (list_empty(&ai->ai_ail1_list)) list_move(&ai->ai_list, &sdp->sd_ail2_list); - else if (!(flags & DIO_ALL)) + else break; } - ret = list_empty(&sdp->sd_ail1_list); - spin_unlock(&sdp->sd_ail_lock); return ret; @@ -569,7 +576,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) set_buffer_uptodate(bh); clear_buffer_dirty(bh); - gfs2_ail1_empty(sdp, 0); + gfs2_ail1_empty(sdp); tail = current_tail(sdp); lh = (struct gfs2_log_header *)bh->b_data; @@ -864,7 +871,7 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) gfs2_log_flush(sdp, NULL); for (;;) { gfs2_ail1_start(sdp); - if (gfs2_ail1_empty(sdp, DIO_ALL)) + if (gfs2_ail1_empty(sdp)) break; msleep(10); } @@ -900,17 +907,15 @@ int gfs2_logd(void *data) preflush = atomic_read(&sdp->sd_log_pinned); if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { - gfs2_ail1_empty(sdp, DIO_ALL); + gfs2_ail1_empty(sdp); gfs2_log_flush(sdp, NULL); - gfs2_ail1_empty(sdp, DIO_ALL); } if (gfs2_ail_flush_reqd(sdp)) { gfs2_ail1_start(sdp); io_schedule(); - gfs2_ail1_empty(sdp, 0); + gfs2_ail1_empty(sdp); gfs2_log_flush(sdp, NULL); - gfs2_ail1_empty(sdp, DIO_ALL); } wake_up(&sdp->sd_log_waitq); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 0d007f920234..ab0621698b73 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -12,6 +12,7 @@ #include #include +#include #include "incore.h" /** @@ -59,6 +60,7 @@ extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); +extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 215c37bfc2a4..58fe3a4ac829 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -714,6 +715,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); + struct backing_dev_info *bdi = metamapping->backing_dev_info; struct gfs2_holder gh; struct buffer_head *bh; struct timespec atime; @@ -747,6 +749,8 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) gfs2_log_flush(GFS2_SB(inode), ip->i_gl); filemap_fdatawrite(metamapping); + if (bdi->dirty_exceeded) + gfs2_ail1_flush(sdp, wbc); if (!ret && (wbc->sync_mode == WB_SYNC_ALL)) ret = filemap_fdatawait(metamapping); if (ret) @@ -1366,7 +1370,8 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) if (error) goto out_rindex_relse; - error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1); + error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, + sdp->sd_jdesc->jd_blocks); if (error) goto out_rg_gunlock; From c83ae9cad8776bab153a05cc466be39f14011091 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 18 Apr 2011 14:18:38 +0100 Subject: [PATCH 14/32] GFS2: Add an AIL writeback tracepoint Add a tracepoint for monitoring writeback of the AIL. Signed-off-by: Steven Whitehouse --- fs/gfs2/log.c | 2 ++ fs/gfs2/trace_gfs2.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 03e00417061b..ad1f1887633f 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -144,6 +144,7 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) struct list_head *head = &sdp->sd_ail1_list; struct gfs2_ail *ai; + trace_gfs2_ail_flush(sdp, wbc, 1); spin_lock(&sdp->sd_ail_lock); list_for_each_entry_reverse(ai, head, ai_list) { if (wbc->nr_to_write <= 0) @@ -151,6 +152,7 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) gfs2_ail1_start_one(sdp, wbc, ai); /* This may drop ail lock */ } spin_unlock(&sdp->sd_ail_lock); + trace_gfs2_ail_flush(sdp, wbc, 0); } /** diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 9133d0aaac26..5d07609ec57d 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "incore.h" #include "glock.h" @@ -320,6 +321,33 @@ TRACE_EVENT(gfs2_log_blocks, MINOR(__entry->dev), __entry->blocks) ); +/* Writing back the AIL */ +TRACE_EVENT(gfs2_ail_flush, + + TP_PROTO(const struct gfs2_sbd *sdp, const struct writeback_control *wbc, int start), + + TP_ARGS(sdp, wbc, start), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( int, start ) + __field( int, sync_mode ) + __field( long, nr_to_write ) + ), + + TP_fast_assign( + __entry->dev = sdp->sd_vfs->s_dev; + __entry->start = start; + __entry->sync_mode = wbc->sync_mode; + __entry->nr_to_write = wbc->nr_to_write; + ), + + TP_printk("%u,%u ail flush %s %s %ld", MAJOR(__entry->dev), + MINOR(__entry->dev), __entry->start ? "start" : "end", + __entry->sync_mode == WB_SYNC_ALL ? "all" : "none", + __entry->nr_to_write) +); + /* Section 3 - bmap * * Objectives: From 6905d9e4dda6112f007e9090bca80507da158e63 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 26 Apr 2011 01:13:24 -0500 Subject: [PATCH 15/32] GFS2: make sure fallocate bytes is a multiple of blksize The GFS2 fallocate code chooses a target size to for allocating chunks of space. Whenever it can't find any resource groups with enough space free, it halves its target. Since this target is in bytes, eventually it will no longer be a multiple of blksize. As long as there is more space available in the resource group than the target, this isn't a problem, since gfs2 will use the actual space available, which is always a multiple of blksize. However, when gfs couldn't fallocate a bigger chunk than the target, it was using the non-blksize aligned number. This caused a BUG in later code that required blksize aligned offsets. GFS2 now ensures that bytes is always a multiple of blksize Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 23eab473f09d..a9f5cbe45cd9 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -816,6 +816,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t bytes, max_bytes; struct gfs2_alloc *al; int error; + loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; next = (next + 1) << sdp->sd_sb.sb_bsize_shift; @@ -823,13 +824,15 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (mode & ~FALLOC_FL_KEEP_SIZE) return -EOPNOTSUPP; - offset = (offset >> sdp->sd_sb.sb_bsize_shift) << - sdp->sd_sb.sb_bsize_shift; + offset &= bsize_mask; len = next - offset; bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; if (!bytes) bytes = UINT_MAX; + bytes &= bsize_mask; + if (bytes == 0) + bytes = sdp->sd_sb.sb_bsize; gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); error = gfs2_glock_nq(&ip->i_gh); @@ -860,6 +863,9 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (error) { if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { bytes >>= 1; + bytes &= bsize_mask; + if (bytes == 0) + bytes = sdp->sd_sb.sb_bsize; goto retry; } goto out_qunlock; From 4f1de018215fb56940ce5793e11becd1e8cd6e44 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 26 Apr 2011 10:23:56 +0100 Subject: [PATCH 16/32] GFS2: Fix ail list traversal In the recent patches to update the AIL list code, I managed to forget that the ail list lock got dropped, even though I added a comment specifically to remind myself :( Reported-by: Barry Marson Signed-off-by: Steven Whitehouse --- fs/gfs2/log.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index ad1f1887633f..cec26c00b50d 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -89,9 +89,9 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) * */ -static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, - struct writeback_control *wbc, - struct gfs2_ail *ai) +static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, + struct writeback_control *wbc, + struct gfs2_ail *ai) __releases(&sdp->sd_ail_lock) __acquires(&sdp->sd_ail_lock) { @@ -100,7 +100,6 @@ __acquires(&sdp->sd_ail_lock) struct gfs2_bufdata *bd, *s; struct buffer_head *bh; -restart: list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) { bh = bd->bd_bh; @@ -120,13 +119,17 @@ __acquires(&sdp->sd_ail_lock) gl = bd->bd_gl; list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); mapping = bh->b_page->mapping; + if (!mapping) + continue; spin_unlock(&sdp->sd_ail_lock); generic_writepages(mapping, wbc); spin_lock(&sdp->sd_ail_lock); if (wbc->nr_to_write <= 0) break; - goto restart; + return 1; } + + return 0; } @@ -146,10 +149,12 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) trace_gfs2_ail_flush(sdp, wbc, 1); spin_lock(&sdp->sd_ail_lock); +restart: list_for_each_entry_reverse(ai, head, ai_list) { if (wbc->nr_to_write <= 0) break; - gfs2_ail1_start_one(sdp, wbc, ai); /* This may drop ail lock */ + if (gfs2_ail1_start_one(sdp, wbc, ai)) + goto restart; } spin_unlock(&sdp->sd_ail_lock); trace_gfs2_ail_flush(sdp, wbc, 0); From 8f065d36508f283ee6cbeb05829f032d0b782a16 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 3 May 2011 11:49:19 +0100 Subject: [PATCH 17/32] GFS2: Improve bug trap code in ->releasepage() If the buffer is dirty or pinned, then as well as printing a warning, we should also refuse to release the page in question. Currently this can occur if there is a race between mmap()ed writers and O_DIRECT on the same file. With the addition of ->launder_page() in the future, we should be able to close this gap. Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 0f5c4f9d5d62..802ac5eeba28 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1076,8 +1076,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) bd = bh->b_private; if (bd && bd->bd_ail) goto cannot_release; - gfs2_assert_warn(sdp, !buffer_pinned(bh)); - gfs2_assert_warn(sdp, !buffer_dirty(bh)); + if (buffer_pinned(bh) || buffer_dirty(bh)) + goto not_possible; bh = bh->b_this_page; } while(bh != head); gfs2_log_unlock(sdp); @@ -1107,6 +1107,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) } while (bh != head); return try_to_free_buffers(page); + +not_possible: /* Should never happen */ + WARN_ON(buffer_dirty(bh)); + WARN_ON(buffer_pinned(bh)); cannot_release: gfs2_log_unlock(sdp); return 0; From d192a8e5c6fec4fe8cdafebccc415db4074dee88 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 5 May 2011 12:35:40 +0100 Subject: [PATCH 18/32] GFS2: Double check link count under glock To avoid any possible races relating to the link count, we need to recheck it under the inode's glock in all cases where it matters. Also to ensure we never get any nasty surprises, this patch also ensures that once the link count has hit zero it can never be elevated by rereading in data from disk. The only place we cannot provide a proper solution is in rename in the case where we are removing a target inode and we discover that the target inode has been already unlinked on another node. The race window is very small, and we return EAGAIN in this case to indicate what has happened. The proper solution would be to move the lookup parts of rename from the vfs into library calls which the fs could call directly, but that is potentially a very big job and this fix should cover most cases for now. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 35 ++++++++++++++++++++++++++++------- fs/gfs2/ops_inode.c | 23 ++++++++++++++++++++++- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 94c3a7db1116..5a02606b68c0 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -248,6 +248,32 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, goto fail; } +/** + * gfs2_set_nlink - Set the inode's link count based on on-disk info + * @inode: The inode in question + * @nlink: The link count + * + * If the link count has hit zero, it must never be raised, whatever the + * on-disk inode might say. When new struct inodes are created the link + * count is set to 1, so that we can safely use this test even when reading + * in on disk information for the first time. + */ + +static void gfs2_set_nlink(struct inode *inode, u32 nlink) +{ + /* + * We will need to review setting the nlink count here in the + * light of the forthcoming ro bind mount work. This is a reminder + * to do that. + */ + if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) { + if (nlink == 0) + clear_nlink(inode); + else + inode->i_nlink = nlink; + } +} + static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) { const struct gfs2_dinode *str = buf; @@ -269,12 +295,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_inode.i_uid = be32_to_cpu(str->di_uid); ip->i_inode.i_gid = be32_to_cpu(str->di_gid); - /* - * We will need to review setting the nlink count here in the - * light of the forthcoming ro bind mount work. This is a reminder - * to do that. - */ - ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); + gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); atime.tv_sec = be64_to_cpu(str->di_atime); @@ -484,7 +505,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, /* Don't create entries in an unlinked directory */ if (!dip->i_inode.i_nlink) - return -EPERM; + return -ENOENT; error = gfs2_dir_check(&dip->i_inode, name, NULL); switch (error) { diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 09e436a50723..1005f9eb456e 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -162,6 +162,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) goto out_child; + error = -ENOENT; + if (inode->i_nlink == 0) + goto out_gunlock; + error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0); if (error) goto out_gunlock; @@ -335,6 +339,10 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) if (error) goto out_child; + error = -ENOENT; + if (ip->i_inode.i_nlink == 0) + goto out_rgrp; + error = gfs2_glock_nq(ghs + 2); /* rgrp */ if (error) goto out_rgrp; @@ -589,6 +597,10 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) if (error) goto out_child; + error = -ENOENT; + if (ip->i_inode.i_nlink == 0) + goto out_rgrp; + error = gfs2_glock_nq(ghs + 2); /* rgrp */ if (error) goto out_rgrp; @@ -792,6 +804,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock; } + error = -ENOENT; + if (ip->i_inode.i_nlink == 0) + goto out_gunlock; + /* Check out the old directory */ error = gfs2_unlink_ok(odip, &odentry->d_name, ip); @@ -805,6 +821,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_gunlock; + if (nip->i_inode.i_nlink == 0) { + error = -EAGAIN; + goto out_gunlock; + } + if (S_ISDIR(nip->i_inode.i_mode)) { if (nip->i_entries < 2) { if (gfs2_consist_inode(nip)) @@ -835,7 +856,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (odip != ndip) { if (!ndip->i_inode.i_nlink) { - error = -EINVAL; + error = -ENOENT; goto out_gunlock; } if (ndip->i_entries == (u32)-1) { From 588da3b3be8b3225c2dd192aa782bf6c5c32eb84 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 5 May 2011 12:36:38 +0100 Subject: [PATCH 19/32] GFS2: Don't use a try lock when promoting to a higher mode Previously we marked all locks being promoted to a higher mode with the try flag to avoid any potential deadlocks issues. The DLM is able to detect these and report them in way that GFS2 can deal with them correctly. So we can just request the required mode and wait for a response without needing to perform this check. Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index eed4b6855614..ee2178d661f5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -543,11 +543,6 @@ __acquires(&gl->gl_spin) clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); gfs2_glock_hold(gl); - if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED || - gl->gl_state == LM_ST_DEFERRED) && - !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) - lck_flags |= LM_FLAG_TRY_1CB; - if (sdp->sd_lockstruct.ls_ops->lm_lock) { /* lock_dlm */ ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); From 2baee03fb916563d7cc597e5460e4cb938815c52 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 9 May 2011 12:08:36 +0100 Subject: [PATCH 20/32] GFS2: Don't use gfs2_change_nlink in link syscall There are three users of gfs2_change_nlink which add to the link count. Two of these are about to be removed in later patches, so this means that there will no callers, when that happens allowing removal of that function, also in a later patch. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_inode.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 1005f9eb456e..acb6f69b02ed 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -145,6 +145,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, struct inode *inode = old_dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder ghs[2]; + struct buffer_head *dibh; int alloc_required; int error; @@ -230,12 +231,22 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, goto out_ipres; } - error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode)); + error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out_end_trans; - error = gfs2_change_nlink(ip, +1); + error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode)); + if (error) + goto out_brelse; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + inc_nlink(&ip->i_inode); + ip->i_inode.i_ctime = CURRENT_TIME; + gfs2_dinode_out(ip, dibh->b_data); + mark_inode_dirty(&ip->i_inode); + +out_brelse: + brelse(dibh); out_end_trans: gfs2_trans_end(sdp); out_ipres: From 855d23ce2665c56437bd88fa6a0d45b6713bd194 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 9 May 2011 16:42:37 +0100 Subject: [PATCH 21/32] GFS2: Make gfs2_dir_del update link count when required When we remove an entry from a directory, we can save ourselves some trouble if we know the type of the entry in question, since if it is itself a directory, we can update the link count of the parent at the same time as removing the directory entry. In addition this patch also merges the rmdir and unlink code which was almost identical anyway. This eliminates the calls to remove the . and .. directory entries on each rmdir (not needed since the directory will be deallocated, anyway) which was the only thing preventing passing the dentry to gfs2_dir_del(). The passing of the dentry rather than just the name allows us to figure out the type of the entry which is being removed, and thus adjust the link count when required. Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 5 +- fs/gfs2/dir.h | 2 +- fs/gfs2/ops_inode.c | 221 +++++++++++++------------------------------- 3 files changed, 71 insertions(+), 157 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index f7a31374ff82..410265151ad1 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1669,8 +1669,9 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, * Returns: 0 on success, error code on failure */ -int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) +int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) { + const struct qstr *name = &dentry->d_name; struct gfs2_dirent *dent, *prev = NULL; struct buffer_head *bh; int error; @@ -1711,6 +1712,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) gfs2_trans_add_bh(dip->i_gl, bh, 1); dip->i_entries--; dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; + if (S_ISDIR(dentry->d_inode->i_mode)) + drop_nlink(&dip->i_inode); gfs2_dinode_out(dip, bh->b_data); brelse(bh); mark_inode_dirty(&dip->i_inode); diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index a98f644bd3df..66831f1d23ea 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -23,7 +23,7 @@ extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, const struct gfs2_inode *ip); extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, const struct gfs2_inode *ip, unsigned int type); -extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); +extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, filldir_t filldir); extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index acb6f69b02ed..765da06c8f55 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -312,11 +312,52 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, } /** - * gfs2_unlink - Unlink a file - * @dir: The inode of the directory containing the file to unlink + * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it + * @dip: The parent directory + * @name: The name of the entry in the parent directory + * @bh: The inode buffer for the inode to be removed + * @inode: The inode to be removed + * + * Called with all the locks and in a transaction. This will only be + * called for a directory after it has been checked to ensure it is empty. + * + * Returns: 0 on success, or an error + */ + +static int gfs2_unlink_inode(struct gfs2_inode *dip, + const struct dentry *dentry, + struct buffer_head *bh) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + int error; + + error = gfs2_dir_del(dip, dentry); + if (error) + return error; + + ip->i_entries = 0; + inode->i_ctime = CURRENT_TIME; + if (S_ISDIR(inode->i_mode)) + clear_nlink(inode); + else + drop_nlink(inode); + gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_dinode_out(ip, bh->b_data); + mark_inode_dirty(inode); + if (inode->i_nlink == 0) + gfs2_unlink_di(inode); + return 0; +} + + +/** + * gfs2_unlink - Unlink an inode (this does rmdir as well) + * @dir: The inode of the directory containing the inode to unlink * @dentry: The file itself * - * Unlink a file. Call gfs2_unlinki() + * This routine uses the type of the inode as a flag to figure out + * whether this is an unlink or an rmdir. * * Returns: errno */ @@ -325,7 +366,9 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) { struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct buffer_head *bh; struct gfs2_holder ghs[3]; struct gfs2_rgrpd *rgd; struct gfs2_holder ri_gh; @@ -351,9 +394,15 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) goto out_child; error = -ENOENT; - if (ip->i_inode.i_nlink == 0) + if (inode->i_nlink == 0) goto out_rgrp; + if (S_ISDIR(inode->i_mode)) { + error = -ENOTEMPTY; + if (ip->i_entries > 2 || inode->i_nlink > 2) + goto out_rgrp; + } + error = gfs2_glock_nq(ghs + 2); /* rgrp */ if (error) goto out_rgrp; @@ -362,15 +411,16 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) if (error) goto out_gunlock; - error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); + error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0); if (error) goto out_gunlock; - error = gfs2_dir_del(dip, &dentry->d_name); - if (error) - goto out_end_trans; + error = gfs2_meta_inode_buffer(ip, &bh); + if (error) + goto out_end_trans; - error = gfs2_change_nlink(ip, -1); + error = gfs2_unlink_inode(dip, dentry, bh); + brelse(bh); out_end_trans: gfs2_trans_end(sdp); @@ -521,138 +571,6 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) return 0; } -/** - * gfs2_rmdiri - Remove a directory - * @dip: The parent directory of the directory to be removed - * @name: The name of the directory to be removed - * @ip: The GFS2 inode of the directory to be removed - * - * Assumes Glocks on dip and ip are held - * - * Returns: errno - */ - -static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip) -{ - int error; - - if (ip->i_entries != 2) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - return -EIO; - } - - error = gfs2_dir_del(dip, name); - if (error) - return error; - - error = gfs2_change_nlink(dip, -1); - if (error) - return error; - - error = gfs2_dir_del(ip, &gfs2_qdot); - if (error) - return error; - - error = gfs2_dir_del(ip, &gfs2_qdotdot); - if (error) - return error; - - /* It looks odd, but it really should be done twice */ - error = gfs2_change_nlink(ip, -1); - if (error) - return error; - - error = gfs2_change_nlink(ip, -1); - if (error) - return error; - - return error; -} - -/** - * gfs2_rmdir - Remove a directory - * @dir: The parent directory of the directory to be removed - * @dentry: The dentry of the directory to remove - * - * Remove a directory. Call gfs2_rmdiri() - * - * Returns: errno - */ - -static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) -{ - struct gfs2_inode *dip = GFS2_I(dir); - struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); - struct gfs2_holder ghs[3]; - struct gfs2_rgrpd *rgd; - struct gfs2_holder ri_gh; - int error; - - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - return error; - gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); - - rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); - gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); - - error = gfs2_glock_nq(ghs); /* parent */ - if (error) - goto out_parent; - - error = gfs2_glock_nq(ghs + 1); /* child */ - if (error) - goto out_child; - - error = -ENOENT; - if (ip->i_inode.i_nlink == 0) - goto out_rgrp; - - error = gfs2_glock_nq(ghs + 2); /* rgrp */ - if (error) - goto out_rgrp; - - error = gfs2_unlink_ok(dip, &dentry->d_name, ip); - if (error) - goto out_gunlock; - - if (ip->i_entries < 2) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - error = -EIO; - goto out_gunlock; - } - if (ip->i_entries > 2) { - error = -ENOTEMPTY; - goto out_gunlock; - } - - error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0); - if (error) - goto out_gunlock; - - error = gfs2_rmdiri(dip, &dentry->d_name, ip); - - gfs2_trans_end(sdp); - -out_gunlock: - gfs2_glock_dq(ghs + 2); -out_rgrp: - gfs2_holder_uninit(ghs + 2); - gfs2_glock_dq(ghs + 1); -out_child: - gfs2_holder_uninit(ghs + 1); - gfs2_glock_dq(ghs); -out_parent: - gfs2_holder_uninit(ghs); - gfs2_glock_dq_uninit(&ri_gh); - return error; -} - /** * gfs2_mknod - Make a special file * @dir: The directory in which the special file will reside @@ -930,23 +848,16 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, /* Remove the target file, if it exists */ if (nip) { - if (S_ISDIR(nip->i_inode.i_mode)) - error = gfs2_rmdiri(ndip, &ndentry->d_name, nip); - else { - error = gfs2_dir_del(ndip, &ndentry->d_name); - if (error) - goto out_end_trans; - error = gfs2_change_nlink(nip, -1); - } + struct buffer_head *bh; + error = gfs2_meta_inode_buffer(nip, &bh); if (error) goto out_end_trans; + error = gfs2_unlink_inode(ndip, ndentry, bh); + brelse(bh); } if (dir_rename) { error = gfs2_change_nlink(ndip, +1); - if (error) - goto out_end_trans; - error = gfs2_change_nlink(odip, -1); if (error) goto out_end_trans; @@ -964,7 +875,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, brelse(dibh); } - error = gfs2_dir_del(odip, &odentry->d_name); + error = gfs2_dir_del(odip, odentry); if (error) goto out_end_trans; @@ -1347,7 +1258,7 @@ const struct inode_operations gfs2_dir_iops = { .unlink = gfs2_unlink, .symlink = gfs2_symlink, .mkdir = gfs2_mkdir, - .rmdir = gfs2_rmdir, + .rmdir = gfs2_unlink, .mknod = gfs2_mknod, .rename = gfs2_rename, .permission = gfs2_permission, From 3d6ecb7d16fd4248fce58387a982a0756ad3fcc2 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 9 May 2011 13:30:08 +0100 Subject: [PATCH 22/32] GFS2: When adding a new dir entry, inc link count if it is a subdir This adds an increment of the link count when we add a new directory entry, if that entry is itself a directory. This means that we no longer need separate code to perform this operation. Now that both adding and removing directory entries automatically update the parent directory's link count if required, that makes the code shorter and simpler than before. Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 6 ++++-- fs/gfs2/dir.h | 2 +- fs/gfs2/inode.c | 48 +-------------------------------------------- fs/gfs2/inode.h | 1 - fs/gfs2/ops_inode.c | 11 ++--------- 5 files changed, 8 insertions(+), 60 deletions(-) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 410265151ad1..091ee4779538 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1597,7 +1597,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) */ int gfs2_dir_add(struct inode *inode, const struct qstr *name, - const struct gfs2_inode *nip, unsigned type) + const struct gfs2_inode *nip) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *bh; @@ -1613,7 +1613,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, return PTR_ERR(dent); dent = gfs2_init_dirent(inode, dent, name, bh); gfs2_inum_out(nip, dent); - dent->de_type = cpu_to_be16(type); + dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); if (ip->i_diskflags & GFS2_DIF_EXHASH) { leaf = (struct gfs2_leaf *)bh->b_data; be16_add_cpu(&leaf->lf_entries, 1); @@ -1625,6 +1625,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_entries++; ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; + if (S_ISDIR(nip->i_inode.i_mode)) + inc_nlink(&ip->i_inode); gfs2_dinode_out(ip, bh->b_data); brelse(bh); error = 0; diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 66831f1d23ea..e686af11becd 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -22,7 +22,7 @@ extern struct inode *gfs2_dir_search(struct inode *dir, extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, const struct gfs2_inode *ip); extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, - const struct gfs2_inode *ip, unsigned int type); + const struct gfs2_inode *ip); extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, filldir_t filldir); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 5a02606b68c0..a8c14c9985e2 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -362,52 +362,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) return error; } -/** - * gfs2_change_nlink - Change nlink count on inode - * @ip: The GFS2 inode - * @diff: The change in the nlink count required - * - * Returns: errno - */ -int gfs2_change_nlink(struct gfs2_inode *ip, int diff) -{ - struct buffer_head *dibh; - u32 nlink; - int error; - - BUG_ON(diff != 1 && diff != -1); - nlink = ip->i_inode.i_nlink + diff; - - /* If we are reducing the nlink count, but the new value ends up being - bigger than the old one, we must have underflowed. */ - if (diff < 0 && nlink > ip->i_inode.i_nlink) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - return -EIO; - } - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - if (diff > 0) - inc_nlink(&ip->i_inode); - else - drop_nlink(&ip->i_inode); - - ip->i_inode.i_ctime = CURRENT_TIME; - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); - mark_inode_dirty(&ip->i_inode); - - if (ip->i_inode.i_nlink == 0) - gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ - - return error; -} - struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) { struct qstr qstr; @@ -723,7 +677,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, goto fail_quota_locks; } - error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); + error = gfs2_dir_add(&dip->i_inode, name, ip); if (error) goto fail_end_trans; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 8d1344a4e673..f9b8289deec5 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -106,7 +106,6 @@ extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonbl extern int gfs2_inode_refresh(struct gfs2_inode *ip); -extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff); extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root); extern struct inode *gfs2_createi(struct gfs2_holder *ghs, diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 765da06c8f55..6b8c2bdb5a0c 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -235,7 +235,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) goto out_end_trans; - error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode)); + error = gfs2_dir_add(dir, &dentry->d_name, ip); if (error) goto out_brelse; @@ -554,9 +554,6 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) brelse(dibh); } - error = gfs2_change_nlink(dip, +1); - gfs2_assert_withdraw(sdp, !error); /* dip already pinned */ - gfs2_trans_end(sdp); if (dip->i_alloc->al_rgd) gfs2_inplace_release(dip); @@ -857,10 +854,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, } if (dir_rename) { - error = gfs2_change_nlink(ndip, +1); - if (error) - goto out_end_trans; - error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); if (error) goto out_end_trans; @@ -879,7 +872,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_end_trans; - error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode)); + error = gfs2_dir_add(ndir, &ndentry->d_name, ip); if (error) goto out_end_trans; From 94fb763b1a76a2000ad21f3119b05c90040acaf0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 9 May 2011 13:36:10 +0100 Subject: [PATCH 23/32] GFS2: Remove gfs2_dinode_print() function This function was intended for debugging purposes, but it is not very useful. If we want to know what is on disk then all we need is a block number and gfs2_edit can give us much better information about what is there. Otherwise, if we are interested in what is stored in the in-core inode, it doesn't help us out there either. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 24 +----------------------- fs/gfs2/inode.h | 1 - fs/gfs2/ops_inode.c | 3 +-- fs/gfs2/super.c | 3 +-- 4 files changed, 3 insertions(+), 28 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a8c14c9985e2..7c2121fe10df 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -329,8 +329,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) return 0; corrupt: - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); + gfs2_consist_inode(ip); return -EIO; } @@ -901,24 +900,3 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); } - -void gfs2_dinode_print(const struct gfs2_inode *ip) -{ - printk(KERN_INFO " no_formal_ino = %llu\n", - (unsigned long long)ip->i_no_formal_ino); - printk(KERN_INFO " no_addr = %llu\n", - (unsigned long long)ip->i_no_addr); - printk(KERN_INFO " i_size = %llu\n", - (unsigned long long)i_size_read(&ip->i_inode)); - printk(KERN_INFO " blocks = %llu\n", - (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); - printk(KERN_INFO " i_goal = %llu\n", - (unsigned long long)ip->i_goal); - printk(KERN_INFO " i_diskflags = 0x%.8X\n", ip->i_diskflags); - printk(KERN_INFO " i_height = %u\n", ip->i_height); - printk(KERN_INFO " i_depth = %u\n", ip->i_depth); - printk(KERN_INFO " i_entries = %u\n", ip->i_entries); - printk(KERN_INFO " i_eattr = %llu\n", - (unsigned long long)ip->i_eattr); -} - diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index f9b8289deec5..7ed60aa1b61f 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -115,7 +115,6 @@ extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); -extern void gfs2_dinode_print(const struct gfs2_inode *ip); extern const struct inode_operations gfs2_file_iops; extern const struct inode_operations gfs2_dir_iops; diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 6b8c2bdb5a0c..2607c2c6de2b 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -754,8 +754,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (S_ISDIR(nip->i_inode.i_mode)) { if (nip->i_entries < 2) { - if (gfs2_consist_inode(nip)) - gfs2_dinode_print(nip); + gfs2_consist_inode(nip); error = -EIO; goto out_gunlock; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 58fe3a4ac829..3061ac64f81e 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1341,8 +1341,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) int error; if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); + gfs2_consist_inode(ip); return -EIO; } From d4b2cf1b0566eebfe39a6d70e9e4b5fa01ddaace Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 9 May 2011 13:49:59 +0100 Subject: [PATCH 24/32] GFS2: Move gfs2_refresh_inode() and friends into glops.c Eventually there will only be a single caller of this code, so lets move it where it can be made static at some future date. Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/inode.c | 117 ------------------------------------------------ 2 files changed, 113 insertions(+), 117 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 7c1b08f63ddb..8ef70f464731 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -251,6 +251,119 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl) return 1; } +/** + * gfs2_set_nlink - Set the inode's link count based on on-disk info + * @inode: The inode in question + * @nlink: The link count + * + * If the link count has hit zero, it must never be raised, whatever the + * on-disk inode might say. When new struct inodes are created the link + * count is set to 1, so that we can safely use this test even when reading + * in on disk information for the first time. + */ + +static void gfs2_set_nlink(struct inode *inode, u32 nlink) +{ + /* + * We will need to review setting the nlink count here in the + * light of the forthcoming ro bind mount work. This is a reminder + * to do that. + */ + if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) { + if (nlink == 0) + clear_nlink(inode); + else + inode->i_nlink = nlink; + } +} + +static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) +{ + const struct gfs2_dinode *str = buf; + struct timespec atime; + u16 height, depth; + + if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) + goto corrupt; + ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); + ip->i_inode.i_mode = be32_to_cpu(str->di_mode); + ip->i_inode.i_rdev = 0; + switch (ip->i_inode.i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), + be32_to_cpu(str->di_minor)); + break; + }; + + ip->i_inode.i_uid = be32_to_cpu(str->di_uid); + ip->i_inode.i_gid = be32_to_cpu(str->di_gid); + gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); + i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); + gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); + atime.tv_sec = be64_to_cpu(str->di_atime); + atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); + if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) + ip->i_inode.i_atime = atime; + ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); + ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); + ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); + ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); + + ip->i_goal = be64_to_cpu(str->di_goal_meta); + ip->i_generation = be64_to_cpu(str->di_generation); + + ip->i_diskflags = be32_to_cpu(str->di_flags); + gfs2_set_inode_flags(&ip->i_inode); + height = be16_to_cpu(str->di_height); + if (unlikely(height > GFS2_MAX_META_HEIGHT)) + goto corrupt; + ip->i_height = (u8)height; + + depth = be16_to_cpu(str->di_depth); + if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) + goto corrupt; + ip->i_depth = (u8)depth; + ip->i_entries = be32_to_cpu(str->di_entries); + + ip->i_eattr = be64_to_cpu(str->di_eattr); + if (S_ISREG(ip->i_inode.i_mode)) + gfs2_set_aops(&ip->i_inode); + + return 0; +corrupt: + gfs2_consist_inode(ip); + return -EIO; +} + +/** + * gfs2_inode_refresh - Refresh the incore copy of the dinode + * @ip: The GFS2 inode + * + * Returns: errno + */ + +int gfs2_inode_refresh(struct gfs2_inode *ip) +{ + struct buffer_head *dibh; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) { + brelse(dibh); + return -EIO; + } + + error = gfs2_dinode_in(ip, dibh->b_data); + brelse(dibh); + clear_bit(GIF_INVALID, &ip->i_flags); + + return error; +} + /** * inode_go_lock - operation done after an inode lock is locked by a process * @gl: the glock diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 7c2121fe10df..5d48baf46457 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -35,11 +35,6 @@ #include "trans.h" #include "util.h" -struct gfs2_inum_range_host { - u64 ir_start; - u64 ir_length; -}; - struct gfs2_skip_data { u64 no_addr; int skipped; @@ -248,118 +243,6 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, goto fail; } -/** - * gfs2_set_nlink - Set the inode's link count based on on-disk info - * @inode: The inode in question - * @nlink: The link count - * - * If the link count has hit zero, it must never be raised, whatever the - * on-disk inode might say. When new struct inodes are created the link - * count is set to 1, so that we can safely use this test even when reading - * in on disk information for the first time. - */ - -static void gfs2_set_nlink(struct inode *inode, u32 nlink) -{ - /* - * We will need to review setting the nlink count here in the - * light of the forthcoming ro bind mount work. This is a reminder - * to do that. - */ - if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) { - if (nlink == 0) - clear_nlink(inode); - else - inode->i_nlink = nlink; - } -} - -static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) -{ - const struct gfs2_dinode *str = buf; - struct timespec atime; - u16 height, depth; - - if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) - goto corrupt; - ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino); - ip->i_inode.i_mode = be32_to_cpu(str->di_mode); - ip->i_inode.i_rdev = 0; - switch (ip->i_inode.i_mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), - be32_to_cpu(str->di_minor)); - break; - }; - - ip->i_inode.i_uid = be32_to_cpu(str->di_uid); - ip->i_inode.i_gid = be32_to_cpu(str->di_gid); - gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); - i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); - gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); - atime.tv_sec = be64_to_cpu(str->di_atime); - atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); - if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) - ip->i_inode.i_atime = atime; - ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); - ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); - ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); - ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec); - - ip->i_goal = be64_to_cpu(str->di_goal_meta); - ip->i_generation = be64_to_cpu(str->di_generation); - - ip->i_diskflags = be32_to_cpu(str->di_flags); - gfs2_set_inode_flags(&ip->i_inode); - height = be16_to_cpu(str->di_height); - if (unlikely(height > GFS2_MAX_META_HEIGHT)) - goto corrupt; - ip->i_height = (u8)height; - - depth = be16_to_cpu(str->di_depth); - if (unlikely(depth > GFS2_DIR_MAX_DEPTH)) - goto corrupt; - ip->i_depth = (u8)depth; - ip->i_entries = be32_to_cpu(str->di_entries); - - ip->i_eattr = be64_to_cpu(str->di_eattr); - if (S_ISREG(ip->i_inode.i_mode)) - gfs2_set_aops(&ip->i_inode); - - return 0; -corrupt: - gfs2_consist_inode(ip); - return -EIO; -} - -/** - * gfs2_inode_refresh - Refresh the incore copy of the dinode - * @ip: The GFS2 inode - * - * Returns: errno - */ - -int gfs2_inode_refresh(struct gfs2_inode *ip) -{ - struct buffer_head *dibh; - int error; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) { - brelse(dibh); - return -EIO; - } - - error = gfs2_dinode_in(ip, dibh->b_data); - brelse(dibh); - clear_bit(GIF_INVALID, &ip->i_flags); - - return error; -} struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) { From 194c011fc4650d0dd1eecbc35bc26045108aca51 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 9 May 2011 14:06:38 +0100 Subject: [PATCH 25/32] GFS2: Move most of the remaining inode.c into ops_inode.c This is in preparation to remove inode.c and rename ops_inode.c to inode.c. Also most of the functions which were left in inode.c relate to the creation and lookup of inodes. I'm intending to work on consolidating some of that code, and its easier when its all in one place. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 711 -------------------------------------------- fs/gfs2/ops_inode.c | 711 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 711 insertions(+), 711 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 5d48baf46457..b95ee5dc46ca 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -35,717 +35,6 @@ #include "trans.h" #include "util.h" -struct gfs2_skip_data { - u64 no_addr; - int skipped; - int non_block; -}; - -static int iget_test(struct inode *inode, void *opaque) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_skip_data *data = opaque; - - if (ip->i_no_addr == data->no_addr) { - if (data->non_block && - inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { - data->skipped = 1; - return 0; - } - return 1; - } - return 0; -} - -static int iget_set(struct inode *inode, void *opaque) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_skip_data *data = opaque; - - if (data->skipped) - return -ENOENT; - inode->i_ino = (unsigned long)(data->no_addr); - ip->i_no_addr = data->no_addr; - return 0; -} - -struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block) -{ - unsigned long hash = (unsigned long)no_addr; - struct gfs2_skip_data data; - - data.no_addr = no_addr; - data.skipped = 0; - data.non_block = non_block; - return ilookup5(sb, hash, iget_test, &data); -} - -static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr, - int non_block) -{ - struct gfs2_skip_data data; - unsigned long hash = (unsigned long)no_addr; - - data.no_addr = no_addr; - data.skipped = 0; - data.non_block = non_block; - return iget5_locked(sb, hash, iget_test, iget_set, &data); -} - -/** - * gfs2_set_iop - Sets inode operations - * @inode: The inode with correct i_mode filled in - * - * GFS2 lookup code fills in vfs inode contents based on info obtained - * from directory entry inside gfs2_inode_lookup(). - */ - -static void gfs2_set_iop(struct inode *inode) -{ - struct gfs2_sbd *sdp = GFS2_SB(inode); - umode_t mode = inode->i_mode; - - if (S_ISREG(mode)) { - inode->i_op = &gfs2_file_iops; - if (gfs2_localflocks(sdp)) - inode->i_fop = &gfs2_file_fops_nolock; - else - inode->i_fop = &gfs2_file_fops; - } else if (S_ISDIR(mode)) { - inode->i_op = &gfs2_dir_iops; - if (gfs2_localflocks(sdp)) - inode->i_fop = &gfs2_dir_fops_nolock; - else - inode->i_fop = &gfs2_dir_fops; - } else if (S_ISLNK(mode)) { - inode->i_op = &gfs2_symlink_iops; - } else { - inode->i_op = &gfs2_file_iops; - init_special_inode(inode, inode->i_mode, inode->i_rdev); - } -} - -/** - * gfs2_inode_lookup - Lookup an inode - * @sb: The super block - * @no_addr: The inode number - * @type: The type of the inode - * non_block: Can we block on inodes that are being freed? - * - * Returns: A VFS inode, or an error - */ - -struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, - u64 no_addr, u64 no_formal_ino, int non_block) -{ - struct inode *inode; - struct gfs2_inode *ip; - struct gfs2_glock *io_gl = NULL; - int error; - - inode = gfs2_iget(sb, no_addr, non_block); - ip = GFS2_I(inode); - - if (!inode) - return ERR_PTR(-ENOBUFS); - - if (inode->i_state & I_NEW) { - struct gfs2_sbd *sdp = GFS2_SB(inode); - ip->i_no_formal_ino = no_formal_ino; - - error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); - if (unlikely(error)) - goto fail; - ip->i_gl->gl_object = ip; - - error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); - if (unlikely(error)) - goto fail_put; - - set_bit(GIF_INVALID, &ip->i_flags); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); - if (unlikely(error)) - goto fail_iopen; - - ip->i_iopen_gh.gh_gl->gl_object = ip; - gfs2_glock_put(io_gl); - io_gl = NULL; - - if (type == DT_UNKNOWN) { - /* Inode glock must be locked already */ - error = gfs2_inode_refresh(GFS2_I(inode)); - if (error) - goto fail_refresh; - } else { - inode->i_mode = DT2IF(type); - } - - gfs2_set_iop(inode); - unlock_new_inode(inode); - } - - return inode; - -fail_refresh: - ip->i_iopen_gh.gh_gl->gl_object = NULL; - gfs2_glock_dq_uninit(&ip->i_iopen_gh); -fail_iopen: - if (io_gl) - gfs2_glock_put(io_gl); -fail_put: - ip->i_gl->gl_object = NULL; - gfs2_glock_put(ip->i_gl); -fail: - iget_failed(inode); - return ERR_PTR(error); -} - -struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, - u64 *no_formal_ino, unsigned int blktype) -{ - struct super_block *sb = sdp->sd_vfs; - struct gfs2_holder i_gh; - struct inode *inode = NULL; - int error; - - /* Must not read in block until block type is verified */ - error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, - LM_ST_EXCLUSIVE, GL_SKIP, &i_gh); - if (error) - return ERR_PTR(error); - - error = gfs2_check_blk_type(sdp, no_addr, blktype); - if (error) - goto fail; - - inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1); - if (IS_ERR(inode)) - goto fail; - - /* Two extra checks for NFS only */ - if (no_formal_ino) { - error = -ESTALE; - if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino) - goto fail_iput; - - error = -EIO; - if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) - goto fail_iput; - - error = 0; - } - -fail: - gfs2_glock_dq_uninit(&i_gh); - return error ? ERR_PTR(error) : inode; -fail_iput: - iput(inode); - goto fail; -} - - -struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) -{ - struct qstr qstr; - struct inode *inode; - gfs2_str2qstr(&qstr, name); - inode = gfs2_lookupi(dip, &qstr, 1); - /* gfs2_lookupi has inconsistent callers: vfs - * related routines expect NULL for no entry found, - * gfs2_lookup_simple callers expect ENOENT - * and do not check for NULL. - */ - if (inode == NULL) - return ERR_PTR(-ENOENT); - else - return inode; -} - - -/** - * gfs2_lookupi - Look up a filename in a directory and return its inode - * @d_gh: An initialized holder for the directory glock - * @name: The name of the inode to look for - * @is_root: If 1, ignore the caller's permissions - * @i_gh: An uninitialized holder for the new inode glock - * - * This can be called via the VFS filldir function when NFS is doing - * a readdirplus and the inode which its intending to stat isn't - * already in cache. In this case we must not take the directory glock - * again, since the readdir call will have already taken that lock. - * - * Returns: errno - */ - -struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, - int is_root) -{ - struct super_block *sb = dir->i_sb; - struct gfs2_inode *dip = GFS2_I(dir); - struct gfs2_holder d_gh; - int error = 0; - struct inode *inode = NULL; - int unlock = 0; - - if (!name->len || name->len > GFS2_FNAMESIZE) - return ERR_PTR(-ENAMETOOLONG); - - if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) || - (name->len == 2 && memcmp(name->name, "..", 2) == 0 && - dir == sb->s_root->d_inode)) { - igrab(dir); - return dir; - } - - if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) { - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); - if (error) - return ERR_PTR(error); - unlock = 1; - } - - if (!is_root) { - error = gfs2_permission(dir, MAY_EXEC, 0); - if (error) - goto out; - } - - inode = gfs2_dir_search(dir, name); - if (IS_ERR(inode)) - error = PTR_ERR(inode); -out: - if (unlock) - gfs2_glock_dq_uninit(&d_gh); - if (error == -ENOENT) - return NULL; - return inode ? inode : ERR_PTR(error); -} - -/** - * create_ok - OK to create a new on-disk inode here? - * @dip: Directory in which dinode is to be created - * @name: Name of new dinode - * @mode: - * - * Returns: errno - */ - -static int create_ok(struct gfs2_inode *dip, const struct qstr *name, - unsigned int mode) -{ - int error; - - error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); - if (error) - return error; - - /* Don't create entries in an unlinked directory */ - if (!dip->i_inode.i_nlink) - return -ENOENT; - - error = gfs2_dir_check(&dip->i_inode, name, NULL); - switch (error) { - case -ENOENT: - error = 0; - break; - case 0: - return -EEXIST; - default: - return error; - } - - if (dip->i_entries == (u32)-1) - return -EFBIG; - if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) - return -EMLINK; - - return 0; -} - -static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, - unsigned int *uid, unsigned int *gid) -{ - if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && - (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { - if (S_ISDIR(*mode)) - *mode |= S_ISUID; - else if (dip->i_inode.i_uid != current_fsuid()) - *mode &= ~07111; - *uid = dip->i_inode.i_uid; - } else - *uid = current_fsuid(); - - if (dip->i_inode.i_mode & S_ISGID) { - if (S_ISDIR(*mode)) - *mode |= S_ISGID; - *gid = dip->i_inode.i_gid; - } else - *gid = current_fsgid(); -} - -static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) -{ - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - int error; - - if (gfs2_alloc_get(dip) == NULL) - return -ENOMEM; - - dip->i_alloc->al_requested = RES_DINODE; - error = gfs2_inplace_reserve(dip); - if (error) - goto out; - - error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0); - if (error) - goto out_ipreserv; - - error = gfs2_alloc_di(dip, no_addr, generation); - - gfs2_trans_end(sdp); - -out_ipreserv: - gfs2_inplace_release(dip); -out: - gfs2_alloc_put(dip); - return error; -} - -/** - * init_dinode - Fill in a new dinode structure - * @dip: the directory this inode is being created in - * @gl: The glock covering the new inode - * @inum: the inode number - * @mode: the file permissions - * @uid: - * @gid: - * - */ - -static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, - const struct gfs2_inum_host *inum, unsigned int mode, - unsigned int uid, unsigned int gid, - const u64 *generation, dev_t dev, struct buffer_head **bhp) -{ - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_dinode *di; - struct buffer_head *dibh; - struct timespec tv = CURRENT_TIME; - - dibh = gfs2_meta_new(gl, inum->no_addr); - gfs2_trans_add_bh(gl, dibh, 1); - gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); - gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); - di = (struct gfs2_dinode *)dibh->b_data; - - di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); - di->di_num.no_addr = cpu_to_be64(inum->no_addr); - di->di_mode = cpu_to_be32(mode); - di->di_uid = cpu_to_be32(uid); - di->di_gid = cpu_to_be32(gid); - di->di_nlink = 0; - di->di_size = 0; - di->di_blocks = cpu_to_be64(1); - di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); - di->di_major = cpu_to_be32(MAJOR(dev)); - di->di_minor = cpu_to_be32(MINOR(dev)); - di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); - di->di_generation = cpu_to_be64(*generation); - di->di_flags = 0; - - if (S_ISREG(mode)) { - if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || - gfs2_tune_get(sdp, gt_new_files_jdata)) - di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); - } else if (S_ISDIR(mode)) { - di->di_flags |= cpu_to_be32(dip->i_diskflags & - GFS2_DIF_INHERIT_JDATA); - } - - di->__pad1 = 0; - di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); - di->di_height = 0; - di->__pad2 = 0; - di->__pad3 = 0; - di->di_depth = 0; - di->di_entries = 0; - memset(&di->__pad4, 0, sizeof(di->__pad4)); - di->di_eattr = 0; - di->di_atime_nsec = cpu_to_be32(tv.tv_nsec); - di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); - di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); - memset(&di->di_reserved, 0, sizeof(di->di_reserved)); - - set_buffer_uptodate(dibh); - - *bhp = dibh; -} - -static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, - unsigned int mode, const struct gfs2_inum_host *inum, - const u64 *generation, dev_t dev, struct buffer_head **bhp) -{ - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - unsigned int uid, gid; - int error; - - munge_mode_uid_gid(dip, &mode, &uid, &gid); - if (!gfs2_alloc_get(dip)) - return -ENOMEM; - - error = gfs2_quota_lock(dip, uid, gid); - if (error) - goto out; - - error = gfs2_quota_check(dip, uid, gid); - if (error) - goto out_quota; - - error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0); - if (error) - goto out_quota; - - init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); - gfs2_quota_change(dip, +1, uid, gid); - gfs2_trans_end(sdp); - -out_quota: - gfs2_quota_unlock(dip); -out: - gfs2_alloc_put(dip); - return error; -} - -static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip) -{ - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_alloc *al; - int alloc_required; - struct buffer_head *dibh; - int error; - - al = gfs2_alloc_get(dip); - if (!al) - return -ENOMEM; - - error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); - if (error) - goto fail; - - error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); - if (alloc_required < 0) - goto fail_quota_locks; - if (alloc_required) { - error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); - if (error) - goto fail_quota_locks; - - al->al_requested = sdp->sd_max_dirres; - - error = gfs2_inplace_reserve(dip); - if (error) - goto fail_quota_locks; - - error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - al->al_rgd->rd_length + - 2 * RES_DINODE + - RES_STATFS + RES_QUOTA, 0); - if (error) - goto fail_ipreserv; - } else { - error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0); - if (error) - goto fail_quota_locks; - } - - error = gfs2_dir_add(&dip->i_inode, name, ip); - if (error) - goto fail_end_trans; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto fail_end_trans; - ip->i_inode.i_nlink = 1; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); - return 0; - -fail_end_trans: - gfs2_trans_end(sdp); - -fail_ipreserv: - if (dip->i_alloc->al_rgd) - gfs2_inplace_release(dip); - -fail_quota_locks: - gfs2_quota_unlock(dip); - -fail: - gfs2_alloc_put(dip); - return error; -} - -static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, - const struct qstr *qstr) -{ - int err; - size_t len; - void *value; - char *name; - - err = security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr, - &name, &value, &len); - - if (err) { - if (err == -EOPNOTSUPP) - return 0; - return err; - } - - err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0, - GFS2_EATYPE_SECURITY); - kfree(value); - kfree(name); - - return err; -} - -/** - * gfs2_createi - Create a new inode - * @ghs: An array of two holders - * @name: The name of the new file - * @mode: the permissions on the new inode - * - * @ghs[0] is an initialized holder for the directory - * @ghs[1] is the holder for the inode lock - * - * If the return value is not NULL, the glocks on both the directory and the new - * file are held. A transaction has been started and an inplace reservation - * is held, as well. - * - * Returns: An inode - */ - -struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, - unsigned int mode, dev_t dev) -{ - struct inode *inode = NULL; - struct gfs2_inode *dip = ghs->gh_gl->gl_object; - struct inode *dir = &dip->i_inode; - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; - int error; - u64 generation; - struct buffer_head *bh = NULL; - - if (!name->len || name->len > GFS2_FNAMESIZE) - return ERR_PTR(-ENAMETOOLONG); - - gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); - error = gfs2_glock_nq(ghs); - if (error) - goto fail; - - error = create_ok(dip, name, mode); - if (error) - goto fail_gunlock; - - error = alloc_dinode(dip, &inum.no_addr, &generation); - if (error) - goto fail_gunlock; - inum.no_formal_ino = generation; - - error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, - LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); - if (error) - goto fail_gunlock; - - error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); - if (error) - goto fail_gunlock2; - - inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, - inum.no_formal_ino, 0); - if (IS_ERR(inode)) - goto fail_gunlock2; - - error = gfs2_inode_refresh(GFS2_I(inode)); - if (error) - goto fail_gunlock2; - - error = gfs2_acl_create(dip, inode); - if (error) - goto fail_gunlock2; - - error = gfs2_security_init(dip, GFS2_I(inode), name); - if (error) - goto fail_gunlock2; - - error = link_dinode(dip, name, GFS2_I(inode)); - if (error) - goto fail_gunlock2; - - if (bh) - brelse(bh); - return inode; - -fail_gunlock2: - gfs2_glock_dq_uninit(ghs + 1); - if (inode && !IS_ERR(inode)) - iput(inode); -fail_gunlock: - gfs2_glock_dq(ghs); -fail: - if (bh) - brelse(bh); - return ERR_PTR(error); -} - -static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) -{ - struct inode *inode = &ip->i_inode; - struct buffer_head *dibh; - int error; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - setattr_copy(inode, attr); - mark_inode_dirty(inode); - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); - return 0; -} - -/** - * gfs2_setattr_simple - - * @ip: - * @attr: - * - * Called with a reference on the vnode. - * - * Returns: errno - */ - -int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) -{ - int error; - - if (current->journal_info) - return __gfs2_setattr_simple(ip, attr); - - error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0); - if (error) - return error; - - error = __gfs2_setattr_simple(ip, attr); - gfs2_trans_end(GFS2_SB(&ip->i_inode)); - return error; -} void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) { diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 2607c2c6de2b..a783a7a8c0ca 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "gfs2.h" @@ -34,7 +35,675 @@ #include "trans.h" #include "util.h" #include "super.h" +#include "glops.h" +struct gfs2_skip_data { + u64 no_addr; + int skipped; + int non_block; +}; + +static int iget_test(struct inode *inode, void *opaque) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_skip_data *data = opaque; + + if (ip->i_no_addr == data->no_addr) { + if (data->non_block && + inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { + data->skipped = 1; + return 0; + } + return 1; + } + return 0; +} + +static int iget_set(struct inode *inode, void *opaque) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_skip_data *data = opaque; + + if (data->skipped) + return -ENOENT; + inode->i_ino = (unsigned long)(data->no_addr); + ip->i_no_addr = data->no_addr; + return 0; +} + +struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block) +{ + unsigned long hash = (unsigned long)no_addr; + struct gfs2_skip_data data; + + data.no_addr = no_addr; + data.skipped = 0; + data.non_block = non_block; + return ilookup5(sb, hash, iget_test, &data); +} + +static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr, + int non_block) +{ + struct gfs2_skip_data data; + unsigned long hash = (unsigned long)no_addr; + + data.no_addr = no_addr; + data.skipped = 0; + data.non_block = non_block; + return iget5_locked(sb, hash, iget_test, iget_set, &data); +} + +/** + * gfs2_set_iop - Sets inode operations + * @inode: The inode with correct i_mode filled in + * + * GFS2 lookup code fills in vfs inode contents based on info obtained + * from directory entry inside gfs2_inode_lookup(). + */ + +static void gfs2_set_iop(struct inode *inode) +{ + struct gfs2_sbd *sdp = GFS2_SB(inode); + umode_t mode = inode->i_mode; + + if (S_ISREG(mode)) { + inode->i_op = &gfs2_file_iops; + if (gfs2_localflocks(sdp)) + inode->i_fop = &gfs2_file_fops_nolock; + else + inode->i_fop = &gfs2_file_fops; + } else if (S_ISDIR(mode)) { + inode->i_op = &gfs2_dir_iops; + if (gfs2_localflocks(sdp)) + inode->i_fop = &gfs2_dir_fops_nolock; + else + inode->i_fop = &gfs2_dir_fops; + } else if (S_ISLNK(mode)) { + inode->i_op = &gfs2_symlink_iops; + } else { + inode->i_op = &gfs2_file_iops; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + } +} + +/** + * gfs2_inode_lookup - Lookup an inode + * @sb: The super block + * @no_addr: The inode number + * @type: The type of the inode + * non_block: Can we block on inodes that are being freed? + * + * Returns: A VFS inode, or an error + */ + +struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, + u64 no_addr, u64 no_formal_ino, int non_block) +{ + struct inode *inode; + struct gfs2_inode *ip; + struct gfs2_glock *io_gl = NULL; + int error; + + inode = gfs2_iget(sb, no_addr, non_block); + ip = GFS2_I(inode); + + if (!inode) + return ERR_PTR(-ENOBUFS); + + if (inode->i_state & I_NEW) { + struct gfs2_sbd *sdp = GFS2_SB(inode); + ip->i_no_formal_ino = no_formal_ino; + + error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); + if (unlikely(error)) + goto fail; + ip->i_gl->gl_object = ip; + + error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl); + if (unlikely(error)) + goto fail_put; + + set_bit(GIF_INVALID, &ip->i_flags); + error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); + if (unlikely(error)) + goto fail_iopen; + + ip->i_iopen_gh.gh_gl->gl_object = ip; + gfs2_glock_put(io_gl); + io_gl = NULL; + + if (type == DT_UNKNOWN) { + /* Inode glock must be locked already */ + error = gfs2_inode_refresh(GFS2_I(inode)); + if (error) + goto fail_refresh; + } else { + inode->i_mode = DT2IF(type); + } + + gfs2_set_iop(inode); + unlock_new_inode(inode); + } + + return inode; + +fail_refresh: + ip->i_iopen_gh.gh_gl->gl_object = NULL; + gfs2_glock_dq_uninit(&ip->i_iopen_gh); +fail_iopen: + if (io_gl) + gfs2_glock_put(io_gl); +fail_put: + ip->i_gl->gl_object = NULL; + gfs2_glock_put(ip->i_gl); +fail: + iget_failed(inode); + return ERR_PTR(error); +} + +struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, + u64 *no_formal_ino, unsigned int blktype) +{ + struct super_block *sb = sdp->sd_vfs; + struct gfs2_holder i_gh; + struct inode *inode = NULL; + int error; + + /* Must not read in block until block type is verified */ + error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, + LM_ST_EXCLUSIVE, GL_SKIP, &i_gh); + if (error) + return ERR_PTR(error); + + error = gfs2_check_blk_type(sdp, no_addr, blktype); + if (error) + goto fail; + + inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1); + if (IS_ERR(inode)) + goto fail; + + /* Two extra checks for NFS only */ + if (no_formal_ino) { + error = -ESTALE; + if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino) + goto fail_iput; + + error = -EIO; + if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) + goto fail_iput; + + error = 0; + } + +fail: + gfs2_glock_dq_uninit(&i_gh); + return error ? ERR_PTR(error) : inode; +fail_iput: + iput(inode); + goto fail; +} + + +struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) +{ + struct qstr qstr; + struct inode *inode; + gfs2_str2qstr(&qstr, name); + inode = gfs2_lookupi(dip, &qstr, 1); + /* gfs2_lookupi has inconsistent callers: vfs + * related routines expect NULL for no entry found, + * gfs2_lookup_simple callers expect ENOENT + * and do not check for NULL. + */ + if (inode == NULL) + return ERR_PTR(-ENOENT); + else + return inode; +} + + +/** + * gfs2_lookupi - Look up a filename in a directory and return its inode + * @d_gh: An initialized holder for the directory glock + * @name: The name of the inode to look for + * @is_root: If 1, ignore the caller's permissions + * @i_gh: An uninitialized holder for the new inode glock + * + * This can be called via the VFS filldir function when NFS is doing + * a readdirplus and the inode which its intending to stat isn't + * already in cache. In this case we must not take the directory glock + * again, since the readdir call will have already taken that lock. + * + * Returns: errno + */ + +struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, + int is_root) +{ + struct super_block *sb = dir->i_sb; + struct gfs2_inode *dip = GFS2_I(dir); + struct gfs2_holder d_gh; + int error = 0; + struct inode *inode = NULL; + int unlock = 0; + + if (!name->len || name->len > GFS2_FNAMESIZE) + return ERR_PTR(-ENAMETOOLONG); + + if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) || + (name->len == 2 && memcmp(name->name, "..", 2) == 0 && + dir == sb->s_root->d_inode)) { + igrab(dir); + return dir; + } + + if (gfs2_glock_is_locked_by_me(dip->i_gl) == NULL) { + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) + return ERR_PTR(error); + unlock = 1; + } + + if (!is_root) { + error = gfs2_permission(dir, MAY_EXEC, 0); + if (error) + goto out; + } + + inode = gfs2_dir_search(dir, name); + if (IS_ERR(inode)) + error = PTR_ERR(inode); +out: + if (unlock) + gfs2_glock_dq_uninit(&d_gh); + if (error == -ENOENT) + return NULL; + return inode ? inode : ERR_PTR(error); +} + +/** + * create_ok - OK to create a new on-disk inode here? + * @dip: Directory in which dinode is to be created + * @name: Name of new dinode + * @mode: + * + * Returns: errno + */ + +static int create_ok(struct gfs2_inode *dip, const struct qstr *name, + unsigned int mode) +{ + int error; + + error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); + if (error) + return error; + + /* Don't create entries in an unlinked directory */ + if (!dip->i_inode.i_nlink) + return -ENOENT; + + error = gfs2_dir_check(&dip->i_inode, name, NULL); + switch (error) { + case -ENOENT: + error = 0; + break; + case 0: + return -EEXIST; + default: + return error; + } + + if (dip->i_entries == (u32)-1) + return -EFBIG; + if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) + return -EMLINK; + + return 0; +} + +static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, + unsigned int *uid, unsigned int *gid) +{ + if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && + (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { + if (S_ISDIR(*mode)) + *mode |= S_ISUID; + else if (dip->i_inode.i_uid != current_fsuid()) + *mode &= ~07111; + *uid = dip->i_inode.i_uid; + } else + *uid = current_fsuid(); + + if (dip->i_inode.i_mode & S_ISGID) { + if (S_ISDIR(*mode)) + *mode |= S_ISGID; + *gid = dip->i_inode.i_gid; + } else + *gid = current_fsgid(); +} + +static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) +{ + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + int error; + + if (gfs2_alloc_get(dip) == NULL) + return -ENOMEM; + + dip->i_alloc->al_requested = RES_DINODE; + error = gfs2_inplace_reserve(dip); + if (error) + goto out; + + error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0); + if (error) + goto out_ipreserv; + + error = gfs2_alloc_di(dip, no_addr, generation); + + gfs2_trans_end(sdp); + +out_ipreserv: + gfs2_inplace_release(dip); +out: + gfs2_alloc_put(dip); + return error; +} + +/** + * init_dinode - Fill in a new dinode structure + * @dip: the directory this inode is being created in + * @gl: The glock covering the new inode + * @inum: the inode number + * @mode: the file permissions + * @uid: + * @gid: + * + */ + +static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, + const struct gfs2_inum_host *inum, unsigned int mode, + unsigned int uid, unsigned int gid, + const u64 *generation, dev_t dev, struct buffer_head **bhp) +{ + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + struct gfs2_dinode *di; + struct buffer_head *dibh; + struct timespec tv = CURRENT_TIME; + + dibh = gfs2_meta_new(gl, inum->no_addr); + gfs2_trans_add_bh(gl, dibh, 1); + gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + di = (struct gfs2_dinode *)dibh->b_data; + + di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino); + di->di_num.no_addr = cpu_to_be64(inum->no_addr); + di->di_mode = cpu_to_be32(mode); + di->di_uid = cpu_to_be32(uid); + di->di_gid = cpu_to_be32(gid); + di->di_nlink = 0; + di->di_size = 0; + di->di_blocks = cpu_to_be64(1); + di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); + di->di_major = cpu_to_be32(MAJOR(dev)); + di->di_minor = cpu_to_be32(MINOR(dev)); + di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); + di->di_generation = cpu_to_be64(*generation); + di->di_flags = 0; + + if (S_ISREG(mode)) { + if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || + gfs2_tune_get(sdp, gt_new_files_jdata)) + di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); + } else if (S_ISDIR(mode)) { + di->di_flags |= cpu_to_be32(dip->i_diskflags & + GFS2_DIF_INHERIT_JDATA); + } + + di->__pad1 = 0; + di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); + di->di_height = 0; + di->__pad2 = 0; + di->__pad3 = 0; + di->di_depth = 0; + di->di_entries = 0; + memset(&di->__pad4, 0, sizeof(di->__pad4)); + di->di_eattr = 0; + di->di_atime_nsec = cpu_to_be32(tv.tv_nsec); + di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); + di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); + memset(&di->di_reserved, 0, sizeof(di->di_reserved)); + + set_buffer_uptodate(dibh); + + *bhp = dibh; +} + +static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, + unsigned int mode, const struct gfs2_inum_host *inum, + const u64 *generation, dev_t dev, struct buffer_head **bhp) +{ + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + unsigned int uid, gid; + int error; + + munge_mode_uid_gid(dip, &mode, &uid, &gid); + if (!gfs2_alloc_get(dip)) + return -ENOMEM; + + error = gfs2_quota_lock(dip, uid, gid); + if (error) + goto out; + + error = gfs2_quota_check(dip, uid, gid); + if (error) + goto out_quota; + + error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0); + if (error) + goto out_quota; + + init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); + gfs2_quota_change(dip, +1, uid, gid); + gfs2_trans_end(sdp); + +out_quota: + gfs2_quota_unlock(dip); +out: + gfs2_alloc_put(dip); + return error; +} + +static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, + struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + struct gfs2_alloc *al; + int alloc_required; + struct buffer_head *dibh; + int error; + + al = gfs2_alloc_get(dip); + if (!al) + return -ENOMEM; + + error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); + if (alloc_required < 0) + goto fail_quota_locks; + if (alloc_required) { + error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); + if (error) + goto fail_quota_locks; + + al->al_requested = sdp->sd_max_dirres; + + error = gfs2_inplace_reserve(dip); + if (error) + goto fail_quota_locks; + + error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + + al->al_rgd->rd_length + + 2 * RES_DINODE + + RES_STATFS + RES_QUOTA, 0); + if (error) + goto fail_ipreserv; + } else { + error = gfs2_trans_begin(sdp, RES_LEAF + 2 * RES_DINODE, 0); + if (error) + goto fail_quota_locks; + } + + error = gfs2_dir_add(&dip->i_inode, name, ip); + if (error) + goto fail_end_trans; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + ip->i_inode.i_nlink = 1; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + return 0; + +fail_end_trans: + gfs2_trans_end(sdp); + +fail_ipreserv: + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + +fail_quota_locks: + gfs2_quota_unlock(dip); + +fail: + gfs2_alloc_put(dip); + return error; +} + +static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, + const struct qstr *qstr) +{ + int err; + size_t len; + void *value; + char *name; + + err = security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr, + &name, &value, &len); + + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0, + GFS2_EATYPE_SECURITY); + kfree(value); + kfree(name); + + return err; +} + +/** + * gfs2_createi - Create a new inode + * @ghs: An array of two holders + * @name: The name of the new file + * @mode: the permissions on the new inode + * + * @ghs[0] is an initialized holder for the directory + * @ghs[1] is the holder for the inode lock + * + * If the return value is not NULL, the glocks on both the directory and the new + * file are held. A transaction has been started and an inplace reservation + * is held, as well. + * + * Returns: An inode + */ + +struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, + unsigned int mode, dev_t dev) +{ + struct inode *inode = NULL; + struct gfs2_inode *dip = ghs->gh_gl->gl_object; + struct inode *dir = &dip->i_inode; + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; + int error; + u64 generation; + struct buffer_head *bh = NULL; + + if (!name->len || name->len > GFS2_FNAMESIZE) + return ERR_PTR(-ENAMETOOLONG); + + gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); + error = gfs2_glock_nq(ghs); + if (error) + goto fail; + + error = create_ok(dip, name, mode); + if (error) + goto fail_gunlock; + + error = alloc_dinode(dip, &inum.no_addr, &generation); + if (error) + goto fail_gunlock; + inum.no_formal_ino = generation; + + error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, + LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); + if (error) + goto fail_gunlock; + + error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); + if (error) + goto fail_gunlock2; + + inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, + inum.no_formal_ino, 0); + if (IS_ERR(inode)) + goto fail_gunlock2; + + error = gfs2_inode_refresh(GFS2_I(inode)); + if (error) + goto fail_gunlock2; + + error = gfs2_acl_create(dip, inode); + if (error) + goto fail_gunlock2; + + error = gfs2_security_init(dip, GFS2_I(inode), name); + if (error) + goto fail_gunlock2; + + error = link_dinode(dip, name, GFS2_I(inode)); + if (error) + goto fail_gunlock2; + + if (bh) + brelse(bh); + return inode; + +fail_gunlock2: + gfs2_glock_dq_uninit(ghs + 1); + if (inode && !IS_ERR(inode)) + iput(inode); +fail_gunlock: + gfs2_glock_dq(ghs); +fail: + if (bh) + brelse(bh); + return ERR_PTR(error); +} /** * gfs2_create - Create a file * @dir: The directory in which to create the file @@ -1000,6 +1669,48 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags) return error; } +static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) +{ + struct inode *inode = &ip->i_inode; + struct buffer_head *dibh; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + setattr_copy(inode, attr); + mark_inode_dirty(inode); + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + return 0; +} + +/** + * gfs2_setattr_simple - + * @ip: + * @attr: + * + * Returns: errno + */ + +int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) +{ + int error; + + if (current->journal_info) + return __gfs2_setattr_simple(ip, attr); + + error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0); + if (error) + return error; + + error = __gfs2_setattr_simple(ip, attr); + gfs2_trans_end(GFS2_SB(&ip->i_inode)); + return error; +} + static int setattr_chown(struct inode *inode, struct iattr *attr) { struct gfs2_inode *ip = GFS2_I(inode); From 9eed04cd99b0a497cf0da22658808a7f5b10d734 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 9 May 2011 14:11:40 +0100 Subject: [PATCH 26/32] GFS2: Move final part of inode.c into super.c Now inode.c is empty. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 36 ------------------------------------ fs/gfs2/super.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b95ee5dc46ca..9076fbe23416 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -36,39 +36,3 @@ #include "util.h" -void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) -{ - struct gfs2_dinode *str = buf; - - str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); - str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); - str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); - str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); - str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); - str->di_mode = cpu_to_be32(ip->i_inode.i_mode); - str->di_uid = cpu_to_be32(ip->i_inode.i_uid); - str->di_gid = cpu_to_be32(ip->i_inode.i_gid); - str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); - str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); - str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); - str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); - str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); - str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); - - str->di_goal_meta = cpu_to_be64(ip->i_goal); - str->di_goal_data = cpu_to_be64(ip->i_goal); - str->di_generation = cpu_to_be64(ip->i_generation); - - str->di_flags = cpu_to_be32(ip->i_diskflags); - str->di_height = cpu_to_be16(ip->i_height); - str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && - !(ip->i_diskflags & GFS2_DIF_EXHASH) ? - GFS2_FORMAT_DE : 0); - str->di_depth = cpu_to_be16(ip->i_depth); - str->di_entries = cpu_to_be32(ip->i_entries); - - str->di_eattr = cpu_to_be64(ip->i_eattr); - str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); - str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); - str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); -} diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 3061ac64f81e..ed540e7018be 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -701,6 +701,42 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) mutex_unlock(&sdp->sd_freeze_lock); } +void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) +{ + struct gfs2_dinode *str = buf; + + str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); + str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); + str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); + str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); + str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); + str->di_mode = cpu_to_be32(ip->i_inode.i_mode); + str->di_uid = cpu_to_be32(ip->i_inode.i_uid); + str->di_gid = cpu_to_be32(ip->i_inode.i_gid); + str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); + str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); + str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); + str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); + str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); + str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); + + str->di_goal_meta = cpu_to_be64(ip->i_goal); + str->di_goal_data = cpu_to_be64(ip->i_goal); + str->di_generation = cpu_to_be64(ip->i_generation); + + str->di_flags = cpu_to_be32(ip->i_diskflags); + str->di_height = cpu_to_be16(ip->i_height); + str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && + !(ip->i_diskflags & GFS2_DIF_EXHASH) ? + GFS2_FORMAT_DE : 0); + str->di_depth = cpu_to_be16(ip->i_depth); + str->di_entries = cpu_to_be32(ip->i_entries); + + str->di_eattr = cpu_to_be64(ip->i_eattr); + str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); + str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); + str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); +} /** * gfs2_write_inode - Make sure the inode is stable on the disk From 64ea5402581485237d3b6e3a0cf2b364ad8bd580 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 10 May 2011 13:09:53 +0100 Subject: [PATCH 27/32] GFS2: Inode.c is empty now, remove it Signed-off-by: Steven Whitehouse --- fs/gfs2/Makefile | 2 +- fs/gfs2/inode.c | 38 -------------------------------------- 2 files changed, 1 insertion(+), 39 deletions(-) delete mode 100644 fs/gfs2/inode.c diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index f3d23ef4e876..d22edbb0108f 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -1,7 +1,7 @@ ccflags-y := -I$(src) obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ - glops.o inode.o log.o lops.o main.o meta_io.o \ + glops.o log.o lops.o main.o meta_io.o \ aops.o dentry.o export.o file.o \ ops_fstype.o ops_inode.o quota.o \ recovery.o rgrp.o super.o sys.o trans.o util.o diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c deleted file mode 100644 index 9076fbe23416..000000000000 --- a/fs/gfs2/inode.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "incore.h" -#include "acl.h" -#include "bmap.h" -#include "dir.h" -#include "xattr.h" -#include "glock.h" -#include "glops.h" -#include "inode.h" -#include "log.h" -#include "meta_io.h" -#include "quota.h" -#include "rgrp.h" -#include "trans.h" -#include "util.h" - - From 2ab9cd1c63b519e37b21b504376822be983badba Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 10 May 2011 13:12:49 +0100 Subject: [PATCH 28/32] GFS2: Rename ops_inode.c to inode.c This is the final part of the ops_inode.c/inode.c reordering. We are left with a single file called inode.c which now contains all the inode operations, as expected. Signed-off-by: Steven Whitehouse --- fs/gfs2/Makefile | 2 +- fs/gfs2/{ops_inode.c => inode.c} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename fs/gfs2/{ops_inode.c => inode.c} (100%) diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index d22edbb0108f..86128202384f 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ glops.o log.o lops.o main.o meta_io.o \ aops.o dentry.o export.o file.o \ - ops_fstype.o ops_inode.o quota.o \ + ops_fstype.o inode.o quota.o \ recovery.o rgrp.o super.o sys.o trans.o util.o gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/inode.c similarity index 100% rename from fs/gfs2/ops_inode.c rename to fs/gfs2/inode.c From 32e471ef1057e812856739d26b4a87d929fb8aa1 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 10 May 2011 15:01:59 +0100 Subject: [PATCH 29/32] GFS2: Use UUID field in generic superblock The VFS superblock structure now has a UUID field, so we can use that in preference to the UUID field in the GFS2 superblock now. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 - fs/gfs2/ops_fstype.c | 32 +++++++++++++------------------- fs/gfs2/sys.c | 6 ++++-- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 69a63823f7c5..0a064e91ac70 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -488,7 +488,6 @@ struct gfs2_sb_host { char sb_lockproto[GFS2_LOCKNAME_LEN]; char sb_locktable[GFS2_LOCKNAME_LEN]; - u8 sb_uuid[16]; }; /* diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index d3c69eb91c74..8ac9ae189b53 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -126,8 +126,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) * changed. */ -static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) +static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) { + struct gfs2_sb_host *sb = &sdp->sd_sb; + if (sb->sb_magic != GFS2_MAGIC || sb->sb_type != GFS2_METATYPE_SB) { if (!silent) @@ -157,8 +159,10 @@ static void end_bio_io_page(struct bio *bio, int error) unlock_page(page); } -static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) +static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf) { + struct gfs2_sb_host *sb = &sdp->sd_sb; + struct super_block *s = sdp->sd_vfs; const struct gfs2_sb *str = buf; sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); @@ -175,7 +179,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); - memcpy(sb->sb_uuid, str->sb_uuid, 16); + memcpy(s->s_uuid, str->sb_uuid, 16); } /** @@ -197,7 +201,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) * Returns: 0 on success or error */ -static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) +static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) { struct super_block *sb = sdp->sd_vfs; struct gfs2_sb *p; @@ -227,10 +231,10 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) return -EIO; } p = kmap(page); - gfs2_sb_in(&sdp->sd_sb, p); + gfs2_sb_in(sdp, p); kunmap(page); __free_page(page); - return 0; + return gfs2_check_sb(sdp, silent); } /** @@ -247,17 +251,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) unsigned int x; int error; - error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); + error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent); if (error) { if (!silent) fs_err(sdp, "can't read superblock\n"); return error; } - error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); - if (error) - return error; - sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT; sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; @@ -340,14 +340,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent) /* Try to autodetect */ if (!proto[0] || !table[0]) { - error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); + error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent); if (error) return error; - error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); - if (error) - goto out; - if (!proto[0]) proto = sdp->sd_sb.sb_lockproto; if (!table[0]) @@ -364,7 +360,6 @@ static int init_names(struct gfs2_sbd *sdp, int silent) while ((table = strchr(table, '/'))) *table = '_'; -out: return error; } @@ -1119,8 +1114,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent if (sdp->sd_args.ar_statfs_quantum) { sdp->sd_tune.gt_statfs_slow = 0; sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum; - } - else { + } else { sdp->sd_tune.gt_statfs_slow = 1; sdp->sd_tune.gt_statfs_quantum = 30; } diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 748ccb557c18..e20eab37bc80 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -81,7 +81,8 @@ static int gfs2_uuid_valid(const u8 *uuid) static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) { - const u8 *uuid = sdp->sd_sb.sb_uuid; + struct super_block *s = sdp->sd_vfs; + const u8 *uuid = s->s_uuid; buf[0] = '\0'; if (!gfs2_uuid_valid(uuid)) return 0; @@ -616,7 +617,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, struct kobj_uevent_env *env) { struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - const u8 *uuid = sdp->sd_sb.sb_uuid; + struct super_block *s = sdp->sd_vfs; + const u8 *uuid = s->s_uuid; add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); From e2d0a13bba051d7a9618b0952d91fac68175a71a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 13 May 2011 09:55:55 +0100 Subject: [PATCH 30/32] GFS2: Clean up mkdir This moves the initialisation of the directory into the inode creation functions to avoid having to duplicate the lookup of the inode's buffer. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 77 +++++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 44 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a783a7a8c0ca..4aee9dd464f7 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -413,6 +413,22 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) return error; } +static void gfs2_init_dir(struct buffer_head *dibh, const struct gfs2_inode *parent) +{ + struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; + struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); + + gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent); + dent->de_inum = di->di_num; /* already GFS2 endian */ + dent->de_type = cpu_to_be16(DT_DIR); + + dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); + gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); + gfs2_inum_out(parent, dent); + dent->de_type = cpu_to_be16(DT_DIR); + +} + /** * init_dinode - Fill in a new dinode structure * @dip: the directory this inode is being created in @@ -454,16 +470,6 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); di->di_generation = cpu_to_be64(*generation); di->di_flags = 0; - - if (S_ISREG(mode)) { - if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || - gfs2_tune_get(sdp, gt_new_files_jdata)) - di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); - } else if (S_ISDIR(mode)) { - di->di_flags |= cpu_to_be32(dip->i_diskflags & - GFS2_DIF_INHERIT_JDATA); - } - di->__pad1 = 0; di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); di->di_height = 0; @@ -478,6 +484,19 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); memset(&di->di_reserved, 0, sizeof(di->di_reserved)); + if (S_ISREG(mode)) { + if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || + gfs2_tune_get(sdp, gt_new_files_jdata)) + di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); + } else if (S_ISDIR(mode)) { + di->di_flags |= cpu_to_be32(dip->i_diskflags & + GFS2_DIF_INHERIT_JDATA); + di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); + di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); + di->di_entries = cpu_to_be32(2); + gfs2_init_dir(dibh, dip); + } + set_buffer_uptodate(dibh); *bhp = dibh; @@ -568,7 +587,9 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto fail_end_trans; - ip->i_inode.i_nlink = 1; + inc_nlink(&ip->i_inode); + if (S_ISDIR(ip->i_inode.i_mode)) + inc_nlink(&ip->i_inode); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1178,12 +1199,10 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - struct gfs2_inode *dip = GFS2_I(dir), *ip; + struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(dir); struct gfs2_holder ghs[2]; struct inode *inode; - struct buffer_head *dibh; - int error; gfs2_holder_init(dip->i_gl, 0, 0, ghs); @@ -1193,36 +1212,6 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) return PTR_ERR(inode); } - ip = ghs[1].gh_gl->gl_object; - - ip->i_inode.i_nlink = 2; - i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); - ip->i_diskflags |= GFS2_DIF_JDATA; - ip->i_entries = 2; - - error = gfs2_meta_inode_buffer(ip, &dibh); - - if (!gfs2_assert_withdraw(sdp, !error)) { - struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; - struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent); - dent->de_inum = di->di_num; /* already GFS2 endian */ - dent->de_type = cpu_to_be16(DT_DIR); - di->di_entries = cpu_to_be32(1); - - dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); - gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); - - gfs2_inum_out(dip, dent); - dent->de_type = cpu_to_be16(DT_DIR); - - gfs2_dinode_out(ip, di); - - brelse(dibh); - } - gfs2_trans_end(sdp); if (dip->i_alloc->al_rgd) gfs2_inplace_release(dip); From 160b4026dc3e75c0693d0123eca805e88cd200b6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 13 May 2011 10:34:59 +0100 Subject: [PATCH 31/32] GFS2: Clean up symlink creation This moves the symlink specific parts of inode creation into the function where we initialise the rest of the dinode. As a result we have one less place where we need to look up the inode's buffer. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 65 ++++++++++++++++++++++--------------------------- fs/gfs2/inode.h | 3 --- 2 files changed, 29 insertions(+), 39 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 4aee9dd464f7..c2a3d9cd0bc8 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -443,7 +443,8 @@ static void gfs2_init_dir(struct buffer_head *dibh, const struct gfs2_inode *par static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, const struct gfs2_inum_host *inum, unsigned int mode, unsigned int uid, unsigned int gid, - const u64 *generation, dev_t dev, struct buffer_head **bhp) + const u64 *generation, dev_t dev, const char *symname, + unsigned size, struct buffer_head **bhp) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_dinode *di; @@ -462,7 +463,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_uid = cpu_to_be32(uid); di->di_gid = cpu_to_be32(gid); di->di_nlink = 0; - di->di_size = 0; + di->di_size = cpu_to_be64(size); di->di_blocks = cpu_to_be64(1); di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); di->di_major = cpu_to_be32(MAJOR(dev)); @@ -483,18 +484,24 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); memset(&di->di_reserved, 0, sizeof(di->di_reserved)); - - if (S_ISREG(mode)) { + + switch(mode & S_IFMT) { + case S_IFREG: if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || gfs2_tune_get(sdp, gt_new_files_jdata)) di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); - } else if (S_ISDIR(mode)) { + break; + case S_IFDIR: di->di_flags |= cpu_to_be32(dip->i_diskflags & GFS2_DIF_INHERIT_JDATA); di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); di->di_entries = cpu_to_be32(2); gfs2_init_dir(dibh, dip); + break; + case S_IFLNK: + memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size); + break; } set_buffer_uptodate(dibh); @@ -504,7 +511,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, unsigned int mode, const struct gfs2_inum_host *inum, - const u64 *generation, dev_t dev, struct buffer_head **bhp) + const u64 *generation, dev_t dev, const char *symname, + unsigned int size, struct buffer_head **bhp) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); unsigned int uid, gid; @@ -526,7 +534,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, if (error) goto out_quota; - init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); + init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp); gfs2_quota_change(dip, +1, uid, gid); gfs2_trans_end(sdp); @@ -651,8 +659,10 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, * Returns: An inode */ -struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, - unsigned int mode, dev_t dev) +static struct inode *gfs2_createi(struct gfs2_holder *ghs, + const struct qstr *name, unsigned int mode, + dev_t dev, const char *symname, + unsigned int size) { struct inode *inode = NULL; struct gfs2_inode *dip = ghs->gh_gl->gl_object; @@ -685,7 +695,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (error) goto fail_gunlock; - error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); + error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh); if (error) goto fail_gunlock2; @@ -745,7 +755,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, gfs2_holder_init(dip->i_gl, 0, 0, ghs); for (;;) { - inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0); + inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0, NULL, 0); if (!IS_ERR(inode)) { gfs2_trans_end(sdp); if (dip->i_alloc->al_rgd) @@ -1140,40 +1150,24 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) static int gfs2_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct gfs2_inode *dip = GFS2_I(dir), *ip; + struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(dir); struct gfs2_holder ghs[2]; struct inode *inode; - struct buffer_head *dibh; - int size; - int error; + unsigned int size; - /* Must be stuffed with a null terminator for gfs2_follow_link() */ size = strlen(symname); if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) return -ENAMETOOLONG; gfs2_holder_init(dip->i_gl, 0, 0, ghs); - inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0); + inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0, symname, size); if (IS_ERR(inode)) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); } - ip = ghs[1].gh_gl->gl_object; - - i_size_write(inode, size); - - error = gfs2_meta_inode_buffer(ip, &dibh); - - if (!gfs2_assert_withdraw(sdp, !error)) { - gfs2_dinode_out(ip, dibh->b_data); - memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, - size); - brelse(dibh); - } - gfs2_trans_end(sdp); if (dip->i_alloc->al_rgd) gfs2_inplace_release(dip); @@ -1206,7 +1200,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) gfs2_holder_init(dip->i_gl, 0, 0, ghs); - inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0); + inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0, NULL, 0); if (IS_ERR(inode)) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); @@ -1245,7 +1239,7 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, gfs2_holder_init(dip->i_gl, 0, 0, ghs); - inode = gfs2_createi(ghs, &dentry->d_name, mode, dev); + inode = gfs2_createi(ghs, &dentry->d_name, mode, dev, NULL, 0); if (IS_ERR(inode)) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); @@ -1572,7 +1566,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) struct gfs2_inode *ip = GFS2_I(dentry->d_inode); struct gfs2_holder i_gh; struct buffer_head *dibh; - unsigned int x, size; + unsigned int size; char *buf; int error; @@ -1597,12 +1591,11 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) goto out; } - x = size + 1; - buf = kmalloc(x, GFP_NOFS); + buf = kzalloc(size + 1, GFP_NOFS); if (!buf) buf = ERR_PTR(-ENOMEM); else - memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x); + memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size); brelse(dibh); out: gfs2_glock_dq_uninit(&i_gh); diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 7ed60aa1b61f..31606076f701 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -108,9 +108,6 @@ extern int gfs2_inode_refresh(struct gfs2_inode *ip); extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root); -extern struct inode *gfs2_createi(struct gfs2_holder *ghs, - const struct qstr *name, - unsigned int mode, dev_t dev); extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); From f2741d9898269e565c220ec295a8f5c3756c7585 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 13 May 2011 12:11:17 +0100 Subject: [PATCH 32/32] GFS2: Move all locking inside the inode creation function Now that there are no longer any exceptions to the normal inode creation code path, we can move the parts of the locking code which were duplicated in mkdir/mknod/create/symlink into the inode create function. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 184 ++++++++++++++---------------------------------- 1 file changed, 52 insertions(+), 132 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c2a3d9cd0bc8..03e0c529063e 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1,6 +1,6 @@ /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions @@ -413,7 +413,8 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation) return error; } -static void gfs2_init_dir(struct buffer_head *dibh, const struct gfs2_inode *parent) +static void gfs2_init_dir(struct buffer_head *dibh, + const struct gfs2_inode *parent) { struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); @@ -431,12 +432,17 @@ static void gfs2_init_dir(struct buffer_head *dibh, const struct gfs2_inode *par /** * init_dinode - Fill in a new dinode structure - * @dip: the directory this inode is being created in + * @dip: The directory this inode is being created in * @gl: The glock covering the new inode - * @inum: the inode number - * @mode: the file permissions - * @uid: - * @gid: + * @inum: The inode number + * @mode: The file permissions + * @uid: The uid of the new inode + * @gid: The gid of the new inode + * @generation: The generation number of the new inode + * @dev: The device number (if a device node) + * @symname: The symlink destination (if a symlink) + * @size: The inode size (ignored for directories) + * @bhp: The buffer head (returned to caller) * */ @@ -644,29 +650,25 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, } /** - * gfs2_createi - Create a new inode - * @ghs: An array of two holders - * @name: The name of the new file - * @mode: the permissions on the new inode + * gfs2_create_inode - Create a new inode + * @dir: The parent directory + * @dentry: The new dentry + * @mode: The permissions on the new inode + * @dev: For device nodes, this is the device number + * @symname: For symlinks, this is the link destination + * @size: The initial size of the inode (ignored for directories) * - * @ghs[0] is an initialized holder for the directory - * @ghs[1] is the holder for the inode lock - * - * If the return value is not NULL, the glocks on both the directory and the new - * file are held. A transaction has been started and an inplace reservation - * is held, as well. - * - * Returns: An inode + * Returns: 0 on success, or error code */ -static struct inode *gfs2_createi(struct gfs2_holder *ghs, - const struct qstr *name, unsigned int mode, - dev_t dev, const char *symname, - unsigned int size) +static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, + unsigned int mode, dev_t dev, const char *symname, + unsigned int size) { + const struct qstr *name = &dentry->d_name; + struct gfs2_holder ghs[2]; struct inode *inode = NULL; - struct gfs2_inode *dip = ghs->gh_gl->gl_object; - struct inode *dir = &dip->i_inode; + struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; int error; @@ -674,10 +676,9 @@ static struct inode *gfs2_createi(struct gfs2_holder *ghs, struct buffer_head *bh = NULL; if (!name->len || name->len > GFS2_FNAMESIZE) - return ERR_PTR(-ENAMETOOLONG); + return -ENAMETOOLONG; - gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); - error = gfs2_glock_nq(ghs); + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); if (error) goto fail; @@ -722,19 +723,29 @@ static struct inode *gfs2_createi(struct gfs2_holder *ghs, if (bh) brelse(bh); - return inode; + + gfs2_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + gfs2_quota_unlock(dip); + gfs2_alloc_put(dip); + gfs2_glock_dq_uninit_m(2, ghs); + mark_inode_dirty(inode); + d_instantiate(dentry, inode); + return 0; fail_gunlock2: gfs2_glock_dq_uninit(ghs + 1); if (inode && !IS_ERR(inode)) iput(inode); fail_gunlock: - gfs2_glock_dq(ghs); + gfs2_glock_dq_uninit(ghs); fail: if (bh) brelse(bh); - return ERR_PTR(error); + return error; } + /** * gfs2_create - Create a file * @dir: The directory in which to create the file @@ -747,44 +758,23 @@ static struct inode *gfs2_createi(struct gfs2_holder *ghs, static int gfs2_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { - struct gfs2_inode *dip = GFS2_I(dir); - struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_holder ghs[2]; struct inode *inode; - - gfs2_holder_init(dip->i_gl, 0, 0, ghs); + int ret; for (;;) { - inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0, NULL, 0); - if (!IS_ERR(inode)) { - gfs2_trans_end(sdp); - if (dip->i_alloc->al_rgd) - gfs2_inplace_release(dip); - gfs2_quota_unlock(dip); - gfs2_alloc_put(dip); - gfs2_glock_dq_uninit_m(2, ghs); - mark_inode_dirty(inode); - break; - } else if (PTR_ERR(inode) != -EEXIST || - (nd && nd->flags & LOOKUP_EXCL)) { - gfs2_holder_uninit(ghs); - return PTR_ERR(inode); - } + ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0); + if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL))) + return ret; inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode) { - if (!IS_ERR(inode)) { - gfs2_holder_uninit(ghs); + if (!IS_ERR(inode)) break; - } else { - gfs2_holder_uninit(ghs); - return PTR_ERR(inode); - } + return PTR_ERR(inode); } } d_instantiate(dentry, inode); - return 0; } @@ -1150,36 +1140,14 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) static int gfs2_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_holder ghs[2]; - struct inode *inode; unsigned int size; size = strlen(symname); if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) return -ENAMETOOLONG; - gfs2_holder_init(dip->i_gl, 0, 0, ghs); - - inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0, symname, size); - if (IS_ERR(inode)) { - gfs2_holder_uninit(ghs); - return PTR_ERR(inode); - } - - gfs2_trans_end(sdp); - if (dip->i_alloc->al_rgd) - gfs2_inplace_release(dip); - gfs2_quota_unlock(dip); - gfs2_alloc_put(dip); - - gfs2_glock_dq_uninit_m(2, ghs); - - d_instantiate(dentry, inode); - mark_inode_dirty(inode); - - return 0; + return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size); } /** @@ -1193,31 +1161,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - struct gfs2_inode *dip = GFS2_I(dir); - struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_holder ghs[2]; - struct inode *inode; - - gfs2_holder_init(dip->i_gl, 0, 0, ghs); - - inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0, NULL, 0); - if (IS_ERR(inode)) { - gfs2_holder_uninit(ghs); - return PTR_ERR(inode); - } - - gfs2_trans_end(sdp); - if (dip->i_alloc->al_rgd) - gfs2_inplace_release(dip); - gfs2_quota_unlock(dip); - gfs2_alloc_put(dip); - - gfs2_glock_dq_uninit_m(2, ghs); - - d_instantiate(dentry, inode); - mark_inode_dirty(inode); - - return 0; + return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0); } /** @@ -1225,38 +1169,14 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) * @dir: The directory in which the special file will reside * @dentry: The dentry of the special file * @mode: The mode of the special file - * @rdev: The device specification of the special file + * @dev: The device specification of the special file * */ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { - struct gfs2_inode *dip = GFS2_I(dir); - struct gfs2_sbd *sdp = GFS2_SB(dir); - struct gfs2_holder ghs[2]; - struct inode *inode; - - gfs2_holder_init(dip->i_gl, 0, 0, ghs); - - inode = gfs2_createi(ghs, &dentry->d_name, mode, dev, NULL, 0); - if (IS_ERR(inode)) { - gfs2_holder_uninit(ghs); - return PTR_ERR(inode); - } - - gfs2_trans_end(sdp); - if (dip->i_alloc->al_rgd) - gfs2_inplace_release(dip); - gfs2_quota_unlock(dip); - gfs2_alloc_put(dip); - - gfs2_glock_dq_uninit_m(2, ghs); - - d_instantiate(dentry, inode); - mark_inode_dirty(inode); - - return 0; + return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0); } /*