Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
  jbd: fix race between write_metadata_buffer and get_write_access
  ext3: Get rid of extenddisksize parameter of ext3_get_blocks_handle()
  jbd: Fix a race between checkpointing code and journal_get_write_access()
  ext3: Fix truncation of symlinks after failed write
  jbd: Fail to load a journal if it is too short
This commit is contained in:
Linus Torvalds 2009-07-27 12:12:10 -07:00
commit 2bc20d09b0
5 changed files with 67 additions and 64 deletions

View file

@ -130,8 +130,7 @@ static int ext3_readdir(struct file * filp,
struct buffer_head *bh = NULL;
map_bh.b_state = 0;
err = ext3_get_blocks_handle(NULL, inode, blk, 1,
&map_bh, 0, 0);
err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0);
if (err > 0) {
pgoff_t index = map_bh.b_blocknr >>
(PAGE_CACHE_SHIFT - inode->i_blkbits);

View file

@ -788,7 +788,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
sector_t iblock, unsigned long maxblocks,
struct buffer_head *bh_result,
int create, int extend_disksize)
int create)
{
int err = -EIO;
int offsets[4];
@ -911,13 +911,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
if (!err)
err = ext3_splice_branch(handle, inode, iblock,
partial, indirect_blks, count);
/*
* i_disksize growing is protected by truncate_mutex. Don't forget to
* protect it if you're about to implement concurrent
* ext3_get_block() -bzzz
*/
if (!err && extend_disksize && inode->i_size > ei->i_disksize)
ei->i_disksize = inode->i_size;
mutex_unlock(&ei->truncate_mutex);
if (err)
goto cleanup;
@ -972,7 +965,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
}
ret = ext3_get_blocks_handle(handle, inode, iblock,
max_blocks, bh_result, create, 0);
max_blocks, bh_result, create);
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);
ret = 0;
@ -1005,7 +998,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
dummy.b_blocknr = -1000;
buffer_trace_init(&dummy.b_history);
err = ext3_get_blocks_handle(handle, inode, block, 1,
&dummy, create, 1);
&dummy, create);
/*
* ext3_get_blocks_handle() returns number of blocks
* mapped. 0 in case of a HOLE.
@ -1193,15 +1186,16 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
* i_size_read because we hold i_mutex.
*
* Add inode to orphan list in case we crash before truncate
* finishes.
* finishes. Do this only if ext3_can_truncate() agrees so
* that orphan processing code is happy.
*/
if (pos + len > inode->i_size)
if (pos + len > inode->i_size && ext3_can_truncate(inode))
ext3_orphan_add(handle, inode);
ext3_journal_stop(handle);
unlock_page(page);
page_cache_release(page);
if (pos + len > inode->i_size)
vmtruncate(inode, inode->i_size);
ext3_truncate(inode);
}
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@ -1287,7 +1281,7 @@ static int ext3_ordered_write_end(struct file *file,
* There may be allocated blocks outside of i_size because
* we failed to copy some data. Prepare for truncate.
*/
if (pos + len > inode->i_size)
if (pos + len > inode->i_size && ext3_can_truncate(inode))
ext3_orphan_add(handle, inode);
ret2 = ext3_journal_stop(handle);
if (!ret)
@ -1296,7 +1290,7 @@ static int ext3_ordered_write_end(struct file *file,
page_cache_release(page);
if (pos + len > inode->i_size)
vmtruncate(inode, inode->i_size);
ext3_truncate(inode);
return ret ? ret : copied;
}
@ -1315,14 +1309,14 @@ static int ext3_writeback_write_end(struct file *file,
* There may be allocated blocks outside of i_size because
* we failed to copy some data. Prepare for truncate.
*/
if (pos + len > inode->i_size)
if (pos + len > inode->i_size && ext3_can_truncate(inode))
ext3_orphan_add(handle, inode);
ret = ext3_journal_stop(handle);
unlock_page(page);
page_cache_release(page);
if (pos + len > inode->i_size)
vmtruncate(inode, inode->i_size);
ext3_truncate(inode);
return ret ? ret : copied;
}
@ -1358,7 +1352,7 @@ static int ext3_journalled_write_end(struct file *file,
* There may be allocated blocks outside of i_size because
* we failed to copy some data. Prepare for truncate.
*/
if (pos + len > inode->i_size)
if (pos + len > inode->i_size && ext3_can_truncate(inode))
ext3_orphan_add(handle, inode);
EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
if (inode->i_size > EXT3_I(inode)->i_disksize) {
@ -1375,7 +1369,7 @@ static int ext3_journalled_write_end(struct file *file,
page_cache_release(page);
if (pos + len > inode->i_size)
vmtruncate(inode, inode->i_size);
ext3_truncate(inode);
return ret ? ret : copied;
}

View file

@ -287,6 +287,7 @@ int journal_write_metadata_buffer(transaction_t *transaction,
struct page *new_page;
unsigned int new_offset;
struct buffer_head *bh_in = jh2bh(jh_in);
journal_t *journal = transaction->t_journal;
/*
* The buffer really shouldn't be locked: only the current committing
@ -300,6 +301,11 @@ int journal_write_metadata_buffer(transaction_t *transaction,
J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
/* keep subsequent assertions sane */
new_bh->b_state = 0;
init_buffer(new_bh, NULL, NULL);
atomic_set(&new_bh->b_count, 1);
new_jh = journal_add_journal_head(new_bh); /* This sleeps */
/*
* If a new transaction has already done a buffer copy-out, then
@ -361,14 +367,6 @@ int journal_write_metadata_buffer(transaction_t *transaction,
kunmap_atomic(mapped_data, KM_USER0);
}
/* keep subsequent assertions sane */
new_bh->b_state = 0;
init_buffer(new_bh, NULL, NULL);
atomic_set(&new_bh->b_count, 1);
jbd_unlock_bh_state(bh_in);
new_jh = journal_add_journal_head(new_bh); /* This sleeps */
set_bh_page(new_bh, new_page, new_offset);
new_jh->b_transaction = NULL;
new_bh->b_size = jh2bh(jh_in)->b_size;
@ -385,7 +383,11 @@ int journal_write_metadata_buffer(transaction_t *transaction,
* copying is moved to the transaction's shadow queue.
*/
JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
journal_file_buffer(jh_in, transaction, BJ_Shadow);
spin_lock(&journal->j_list_lock);
__journal_file_buffer(jh_in, transaction, BJ_Shadow);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh_in);
JBUFFER_TRACE(new_jh, "file as BJ_IO");
journal_file_buffer(new_jh, transaction, BJ_IO);
@ -848,6 +850,12 @@ static int journal_reset(journal_t *journal)
first = be32_to_cpu(sb->s_first);
last = be32_to_cpu(sb->s_maxlen);
if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n",
first, last);
journal_fail_superblock(journal);
return -EINVAL;
}
journal->j_first = first;
journal->j_last = last;

View file

@ -489,34 +489,15 @@ void journal_unlock_updates (journal_t *journal)
wake_up(&journal->j_wait_transaction_locked);
}
/*
* Report any unexpected dirty buffers which turn up. Normally those
* indicate an error, but they can occur if the user is running (say)
* tune2fs to modify the live filesystem, so we need the option of
* continuing as gracefully as possible. #
*
* The caller should already hold the journal lock and
* j_list_lock spinlock: most callers will need those anyway
* in order to probe the buffer's journaling state safely.
*/
static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
static void warn_dirty_buffer(struct buffer_head *bh)
{
int jlist;
char b[BDEVNAME_SIZE];
/* If this buffer is one which might reasonably be dirty
* --- ie. data, or not part of this journal --- then
* we're OK to leave it alone, but otherwise we need to
* move the dirty bit to the journal's own internal
* JBDDirty bit. */
jlist = jh->b_jlist;
if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
jlist == BJ_Shadow || jlist == BJ_Forget) {
struct buffer_head *bh = jh2bh(jh);
if (test_clear_buffer_dirty(bh))
set_buffer_jbddirty(bh);
}
printk(KERN_WARNING
"JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
"There's a risk of filesystem corruption in case of system "
"crash.\n",
bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
}
/*
@ -583,14 +564,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
if (jh->b_next_transaction)
J_ASSERT_JH(jh, jh->b_next_transaction ==
transaction);
warn_dirty_buffer(bh);
}
/*
* In any case we need to clean the dirty flag and we must
* do it under the buffer lock to be sure we don't race
* with running write-out.
*/
JBUFFER_TRACE(jh, "Unexpected dirty buffer");
jbd_unexpected_dirty_buffer(jh);
JBUFFER_TRACE(jh, "Journalling dirty buffer");
clear_buffer_dirty(bh);
set_buffer_jbddirty(bh);
}
unlock_buffer(bh);
@ -826,6 +809,15 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
if (jh->b_transaction == NULL) {
/*
* Previous journal_forget() could have left the buffer
* with jbddirty bit set because it was being committed. When
* the commit finished, we've filed the buffer for
* checkpointing and marked it dirty. Now we are reallocating
* the buffer so the transaction freeing it must have
* committed and so it's safe to clear the dirty bit.
*/
clear_buffer_dirty(jh2bh(jh));
jh->b_transaction = transaction;
/* first access by this transaction */
@ -1782,8 +1774,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
if (jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "on running+cp transaction");
/*
* We don't want to write the buffer anymore, clear the
* bit so that we don't confuse checks in
* __journal_file_buffer
*/
clear_buffer_dirty(bh);
__journal_file_buffer(jh, transaction, BJ_Forget);
clear_buffer_jbddirty(bh);
may_free = 0;
} else {
JBUFFER_TRACE(jh, "on running transaction");
@ -2041,12 +2038,17 @@ void __journal_file_buffer(struct journal_head *jh,
if (jh->b_transaction && jh->b_jlist == jlist)
return;
/* The following list of buffer states needs to be consistent
* with __jbd_unexpected_dirty_buffer()'s handling of dirty
* state. */
if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
jlist == BJ_Shadow || jlist == BJ_Forget) {
/*
* For metadata buffers, we track dirty bit in buffer_jbddirty
* instead of buffer_dirty. We should not see a dirty bit set
* here because we clear it in do_get_write_access but e.g.
* tune2fs can modify the sb and set the dirty bit at any time
* so we try to gracefully handle that.
*/
if (buffer_dirty(bh))
warn_dirty_buffer(bh);
if (test_clear_buffer_dirty(bh) ||
test_clear_buffer_jbddirty(bh))
was_dirty = 1;

View file

@ -874,7 +874,7 @@ struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
int create, int extend_disksize);
int create);
extern struct inode *ext3_iget(struct super_block *, unsigned long);
extern int ext3_write_inode (struct inode *, int);