Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6: jbd: fix race between write_metadata_buffer and get_write_access ext3: Get rid of extenddisksize parameter of ext3_get_blocks_handle() jbd: Fix a race between checkpointing code and journal_get_write_access() ext3: Fix truncation of symlinks after failed write jbd: Fail to load a journal if it is too short
This commit is contained in:
commit
2bc20d09b0
5 changed files with 67 additions and 64 deletions
|
@ -130,8 +130,7 @@ static int ext3_readdir(struct file * filp,
|
|||
struct buffer_head *bh = NULL;
|
||||
|
||||
map_bh.b_state = 0;
|
||||
err = ext3_get_blocks_handle(NULL, inode, blk, 1,
|
||||
&map_bh, 0, 0);
|
||||
err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0);
|
||||
if (err > 0) {
|
||||
pgoff_t index = map_bh.b_blocknr >>
|
||||
(PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||
|
|
|
@ -788,7 +788,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
|
|||
int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
|
||||
sector_t iblock, unsigned long maxblocks,
|
||||
struct buffer_head *bh_result,
|
||||
int create, int extend_disksize)
|
||||
int create)
|
||||
{
|
||||
int err = -EIO;
|
||||
int offsets[4];
|
||||
|
@ -911,13 +911,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
|
|||
if (!err)
|
||||
err = ext3_splice_branch(handle, inode, iblock,
|
||||
partial, indirect_blks, count);
|
||||
/*
|
||||
* i_disksize growing is protected by truncate_mutex. Don't forget to
|
||||
* protect it if you're about to implement concurrent
|
||||
* ext3_get_block() -bzzz
|
||||
*/
|
||||
if (!err && extend_disksize && inode->i_size > ei->i_disksize)
|
||||
ei->i_disksize = inode->i_size;
|
||||
mutex_unlock(&ei->truncate_mutex);
|
||||
if (err)
|
||||
goto cleanup;
|
||||
|
@ -972,7 +965,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
|
|||
}
|
||||
|
||||
ret = ext3_get_blocks_handle(handle, inode, iblock,
|
||||
max_blocks, bh_result, create, 0);
|
||||
max_blocks, bh_result, create);
|
||||
if (ret > 0) {
|
||||
bh_result->b_size = (ret << inode->i_blkbits);
|
||||
ret = 0;
|
||||
|
@ -1005,7 +998,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
|
|||
dummy.b_blocknr = -1000;
|
||||
buffer_trace_init(&dummy.b_history);
|
||||
err = ext3_get_blocks_handle(handle, inode, block, 1,
|
||||
&dummy, create, 1);
|
||||
&dummy, create);
|
||||
/*
|
||||
* ext3_get_blocks_handle() returns number of blocks
|
||||
* mapped. 0 in case of a HOLE.
|
||||
|
@ -1193,15 +1186,16 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
|
|||
* i_size_read because we hold i_mutex.
|
||||
*
|
||||
* Add inode to orphan list in case we crash before truncate
|
||||
* finishes.
|
||||
* finishes. Do this only if ext3_can_truncate() agrees so
|
||||
* that orphan processing code is happy.
|
||||
*/
|
||||
if (pos + len > inode->i_size)
|
||||
if (pos + len > inode->i_size && ext3_can_truncate(inode))
|
||||
ext3_orphan_add(handle, inode);
|
||||
ext3_journal_stop(handle);
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
if (pos + len > inode->i_size)
|
||||
vmtruncate(inode, inode->i_size);
|
||||
ext3_truncate(inode);
|
||||
}
|
||||
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
|
||||
goto retry;
|
||||
|
@ -1287,7 +1281,7 @@ static int ext3_ordered_write_end(struct file *file,
|
|||
* There may be allocated blocks outside of i_size because
|
||||
* we failed to copy some data. Prepare for truncate.
|
||||
*/
|
||||
if (pos + len > inode->i_size)
|
||||
if (pos + len > inode->i_size && ext3_can_truncate(inode))
|
||||
ext3_orphan_add(handle, inode);
|
||||
ret2 = ext3_journal_stop(handle);
|
||||
if (!ret)
|
||||
|
@ -1296,7 +1290,7 @@ static int ext3_ordered_write_end(struct file *file,
|
|||
page_cache_release(page);
|
||||
|
||||
if (pos + len > inode->i_size)
|
||||
vmtruncate(inode, inode->i_size);
|
||||
ext3_truncate(inode);
|
||||
return ret ? ret : copied;
|
||||
}
|
||||
|
||||
|
@ -1315,14 +1309,14 @@ static int ext3_writeback_write_end(struct file *file,
|
|||
* There may be allocated blocks outside of i_size because
|
||||
* we failed to copy some data. Prepare for truncate.
|
||||
*/
|
||||
if (pos + len > inode->i_size)
|
||||
if (pos + len > inode->i_size && ext3_can_truncate(inode))
|
||||
ext3_orphan_add(handle, inode);
|
||||
ret = ext3_journal_stop(handle);
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
|
||||
if (pos + len > inode->i_size)
|
||||
vmtruncate(inode, inode->i_size);
|
||||
ext3_truncate(inode);
|
||||
return ret ? ret : copied;
|
||||
}
|
||||
|
||||
|
@ -1358,7 +1352,7 @@ static int ext3_journalled_write_end(struct file *file,
|
|||
* There may be allocated blocks outside of i_size because
|
||||
* we failed to copy some data. Prepare for truncate.
|
||||
*/
|
||||
if (pos + len > inode->i_size)
|
||||
if (pos + len > inode->i_size && ext3_can_truncate(inode))
|
||||
ext3_orphan_add(handle, inode);
|
||||
EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
|
||||
if (inode->i_size > EXT3_I(inode)->i_disksize) {
|
||||
|
@ -1375,7 +1369,7 @@ static int ext3_journalled_write_end(struct file *file,
|
|||
page_cache_release(page);
|
||||
|
||||
if (pos + len > inode->i_size)
|
||||
vmtruncate(inode, inode->i_size);
|
||||
ext3_truncate(inode);
|
||||
return ret ? ret : copied;
|
||||
}
|
||||
|
||||
|
|
|
@ -287,6 +287,7 @@ int journal_write_metadata_buffer(transaction_t *transaction,
|
|||
struct page *new_page;
|
||||
unsigned int new_offset;
|
||||
struct buffer_head *bh_in = jh2bh(jh_in);
|
||||
journal_t *journal = transaction->t_journal;
|
||||
|
||||
/*
|
||||
* The buffer really shouldn't be locked: only the current committing
|
||||
|
@ -300,6 +301,11 @@ int journal_write_metadata_buffer(transaction_t *transaction,
|
|||
J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
|
||||
|
||||
new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
|
||||
/* keep subsequent assertions sane */
|
||||
new_bh->b_state = 0;
|
||||
init_buffer(new_bh, NULL, NULL);
|
||||
atomic_set(&new_bh->b_count, 1);
|
||||
new_jh = journal_add_journal_head(new_bh); /* This sleeps */
|
||||
|
||||
/*
|
||||
* If a new transaction has already done a buffer copy-out, then
|
||||
|
@ -361,14 +367,6 @@ int journal_write_metadata_buffer(transaction_t *transaction,
|
|||
kunmap_atomic(mapped_data, KM_USER0);
|
||||
}
|
||||
|
||||
/* keep subsequent assertions sane */
|
||||
new_bh->b_state = 0;
|
||||
init_buffer(new_bh, NULL, NULL);
|
||||
atomic_set(&new_bh->b_count, 1);
|
||||
jbd_unlock_bh_state(bh_in);
|
||||
|
||||
new_jh = journal_add_journal_head(new_bh); /* This sleeps */
|
||||
|
||||
set_bh_page(new_bh, new_page, new_offset);
|
||||
new_jh->b_transaction = NULL;
|
||||
new_bh->b_size = jh2bh(jh_in)->b_size;
|
||||
|
@ -385,7 +383,11 @@ int journal_write_metadata_buffer(transaction_t *transaction,
|
|||
* copying is moved to the transaction's shadow queue.
|
||||
*/
|
||||
JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
|
||||
journal_file_buffer(jh_in, transaction, BJ_Shadow);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
__journal_file_buffer(jh_in, transaction, BJ_Shadow);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
jbd_unlock_bh_state(bh_in);
|
||||
|
||||
JBUFFER_TRACE(new_jh, "file as BJ_IO");
|
||||
journal_file_buffer(new_jh, transaction, BJ_IO);
|
||||
|
||||
|
@ -848,6 +850,12 @@ static int journal_reset(journal_t *journal)
|
|||
|
||||
first = be32_to_cpu(sb->s_first);
|
||||
last = be32_to_cpu(sb->s_maxlen);
|
||||
if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
|
||||
printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n",
|
||||
first, last);
|
||||
journal_fail_superblock(journal);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
journal->j_first = first;
|
||||
journal->j_last = last;
|
||||
|
|
|
@ -489,34 +489,15 @@ void journal_unlock_updates (journal_t *journal)
|
|||
wake_up(&journal->j_wait_transaction_locked);
|
||||
}
|
||||
|
||||
/*
|
||||
* Report any unexpected dirty buffers which turn up. Normally those
|
||||
* indicate an error, but they can occur if the user is running (say)
|
||||
* tune2fs to modify the live filesystem, so we need the option of
|
||||
* continuing as gracefully as possible. #
|
||||
*
|
||||
* The caller should already hold the journal lock and
|
||||
* j_list_lock spinlock: most callers will need those anyway
|
||||
* in order to probe the buffer's journaling state safely.
|
||||
*/
|
||||
static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
|
||||
static void warn_dirty_buffer(struct buffer_head *bh)
|
||||
{
|
||||
int jlist;
|
||||
char b[BDEVNAME_SIZE];
|
||||
|
||||
/* If this buffer is one which might reasonably be dirty
|
||||
* --- ie. data, or not part of this journal --- then
|
||||
* we're OK to leave it alone, but otherwise we need to
|
||||
* move the dirty bit to the journal's own internal
|
||||
* JBDDirty bit. */
|
||||
jlist = jh->b_jlist;
|
||||
|
||||
if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
|
||||
jlist == BJ_Shadow || jlist == BJ_Forget) {
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
if (test_clear_buffer_dirty(bh))
|
||||
set_buffer_jbddirty(bh);
|
||||
}
|
||||
printk(KERN_WARNING
|
||||
"JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
|
||||
"There's a risk of filesystem corruption in case of system "
|
||||
"crash.\n",
|
||||
bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -583,14 +564,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
|
|||
if (jh->b_next_transaction)
|
||||
J_ASSERT_JH(jh, jh->b_next_transaction ==
|
||||
transaction);
|
||||
warn_dirty_buffer(bh);
|
||||
}
|
||||
/*
|
||||
* In any case we need to clean the dirty flag and we must
|
||||
* do it under the buffer lock to be sure we don't race
|
||||
* with running write-out.
|
||||
*/
|
||||
JBUFFER_TRACE(jh, "Unexpected dirty buffer");
|
||||
jbd_unexpected_dirty_buffer(jh);
|
||||
JBUFFER_TRACE(jh, "Journalling dirty buffer");
|
||||
clear_buffer_dirty(bh);
|
||||
set_buffer_jbddirty(bh);
|
||||
}
|
||||
|
||||
unlock_buffer(bh);
|
||||
|
@ -826,6 +809,15 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
|
|||
J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
|
||||
|
||||
if (jh->b_transaction == NULL) {
|
||||
/*
|
||||
* Previous journal_forget() could have left the buffer
|
||||
* with jbddirty bit set because it was being committed. When
|
||||
* the commit finished, we've filed the buffer for
|
||||
* checkpointing and marked it dirty. Now we are reallocating
|
||||
* the buffer so the transaction freeing it must have
|
||||
* committed and so it's safe to clear the dirty bit.
|
||||
*/
|
||||
clear_buffer_dirty(jh2bh(jh));
|
||||
jh->b_transaction = transaction;
|
||||
|
||||
/* first access by this transaction */
|
||||
|
@ -1782,8 +1774,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
|
|||
|
||||
if (jh->b_cp_transaction) {
|
||||
JBUFFER_TRACE(jh, "on running+cp transaction");
|
||||
/*
|
||||
* We don't want to write the buffer anymore, clear the
|
||||
* bit so that we don't confuse checks in
|
||||
* __journal_file_buffer
|
||||
*/
|
||||
clear_buffer_dirty(bh);
|
||||
__journal_file_buffer(jh, transaction, BJ_Forget);
|
||||
clear_buffer_jbddirty(bh);
|
||||
may_free = 0;
|
||||
} else {
|
||||
JBUFFER_TRACE(jh, "on running transaction");
|
||||
|
@ -2041,12 +2038,17 @@ void __journal_file_buffer(struct journal_head *jh,
|
|||
if (jh->b_transaction && jh->b_jlist == jlist)
|
||||
return;
|
||||
|
||||
/* The following list of buffer states needs to be consistent
|
||||
* with __jbd_unexpected_dirty_buffer()'s handling of dirty
|
||||
* state. */
|
||||
|
||||
if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
|
||||
jlist == BJ_Shadow || jlist == BJ_Forget) {
|
||||
/*
|
||||
* For metadata buffers, we track dirty bit in buffer_jbddirty
|
||||
* instead of buffer_dirty. We should not see a dirty bit set
|
||||
* here because we clear it in do_get_write_access but e.g.
|
||||
* tune2fs can modify the sb and set the dirty bit at any time
|
||||
* so we try to gracefully handle that.
|
||||
*/
|
||||
if (buffer_dirty(bh))
|
||||
warn_dirty_buffer(bh);
|
||||
if (test_clear_buffer_dirty(bh) ||
|
||||
test_clear_buffer_jbddirty(bh))
|
||||
was_dirty = 1;
|
||||
|
|
|
@ -874,7 +874,7 @@ struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
|
|||
struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
|
||||
int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
|
||||
sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
|
||||
int create, int extend_disksize);
|
||||
int create);
|
||||
|
||||
extern struct inode *ext3_iget(struct super_block *, unsigned long);
|
||||
extern int ext3_write_inode (struct inode *, int);
|
||||
|
|
Loading…
Reference in a new issue