btrfs: fix dead lock while running replace and defrag concurrently
This can be reproduced by fstests: btrfs/070 The scenario is like the following: replace worker thread defrag thread --------------------- ------------- copy_nocow_pages_worker btrfs_defrag_file copy_nocow_pages_for_inode ... btrfs_writepages |A| lock_extent_bits extent_write_cache_pages |B| lock_page __extent_writepage ... writepage_delalloc find_lock_delalloc_range |B| lock_extent_bits find_or_create_page pagecache_get_page |A| lock_page This leads to an ABBA pattern deadlock. To fix it, o we just change it to an AABB pattern which means to @unlock_extent_bits() before we @lock_page(), and in this way the @extent_read_full_page_nolock() is no longer in an locked context, so change it back to @extent_read_full_page() to regain protection. o Since we @unlock_extent_bits() earlier, then before @write_page_nocow(), the extent may not really point at the physical block we want, so we have to check it before write. Signed-off-by: Gui Hecheng <guihc.fnst@cn.fujitsu.com> Tested-by: David Sterba <dsterba@suse.cz> Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
parent
5f5bc6b1e2
commit
321592427c
1 changed files with 60 additions and 30 deletions
|
@ -3310,6 +3310,50 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
|
|||
scrub_pending_trans_workers_dec(sctx);
|
||||
}
|
||||
|
||||
static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
|
||||
u64 logical)
|
||||
{
|
||||
struct extent_state *cached_state = NULL;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct extent_io_tree *io_tree;
|
||||
struct extent_map *em;
|
||||
u64 lockstart = start, lockend = start + len - 1;
|
||||
int ret = 0;
|
||||
|
||||
io_tree = &BTRFS_I(inode)->io_tree;
|
||||
|
||||
lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
|
||||
ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
|
||||
if (ordered) {
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
ret = 1;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* This extent does not actually cover the logical extent anymore,
|
||||
* move on to the next inode.
|
||||
*/
|
||||
if (em->block_start > logical ||
|
||||
em->block_start + em->block_len < logical + len) {
|
||||
free_extent_map(em);
|
||||
ret = 1;
|
||||
goto out_unlock;
|
||||
}
|
||||
free_extent_map(em);
|
||||
|
||||
out_unlock:
|
||||
unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
|
||||
GFP_NOFS);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
||||
struct scrub_copy_nocow_ctx *nocow_ctx)
|
||||
{
|
||||
|
@ -3318,13 +3362,10 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
|||
struct inode *inode;
|
||||
struct page *page;
|
||||
struct btrfs_root *local_root;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct extent_map *em;
|
||||
struct extent_state *cached_state = NULL;
|
||||
struct extent_io_tree *io_tree;
|
||||
u64 physical_for_dev_replace;
|
||||
u64 nocow_ctx_logical;
|
||||
u64 len = nocow_ctx->len;
|
||||
u64 lockstart = offset, lockend = offset + len - 1;
|
||||
unsigned long index;
|
||||
int srcu_index;
|
||||
int ret = 0;
|
||||
|
@ -3356,31 +3397,14 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
|||
|
||||
physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
|
||||
io_tree = &BTRFS_I(inode)->io_tree;
|
||||
nocow_ctx_logical = nocow_ctx->logical;
|
||||
|
||||
lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
|
||||
ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
|
||||
if (ordered) {
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
goto out_unlock;
|
||||
ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
|
||||
if (ret) {
|
||||
ret = ret > 0 ? 0 : ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* This extent does not actually cover the logical extent anymore,
|
||||
* move on to the next inode.
|
||||
*/
|
||||
if (em->block_start > nocow_ctx->logical ||
|
||||
em->block_start + em->block_len < nocow_ctx->logical + len) {
|
||||
free_extent_map(em);
|
||||
goto out_unlock;
|
||||
}
|
||||
free_extent_map(em);
|
||||
|
||||
while (len >= PAGE_CACHE_SIZE) {
|
||||
index = offset >> PAGE_CACHE_SHIFT;
|
||||
again:
|
||||
|
@ -3396,7 +3420,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
|||
goto next_page;
|
||||
} else {
|
||||
ClearPageError(page);
|
||||
err = extent_read_full_page_nolock(io_tree, page,
|
||||
err = extent_read_full_page(io_tree, page,
|
||||
btrfs_get_extent,
|
||||
nocow_ctx->mirror_num);
|
||||
if (err) {
|
||||
|
@ -3421,6 +3445,14 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
|||
goto next_page;
|
||||
}
|
||||
}
|
||||
|
||||
ret = check_extent_to_block(inode, offset, len,
|
||||
nocow_ctx_logical);
|
||||
if (ret) {
|
||||
ret = ret > 0 ? 0 : ret;
|
||||
goto next_page;
|
||||
}
|
||||
|
||||
err = write_page_nocow(nocow_ctx->sctx,
|
||||
physical_for_dev_replace, page);
|
||||
if (err)
|
||||
|
@ -3434,12 +3466,10 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
|||
|
||||
offset += PAGE_CACHE_SIZE;
|
||||
physical_for_dev_replace += PAGE_CACHE_SIZE;
|
||||
nocow_ctx_logical += PAGE_CACHE_SIZE;
|
||||
len -= PAGE_CACHE_SIZE;
|
||||
}
|
||||
ret = COPY_COMPLETE;
|
||||
out_unlock:
|
||||
unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
|
||||
GFP_NOFS);
|
||||
out:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
iput(inode);
|
||||
|
|
Loading…
Reference in a new issue