btrfs: fix dead lock while running replace and defrag concurrently
This can be reproduced by fstests: btrfs/070 The scenario is like the following: replace worker thread defrag thread --------------------- ------------- copy_nocow_pages_worker btrfs_defrag_file copy_nocow_pages_for_inode ... btrfs_writepages |A| lock_extent_bits extent_write_cache_pages |B| lock_page __extent_writepage ... writepage_delalloc find_lock_delalloc_range |B| lock_extent_bits find_or_create_page pagecache_get_page |A| lock_page This leads to an ABBA pattern deadlock. To fix it, o we just change it to an AABB pattern which means to @unlock_extent_bits() before we @lock_page(), and in this way the @extent_read_full_page_nolock() is no longer in an locked context, so change it back to @extent_read_full_page() to regain protection. o Since we @unlock_extent_bits() earlier, then before @write_page_nocow(), the extent may not really point at the physical block we want, so we have to check it before write. Signed-off-by: Gui Hecheng <guihc.fnst@cn.fujitsu.com> Tested-by: David Sterba <dsterba@suse.cz> Signed-off-by: Chris Mason <clm@fb.com>
This commit is contained in:
parent
5f5bc6b1e2
commit
321592427c
1 changed files with 60 additions and 30 deletions
|
@ -3310,6 +3310,50 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
|
||||||
scrub_pending_trans_workers_dec(sctx);
|
scrub_pending_trans_workers_dec(sctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
|
||||||
|
u64 logical)
|
||||||
|
{
|
||||||
|
struct extent_state *cached_state = NULL;
|
||||||
|
struct btrfs_ordered_extent *ordered;
|
||||||
|
struct extent_io_tree *io_tree;
|
||||||
|
struct extent_map *em;
|
||||||
|
u64 lockstart = start, lockend = start + len - 1;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
io_tree = &BTRFS_I(inode)->io_tree;
|
||||||
|
|
||||||
|
lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
|
||||||
|
ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
|
||||||
|
if (ordered) {
|
||||||
|
btrfs_put_ordered_extent(ordered);
|
||||||
|
ret = 1;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
|
||||||
|
if (IS_ERR(em)) {
|
||||||
|
ret = PTR_ERR(em);
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This extent does not actually cover the logical extent anymore,
|
||||||
|
* move on to the next inode.
|
||||||
|
*/
|
||||||
|
if (em->block_start > logical ||
|
||||||
|
em->block_start + em->block_len < logical + len) {
|
||||||
|
free_extent_map(em);
|
||||||
|
ret = 1;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
free_extent_map(em);
|
||||||
|
|
||||||
|
out_unlock:
|
||||||
|
unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
|
||||||
|
GFP_NOFS);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
||||||
struct scrub_copy_nocow_ctx *nocow_ctx)
|
struct scrub_copy_nocow_ctx *nocow_ctx)
|
||||||
{
|
{
|
||||||
|
@ -3318,13 +3362,10 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
struct btrfs_root *local_root;
|
struct btrfs_root *local_root;
|
||||||
struct btrfs_ordered_extent *ordered;
|
|
||||||
struct extent_map *em;
|
|
||||||
struct extent_state *cached_state = NULL;
|
|
||||||
struct extent_io_tree *io_tree;
|
struct extent_io_tree *io_tree;
|
||||||
u64 physical_for_dev_replace;
|
u64 physical_for_dev_replace;
|
||||||
|
u64 nocow_ctx_logical;
|
||||||
u64 len = nocow_ctx->len;
|
u64 len = nocow_ctx->len;
|
||||||
u64 lockstart = offset, lockend = offset + len - 1;
|
|
||||||
unsigned long index;
|
unsigned long index;
|
||||||
int srcu_index;
|
int srcu_index;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
@ -3356,31 +3397,14 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
||||||
|
|
||||||
physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
|
physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
|
||||||
io_tree = &BTRFS_I(inode)->io_tree;
|
io_tree = &BTRFS_I(inode)->io_tree;
|
||||||
|
nocow_ctx_logical = nocow_ctx->logical;
|
||||||
|
|
||||||
lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
|
ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
|
||||||
ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
|
if (ret) {
|
||||||
if (ordered) {
|
ret = ret > 0 ? 0 : ret;
|
||||||
btrfs_put_ordered_extent(ordered);
|
goto out;
|
||||||
goto out_unlock;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
|
|
||||||
if (IS_ERR(em)) {
|
|
||||||
ret = PTR_ERR(em);
|
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This extent does not actually cover the logical extent anymore,
|
|
||||||
* move on to the next inode.
|
|
||||||
*/
|
|
||||||
if (em->block_start > nocow_ctx->logical ||
|
|
||||||
em->block_start + em->block_len < nocow_ctx->logical + len) {
|
|
||||||
free_extent_map(em);
|
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
free_extent_map(em);
|
|
||||||
|
|
||||||
while (len >= PAGE_CACHE_SIZE) {
|
while (len >= PAGE_CACHE_SIZE) {
|
||||||
index = offset >> PAGE_CACHE_SHIFT;
|
index = offset >> PAGE_CACHE_SHIFT;
|
||||||
again:
|
again:
|
||||||
|
@ -3396,7 +3420,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
||||||
goto next_page;
|
goto next_page;
|
||||||
} else {
|
} else {
|
||||||
ClearPageError(page);
|
ClearPageError(page);
|
||||||
err = extent_read_full_page_nolock(io_tree, page,
|
err = extent_read_full_page(io_tree, page,
|
||||||
btrfs_get_extent,
|
btrfs_get_extent,
|
||||||
nocow_ctx->mirror_num);
|
nocow_ctx->mirror_num);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
@ -3421,6 +3445,14 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
||||||
goto next_page;
|
goto next_page;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ret = check_extent_to_block(inode, offset, len,
|
||||||
|
nocow_ctx_logical);
|
||||||
|
if (ret) {
|
||||||
|
ret = ret > 0 ? 0 : ret;
|
||||||
|
goto next_page;
|
||||||
|
}
|
||||||
|
|
||||||
err = write_page_nocow(nocow_ctx->sctx,
|
err = write_page_nocow(nocow_ctx->sctx,
|
||||||
physical_for_dev_replace, page);
|
physical_for_dev_replace, page);
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -3434,12 +3466,10 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
|
||||||
|
|
||||||
offset += PAGE_CACHE_SIZE;
|
offset += PAGE_CACHE_SIZE;
|
||||||
physical_for_dev_replace += PAGE_CACHE_SIZE;
|
physical_for_dev_replace += PAGE_CACHE_SIZE;
|
||||||
|
nocow_ctx_logical += PAGE_CACHE_SIZE;
|
||||||
len -= PAGE_CACHE_SIZE;
|
len -= PAGE_CACHE_SIZE;
|
||||||
}
|
}
|
||||||
ret = COPY_COMPLETE;
|
ret = COPY_COMPLETE;
|
||||||
out_unlock:
|
|
||||||
unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
|
|
||||||
GFP_NOFS);
|
|
||||||
out:
|
out:
|
||||||
mutex_unlock(&inode->i_mutex);
|
mutex_unlock(&inode->i_mutex);
|
||||||
iput(inode);
|
iput(inode);
|
||||||
|
|
Loading…
Reference in a new issue