Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "It turns out that we had two crc bugs when running fsx-linux in a
  loop.  Many thanks to Josef, Miao Xie, and Dave Sterba for nailing it
  all down.  Miao also has a new OOM fix in this v2 pull as well.

  Ilya fixed a regression Liu Bo found in the balance ioctls for pausing
  and resuming a running balance across drives.

  Josef's orphan truncate patch fixes an obscure corruption we'd see
  during xfstests.

  Arne's patches address problems with subvolume quotas.  If the user
  destroys quota groups incorrectly the FS will refuse to mount.

  The rest are smaller fixes and plugs for memory leaks."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (30 commits)
  Btrfs: fix repeated delalloc work allocation
  Btrfs: fix wrong max device number for single profile
  Btrfs: fix missed transaction->aborted check
  Btrfs: Add ACCESS_ONCE() to transaction->abort accesses
  Btrfs: put csums on the right ordered extent
  Btrfs: use right range to find checksum for compressed extents
  Btrfs: fix panic when recovering tree log
  Btrfs: do not allow logged extents to be merged or removed
  Btrfs: fix a regression in balance usage filter
  Btrfs: prevent qgroup destroy when there are still relations
  Btrfs: ignore orphan qgroup relations
  Btrfs: reorder locks and sanity checks in btrfs_ioctl_defrag
  Btrfs: fix unlock order in btrfs_ioctl_rm_dev
  Btrfs: fix unlock order in btrfs_ioctl_resize
  Btrfs: fix "mutually exclusive op is running" error code
  Btrfs: bring back balance pause/resume logic
  btrfs: update timestamps on truncate()
  btrfs: fix btrfs_cont_expand() freeing IS_ERR em
  Btrfs: fix a bug when llseek for delalloc bytes behind prealloc extents
  Btrfs: fix off-by-one in lseek
  ...
This commit is contained in:
Linus Torvalds 2013-01-25 10:55:21 -08:00
commit d7df025eb4
14 changed files with 300 additions and 98 deletions

View file

@ -3997,7 +3997,7 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
* We make the other tasks wait for the flush only when we can flush * We make the other tasks wait for the flush only when we can flush
* all things. * all things.
*/ */
if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) { if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
flushing = true; flushing = true;
space_info->flush = 1; space_info->flush = 1;
} }
@ -5560,7 +5560,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
int empty_cluster = 2 * 1024 * 1024; int empty_cluster = 2 * 1024 * 1024;
struct btrfs_space_info *space_info; struct btrfs_space_info *space_info;
int loop = 0; int loop = 0;
int index = 0; int index = __get_raid_index(data);
int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
bool found_uncached_bg = false; bool found_uncached_bg = false;
@ -6788,11 +6788,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
&wc->flags[level]); &wc->flags[level]);
if (ret < 0) { if (ret < 0) {
btrfs_tree_unlock_rw(eb, path->locks[level]); btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
return ret; return ret;
} }
BUG_ON(wc->refs[level] == 0); BUG_ON(wc->refs[level] == 0);
if (wc->refs[level] == 1) { if (wc->refs[level] == 1) {
btrfs_tree_unlock_rw(eb, path->locks[level]); btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
return 1; return 1;
} }
} }

View file

@ -171,6 +171,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
return 0; return 0;
if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
test_bit(EXTENT_FLAG_LOGGING, &next->flags))
return 0;
if (extent_map_end(prev) == next->start && if (extent_map_end(prev) == next->start &&
prev->flags == next->flags && prev->flags == next->flags &&
prev->bdev == next->bdev && prev->bdev == next->bdev &&
@ -255,7 +259,8 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
if (!em) if (!em)
goto out; goto out;
list_move(&em->list, &tree->modified_extents); if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
list_move(&em->list, &tree->modified_extents);
em->generation = gen; em->generation = gen;
clear_bit(EXTENT_FLAG_PINNED, &em->flags); clear_bit(EXTENT_FLAG_PINNED, &em->flags);
em->mod_start = em->start; em->mod_start = em->start;
@ -280,6 +285,12 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
} }
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
{
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
try_merge_map(tree, em);
}
/** /**
* add_extent_mapping - add new extent map to the extent tree * add_extent_mapping - add new extent map to the extent tree
* @tree: tree to insert new map in * @tree: tree to insert new map in

View file

@ -69,6 +69,7 @@ void free_extent_map(struct extent_map *em);
int __init extent_map_init(void); int __init extent_map_init(void);
void extent_map_exit(void); void extent_map_exit(void);
int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
struct extent_map *search_extent_mapping(struct extent_map_tree *tree, struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len); u64 start, u64 len);
#endif #endif

View file

@ -460,8 +460,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
if (!contig) if (!contig)
offset = page_offset(bvec->bv_page) + bvec->bv_offset; offset = page_offset(bvec->bv_page) + bvec->bv_offset;
if (!contig && (offset >= ordered->file_offset + ordered->len || if (offset >= ordered->file_offset + ordered->len ||
offset < ordered->file_offset)) { offset < ordered->file_offset) {
unsigned long bytes_left; unsigned long bytes_left;
sums->len = this_sum_bytes; sums->len = this_sum_bytes;
this_sum_bytes = 0; this_sum_bytes = 0;

View file

@ -2241,6 +2241,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
if (lockend <= lockstart) if (lockend <= lockstart)
lockend = lockstart + root->sectorsize; lockend = lockstart + root->sectorsize;
lockend--;
len = lockend - lockstart + 1; len = lockend - lockstart + 1;
len = max_t(u64, len, root->sectorsize); len = max_t(u64, len, root->sectorsize);
@ -2307,9 +2308,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
} }
} }
*offset = start; if (!test_bit(EXTENT_FLAG_PREALLOC,
free_extent_map(em); &em->flags)) {
break; *offset = start;
free_extent_map(em);
break;
}
} }
} }

View file

@ -1862,11 +1862,13 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
{ {
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info; struct btrfs_free_space *info;
int ret = 0; int ret;
bool re_search = false;
spin_lock(&ctl->tree_lock); spin_lock(&ctl->tree_lock);
again: again:
ret = 0;
if (!bytes) if (!bytes)
goto out_lock; goto out_lock;
@ -1879,17 +1881,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1, 0); 1, 0);
if (!info) { if (!info) {
/* the tree logging code might be calling us before we /*
* have fully loaded the free space rbtree for this * If we found a partial bit of our free space in a
* block group. So it is possible the entry won't * bitmap but then couldn't find the other part this may
* be in the rbtree yet at all. The caching code * be a problem, so WARN about it.
* will make sure not to put it in the rbtree if
* the logging code has pinned it.
*/ */
WARN_ON(re_search);
goto out_lock; goto out_lock;
} }
} }
re_search = false;
if (!info->bitmap) { if (!info->bitmap) {
unlink_free_space(ctl, info); unlink_free_space(ctl, info);
if (offset == info->offset) { if (offset == info->offset) {
@ -1935,8 +1937,10 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
} }
ret = remove_from_bitmap(ctl, info, &offset, &bytes); ret = remove_from_bitmap(ctl, info, &offset, &bytes);
if (ret == -EAGAIN) if (ret == -EAGAIN) {
re_search = true;
goto again; goto again;
}
BUG_ON(ret); /* logic error */ BUG_ON(ret); /* logic error */
out_lock: out_lock:
spin_unlock(&ctl->tree_lock); spin_unlock(&ctl->tree_lock);

View file

@ -88,7 +88,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
}; };
static int btrfs_setsize(struct inode *inode, loff_t newsize); static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct inode *inode); static int btrfs_truncate(struct inode *inode);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode, static noinline int cow_file_range(struct inode *inode,
@ -2478,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
continue; continue;
} }
nr_truncate++; nr_truncate++;
/* 1 for the orphan item deletion. */
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
ret = btrfs_orphan_add(trans, inode);
btrfs_end_transaction(trans, root);
if (ret)
goto out;
ret = btrfs_truncate(inode); ret = btrfs_truncate(inode);
} else { } else {
nr_unlink++; nr_unlink++;
@ -3665,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
block_end - cur_offset, 0); block_end - cur_offset, 0);
if (IS_ERR(em)) { if (IS_ERR(em)) {
err = PTR_ERR(em); err = PTR_ERR(em);
em = NULL;
break; break;
} }
last_byte = min(extent_map_end(em), block_end); last_byte = min(extent_map_end(em), block_end);
@ -3748,16 +3761,27 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
return err; return err;
} }
static int btrfs_setsize(struct inode *inode, loff_t newsize) static int btrfs_setsize(struct inode *inode, struct iattr *attr)
{ {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
loff_t oldsize = i_size_read(inode); loff_t oldsize = i_size_read(inode);
loff_t newsize = attr->ia_size;
int mask = attr->ia_valid;
int ret; int ret;
if (newsize == oldsize) if (newsize == oldsize)
return 0; return 0;
/*
* The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
* special case where we need to update the times despite not having
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
if (newsize > oldsize) { if (newsize > oldsize) {
truncate_pagecache(inode, oldsize, newsize); truncate_pagecache(inode, oldsize, newsize);
ret = btrfs_cont_expand(inode, oldsize, newsize); ret = btrfs_cont_expand(inode, oldsize, newsize);
@ -3783,9 +3807,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags); &BTRFS_I(inode)->runtime_flags);
/*
* 1 for the orphan item we're going to add
* 1 for the orphan item deletion.
*/
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans))
return PTR_ERR(trans);
/*
* We need to do this in case we fail at _any_ point during the
* actual truncate. Once we do the truncate_setsize we could
* invalidate pages which forces any outstanding ordered io to
* be instantly completed which will give us extents that need
* to be truncated. If we fail to get an orphan inode down we
* could have left over extents that were never meant to live,
* so we need to garuntee from this point on that everything
* will be consistent.
*/
ret = btrfs_orphan_add(trans, inode);
btrfs_end_transaction(trans, root);
if (ret)
return ret;
/* we don't support swapfiles, so vmtruncate shouldn't fail */ /* we don't support swapfiles, so vmtruncate shouldn't fail */
truncate_setsize(inode, newsize); truncate_setsize(inode, newsize);
ret = btrfs_truncate(inode); ret = btrfs_truncate(inode);
if (ret && inode->i_nlink)
btrfs_orphan_del(NULL, inode);
} }
return ret; return ret;
@ -3805,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
return err; return err;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
err = btrfs_setsize(inode, attr->ia_size); err = btrfs_setsize(inode, attr);
if (err) if (err)
return err; return err;
} }
@ -5572,10 +5621,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
return em; return em;
if (em) { if (em) {
/* /*
* if our em maps to a hole, there might * if our em maps to
* actually be delalloc bytes behind it * - a hole or
* - a pre-alloc extent,
* there might actually be delalloc bytes behind it.
*/ */
if (em->block_start != EXTENT_MAP_HOLE) if (em->block_start != EXTENT_MAP_HOLE &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
return em; return em;
else else
hole_em = em; hole_em = em;
@ -5657,6 +5709,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
*/ */
em->block_start = hole_em->block_start; em->block_start = hole_em->block_start;
em->block_len = hole_len; em->block_len = hole_len;
if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
} else { } else {
em->start = range_start; em->start = range_start;
em->len = found; em->len = found;
@ -6915,11 +6969,9 @@ static int btrfs_truncate(struct inode *inode)
/* /*
* 1 for the truncate slack space * 1 for the truncate slack space
* 1 for the orphan item we're going to add
* 1 for the orphan item deletion
* 1 for updating the inode. * 1 for updating the inode.
*/ */
trans = btrfs_start_transaction(root, 4); trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
err = PTR_ERR(trans); err = PTR_ERR(trans);
goto out; goto out;
@ -6930,12 +6982,6 @@ static int btrfs_truncate(struct inode *inode)
min_size); min_size);
BUG_ON(ret); BUG_ON(ret);
ret = btrfs_orphan_add(trans, inode);
if (ret) {
btrfs_end_transaction(trans, root);
goto out;
}
/* /*
* setattr is responsible for setting the ordered_data_close flag, * setattr is responsible for setting the ordered_data_close flag,
* but that is only tested during the last file release. That * but that is only tested during the last file release. That
@ -7004,12 +7050,6 @@ static int btrfs_truncate(struct inode *inode)
ret = btrfs_orphan_del(trans, inode); ret = btrfs_orphan_del(trans, inode);
if (ret) if (ret)
err = ret; err = ret;
} else if (ret && inode->i_nlink > 0) {
/*
* Failed to do the truncate, remove us from the in memory
* orphan list.
*/
ret = btrfs_orphan_del(NULL, inode);
} }
if (trans) { if (trans) {
@ -7531,41 +7571,61 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
*/ */
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
{ {
struct list_head *head = &root->fs_info->delalloc_inodes;
struct btrfs_inode *binode; struct btrfs_inode *binode;
struct inode *inode; struct inode *inode;
struct btrfs_delalloc_work *work, *next; struct btrfs_delalloc_work *work, *next;
struct list_head works; struct list_head works;
struct list_head splice;
int ret = 0; int ret = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY) if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS; return -EROFS;
INIT_LIST_HEAD(&works); INIT_LIST_HEAD(&works);
INIT_LIST_HEAD(&splice);
again:
spin_lock(&root->fs_info->delalloc_lock); spin_lock(&root->fs_info->delalloc_lock);
while (!list_empty(head)) { list_splice_init(&root->fs_info->delalloc_inodes, &splice);
binode = list_entry(head->next, struct btrfs_inode, while (!list_empty(&splice)) {
binode = list_entry(splice.next, struct btrfs_inode,
delalloc_inodes); delalloc_inodes);
list_del_init(&binode->delalloc_inodes);
inode = igrab(&binode->vfs_inode); inode = igrab(&binode->vfs_inode);
if (!inode) if (!inode)
list_del_init(&binode->delalloc_inodes); continue;
list_add_tail(&binode->delalloc_inodes,
&root->fs_info->delalloc_inodes);
spin_unlock(&root->fs_info->delalloc_lock); spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
if (!work) { if (unlikely(!work)) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
}
list_add_tail(&work->list, &works);
btrfs_queue_worker(&root->fs_info->flush_workers,
&work->work);
} }
list_add_tail(&work->list, &works);
btrfs_queue_worker(&root->fs_info->flush_workers,
&work->work);
cond_resched(); cond_resched();
spin_lock(&root->fs_info->delalloc_lock); spin_lock(&root->fs_info->delalloc_lock);
} }
spin_unlock(&root->fs_info->delalloc_lock); spin_unlock(&root->fs_info->delalloc_lock);
list_for_each_entry_safe(work, next, &works, list) {
list_del_init(&work->list);
btrfs_wait_and_free_delalloc_work(work);
}
spin_lock(&root->fs_info->delalloc_lock);
if (!list_empty(&root->fs_info->delalloc_inodes)) {
spin_unlock(&root->fs_info->delalloc_lock);
goto again;
}
spin_unlock(&root->fs_info->delalloc_lock);
/* the filemap_flush will queue IO into the worker threads, but /* the filemap_flush will queue IO into the worker threads, but
* we have to make sure the IO is actually started and that * we have to make sure the IO is actually started and that
* ordered extents get created before we return * ordered extents get created before we return
@ -7578,11 +7638,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
atomic_read(&root->fs_info->async_delalloc_pages) == 0)); atomic_read(&root->fs_info->async_delalloc_pages) == 0));
} }
atomic_dec(&root->fs_info->async_submit_draining); atomic_dec(&root->fs_info->async_submit_draining);
return 0;
out: out:
list_for_each_entry_safe(work, next, &works, list) { list_for_each_entry_safe(work, next, &works, list) {
list_del_init(&work->list); list_del_init(&work->list);
btrfs_wait_and_free_delalloc_work(work); btrfs_wait_and_free_delalloc_work(work);
} }
if (!list_empty_careful(&splice)) {
spin_lock(&root->fs_info->delalloc_lock);
list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
spin_unlock(&root->fs_info->delalloc_lock);
}
return ret; return ret;
} }

View file

@ -1339,7 +1339,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) { 1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
return -EINPROGRESS; mnt_drop_write_file(file);
return -EINVAL;
} }
mutex_lock(&root->fs_info->volume_mutex); mutex_lock(&root->fs_info->volume_mutex);
@ -1362,6 +1363,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
printk(KERN_INFO "btrfs: resizing devid %llu\n", printk(KERN_INFO "btrfs: resizing devid %llu\n",
(unsigned long long)devid); (unsigned long long)devid);
} }
device = btrfs_find_device(root->fs_info, devid, NULL, NULL); device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
if (!device) { if (!device) {
printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
@ -1369,9 +1371,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
ret = -EINVAL; ret = -EINVAL;
goto out_free; goto out_free;
} }
if (device->fs_devices && device->fs_devices->seeding) {
if (!device->writeable) {
printk(KERN_INFO "btrfs: resizer unable to apply on " printk(KERN_INFO "btrfs: resizer unable to apply on "
"seeding device %llu\n", "readonly device %llu\n",
(unsigned long long)devid); (unsigned long long)devid);
ret = -EINVAL; ret = -EINVAL;
goto out_free; goto out_free;
@ -1443,8 +1446,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
kfree(vol_args); kfree(vol_args);
out: out:
mutex_unlock(&root->fs_info->volume_mutex); mutex_unlock(&root->fs_info->volume_mutex);
mnt_drop_write_file(file);
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
mnt_drop_write_file(file);
return ret; return ret;
} }
@ -2095,13 +2098,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = inode_permission(inode, MAY_WRITE | MAY_EXEC); err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
if (err) if (err)
goto out_dput; goto out_dput;
/* check if subvolume may be deleted by a non-root user */
err = btrfs_may_delete(dir, dentry, 1);
if (err)
goto out_dput;
} }
/* check if subvolume may be deleted by a user */
err = btrfs_may_delete(dir, dentry, 1);
if (err)
goto out_dput;
if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
err = -EINVAL; err = -EINVAL;
goto out_dput; goto out_dput;
@ -2183,19 +2186,20 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
struct btrfs_ioctl_defrag_range_args *range; struct btrfs_ioctl_defrag_range_args *range;
int ret; int ret;
if (btrfs_root_readonly(root)) ret = mnt_want_write_file(file);
return -EROFS; if (ret)
return ret;
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) { 1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
return -EINPROGRESS; mnt_drop_write_file(file);
return -EINVAL;
} }
ret = mnt_want_write_file(file);
if (ret) { if (btrfs_root_readonly(root)) {
atomic_set(&root->fs_info->mutually_exclusive_operation_running, ret = -EROFS;
0); goto out;
return ret;
} }
switch (inode->i_mode & S_IFMT) { switch (inode->i_mode & S_IFMT) {
@ -2247,8 +2251,8 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
ret = -EINVAL; ret = -EINVAL;
} }
out: out:
mnt_drop_write_file(file);
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
mnt_drop_write_file(file);
return ret; return ret;
} }
@ -2263,7 +2267,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) { 1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
return -EINPROGRESS; return -EINVAL;
} }
mutex_lock(&root->fs_info->volume_mutex); mutex_lock(&root->fs_info->volume_mutex);
@ -2300,7 +2304,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
1)) { 1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
mnt_drop_write_file(file); mnt_drop_write_file(file);
return -EINPROGRESS; return -EINVAL;
} }
mutex_lock(&root->fs_info->volume_mutex); mutex_lock(&root->fs_info->volume_mutex);
@ -2316,8 +2320,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
kfree(vol_args); kfree(vol_args);
out: out:
mutex_unlock(&root->fs_info->volume_mutex); mutex_unlock(&root->fs_info->volume_mutex);
mnt_drop_write_file(file);
atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
mnt_drop_write_file(file);
return ret; return ret;
} }
@ -3437,8 +3441,8 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs; struct btrfs_ioctl_balance_args *bargs;
struct btrfs_balance_control *bctl; struct btrfs_balance_control *bctl;
bool need_unlock; /* for mut. excl. ops lock */
int ret; int ret;
int need_to_clear_lock = 0;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
@ -3447,14 +3451,61 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
if (ret) if (ret)
return ret; return ret;
mutex_lock(&fs_info->volume_mutex); again:
if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
need_unlock = true;
goto locked;
}
/*
* mut. excl. ops lock is locked. Three possibilites:
* (1) some other op is running
* (2) balance is running
* (3) balance is paused -- special case (think resume)
*/
mutex_lock(&fs_info->balance_mutex); mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl) {
/* this is either (2) or (3) */
if (!atomic_read(&fs_info->balance_running)) {
mutex_unlock(&fs_info->balance_mutex);
if (!mutex_trylock(&fs_info->volume_mutex))
goto again;
mutex_lock(&fs_info->balance_mutex);
if (fs_info->balance_ctl &&
!atomic_read(&fs_info->balance_running)) {
/* this is (3) */
need_unlock = false;
goto locked;
}
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
goto again;
} else {
/* this is (2) */
mutex_unlock(&fs_info->balance_mutex);
ret = -EINPROGRESS;
goto out;
}
} else {
/* this is (1) */
mutex_unlock(&fs_info->balance_mutex);
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
ret = -EINVAL;
goto out;
}
locked:
BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
if (arg) { if (arg) {
bargs = memdup_user(arg, sizeof(*bargs)); bargs = memdup_user(arg, sizeof(*bargs));
if (IS_ERR(bargs)) { if (IS_ERR(bargs)) {
ret = PTR_ERR(bargs); ret = PTR_ERR(bargs);
goto out; goto out_unlock;
} }
if (bargs->flags & BTRFS_BALANCE_RESUME) { if (bargs->flags & BTRFS_BALANCE_RESUME) {
@ -3474,13 +3525,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
bargs = NULL; bargs = NULL;
} }
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, if (fs_info->balance_ctl) {
1)) {
pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
ret = -EINPROGRESS; ret = -EINPROGRESS;
goto out_bargs; goto out_bargs;
} }
need_to_clear_lock = 1;
bctl = kzalloc(sizeof(*bctl), GFP_NOFS); bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
if (!bctl) { if (!bctl) {
@ -3501,11 +3549,17 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
} }
do_balance: do_balance:
ret = btrfs_balance(bctl, bargs);
/* /*
* bctl is freed in __cancel_balance or in free_fs_info if * Ownership of bctl and mutually_exclusive_operation_running
* restriper was paused all the way until unmount * goes to to btrfs_balance. bctl is freed in __cancel_balance,
* or, if restriper was paused all the way until unmount, in
* free_fs_info. mutually_exclusive_operation_running is
* cleared in __cancel_balance.
*/ */
need_unlock = false;
ret = btrfs_balance(bctl, bargs);
if (arg) { if (arg) {
if (copy_to_user(arg, bargs, sizeof(*bargs))) if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT; ret = -EFAULT;
@ -3513,12 +3567,12 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
out_bargs: out_bargs:
kfree(bargs); kfree(bargs);
out: out_unlock:
if (need_to_clear_lock)
atomic_set(&root->fs_info->mutually_exclusive_operation_running,
0);
mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex); mutex_unlock(&fs_info->volume_mutex);
if (need_unlock)
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
out:
mnt_drop_write_file(file); mnt_drop_write_file(file);
return ret; return ret;
} }
@ -3698,6 +3752,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
goto drop_write; goto drop_write;
} }
if (!sa->qgroupid) {
ret = -EINVAL;
goto out;
}
trans = btrfs_join_transaction(root); trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
ret = PTR_ERR(trans); ret = PTR_ERR(trans);

View file

@ -379,6 +379,13 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
ret = add_relation_rb(fs_info, found_key.objectid, ret = add_relation_rb(fs_info, found_key.objectid,
found_key.offset); found_key.offset);
if (ret == -ENOENT) {
printk(KERN_WARNING
"btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
(unsigned long long)found_key.objectid,
(unsigned long long)found_key.offset);
ret = 0; /* ignore the error */
}
if (ret) if (ret)
goto out; goto out;
next2: next2:
@ -956,17 +963,28 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid) struct btrfs_fs_info *fs_info, u64 qgroupid)
{ {
struct btrfs_root *quota_root; struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0; int ret = 0;
quota_root = fs_info->quota_root; quota_root = fs_info->quota_root;
if (!quota_root) if (!quota_root)
return -EINVAL; return -EINVAL;
/* check if there are no relations to this qgroup */
spin_lock(&fs_info->qgroup_lock);
qgroup = find_qgroup_rb(fs_info, qgroupid);
if (qgroup) {
if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
spin_unlock(&fs_info->qgroup_lock);
return -EBUSY;
}
}
spin_unlock(&fs_info->qgroup_lock);
ret = del_qgroup_item(trans, quota_root, qgroupid); ret = del_qgroup_item(trans, quota_root, qgroupid);
spin_lock(&fs_info->qgroup_lock); spin_lock(&fs_info->qgroup_lock);
del_qgroup_rb(quota_root->fs_info, qgroupid); del_qgroup_rb(quota_root->fs_info, qgroupid);
spin_unlock(&fs_info->qgroup_lock); spin_unlock(&fs_info->qgroup_lock);
return ret; return ret;

View file

@ -1814,8 +1814,10 @@ static int name_cache_insert(struct send_ctx *sctx,
(unsigned long)nce->ino); (unsigned long)nce->ino);
if (!nce_head) { if (!nce_head) {
nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
if (!nce_head) if (!nce_head) {
kfree(nce);
return -ENOMEM; return -ENOMEM;
}
INIT_LIST_HEAD(nce_head); INIT_LIST_HEAD(nce_head);
ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);

View file

@ -267,7 +267,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
function, line, errstr); function, line, errstr);
return; return;
} }
trans->transaction->aborted = errno; ACCESS_ONCE(trans->transaction->aborted) = errno;
__btrfs_std_error(root->fs_info, function, line, errno, NULL); __btrfs_std_error(root->fs_info, function, line, errno, NULL);
} }
/* /*

View file

@ -1468,7 +1468,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
goto cleanup_transaction; goto cleanup_transaction;
} }
if (cur_trans->aborted) { /* Stop the commit early if ->aborted is set */
if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted; ret = cur_trans->aborted;
goto cleanup_transaction; goto cleanup_transaction;
} }
@ -1574,6 +1575,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
wait_event(cur_trans->writer_wait, wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1); atomic_read(&cur_trans->num_writers) == 1);
/* ->aborted might be set after the previous check, so check it */
if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
goto cleanup_transaction;
}
/* /*
* the reloc mutex makes sure that we stop * the reloc mutex makes sure that we stop
* the balancing code from coming in and moving * the balancing code from coming in and moving
@ -1657,6 +1663,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
goto cleanup_transaction; goto cleanup_transaction;
} }
/*
* The tasks which save the space cache and inode cache may also
* update ->aborted, check it.
*/
if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
mutex_unlock(&root->fs_info->tree_log_mutex);
mutex_unlock(&root->fs_info->reloc_mutex);
goto cleanup_transaction;
}
btrfs_prepare_extent_commit(trans, root); btrfs_prepare_extent_commit(trans, root);
cur_trans = root->fs_info->running_transaction; cur_trans = root->fs_info->running_transaction;

View file

@ -3357,6 +3357,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
if (skip_csum) if (skip_csum)
return 0; return 0;
if (em->compress_type) {
csum_offset = 0;
csum_len = block_len;
}
/* block start is already adjusted for the file extent offset. */ /* block start is already adjusted for the file extent offset. */
ret = btrfs_lookup_csums_range(log->fs_info->csum_root, ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
em->block_start + csum_offset, em->block_start + csum_offset,
@ -3410,13 +3415,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
em = list_entry(extents.next, struct extent_map, list); em = list_entry(extents.next, struct extent_map, list);
list_del_init(&em->list); list_del_init(&em->list);
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
/* /*
* If we had an error we just need to delete everybody from our * If we had an error we just need to delete everybody from our
* private list. * private list.
*/ */
if (ret) { if (ret) {
clear_em_logging(tree, em);
free_extent_map(em); free_extent_map(em);
continue; continue;
} }
@ -3424,8 +3429,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
write_unlock(&tree->lock); write_unlock(&tree->lock);
ret = log_one_extent(trans, inode, root, em, path); ret = log_one_extent(trans, inode, root, em, path);
free_extent_map(em);
write_lock(&tree->lock); write_lock(&tree->lock);
clear_em_logging(tree, em);
free_extent_map(em);
} }
WARN_ON(!list_empty(&extents)); WARN_ON(!list_empty(&extents));
write_unlock(&tree->lock); write_unlock(&tree->lock);

View file

@ -1431,7 +1431,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
} }
} else { } else {
ret = btrfs_get_bdev_and_sb(device_path, ret = btrfs_get_bdev_and_sb(device_path,
FMODE_READ | FMODE_EXCL, FMODE_WRITE | FMODE_EXCL,
root->fs_info->bdev_holder, 0, root->fs_info->bdev_holder, 0,
&bdev, &bh); &bdev, &bh);
if (ret) if (ret)
@ -2614,7 +2614,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
cache = btrfs_lookup_block_group(fs_info, chunk_offset); cache = btrfs_lookup_block_group(fs_info, chunk_offset);
chunk_used = btrfs_block_group_used(&cache->item); chunk_used = btrfs_block_group_used(&cache->item);
user_thresh = div_factor_fine(cache->key.offset, bargs->usage); if (bargs->usage == 0)
user_thresh = 0;
else if (bargs->usage > 100)
user_thresh = cache->key.offset;
else
user_thresh = div_factor_fine(cache->key.offset,
bargs->usage);
if (chunk_used < user_thresh) if (chunk_used < user_thresh)
ret = 0; ret = 0;
@ -2959,6 +2966,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
unset_balance_control(fs_info); unset_balance_control(fs_info);
ret = del_balance_item(fs_info->tree_root); ret = del_balance_item(fs_info->tree_root);
BUG_ON(ret); BUG_ON(ret);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
} }
void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
@ -3138,8 +3147,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
out: out:
if (bctl->flags & BTRFS_BALANCE_RESUME) if (bctl->flags & BTRFS_BALANCE_RESUME)
__cancel_balance(fs_info); __cancel_balance(fs_info);
else else {
kfree(bctl); kfree(bctl);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
}
return ret; return ret;
} }
@ -3156,7 +3167,6 @@ static int balance_kthread(void *data)
ret = btrfs_balance(fs_info->balance_ctl, NULL); ret = btrfs_balance(fs_info->balance_ctl, NULL);
} }
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex); mutex_unlock(&fs_info->volume_mutex);
@ -3179,7 +3189,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
return 0; return 0;
} }
WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
if (IS_ERR(tsk)) if (IS_ERR(tsk))
return PTR_ERR(tsk); return PTR_ERR(tsk);
@ -3233,6 +3242,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
btrfs_balance_sys(leaf, item, &disk_bargs); btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
mutex_lock(&fs_info->volume_mutex); mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex); mutex_lock(&fs_info->balance_mutex);
@ -3496,7 +3507,7 @@ struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
{ 1, 1, 2, 2, 2, 2 /* raid1 */ }, { 1, 1, 2, 2, 2, 2 /* raid1 */ },
{ 1, 2, 1, 1, 1, 2 /* dup */ }, { 1, 2, 1, 1, 1, 2 /* dup */ },
{ 1, 1, 0, 2, 1, 1 /* raid0 */ }, { 1, 1, 0, 2, 1, 1 /* raid0 */ },
{ 1, 1, 0, 1, 1, 1 /* single */ }, { 1, 1, 1, 1, 1, 1 /* single */ },
}; };
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,