Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (62 commits)
  Btrfs: use larger system chunks
  Btrfs: add a delalloc mutex to inodes for delalloc reservations
  Btrfs: space leak tracepoints
  Btrfs: protect orphan block rsv with spin_lock
  Btrfs: add allocator tracepoints
  Btrfs: don't call btrfs_throttle in file write
  Btrfs: release space on error in page_mkwrite
  Btrfs: fix btrfsck error 400 when truncating a compressed
  Btrfs: do not use btrfs_end_transaction_throttle everywhere
  Btrfs: add balance progress reporting
  Btrfs: allow for resuming restriper after it was paused
  Btrfs: allow for canceling restriper
  Btrfs: allow for pausing restriper
  Btrfs: add skip_balance mount option
  Btrfs: recover balance on mount
  Btrfs: save balance parameters to disk
  Btrfs: soft profile changing mode (aka soft convert)
  Btrfs: implement online profile changing
  Btrfs: do not reduce profile in do_chunk_alloc()
  Btrfs: virtual address space subset filter
  ...

Fix up trivial conflict in fs/btrfs/ioctl.c due to the use of the new
mnt_drop_write_file() helper.
This commit is contained in:
Linus Torvalds 2012-01-17 15:49:54 -08:00
commit f9156c7288
34 changed files with 6951 additions and 923 deletions

View file

@ -31,3 +31,22 @@ config BTRFS_FS_POSIX_ACL
Linux website <http://acl.bestbits.at/>.
If you don't know what Access Control Lists are, say N
config BTRFS_FS_CHECK_INTEGRITY
bool "Btrfs with integrity check tool compiled in (DANGEROUS)"
depends on BTRFS_FS
help
Adds code that examines all block write requests (including
writes of the super block). The goal is to verify that the
state of the filesystem on disk is always consistent, i.e.,
after a power-loss or kernel panic event the filesystem is
in a consistent state.
If the integrity check tool is included and activated in
the mount options, plenty of kernel memory is used, and
plenty of additional CPU cycles are spent. Enabling this
functionality is not intended for normal use.
In most cases, unless you are a btrfs developer who needs
to verify the integrity of (super)-block write requests
during the run of a regression test, say N

View file

@ -8,6 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o
reada.o backref.o ulist.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

File diff suppressed because it is too large Load diff

View file

@ -20,6 +20,7 @@
#define __BTRFS_BACKREF__
#include "ioctl.h"
#include "ulist.h"
struct inode_fs_paths {
struct btrfs_path *btrfs_path;
@ -54,6 +55,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 seq, struct ulist **roots);
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
struct btrfs_path *path);

View file

@ -51,6 +51,9 @@ struct btrfs_inode {
/* held while logging the inode in tree-log.c */
struct mutex log_mutex;
/* held while doing delalloc reservations */
struct mutex delalloc_mutex;
/* used to order data wrt metadata */
struct btrfs_ordered_inode_tree ordered_tree;

3068
fs/btrfs/check-integrity.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,36 @@
/*
* Copyright (C) STRATO AG 2011. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#if !defined(__BTRFS_CHECK_INTEGRITY__)
#define __BTRFS_CHECK_INTEGRITY__
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
int btrfsic_submit_bh(int rw, struct buffer_head *bh);
void btrfsic_submit_bio(int rw, struct bio *bio);
#else
#define btrfsic_submit_bh submit_bh
#define btrfsic_submit_bio submit_bio
#endif
int btrfsic_mount(struct btrfs_root *root,
struct btrfs_fs_devices *fs_devices,
int including_extent_data, u32 print_mask);
void btrfsic_unmount(struct btrfs_root *root,
struct btrfs_fs_devices *fs_devices);
#endif

View file

@ -240,7 +240,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
new_root_objectid, &disk_key, level,
buf->start, 0);
buf->start, 0, 1);
if (IS_ERR(cow))
return PTR_ERR(cow);
@ -261,9 +261,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
WARN_ON(btrfs_header_generation(buf) > trans->transid);
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
ret = btrfs_inc_ref(trans, root, cow, 1);
ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
ret = btrfs_inc_ref(trans, root, cow, 0, 1);
if (ret)
return ret;
@ -350,14 +350,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if ((owner == root->root_key.objectid ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
ret = btrfs_inc_ref(trans, root, buf, 1);
ret = btrfs_inc_ref(trans, root, buf, 1, 1);
BUG_ON(ret);
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_dec_ref(trans, root, buf, 0);
ret = btrfs_dec_ref(trans, root, buf, 0, 1);
BUG_ON(ret);
ret = btrfs_inc_ref(trans, root, cow, 1);
ret = btrfs_inc_ref(trans, root, cow, 1, 1);
BUG_ON(ret);
}
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
@ -365,9 +365,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
ret = btrfs_inc_ref(trans, root, cow, 1);
ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
ret = btrfs_inc_ref(trans, root, cow, 0, 1);
BUG_ON(ret);
}
if (new_flags != 0) {
@ -381,11 +381,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
ret = btrfs_inc_ref(trans, root, cow, 1);
ret = btrfs_inc_ref(trans, root, cow, 1, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
ret = btrfs_inc_ref(trans, root, cow, 0, 1);
BUG_ON(ret);
ret = btrfs_dec_ref(trans, root, buf, 1);
ret = btrfs_dec_ref(trans, root, buf, 1, 1);
BUG_ON(ret);
}
clean_tree_block(trans, root, buf);
@ -446,7 +446,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
root->root_key.objectid, &disk_key,
level, search_start, empty_size);
level, search_start, empty_size, 1);
if (IS_ERR(cow))
return PTR_ERR(cow);
@ -484,7 +484,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
rcu_assign_pointer(root->node, cow);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
last_ref, 1);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
@ -500,7 +500,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid);
btrfs_mark_buffer_dirty(parent);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
last_ref, 1);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
@ -957,7 +957,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
free_extent_buffer(mid);
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
/* once for the root ptr */
free_extent_buffer(mid);
return 0;
@ -1015,7 +1015,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret)
ret = wret;
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, root, right, 0, 1);
btrfs_free_tree_block(trans, root, right, 0, 1, 0);
free_extent_buffer(right);
right = NULL;
} else {
@ -1055,7 +1055,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (wret)
ret = wret;
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
free_extent_buffer(mid);
mid = NULL;
} else {
@ -2089,7 +2089,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
root->root_key.objectid, &lower_key,
level, root->node->start, 0);
level, root->node->start, 0, 0);
if (IS_ERR(c))
return PTR_ERR(c);
@ -2216,7 +2216,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
root->root_key.objectid,
&disk_key, level, c->start, 0);
&disk_key, level, c->start, 0, 0);
if (IS_ERR(split))
return PTR_ERR(split);
@ -2970,7 +2970,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
root->root_key.objectid,
&disk_key, 0, l->start, 0);
&disk_key, 0, l->start, 0, 0);
if (IS_ERR(right))
return PTR_ERR(right);
@ -3781,7 +3781,7 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
root_sub_used(root, leaf->len);
btrfs_free_tree_block(trans, root, leaf, 0, 1);
btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
return 0;
}
/*

View file

@ -86,6 +86,9 @@ struct btrfs_ordered_sum;
/* holds checksums of all the data extents */
#define BTRFS_CSUM_TREE_OBJECTID 7ULL
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL
@ -692,6 +695,54 @@ struct btrfs_root_ref {
__le16 name_len;
} __attribute__ ((__packed__));
struct btrfs_disk_balance_args {
/*
* profiles to operate on, single is denoted by
* BTRFS_AVAIL_ALLOC_BIT_SINGLE
*/
__le64 profiles;
/* usage filter */
__le64 usage;
/* devid filter */
__le64 devid;
/* devid subset filter [pstart..pend) */
__le64 pstart;
__le64 pend;
/* btrfs virtual address space subset filter [vstart..vend) */
__le64 vstart;
__le64 vend;
/*
* profile to convert to, single is denoted by
* BTRFS_AVAIL_ALLOC_BIT_SINGLE
*/
__le64 target;
/* BTRFS_BALANCE_ARGS_* */
__le64 flags;
__le64 unused[8];
} __attribute__ ((__packed__));
/*
* store balance parameters to disk so that balance can be properly
* resumed after crash or unmount
*/
struct btrfs_balance_item {
/* BTRFS_BALANCE_* */
__le64 flags;
struct btrfs_disk_balance_args data;
struct btrfs_disk_balance_args meta;
struct btrfs_disk_balance_args sys;
__le64 unused[4];
} __attribute__ ((__packed__));
#define BTRFS_FILE_EXTENT_INLINE 0
#define BTRFS_FILE_EXTENT_REG 1
#define BTRFS_FILE_EXTENT_PREALLOC 2
@ -751,14 +802,32 @@ struct btrfs_csum_item {
} __attribute__ ((__packed__));
/* different types of block groups (and chunks) */
#define BTRFS_BLOCK_GROUP_DATA (1 << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
#define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3)
#define BTRFS_BLOCK_GROUP_RAID1 (1 << 4)
#define BTRFS_BLOCK_GROUP_DUP (1 << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1 << 6)
#define BTRFS_NR_RAID_TYPES 5
#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
#define BTRFS_NR_RAID_TYPES 5
#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
BTRFS_BLOCK_GROUP_SYSTEM | \
BTRFS_BLOCK_GROUP_METADATA)
#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID1 | \
BTRFS_BLOCK_GROUP_DUP | \
BTRFS_BLOCK_GROUP_RAID10)
/*
* We need a bit for restriper to be able to tell when chunks of type
* SINGLE are available. This "extended" profile format is used in
* fs_info->avail_*_alloc_bits (in-memory) and balance item fields
* (on-disk). The corresponding on-disk bit in chunk.type is reserved
* to avoid remappings between two formats in future.
*/
#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
struct btrfs_block_group_item {
__le64 used;
@ -916,6 +985,7 @@ struct btrfs_block_group_cache {
struct reloc_control;
struct btrfs_device;
struct btrfs_fs_devices;
struct btrfs_balance_control;
struct btrfs_delayed_root;
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
@ -971,7 +1041,7 @@ struct btrfs_fs_info {
* is required instead of the faster short fsync log commits
*/
u64 last_trans_log_full_commit;
unsigned long mount_opt:20;
unsigned long mount_opt:21;
unsigned long compress_type:4;
u64 max_inline;
u64 alloc_start;
@ -1132,12 +1202,23 @@ struct btrfs_fs_info {
spinlock_t ref_cache_lock;
u64 total_ref_cache_size;
/*
* these three are in extended format (availability of single
* chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
* types are denoted by corresponding BTRFS_BLOCK_GROUP_* bits)
*/
u64 avail_data_alloc_bits;
u64 avail_metadata_alloc_bits;
u64 avail_system_alloc_bits;
u64 data_alloc_profile;
u64 metadata_alloc_profile;
u64 system_alloc_profile;
/* restriper state */
spinlock_t balance_lock;
struct mutex balance_mutex;
atomic_t balance_running;
atomic_t balance_pause_req;
atomic_t balance_cancel_req;
struct btrfs_balance_control *balance_ctl;
wait_queue_head_t balance_wait_q;
unsigned data_chunk_allocations;
unsigned metadata_ratio;
@ -1155,6 +1236,10 @@ struct btrfs_fs_info {
int scrub_workers_refcnt;
struct btrfs_workers scrub_workers;
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
u32 check_integrity_print_mask;
#endif
/* filesystem state */
u64 fs_state;
@ -1383,6 +1468,8 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
#define BTRFS_BALANCE_ITEM_KEY 248
/*
* string items are for debugging. They just store a short string of
* data in the FS
@ -1413,6 +1500,9 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
#define BTRFS_MOUNT_RECOVERY (1 << 18)
#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@ -2077,8 +2167,86 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
num_devices, 64);
/* struct btrfs_super_block */
/* struct btrfs_balance_item */
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
static inline void btrfs_balance_data(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}
static inline void btrfs_set_balance_data(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, data, ba);
}
static inline void btrfs_balance_meta(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}
static inline void btrfs_set_balance_meta(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba);
}
static inline void btrfs_balance_sys(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
static inline void btrfs_set_balance_sys(struct extent_buffer *eb,
struct btrfs_balance_item *bi,
struct btrfs_disk_balance_args *ba)
{
write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba);
}
static inline void
btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
struct btrfs_disk_balance_args *disk)
{
memset(cpu, 0, sizeof(*cpu));
cpu->profiles = le64_to_cpu(disk->profiles);
cpu->usage = le64_to_cpu(disk->usage);
cpu->devid = le64_to_cpu(disk->devid);
cpu->pstart = le64_to_cpu(disk->pstart);
cpu->pend = le64_to_cpu(disk->pend);
cpu->vstart = le64_to_cpu(disk->vstart);
cpu->vend = le64_to_cpu(disk->vend);
cpu->target = le64_to_cpu(disk->target);
cpu->flags = le64_to_cpu(disk->flags);
}
static inline void
btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
struct btrfs_balance_args *cpu)
{
memset(disk, 0, sizeof(*disk));
disk->profiles = cpu_to_le64(cpu->profiles);
disk->usage = cpu_to_le64(cpu->usage);
disk->devid = cpu_to_le64(cpu->devid);
disk->pstart = cpu_to_le64(cpu->pstart);
disk->pend = cpu_to_le64(cpu->pend);
disk->vstart = cpu_to_le64(cpu->vstart);
disk->vend = cpu_to_le64(cpu->vend);
disk->target = cpu_to_le64(cpu->target);
disk->flags = cpu_to_le64(cpu->flags);
}
/* struct btrfs_super_block */
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
@ -2277,11 +2445,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size);
u64 hint, u64 empty_size, int for_cow);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref);
u64 parent, int last_ref, int for_cow);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
@ -2301,17 +2469,17 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
u64 search_end, struct btrfs_key *ins,
u64 data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
struct extent_buffer *buf, int full_backref, int for_cow);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
struct extent_buffer *buf, int full_backref, int for_cow);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset);
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int for_cow);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
@ -2323,7 +2491,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset);
u64 root_objectid, u64 owner, u64 offset, int for_cow);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
@ -2482,10 +2650,18 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
}
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
{
++p->slots[0];
if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
return btrfs_next_leaf(root, p);
return 0;
}
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
void btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int update_ref);
struct btrfs_block_rsv *block_rsv, int update_ref,
int for_reloc);
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
@ -2500,6 +2676,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
}
static inline void free_fs_info(struct btrfs_fs_info *fs_info)
{
kfree(fs_info->balance_ctl);
kfree(fs_info->delayed_root);
kfree(fs_info->extent_root);
kfree(fs_info->tree_root);
@ -2510,6 +2687,24 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->super_for_commit);
kfree(fs_info);
}
/**
* profile_is_valid - tests whether a given profile is valid and reduced
* @flags: profile to validate
* @extended: if true @flags is treated as an extended profile
*/
static inline int profile_is_valid(u64 flags, int extended)
{
u64 mask = ~BTRFS_BLOCK_GROUP_PROFILE_MASK;
flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
if (extended)
mask &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (flags & mask)
return 0;
/* true if zero or exactly one bit set */
return (flags & (~flags + 1)) == flags;
}
/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,

View file

@ -595,8 +595,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
num_bytes = btrfs_calc_trans_metadata_size(root, 1);
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
if (!ret)
if (!ret) {
trace_btrfs_space_reservation(root->fs_info, "delayed_item",
item->key.objectid,
num_bytes, 1);
item->bytes_reserved = num_bytes;
}
return ret;
}
@ -610,6 +614,9 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
return;
rsv = &root->fs_info->delayed_block_rsv;
trace_btrfs_space_reservation(root->fs_info, "delayed_item",
item->key.objectid, item->bytes_reserved,
0);
btrfs_block_rsv_release(root, rsv,
item->bytes_reserved);
}
@ -624,7 +631,7 @@ static int btrfs_delayed_inode_reserve_metadata(
struct btrfs_block_rsv *dst_rsv;
u64 num_bytes;
int ret;
int release = false;
bool release = false;
src_rsv = trans->block_rsv;
dst_rsv = &root->fs_info->delayed_block_rsv;
@ -651,8 +658,13 @@ static int btrfs_delayed_inode_reserve_metadata(
*/
if (ret == -EAGAIN)
ret = -ENOSPC;
if (!ret)
if (!ret) {
node->bytes_reserved = num_bytes;
trace_btrfs_space_reservation(root->fs_info,
"delayed_inode",
btrfs_ino(inode),
num_bytes, 1);
}
return ret;
} else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
spin_lock(&BTRFS_I(inode)->lock);
@ -707,11 +719,17 @@ static int btrfs_delayed_inode_reserve_metadata(
* reservation here. I think it may be time for a documentation page on
* how block rsvs. work.
*/
if (!ret)
if (!ret) {
trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
btrfs_ino(inode), num_bytes, 1);
node->bytes_reserved = num_bytes;
}
if (release)
if (release) {
trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), num_bytes, 0);
btrfs_block_rsv_release(root, src_rsv, num_bytes);
}
return ret;
}
@ -725,6 +743,8 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
return;
rsv = &root->fs_info->delayed_block_rsv;
trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
node->inode_id, node->bytes_reserved, 0);
btrfs_block_rsv_release(root, rsv,
node->bytes_reserved);
node->bytes_reserved = 0;
@ -1372,13 +1392,6 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
goto release_node;
}
ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
/*
* we have reserved enough space when we start a new transaction,
* so reserving metadata failure is impossible
*/
BUG_ON(ret);
delayed_item->key.objectid = btrfs_ino(dir);
btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
delayed_item->key.offset = index;
@ -1391,6 +1404,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
dir_item->type = type;
memcpy((char *)(dir_item + 1), name, name_len);
ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
/*
* we have reserved enough space when we start a new transaction,
* so reserving metadata failure is impossible
*/
BUG_ON(ret);
mutex_lock(&delayed_node->mutex);
ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
if (unlikely(ret)) {

View file

@ -101,6 +101,11 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
return -1;
if (ref1->type > ref2->type)
return 1;
/* merging of sequenced refs is not allowed */
if (ref1->seq < ref2->seq)
return -1;
if (ref1->seq > ref2->seq)
return 1;
if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
@ -150,16 +155,22 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
/*
* find an head entry based on bytenr. This returns the delayed ref
* head if it was able to find one, or NULL if nothing was in that spot
* head if it was able to find one, or NULL if nothing was in that spot.
* If return_bigger is given, the next bigger entry is returned if no exact
* match is found.
*/
static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
u64 bytenr,
struct btrfs_delayed_ref_node **last)
struct btrfs_delayed_ref_node **last,
int return_bigger)
{
struct rb_node *n = root->rb_node;
struct rb_node *n;
struct btrfs_delayed_ref_node *entry;
int cmp;
int cmp = 0;
again:
n = root->rb_node;
entry = NULL;
while (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
WARN_ON(!entry->in_tree);
@ -182,6 +193,19 @@ static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
else
return entry;
}
if (entry && return_bigger) {
if (cmp > 0) {
n = rb_next(&entry->rb_node);
if (!n)
n = rb_first(root);
entry = rb_entry(n, struct btrfs_delayed_ref_node,
rb_node);
bytenr = entry->bytenr;
return_bigger = 0;
goto again;
}
return entry;
}
return NULL;
}
@ -209,6 +233,24 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
{
struct seq_list *elem;
assert_spin_locked(&delayed_refs->lock);
if (list_empty(&delayed_refs->seq_head))
return 0;
elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list);
if (seq >= elem->seq) {
pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n",
seq, elem->seq, delayed_refs);
return 1;
}
return 0;
}
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 start)
{
@ -223,20 +265,8 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
node = rb_first(&delayed_refs->root);
} else {
ref = NULL;
find_ref_head(&delayed_refs->root, start, &ref);
find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
if (ref) {
struct btrfs_delayed_ref_node *tmp;
node = rb_prev(&ref->rb_node);
while (node) {
tmp = rb_entry(node,
struct btrfs_delayed_ref_node,
rb_node);
if (tmp->bytenr < start)
break;
ref = tmp;
node = rb_prev(&ref->rb_node);
}
node = &ref->rb_node;
} else
node = rb_first(&delayed_refs->root);
@ -390,7 +420,8 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
* this does all the dirty work in terms of maintaining the correct
* overall modification count.
*/
static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
static noinline int add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes,
int action, int is_data)
@ -437,6 +468,7 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
ref->action = 0;
ref->is_head = 1;
ref->in_tree = 1;
ref->seq = 0;
head_ref = btrfs_delayed_node_to_head(ref);
head_ref->must_insert_reserved = must_insert_reserved;
@ -468,14 +500,17 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
/*
* helper to insert a delayed tree ref into the rbtree.
*/
static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
static noinline int add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action)
u64 ref_root, int level, int action,
int for_cow)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_tree_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
@ -491,14 +526,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
ref->is_head = 0;
ref->in_tree = 1;
if (need_ref_seq(for_cow, ref_root))
seq = inc_delayed_seq(delayed_refs);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref);
if (parent) {
full_ref->parent = parent;
full_ref->parent = parent;
full_ref->root = ref_root;
if (parent)
ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
} else {
full_ref->root = ref_root;
else
ref->type = BTRFS_TREE_BLOCK_REF_KEY;
}
full_ref->level = level;
trace_btrfs_delayed_tree_ref(ref, full_ref, action);
@ -522,15 +560,17 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
/*
* helper to insert a delayed data ref into the rbtree.
*/
static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
static noinline int add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, u64 owner, u64 offset,
int action)
int action, int for_cow)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_data_ref *full_ref;
struct btrfs_delayed_ref_root *delayed_refs;
u64 seq = 0;
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
@ -546,14 +586,18 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
ref->is_head = 0;
ref->in_tree = 1;
if (need_ref_seq(for_cow, ref_root))
seq = inc_delayed_seq(delayed_refs);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref);
if (parent) {
full_ref->parent = parent;
full_ref->parent = parent;
full_ref->root = ref_root;
if (parent)
ref->type = BTRFS_SHARED_DATA_REF_KEY;
} else {
full_ref->root = ref_root;
else
ref->type = BTRFS_EXTENT_DATA_REF_KEY;
}
full_ref->objectid = owner;
full_ref->offset = offset;
@ -580,10 +624,12 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
* to make sure the delayed ref is eventually processed before this
* transaction commits.
*/
int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op)
struct btrfs_delayed_extent_op *extent_op,
int for_cow)
{
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
@ -610,13 +656,17 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
* insert both the head node and the new ref without dropping
* the spin lock
*/
ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
action, 0);
ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
num_bytes, action, 0);
BUG_ON(ret);
ret = add_delayed_tree_ref(trans, &ref->node, bytenr, num_bytes,
parent, ref_root, level, action);
ret = add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
BUG_ON(ret);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
@ -624,11 +674,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
/*
* add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
*/
int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op)
struct btrfs_delayed_extent_op *extent_op,
int for_cow)
{
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
@ -655,18 +707,23 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
* insert both the head node and the new ref without dropping
* the spin lock
*/
ret = add_delayed_ref_head(trans, &head_ref->node, bytenr, num_bytes,
action, 1);
ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
num_bytes, action, 1);
BUG_ON(ret);
ret = add_delayed_data_ref(trans, &ref->node, bytenr, num_bytes,
parent, ref_root, owner, offset, action);
ret = add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
BUG_ON(ret);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op)
{
@ -683,11 +740,13 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
ret = add_delayed_ref_head(trans, &head_ref->node, bytenr,
ret = add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
BUG_ON(ret);
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
@ -704,7 +763,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
struct btrfs_delayed_ref_root *delayed_refs;
delayed_refs = &trans->transaction->delayed_refs;
ref = find_ref_head(&delayed_refs->root, bytenr, NULL);
ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
if (ref)
return btrfs_delayed_node_to_head(ref);
return NULL;

View file

@ -33,6 +33,9 @@ struct btrfs_delayed_ref_node {
/* the size of the extent */
u64 num_bytes;
/* seq number to keep track of insertion order */
u64 seq;
/* ref count on this data structure */
atomic_t refs;
@ -98,19 +101,15 @@ struct btrfs_delayed_ref_head {
struct btrfs_delayed_tree_ref {
struct btrfs_delayed_ref_node node;
union {
u64 root;
u64 parent;
};
u64 root;
u64 parent;
int level;
};
struct btrfs_delayed_data_ref {
struct btrfs_delayed_ref_node node;
union {
u64 root;
u64 parent;
};
u64 root;
u64 parent;
u64 objectid;
u64 offset;
};
@ -140,6 +139,26 @@ struct btrfs_delayed_ref_root {
int flushing;
u64 run_delayed_start;
/*
* seq number of delayed refs. We need to know if a backref was being
* added before the currently processed ref or afterwards.
*/
u64 seq;
/*
* seq_list holds a list of all seq numbers that are currently being
* added to the list. While walking backrefs (btrfs_find_all_roots,
* qgroups), which might take some time, no newer ref must be processed,
* as it might influence the outcome of the walk.
*/
struct list_head seq_head;
/*
* when the only refs we have in the list must not be processed, we want
* to wait for more refs to show up or for the end of backref walking.
*/
wait_queue_head_t seq_wait;
};
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
@ -151,16 +170,21 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
}
}
int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_extent_op *extent_op,
int for_cow);
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
struct btrfs_delayed_extent_op *extent_op,
int for_cow);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
struct btrfs_delayed_extent_op *extent_op);
@ -170,6 +194,60 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head);
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 search_start);
struct seq_list {
struct list_head list;
u64 seq;
};
static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
{
assert_spin_locked(&delayed_refs->lock);
++delayed_refs->seq;
return delayed_refs->seq;
}
static inline void
btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
{
assert_spin_locked(&delayed_refs->lock);
elem->seq = delayed_refs->seq;
list_add_tail(&elem->list, &delayed_refs->seq_head);
}
static inline void
btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
{
spin_lock(&delayed_refs->lock);
list_del(&elem->list);
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
}
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
/*
* delayed refs with a ref_seq > 0 must be held back during backref walking.
* this only applies to items in one of the fs-trees. for_cow items never need
* to be held back, so they won't get a ref_seq number.
*/
static inline int need_ref_seq(int for_cow, u64 rootid)
{
if (for_cow)
return 0;
if (rootid == BTRFS_FS_TREE_OBJECTID)
return 1;
if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.

View file

@ -43,6 +43,7 @@
#include "tree-log.h"
#include "free-space-cache.h"
#include "inode-map.h"
#include "check-integrity.h"
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
@ -1244,7 +1245,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
root->ref_cows = 0;
leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
BTRFS_TREE_LOG_OBJECTID, NULL,
0, 0, 0, 0);
if (IS_ERR(leaf)) {
kfree(root);
return ERR_CAST(leaf);
@ -1998,6 +2000,17 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->scrub_pause_wait);
init_rwsem(&fs_info->scrub_super_lock);
fs_info->scrub_workers_refcnt = 0;
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
fs_info->check_integrity_print_mask = 0;
#endif
spin_lock_init(&fs_info->balance_lock);
mutex_init(&fs_info->balance_mutex);
atomic_set(&fs_info->balance_running, 0);
atomic_set(&fs_info->balance_pause_req, 0);
atomic_set(&fs_info->balance_cancel_req, 0);
fs_info->balance_ctl = NULL;
init_waitqueue_head(&fs_info->balance_wait_q);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
@ -2267,9 +2280,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
(unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
BTRFS_UUID_SIZE);
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_chunk_tree(chunk_root);
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
sb->s_id);
@ -2318,9 +2329,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
fs_info->data_alloc_profile = (u64)-1;
fs_info->metadata_alloc_profile = (u64)-1;
fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
ret = btrfs_init_space_info(fs_info);
if (ret) {
@ -2353,6 +2361,19 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_set_opt(fs_info->mount_opt, SSD);
}
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
ret = btrfsic_mount(tree_root, fs_devices,
btrfs_test_opt(tree_root,
CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ?
1 : 0,
fs_info->check_integrity_print_mask);
if (ret)
printk(KERN_WARNING "btrfs: failed to initialize"
" integrity check module %s\n", sb->s_id);
}
#endif
/* do not make disk changes in broken FS */
if (btrfs_super_log_root(disk_super) != 0 &&
!(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
@ -2423,6 +2444,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!err)
err = btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);
if (!err)
err = btrfs_recover_balance(fs_info->tree_root);
if (err) {
close_ctree(tree_root);
return ERR_PTR(err);
@ -2631,7 +2656,7 @@ static int write_dev_supers(struct btrfs_device *device,
* we fua the first super. The others we allow
* to go down lazy.
*/
ret = submit_bh(WRITE_FUA, bh);
ret = btrfsic_submit_bh(WRITE_FUA, bh);
if (ret)
errors++;
}
@ -2708,7 +2733,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
device->flush_bio = bio;
bio_get(bio);
submit_bio(WRITE_FLUSH, bio);
btrfsic_submit_bio(WRITE_FLUSH, bio);
return 0;
}
@ -2972,6 +2997,9 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();
/* pause restriper - we want to resume on mount */
btrfs_pause_balance(root->fs_info);
btrfs_scrub_cancel(root);
/* wait for any defraggers to finish */
@ -3054,6 +3082,11 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->caching_workers);
btrfs_stop_workers(&fs_info->readahead_workers);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(root, CHECK_INTEGRITY))
btrfsic_unmount(root, fs_info->fs_devices);
#endif
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);

View file

@ -618,8 +618,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
BTRFS_BLOCK_GROUP_METADATA;
flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
@ -1872,20 +1871,24 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset)
u64 root_objectid, u64 owner, u64 offset, int for_cow)
{
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
root_objectid == BTRFS_TREE_LOG_OBJECTID);
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, (int)owner,
BTRFS_ADD_DELAYED_REF, NULL);
BTRFS_ADD_DELAYED_REF, NULL, for_cow);
} else {
ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, owner, offset,
BTRFS_ADD_DELAYED_REF, NULL);
BTRFS_ADD_DELAYED_REF, NULL, for_cow);
}
return ret;
}
@ -2232,6 +2235,28 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
}
}
/*
* locked_ref is the head node, so we have to go one
* node back for any delayed ref updates
*/
ref = select_delayed_ref(locked_ref);
if (ref && ref->seq &&
btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
/*
* there are still refs with lower seq numbers in the
* process of being added. Don't run this ref yet.
*/
list_del_init(&locked_ref->cluster);
mutex_unlock(&locked_ref->mutex);
locked_ref = NULL;
delayed_refs->num_heads_ready++;
spin_unlock(&delayed_refs->lock);
cond_resched();
spin_lock(&delayed_refs->lock);
continue;
}
/*
* record the must insert reserved flag before we
* drop the spin lock.
@ -2242,11 +2267,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
extent_op = locked_ref->extent_op;
locked_ref->extent_op = NULL;
/*
* locked_ref is the head node, so we have to go one
* node back for any delayed ref updates
*/
ref = select_delayed_ref(locked_ref);
if (!ref) {
/* All delayed refs have been processed, Go ahead
* and send the head node to run_one_delayed_ref,
@ -2267,9 +2287,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
BUG_ON(ret);
kfree(extent_op);
cond_resched();
spin_lock(&delayed_refs->lock);
continue;
goto next;
}
list_del_init(&locked_ref->cluster);
@ -2279,7 +2297,12 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ref->in_tree = 0;
rb_erase(&ref->rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
/*
* we modified num_entries, but as we're currently running
* delayed refs, skip
* wake_up(&delayed_refs->seq_wait);
* here.
*/
spin_unlock(&delayed_refs->lock);
ret = run_one_delayed_ref(trans, root, ref, extent_op,
@ -2289,13 +2312,34 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
btrfs_put_delayed_ref(ref);
kfree(extent_op);
count++;
next:
do_chunk_alloc(trans, root->fs_info->extent_root,
2 * 1024 * 1024,
btrfs_get_alloc_profile(root, 0),
CHUNK_ALLOC_NO_FORCE);
cond_resched();
spin_lock(&delayed_refs->lock);
}
return count;
}
static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
unsigned long num_refs)
{
struct list_head *first_seq = delayed_refs->seq_head.next;
spin_unlock(&delayed_refs->lock);
pr_debug("waiting for more refs (num %ld, first %p)\n",
num_refs, first_seq);
wait_event(delayed_refs->seq_wait,
num_refs != delayed_refs->num_entries ||
delayed_refs->seq_head.next != first_seq);
pr_debug("done waiting for more refs (num %ld, first %p)\n",
delayed_refs->num_entries, delayed_refs->seq_head.next);
spin_lock(&delayed_refs->lock);
}
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
@ -2311,15 +2355,23 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref;
struct list_head cluster;
int ret;
u64 delayed_start;
int run_all = count == (unsigned long)-1;
int run_most = 0;
unsigned long num_refs = 0;
int consider_waiting;
if (root == root->fs_info->extent_root)
root = root->fs_info->tree_root;
do_chunk_alloc(trans, root->fs_info->extent_root,
2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0),
CHUNK_ALLOC_NO_FORCE);
delayed_refs = &trans->transaction->delayed_refs;
INIT_LIST_HEAD(&cluster);
again:
consider_waiting = 0;
spin_lock(&delayed_refs->lock);
if (count == 0) {
count = delayed_refs->num_entries * 2;
@ -2336,11 +2388,35 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
* of refs to process starting at the first one we are able to
* lock
*/
delayed_start = delayed_refs->run_delayed_start;
ret = btrfs_find_ref_cluster(trans, &cluster,
delayed_refs->run_delayed_start);
if (ret)
break;
if (delayed_start >= delayed_refs->run_delayed_start) {
if (consider_waiting == 0) {
/*
* btrfs_find_ref_cluster looped. let's do one
* more cycle. if we don't run any delayed ref
* during that cycle (because we can't because
* all of them are blocked) and if the number of
* refs doesn't change, we avoid busy waiting.
*/
consider_waiting = 1;
num_refs = delayed_refs->num_entries;
} else {
wait_for_more_refs(delayed_refs, num_refs);
/*
* after waiting, things have changed. we
* dropped the lock and someone else might have
* run some refs, built new clusters and so on.
* therefore, we restart staleness detection.
*/
consider_waiting = 0;
}
}
ret = run_clustered_refs(trans, root, &cluster);
BUG_ON(ret < 0);
@ -2348,6 +2424,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
if (count == 0)
break;
if (ret || delayed_refs->run_delayed_start == 0) {
/* refs were run, let's reset staleness detection */
consider_waiting = 0;
}
}
if (run_all) {
@ -2405,7 +2486,8 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
extent_op->update_key = 0;
extent_op->is_data = is_data ? 1 : 0;
ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
num_bytes, extent_op);
if (ret)
kfree(extent_op);
return ret;
@ -2590,7 +2672,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
int full_backref, int inc)
int full_backref, int inc, int for_cow)
{
u64 bytenr;
u64 num_bytes;
@ -2603,7 +2685,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
int level;
int ret = 0;
int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
u64, u64, u64, u64, u64, u64);
u64, u64, u64, u64, u64, u64, int);
ref_root = btrfs_header_owner(buf);
nritems = btrfs_header_nritems(buf);
@ -2640,14 +2722,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
key.offset -= btrfs_file_extent_offset(buf, fi);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, key.objectid,
key.offset);
key.offset, for_cow);
if (ret)
goto fail;
} else {
bytenr = btrfs_node_blockptr(buf, i);
num_bytes = btrfs_level_size(root, level - 1);
ret = process_func(trans, root, bytenr, num_bytes,
parent, ref_root, level - 1, 0);
parent, ref_root, level - 1, 0,
for_cow);
if (ret)
goto fail;
}
@ -2659,15 +2742,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
}
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref)
struct extent_buffer *buf, int full_backref, int for_cow)
{
return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
}
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref)
struct extent_buffer *buf, int full_backref, int for_cow)
{
return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
}
static int write_one_cache_group(struct btrfs_trans_handle *trans,
@ -2993,9 +3076,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
INIT_LIST_HEAD(&found->block_groups[i]);
init_rwsem(&found->groups_sem);
spin_lock_init(&found->lock);
found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
BTRFS_BLOCK_GROUP_SYSTEM |
BTRFS_BLOCK_GROUP_METADATA);
found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
found->total_bytes = total_bytes;
found->disk_total = total_bytes * factor;
found->bytes_used = bytes_used;
@ -3016,20 +3097,27 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10 |
BTRFS_BLOCK_GROUP_DUP);
if (extra_flags) {
if (flags & BTRFS_BLOCK_GROUP_DATA)
fs_info->avail_data_alloc_bits |= extra_flags;
if (flags & BTRFS_BLOCK_GROUP_METADATA)
fs_info->avail_metadata_alloc_bits |= extra_flags;
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
fs_info->avail_system_alloc_bits |= extra_flags;
}
u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
/* chunk -> extended profile */
if (extra_flags == 0)
extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (flags & BTRFS_BLOCK_GROUP_DATA)
fs_info->avail_data_alloc_bits |= extra_flags;
if (flags & BTRFS_BLOCK_GROUP_METADATA)
fs_info->avail_metadata_alloc_bits |= extra_flags;
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
fs_info->avail_system_alloc_bits |= extra_flags;
}
/*
* @flags: available profiles in extended format (see ctree.h)
*
* Returns reduced profile in chunk format. If profile changing is in
* progress (either running or paused) picks the target profile (if it's
* already available), otherwise falls back to plain reducing.
*/
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
/*
@ -3040,6 +3128,34 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
u64 num_devices = root->fs_info->fs_devices->rw_devices +
root->fs_info->fs_devices->missing_devices;
/* pick restriper's target profile if it's available */
spin_lock(&root->fs_info->balance_lock);
if (root->fs_info->balance_ctl) {
struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
u64 tgt = 0;
if ((flags & BTRFS_BLOCK_GROUP_DATA) &&
(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(flags & bctl->data.target)) {
tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
} else if ((flags & BTRFS_BLOCK_GROUP_SYSTEM) &&
(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(flags & bctl->sys.target)) {
tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
} else if ((flags & BTRFS_BLOCK_GROUP_METADATA) &&
(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(flags & bctl->meta.target)) {
tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
}
if (tgt) {
spin_unlock(&root->fs_info->balance_lock);
flags = tgt;
goto out;
}
}
spin_unlock(&root->fs_info->balance_lock);
if (num_devices == 1)
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
if (num_devices < 4)
@ -3059,22 +3175,25 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
((flags & BTRFS_BLOCK_GROUP_RAID1) |
(flags & BTRFS_BLOCK_GROUP_RAID10) |
(flags & BTRFS_BLOCK_GROUP_DUP)))
(flags & BTRFS_BLOCK_GROUP_DUP))) {
flags &= ~BTRFS_BLOCK_GROUP_RAID0;
}
out:
/* extended -> chunk profile */
flags &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
return flags;
}
static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
{
if (flags & BTRFS_BLOCK_GROUP_DATA)
flags |= root->fs_info->avail_data_alloc_bits &
root->fs_info->data_alloc_profile;
flags |= root->fs_info->avail_data_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
flags |= root->fs_info->avail_system_alloc_bits &
root->fs_info->system_alloc_profile;
flags |= root->fs_info->avail_system_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_METADATA)
flags |= root->fs_info->avail_metadata_alloc_bits &
root->fs_info->metadata_alloc_profile;
flags |= root->fs_info->avail_metadata_alloc_bits;
return btrfs_reduce_alloc_profile(root, flags);
}
@ -3191,6 +3310,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
(u64)data_sinfo, bytes, 1);
spin_unlock(&data_sinfo->lock);
return 0;
@ -3210,6 +3331,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
data_sinfo = BTRFS_I(inode)->space_info;
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
(u64)data_sinfo, bytes, 0);
spin_unlock(&data_sinfo->lock);
}
@ -3257,27 +3380,15 @@ static int should_alloc_chunk(struct btrfs_root *root,
if (num_bytes - num_allocated < thresh)
return 1;
}
/*
* we have two similar checks here, one based on percentage
* and once based on a hard number of 256MB. The idea
* is that if we have a good amount of free
* room, don't allocate a chunk. A good mount is
* less than 80% utilized of the chunks we have allocated,
* or more than 256MB free
*/
if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
return 0;
if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
return 0;
thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
/* 256MB or 5% of the FS */
thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
/* 256MB or 2% of the FS */
thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2));
/* system chunks need a much small threshold */
if (sinfo->flags & BTRFS_BLOCK_GROUP_SYSTEM)
thresh = 32 * 1024 * 1024;
if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8))
return 0;
return 1;
}
@ -3291,7 +3402,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
int wait_for_alloc = 0;
int ret = 0;
flags = btrfs_reduce_alloc_profile(extent_root, flags);
BUG_ON(!profile_is_valid(flags, 0));
space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) {
@ -3582,6 +3693,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
if (used <= space_info->total_bytes) {
if (used + orig_bytes <= space_info->total_bytes) {
space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(root->fs_info,
"space_info",
(u64)space_info,
orig_bytes, 1);
ret = 0;
} else {
/*
@ -3649,6 +3764,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
if (used + num_bytes < space_info->total_bytes + avail) {
space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(root->fs_info,
"space_info",
(u64)space_info,
orig_bytes, 1);
ret = 0;
} else {
wait_ordered = true;
@ -3755,7 +3874,8 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
spin_unlock(&block_rsv->lock);
}
static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
@ -3791,6 +3911,9 @@ static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
if (num_bytes) {
spin_lock(&space_info->lock);
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
(u64)space_info,
num_bytes, 0);
space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
@ -3947,7 +4070,8 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
if (global_rsv->full || global_rsv == block_rsv ||
block_rsv->space_info != global_rsv->space_info)
global_rsv = NULL;
block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
block_rsv_release_bytes(root->fs_info, block_rsv, global_rsv,
num_bytes);
}
/*
@ -4006,11 +4130,15 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
num_bytes = sinfo->total_bytes - num_bytes;
block_rsv->reserved += num_bytes;
sinfo->bytes_may_use += num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
(u64)sinfo, num_bytes, 1);
}
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
(u64)sinfo, num_bytes, 0);
sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
@ -4045,7 +4173,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
{
block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1);
block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
(u64)-1);
WARN_ON(fs_info->delalloc_block_rsv.size > 0);
WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
WARN_ON(fs_info->trans_block_rsv.size > 0);
@ -4062,6 +4191,8 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
if (!trans->bytes_reserved)
return;
trace_btrfs_space_reservation(root->fs_info, "transaction", (u64)trans,
trans->bytes_reserved, 0);
btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
trans->bytes_reserved = 0;
}
@ -4079,6 +4210,8 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
* when we are truly done with the orphan item.
*/
u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
trace_btrfs_space_reservation(root->fs_info, "orphan",
btrfs_ino(inode), num_bytes, 1);
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
@ -4086,6 +4219,8 @@ void btrfs_orphan_release_metadata(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
trace_btrfs_space_reservation(root->fs_info, "orphan",
btrfs_ino(inode), num_bytes, 0);
btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
}
@ -4213,12 +4348,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
/* Need to be holding the i_mutex here if we aren't free space cache */
if (btrfs_is_free_space_inode(root, inode))
flush = 0;
else
WARN_ON(!mutex_is_locked(&inode->i_mutex));
if (flush && btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);
mutex_lock(&BTRFS_I(inode)->delalloc_mutex);
num_bytes = ALIGN(num_bytes, root->sectorsize);
spin_lock(&BTRFS_I(inode)->lock);
@ -4266,8 +4400,14 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
if (dropped)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
if (to_free)
if (to_free) {
btrfs_block_rsv_release(root, block_rsv, to_free);
trace_btrfs_space_reservation(root->fs_info,
"delalloc",
btrfs_ino(inode),
to_free, 0);
}
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
return ret;
}
@ -4278,7 +4418,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
}
BTRFS_I(inode)->reserved_extents += nr_extents;
spin_unlock(&BTRFS_I(inode)->lock);
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
if (to_reserve)
trace_btrfs_space_reservation(root->fs_info,"delalloc",
btrfs_ino(inode), to_reserve, 1);
block_rsv_add_bytes(block_rsv, to_reserve, 1);
return 0;
@ -4308,6 +4452,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
if (dropped > 0)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_free, 0);
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free);
}
@ -4562,7 +4708,10 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
cache->reserved += num_bytes;
space_info->bytes_reserved += num_bytes;
if (reserve == RESERVE_ALLOC) {
BUG_ON(space_info->bytes_may_use < num_bytes);
trace_btrfs_space_reservation(cache->fs_info,
"space_info",
(u64)space_info,
num_bytes, 0);
space_info->bytes_may_use -= num_bytes;
}
}
@ -4928,6 +5077,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
rb_erase(&head->node.rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
/*
* we don't take a ref on the node because we're removing it from the
@ -4955,16 +5106,17 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref)
u64 parent, int last_ref, int for_cow)
{
struct btrfs_block_group_cache *cache = NULL;
int ret;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
parent, root->root_key.objectid,
btrfs_header_level(buf),
BTRFS_DROP_DELAYED_REF, NULL);
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
buf->start, buf->len,
parent, root->root_key.objectid,
btrfs_header_level(buf),
BTRFS_DROP_DELAYED_REF, NULL, for_cow);
BUG_ON(ret);
}
@ -4999,12 +5151,12 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_put_block_group(cache);
}
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset)
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int for_cow)
{
int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
/*
* tree log blocks never actually go into the extent allocation
@ -5016,14 +5168,17 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_pin_extent(root, bytenr, num_bytes, 1);
ret = 0;
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, (int)owner,
BTRFS_DROP_DELAYED_REF, NULL);
BTRFS_DROP_DELAYED_REF, NULL, for_cow);
BUG_ON(ret);
} else {
ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
parent, root_objectid, owner,
offset, BTRFS_DROP_DELAYED_REF, NULL);
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
num_bytes,
parent, root_objectid, owner,
offset, BTRFS_DROP_DELAYED_REF,
NULL, for_cow);
BUG_ON(ret);
}
return ret;
@ -5146,6 +5301,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
ins->objectid = 0;
ins->offset = 0;
trace_find_free_extent(orig_root, num_bytes, empty_size, data);
space_info = __find_space_info(root->fs_info, data);
if (!space_info) {
printk(KERN_ERR "No space info for %llu\n", data);
@ -5295,15 +5452,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
if (unlikely(block_group->ro))
goto loop;
spin_lock(&block_group->free_space_ctl->tree_lock);
if (cached &&
block_group->free_space_ctl->free_space <
num_bytes + empty_cluster + empty_size) {
spin_unlock(&block_group->free_space_ctl->tree_lock);
goto loop;
}
spin_unlock(&block_group->free_space_ctl->tree_lock);
/*
* Ok we want to try and use the cluster allocator, so
* lets look there
@ -5331,6 +5479,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
if (offset) {
/* we have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
trace_btrfs_reserve_extent_cluster(root,
block_group, search_start, num_bytes);
goto checks;
}
@ -5349,8 +5499,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
* plenty of times and not have found
* anything, so we are likely way too
* fragmented for the clustering stuff to find
* anything. */
if (loop >= LOOP_NO_EMPTY_SIZE) {
* anything.
*
* However, if the cluster is taken from the
* current block group, release the cluster
* first, so that we stand a better chance of
* succeeding in the unclustered
* allocation. */
if (loop >= LOOP_NO_EMPTY_SIZE &&
last_ptr->block_group != block_group) {
spin_unlock(&last_ptr->refill_lock);
goto unclustered_alloc;
}
@ -5361,6 +5518,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
*/
btrfs_return_cluster_to_free_space(NULL, last_ptr);
if (loop >= LOOP_NO_EMPTY_SIZE) {
spin_unlock(&last_ptr->refill_lock);
goto unclustered_alloc;
}
/* allocate a cluster in this block group */
ret = btrfs_find_space_cluster(trans, root,
block_group, last_ptr,
@ -5377,6 +5539,9 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
if (offset) {
/* we found one, proceed */
spin_unlock(&last_ptr->refill_lock);
trace_btrfs_reserve_extent_cluster(root,
block_group, search_start,
num_bytes);
goto checks;
}
} else if (!cached && loop > LOOP_CACHING_NOWAIT
@ -5401,6 +5566,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
}
unclustered_alloc:
spin_lock(&block_group->free_space_ctl->tree_lock);
if (cached &&
block_group->free_space_ctl->free_space <
num_bytes + empty_cluster + empty_size) {
spin_unlock(&block_group->free_space_ctl->tree_lock);
goto loop;
}
spin_unlock(&block_group->free_space_ctl->tree_lock);
offset = btrfs_find_space_for_alloc(block_group, search_start,
num_bytes, empty_size);
/*
@ -5438,9 +5612,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
goto loop;
}
ins->objectid = search_start;
ins->offset = num_bytes;
if (offset < search_start)
btrfs_add_free_space(used_block_group, offset,
search_start - offset);
@ -5457,6 +5628,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
ins->objectid = search_start;
ins->offset = num_bytes;
trace_btrfs_reserve_extent(orig_root, block_group,
search_start, num_bytes);
if (offset < search_start)
btrfs_add_free_space(used_block_group, offset,
search_start - offset);
@ -5842,9 +6015,10 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
0, root_objectid, owner, offset,
BTRFS_ADD_DELAYED_EXTENT, NULL);
ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
ins->offset, 0,
root_objectid, owner, offset,
BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
return ret;
}
@ -5997,10 +6171,11 @@ use_block_rsv(struct btrfs_trans_handle *trans,
return ERR_PTR(-ENOSPC);
}
static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv, u32 blocksize)
{
block_rsv_add_bytes(block_rsv, blocksize, 0);
block_rsv_release_bytes(block_rsv, NULL, 0);
block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
}
/*
@ -6014,7 +6189,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size)
u64 hint, u64 empty_size, int for_cow)
{
struct btrfs_key ins;
struct btrfs_block_rsv *block_rsv;
@ -6030,7 +6205,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
empty_size, hint, (u64)-1, &ins, 0);
if (ret) {
unuse_block_rsv(block_rsv, blocksize);
unuse_block_rsv(root->fs_info, block_rsv, blocksize);
return ERR_PTR(ret);
}
@ -6058,10 +6233,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
extent_op->update_flags = 1;
extent_op->is_data = 0;
ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
ins.objectid,
ins.offset, parent, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT,
extent_op);
extent_op, for_cow);
BUG_ON(ret);
}
return buf;
@ -6078,6 +6254,7 @@ struct walk_control {
int keep_locks;
int reada_slot;
int reada_count;
int for_reloc;
};
#define DROP_REFERENCE 1
@ -6216,9 +6393,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
/* wc->stage == UPDATE_BACKREF */
if (!(wc->flags[level] & flag)) {
BUG_ON(!path->locks[level]);
ret = btrfs_inc_ref(trans, root, eb, 1);
ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
BUG_ON(ret);
ret = btrfs_dec_ref(trans, root, eb, 0);
ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
BUG_ON(ret);
ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
eb->len, flag, 0);
@ -6362,7 +6539,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
}
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
root->root_key.objectid, level - 1, 0);
root->root_key.objectid, level - 1, 0, 0);
BUG_ON(ret);
}
btrfs_tree_unlock(next);
@ -6436,9 +6613,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (wc->refs[level] == 1) {
if (level == 0) {
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
ret = btrfs_dec_ref(trans, root, eb, 1);
ret = btrfs_dec_ref(trans, root, eb, 1,
wc->for_reloc);
else
ret = btrfs_dec_ref(trans, root, eb, 0);
ret = btrfs_dec_ref(trans, root, eb, 0,
wc->for_reloc);
BUG_ON(ret);
}
/* make block locked assertion in clean_tree_block happy */
@ -6465,7 +6644,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_header_owner(path->nodes[level + 1]));
}
btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0);
out:
wc->refs[level] = 0;
wc->flags[level] = 0;
@ -6549,7 +6728,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
* blocks are properly updated.
*/
void btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int update_ref)
struct btrfs_block_rsv *block_rsv, int update_ref,
int for_reloc)
{
struct btrfs_path *path;
struct btrfs_trans_handle *trans;
@ -6637,6 +6817,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
wc->stage = DROP_REFERENCE;
wc->update_ref = update_ref;
wc->keep_locks = 0;
wc->for_reloc = for_reloc;
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
@ -6721,6 +6902,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
* drop subtree rooted at tree block 'node'.
*
* NOTE: this function will unlock and release tree block 'node'
* only used by relocation code
*/
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@ -6765,6 +6947,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
wc->stage = DROP_REFERENCE;
wc->update_ref = 0;
wc->keep_locks = 1;
wc->for_reloc = 1;
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
@ -6792,6 +6975,29 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
if (root->fs_info->balance_ctl) {
struct btrfs_balance_control *bctl = root->fs_info->balance_ctl;
u64 tgt = 0;
/* pick restriper's target profile and return */
if (flags & BTRFS_BLOCK_GROUP_DATA &&
bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) {
tgt = BTRFS_BLOCK_GROUP_DATA | bctl->data.target;
} else if (flags & BTRFS_BLOCK_GROUP_SYSTEM &&
bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
tgt = BTRFS_BLOCK_GROUP_SYSTEM | bctl->sys.target;
} else if (flags & BTRFS_BLOCK_GROUP_METADATA &&
bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) {
tgt = BTRFS_BLOCK_GROUP_METADATA | bctl->meta.target;
}
if (tgt) {
/* extended -> chunk profile */
tgt &= ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
return tgt;
}
}
/*
* we add in the count of missing devices because we want
* to make sure that any RAID levels on a degraded FS
@ -7085,7 +7291,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
* space to fit our block group in.
*/
if (device->total_bytes > device->bytes_used + min_free) {
ret = find_free_dev_extent(NULL, device, min_free,
ret = find_free_dev_extent(device, min_free,
&dev_offset, NULL);
if (!ret)
dev_nr++;
@ -7447,6 +7653,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
BUG_ON(ret);
update_global_block_rsv(root->fs_info);
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_readonly += cache->bytes_super;
@ -7466,6 +7673,22 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
return 0;
}
static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = flags & BTRFS_BLOCK_GROUP_PROFILE_MASK;
/* chunk -> extended profile */
if (extra_flags == 0)
extra_flags = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (flags & BTRFS_BLOCK_GROUP_DATA)
fs_info->avail_data_alloc_bits &= ~extra_flags;
if (flags & BTRFS_BLOCK_GROUP_METADATA)
fs_info->avail_metadata_alloc_bits &= ~extra_flags;
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
fs_info->avail_system_alloc_bits &= ~extra_flags;
}
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start)
{
@ -7476,6 +7699,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct inode *inode;
int ret;
int index;
int factor;
root = root->fs_info->extent_root;
@ -7491,6 +7715,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
free_excluded_extents(root, block_group);
memcpy(&key, &block_group->key, sizeof(key));
index = get_block_group_index(block_group);
if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10))
@ -7565,6 +7790,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* are still on the list after taking the semaphore
*/
list_del_init(&block_group->list);
if (list_empty(&block_group->space_info->block_groups[index]))
clear_avail_alloc_bits(root->fs_info, block_group->flags);
up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)

View file

@ -18,6 +18,7 @@
#include "ctree.h"
#include "btrfs_inode.h"
#include "volumes.h"
#include "check-integrity.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@ -1895,7 +1896,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
}
bio->bi_bdev = dev->bdev;
bio_add_page(bio, page, length, start-page_offset(page));
submit_bio(WRITE_SYNC, bio);
btrfsic_submit_bio(WRITE_SYNC, bio);
wait_for_completion(&compl);
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
@ -2393,7 +2394,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
mirror_num, bio_flags, start);
else
submit_bio(rw, bio);
btrfsic_submit_bio(rw, bio);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
ret = -EOPNOTSUPP;
@ -3579,6 +3580,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
atomic_set(&eb->blocking_writers, 0);
atomic_set(&eb->spinning_readers, 0);
atomic_set(&eb->spinning_writers, 0);
eb->lock_nested = 0;
init_waitqueue_head(&eb->write_lock_wq);
init_waitqueue_head(&eb->read_lock_wq);

View file

@ -129,6 +129,7 @@ struct extent_buffer {
struct list_head leak_list;
struct rcu_head rcu_head;
atomic_t refs;
pid_t lock_owner;
/* count of read lock holders on the extent buffer */
atomic_t write_locks;
@ -137,6 +138,7 @@ struct extent_buffer {
atomic_t blocking_readers;
atomic_t spinning_readers;
atomic_t spinning_writers;
int lock_nested;
/* protects write locks */
rwlock_t lock;

View file

@ -678,7 +678,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
new_key.objectid,
start - extent_offset);
start - extent_offset, 0);
BUG_ON(ret);
*hint_byte = disk_bytenr;
}
@ -753,7 +753,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
key.objectid, key.offset -
extent_offset);
extent_offset, 0);
BUG_ON(ret);
inode_sub_bytes(inode,
extent_end - key.offset);
@ -962,7 +962,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
root->root_key.objectid,
ino, orig_offset);
ino, orig_offset, 0);
BUG_ON(ret);
if (split == start) {
@ -989,7 +989,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
ino, orig_offset, 0);
BUG_ON(ret);
}
other_start = 0;
@ -1006,7 +1006,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
del_nr++;
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
0, root->root_key.objectid,
ino, orig_offset);
ino, orig_offset, 0);
BUG_ON(ret);
}
if (del_nr == 0) {
@ -1274,7 +1274,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
dirty_pages);
if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
btrfs_btree_balance_dirty(root, 1);
btrfs_throttle(root);
pos += copied;
num_written += copied;

View file

@ -319,9 +319,11 @@ static void io_ctl_drop_pages(struct io_ctl *io_ctl)
io_ctl_unmap_page(io_ctl);
for (i = 0; i < io_ctl->num_pages; i++) {
ClearPageChecked(io_ctl->pages[i]);
unlock_page(io_ctl->pages[i]);
page_cache_release(io_ctl->pages[i]);
if (io_ctl->pages[i]) {
ClearPageChecked(io_ctl->pages[i]);
unlock_page(io_ctl->pages[i]);
page_cache_release(io_ctl->pages[i]);
}
}
}
@ -635,7 +637,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
if (!num_entries)
return 0;
io_ctl_init(&io_ctl, inode, root);
ret = io_ctl_init(&io_ctl, inode, root);
if (ret)
return ret;
ret = readahead_cache(inode);
if (ret)
goto out;
@ -838,7 +843,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct io_ctl io_ctl;
struct list_head bitmap_list;
struct btrfs_key key;
u64 start, end, len;
u64 start, extent_start, extent_end, len;
int entries = 0;
int bitmaps = 0;
int ret;
@ -849,7 +854,9 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
if (!i_size_read(inode))
return -1;
io_ctl_init(&io_ctl, inode, root);
ret = io_ctl_init(&io_ctl, inode, root);
if (ret)
return -1;
/* Get the cluster for this block_group if it exists */
if (block_group && !list_empty(&block_group->cluster_list))
@ -857,25 +864,12 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_cluster,
block_group_list);
/*
* We shouldn't have switched the pinned extents yet so this is the
* right one
*/
unpin = root->fs_info->pinned_extents;
/* Lock all pages first so we can lock the extent safely. */
io_ctl_prepare_pages(&io_ctl, inode, 0);
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
0, &cached_state, GFP_NOFS);
/*
* When searching for pinned extents, we need to start at our start
* offset.
*/
if (block_group)
start = block_group->key.objectid;
node = rb_first(&ctl->free_space_offset);
if (!node && cluster) {
node = rb_first(&cluster->root);
@ -918,9 +912,20 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
* We want to add any pinned extents to our free space cache
* so we don't leak the space
*/
/*
* We shouldn't have switched the pinned extents yet so this is the
* right one
*/
unpin = root->fs_info->pinned_extents;
if (block_group)
start = block_group->key.objectid;
while (block_group && (start < block_group->key.objectid +
block_group->key.offset)) {
ret = find_first_extent_bit(unpin, start, &start, &end,
ret = find_first_extent_bit(unpin, start,
&extent_start, &extent_end,
EXTENT_DIRTY);
if (ret) {
ret = 0;
@ -928,20 +933,21 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
}
/* This pinned extent is out of our range */
if (start >= block_group->key.objectid +
if (extent_start >= block_group->key.objectid +
block_group->key.offset)
break;
len = block_group->key.objectid +
block_group->key.offset - start;
len = min(len, end + 1 - start);
extent_start = max(extent_start, start);
extent_end = min(block_group->key.objectid +
block_group->key.offset, extent_end + 1);
len = extent_end - extent_start;
entries++;
ret = io_ctl_add_entry(&io_ctl, start, len, NULL);
ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
if (ret)
goto out_nospc;
start = end + 1;
start = extent_end;
}
/* Write out the bitmaps */
@ -2283,23 +2289,23 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *entry,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 min_bytes)
u64 offset, u64 bytes,
u64 cont1_bytes, u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
unsigned long next_zero;
unsigned long i;
unsigned long search_bits;
unsigned long total_bits;
unsigned long want_bits;
unsigned long min_bits;
unsigned long found_bits;
unsigned long start = 0;
unsigned long total_found = 0;
int ret;
bool found = false;
i = offset_to_bit(entry->offset, block_group->sectorsize,
max_t(u64, offset, entry->offset));
search_bits = bytes_to_bits(bytes, block_group->sectorsize);
total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
want_bits = bytes_to_bits(bytes, block_group->sectorsize);
min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
again:
found_bits = 0;
@ -2308,7 +2314,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
next_zero = find_next_zero_bit(entry->bitmap,
BITS_PER_BITMAP, i);
if (next_zero - i >= search_bits) {
if (next_zero - i >= min_bits) {
found_bits = next_zero - i;
break;
}
@ -2318,10 +2324,9 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
if (!found_bits)
return -ENOSPC;
if (!found) {
if (!total_found) {
start = i;
cluster->max_size = 0;
found = true;
}
total_found += found_bits;
@ -2329,13 +2334,8 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
if (cluster->max_size < found_bits * block_group->sectorsize)
cluster->max_size = found_bits * block_group->sectorsize;
if (total_found < total_bits) {
i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
if (i - start > total_bits * 2) {
total_found = 0;
cluster->max_size = 0;
found = false;
}
if (total_found < want_bits || cluster->max_size < cont1_bytes) {
i = next_zero + 1;
goto again;
}
@ -2346,28 +2346,31 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
&entry->offset_index, 1);
BUG_ON(ret);
trace_btrfs_setup_cluster(block_group, cluster,
total_found * block_group->sectorsize, 1);
return 0;
}
/*
* This searches the block group for just extents to fill the cluster with.
* Try to find a cluster with at least bytes total bytes, at least one
* extent of cont1_bytes, and other clusters of at least min_bytes.
*/
static noinline int
setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
struct list_head *bitmaps, u64 offset, u64 bytes,
u64 min_bytes)
u64 cont1_bytes, u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *first = NULL;
struct btrfs_free_space *entry = NULL;
struct btrfs_free_space *prev = NULL;
struct btrfs_free_space *last;
struct rb_node *node;
u64 window_start;
u64 window_free;
u64 max_extent;
u64 max_gap = 128 * 1024;
u64 total_size = 0;
entry = tree_search_offset(ctl, offset, 0, 1);
if (!entry)
@ -2377,8 +2380,8 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
* We don't want bitmaps, so just move along until we find a normal
* extent entry.
*/
while (entry->bitmap) {
if (list_empty(&entry->list))
while (entry->bitmap || entry->bytes < min_bytes) {
if (entry->bitmap && list_empty(&entry->list))
list_add_tail(&entry->list, bitmaps);
node = rb_next(&entry->offset_index);
if (!node)
@ -2391,12 +2394,9 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
max_extent = entry->bytes;
first = entry;
last = entry;
prev = entry;
while (window_free <= min_bytes) {
node = rb_next(&entry->offset_index);
if (!node)
return -ENOSPC;
for (node = rb_next(&entry->offset_index); node;
node = rb_next(&entry->offset_index)) {
entry = rb_entry(node, struct btrfs_free_space, offset_index);
if (entry->bitmap) {
@ -2405,26 +2405,18 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
continue;
}
/*
* we haven't filled the empty size and the window is
* very large. reset and try again
*/
if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
entry->offset - window_start > (min_bytes * 2)) {
first = entry;
window_start = entry->offset;
window_free = entry->bytes;
last = entry;
if (entry->bytes < min_bytes)
continue;
last = entry;
window_free += entry->bytes;
if (entry->bytes > max_extent)
max_extent = entry->bytes;
} else {
last = entry;
window_free += entry->bytes;
if (entry->bytes > max_extent)
max_extent = entry->bytes;
}
prev = entry;
}
if (window_free < bytes || max_extent < cont1_bytes)
return -ENOSPC;
cluster->window_start = first->offset;
node = &first->offset_index;
@ -2438,17 +2430,18 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
entry = rb_entry(node, struct btrfs_free_space, offset_index);
node = rb_next(&entry->offset_index);
if (entry->bitmap)
if (entry->bitmap || entry->bytes < min_bytes)
continue;
rb_erase(&entry->offset_index, &ctl->free_space_offset);
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index, 0);
total_size += entry->bytes;
BUG_ON(ret);
} while (node && entry != last);
cluster->max_size = max_extent;
trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
return 0;
}
@ -2460,7 +2453,7 @@ static noinline int
setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
struct list_head *bitmaps, u64 offset, u64 bytes,
u64 min_bytes)
u64 cont1_bytes, u64 min_bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry;
@ -2485,7 +2478,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
if (entry->bytes < min_bytes)
continue;
ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
bytes, min_bytes);
bytes, cont1_bytes, min_bytes);
if (!ret)
return 0;
}
@ -2499,7 +2492,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
/*
* here we try to find a cluster of blocks in a block group. The goal
* is to find at least bytes free and up to empty_size + bytes free.
* is to find at least bytes+empty_size.
* We might not find them all in one contiguous area.
*
* returns zero and sets up cluster if things worked out, otherwise
@ -2515,23 +2508,24 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
struct btrfs_free_space *entry, *tmp;
LIST_HEAD(bitmaps);
u64 min_bytes;
u64 cont1_bytes;
int ret;
/* for metadata, allow allocates with more holes */
/*
* Choose the minimum extent size we'll require for this
* cluster. For SSD_SPREAD, don't allow any fragmentation.
* For metadata, allow allocates with smaller extents. For
* data, keep it dense.
*/
if (btrfs_test_opt(root, SSD_SPREAD)) {
min_bytes = bytes + empty_size;
cont1_bytes = min_bytes = bytes + empty_size;
} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
/*
* we want to do larger allocations when we are
* flushing out the delayed refs, it helps prevent
* making more work as we go along.
*/
if (trans->transaction->delayed_refs.flushing)
min_bytes = max(bytes, (bytes + empty_size) >> 1);
else
min_bytes = max(bytes, (bytes + empty_size) >> 4);
} else
min_bytes = max(bytes, (bytes + empty_size) >> 2);
cont1_bytes = bytes;
min_bytes = block_group->sectorsize;
} else {
cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
min_bytes = block_group->sectorsize;
}
spin_lock(&ctl->tree_lock);
@ -2539,7 +2533,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
* If we know we don't have enough space to make a cluster don't even
* bother doing all the work to try and find one.
*/
if (ctl->free_space < min_bytes) {
if (ctl->free_space < bytes) {
spin_unlock(&ctl->tree_lock);
return -ENOSPC;
}
@ -2552,11 +2546,17 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
goto out;
}
trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
min_bytes);
INIT_LIST_HEAD(&bitmaps);
ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
bytes, min_bytes);
bytes + empty_size,
cont1_bytes, min_bytes);
if (ret)
ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
offset, bytes, min_bytes);
offset, bytes + empty_size,
cont1_bytes, min_bytes);
/* Clear our temporary list */
list_for_each_entry_safe(entry, tmp, &bitmaps, list)
@ -2567,6 +2567,8 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
list_add_tail(&cluster->block_group_list,
&block_group->cluster_list);
cluster->block_group = block_group;
} else {
trace_btrfs_failed_cluster_setup(block_group);
}
out:
spin_unlock(&cluster->lock);
@ -2588,17 +2590,57 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
cluster->block_group = NULL;
}
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
static int do_trimming(struct btrfs_block_group_cache *block_group,
u64 *total_trimmed, u64 start, u64 bytes,
u64 reserved_start, u64 reserved_bytes)
{
struct btrfs_space_info *space_info = block_group->space_info;
struct btrfs_fs_info *fs_info = block_group->fs_info;
int ret;
int update = 0;
u64 trimmed = 0;
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
if (!block_group->ro) {
block_group->reserved += reserved_bytes;
space_info->bytes_reserved += reserved_bytes;
update = 1;
}
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
ret = btrfs_error_discard_extent(fs_info->extent_root,
start, bytes, &trimmed);
if (!ret)
*total_trimmed += trimmed;
btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
if (update) {
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
if (block_group->ro)
space_info->bytes_readonly += reserved_bytes;
block_group->reserved -= reserved_bytes;
space_info->bytes_reserved -= reserved_bytes;
spin_unlock(&space_info->lock);
spin_unlock(&block_group->lock);
}
return ret;
}
static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
u64 *total_trimmed, u64 start, u64 end, u64 minlen)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry = NULL;
struct btrfs_fs_info *fs_info = block_group->fs_info;
u64 bytes = 0;
u64 actually_trimmed;
struct btrfs_free_space *entry;
struct rb_node *node;
int ret = 0;
*trimmed = 0;
u64 extent_start;
u64 extent_bytes;
u64 bytes;
while (start < end) {
spin_lock(&ctl->tree_lock);
@ -2609,81 +2651,47 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
}
entry = tree_search_offset(ctl, start, 0, 1);
if (!entry)
entry = tree_search_offset(ctl,
offset_to_bitmap(ctl, start),
1, 1);
if (!entry || entry->offset >= end) {
if (!entry) {
spin_unlock(&ctl->tree_lock);
break;
}
if (entry->bitmap) {
ret = search_bitmap(ctl, entry, &start, &bytes);
if (!ret) {
if (start >= end) {
spin_unlock(&ctl->tree_lock);
break;
}
bytes = min(bytes, end - start);
bitmap_clear_bits(ctl, entry, start, bytes);
if (entry->bytes == 0)
free_bitmap(ctl, entry);
} else {
start = entry->offset + BITS_PER_BITMAP *
block_group->sectorsize;
/* skip bitmaps */
while (entry->bitmap) {
node = rb_next(&entry->offset_index);
if (!node) {
spin_unlock(&ctl->tree_lock);
ret = 0;
continue;
goto out;
}
} else {
start = entry->offset;
bytes = min(entry->bytes, end - start);
unlink_free_space(ctl, entry);
kmem_cache_free(btrfs_free_space_cachep, entry);
entry = rb_entry(node, struct btrfs_free_space,
offset_index);
}
if (entry->offset >= end) {
spin_unlock(&ctl->tree_lock);
break;
}
extent_start = entry->offset;
extent_bytes = entry->bytes;
start = max(start, extent_start);
bytes = min(extent_start + extent_bytes, end) - start;
if (bytes < minlen) {
spin_unlock(&ctl->tree_lock);
goto next;
}
unlink_free_space(ctl, entry);
kmem_cache_free(btrfs_free_space_cachep, entry);
spin_unlock(&ctl->tree_lock);
if (bytes >= minlen) {
struct btrfs_space_info *space_info;
int update = 0;
space_info = block_group->space_info;
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
if (!block_group->ro) {
block_group->reserved += bytes;
space_info->bytes_reserved += bytes;
update = 1;
}
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
ret = btrfs_error_discard_extent(fs_info->extent_root,
start,
bytes,
&actually_trimmed);
btrfs_add_free_space(block_group, start, bytes);
if (update) {
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
if (block_group->ro)
space_info->bytes_readonly += bytes;
block_group->reserved -= bytes;
space_info->bytes_reserved -= bytes;
spin_unlock(&space_info->lock);
spin_unlock(&block_group->lock);
}
if (ret)
break;
*trimmed += actually_trimmed;
}
ret = do_trimming(block_group, total_trimmed, start, bytes,
extent_start, extent_bytes);
if (ret)
break;
next:
start += bytes;
bytes = 0;
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
@ -2692,6 +2700,93 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
cond_resched();
}
out:
return ret;
}
static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
u64 *total_trimmed, u64 start, u64 end, u64 minlen)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry;
int ret = 0;
int ret2;
u64 bytes;
u64 offset = offset_to_bitmap(ctl, start);
while (offset < end) {
bool next_bitmap = false;
spin_lock(&ctl->tree_lock);
if (ctl->free_space < minlen) {
spin_unlock(&ctl->tree_lock);
break;
}
entry = tree_search_offset(ctl, offset, 1, 0);
if (!entry) {
spin_unlock(&ctl->tree_lock);
next_bitmap = true;
goto next;
}
bytes = minlen;
ret2 = search_bitmap(ctl, entry, &start, &bytes);
if (ret2 || start >= end) {
spin_unlock(&ctl->tree_lock);
next_bitmap = true;
goto next;
}
bytes = min(bytes, end - start);
if (bytes < minlen) {
spin_unlock(&ctl->tree_lock);
goto next;
}
bitmap_clear_bits(ctl, entry, start, bytes);
if (entry->bytes == 0)
free_bitmap(ctl, entry);
spin_unlock(&ctl->tree_lock);
ret = do_trimming(block_group, total_trimmed, start, bytes,
start, bytes);
if (ret)
break;
next:
if (next_bitmap) {
offset += BITS_PER_BITMAP * ctl->unit;
} else {
start += bytes;
if (start >= offset + BITS_PER_BITMAP * ctl->unit)
offset += BITS_PER_BITMAP * ctl->unit;
}
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
cond_resched();
}
return ret;
}
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
{
int ret;
*trimmed = 0;
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
if (ret)
return ret;
ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
return ret;
}

View file

@ -438,6 +438,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
trans->bytes_reserved);
if (ret)
goto out;
trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans,
trans->bytes_reserved, 1);
again:
inode = lookup_free_ino_inode(root, path);
if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@ -498,6 +500,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
out_put:
iput(inode);
out_release:
trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans,
trans->bytes_reserved, 0);
btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
out:
trans->block_rsv = rsv;

View file

@ -1951,12 +1951,28 @@ enum btrfs_orphan_cleanup_state {
void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_block_rsv *block_rsv;
int ret;
if (!list_empty(&root->orphan_list) ||
root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
return;
spin_lock(&root->orphan_lock);
if (!list_empty(&root->orphan_list)) {
spin_unlock(&root->orphan_lock);
return;
}
if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) {
spin_unlock(&root->orphan_lock);
return;
}
block_rsv = root->orphan_block_rsv;
root->orphan_block_rsv = NULL;
spin_unlock(&root->orphan_lock);
if (root->orphan_item_inserted &&
btrfs_root_refs(&root->root_item) > 0) {
ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
@ -1965,10 +1981,9 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
root->orphan_item_inserted = 0;
}
if (root->orphan_block_rsv) {
WARN_ON(root->orphan_block_rsv->size > 0);
btrfs_free_block_rsv(root, root->orphan_block_rsv);
root->orphan_block_rsv = NULL;
if (block_rsv) {
WARN_ON(block_rsv->size > 0);
btrfs_free_block_rsv(root, block_rsv);
}
}
@ -2224,14 +2239,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
continue;
}
nr_truncate++;
/*
* Need to hold the imutex for reservation purposes, not
* a huge deal here but I have a WARN_ON in
* btrfs_delalloc_reserve_space to catch offenders.
*/
mutex_lock(&inode->i_mutex);
ret = btrfs_truncate(inode);
mutex_unlock(&inode->i_mutex);
} else {
nr_unlink++;
}
@ -2845,7 +2853,7 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans,
BUG_ON(!root->fs_info->enospc_unlink);
root->fs_info->enospc_unlink = 0;
}
btrfs_end_transaction_throttle(trans, root);
btrfs_end_transaction(trans, root);
}
static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
@ -3009,7 +3017,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int pending_del_nr = 0;
int pending_del_slot = 0;
int extent_type = -1;
int encoding;
int ret;
int err = 0;
u64 ino = btrfs_ino(inode);
@ -3059,7 +3066,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
found_type = btrfs_key_type(&found_key);
encoding = 0;
if (found_key.objectid != ino)
break;
@ -3072,10 +3078,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(leaf, fi);
encoding = btrfs_file_extent_compression(leaf, fi);
encoding |= btrfs_file_extent_encryption(leaf, fi);
encoding |= btrfs_file_extent_other_encoding(leaf, fi);
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
item_end +=
btrfs_file_extent_num_bytes(leaf, fi);
@ -3103,7 +3105,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
u64 num_dec;
extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
if (!del_item && !encoding) {
if (!del_item) {
u64 orig_num_bytes =
btrfs_file_extent_num_bytes(leaf, fi);
extent_num_bytes = new_size -
@ -3179,7 +3181,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
ret = btrfs_free_extent(trans, root, extent_start,
extent_num_bytes, 0,
btrfs_header_owner(leaf),
ino, extent_offset);
ino, extent_offset, 0);
BUG_ON(ret);
}
@ -3434,7 +3436,7 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
i_size_write(inode, newsize);
btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
ret = btrfs_update_inode(trans, root, inode);
btrfs_end_transaction_throttle(trans, root);
btrfs_end_transaction(trans, root);
} else {
/*
@ -4655,7 +4657,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
}
out_unlock:
nr = trans->blocks_used;
btrfs_end_transaction_throttle(trans, root);
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
if (drop_inode) {
inode_dec_link_count(inode);
@ -4723,7 +4725,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
}
out_unlock:
nr = trans->blocks_used;
btrfs_end_transaction_throttle(trans, root);
btrfs_end_transaction(trans, root);
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
@ -4782,7 +4784,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
}
nr = trans->blocks_used;
btrfs_end_transaction_throttle(trans, root);
btrfs_end_transaction(trans, root);
fail:
if (drop_inode) {
inode_dec_link_count(inode);
@ -4848,7 +4850,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
out_fail:
nr = trans->blocks_used;
btrfs_end_transaction_throttle(trans, root);
btrfs_end_transaction(trans, root);
if (drop_on_err)
iput(inode);
btrfs_btree_balance_dirty(root, nr);
@ -5121,7 +5123,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
}
flush_dcache_page(page);
} else if (create && PageUptodate(page)) {
WARN_ON(1);
BUG();
if (!trans) {
kunmap(page);
free_extent_map(em);
@ -6402,10 +6404,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
u64 page_start;
u64 page_end;
/* Need this to keep space reservations serialized */
mutex_lock(&inode->i_mutex);
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
mutex_unlock(&inode->i_mutex);
if (!ret)
ret = btrfs_update_time(vma->vm_file);
if (ret) {
@ -6494,8 +6493,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (!ret)
return VM_FAULT_LOCKED;
unlock_page(page);
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
out:
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
return ret;
}
@ -6668,7 +6667,7 @@ static int btrfs_truncate(struct inode *inode)
err = ret;
nr = trans->blocks_used;
ret = btrfs_end_transaction_throttle(trans, root);
ret = btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
}
@ -6749,6 +6748,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
extent_io_tree_init(&ei->io_tree, &inode->i_data);
extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
mutex_init(&ei->log_mutex);
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->i_orphan);
INIT_LIST_HEAD(&ei->delalloc_inodes);
@ -7074,7 +7074,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
btrfs_end_log_trans(root);
}
out_fail:
btrfs_end_transaction_throttle(trans, root);
btrfs_end_transaction(trans, root);
out_notrans:
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&root->fs_info->subvol_sem);
@ -7246,7 +7246,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (!err)
d_instantiate(dentry, inode);
nr = trans->blocks_used;
btrfs_end_transaction_throttle(trans, root);
btrfs_end_transaction(trans, root);
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);

View file

@ -176,6 +176,8 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
struct btrfs_trans_handle *trans;
unsigned int flags, oldflags;
int ret;
u64 ip_oldflags;
unsigned int i_oldflags;
if (btrfs_root_readonly(root))
return -EROFS;
@ -192,6 +194,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
mutex_lock(&inode->i_mutex);
ip_oldflags = ip->flags;
i_oldflags = inode->i_flags;
flags = btrfs_mask_flags(inode->i_mode, flags);
oldflags = btrfs_flags_to_ioctl(ip->flags);
if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
@ -249,19 +254,24 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
}
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_drop;
}
btrfs_update_iflags(inode);
inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
btrfs_end_transaction(trans, root);
out_drop:
if (ret) {
ip->flags = ip_oldflags;
inode->i_flags = i_oldflags;
}
mnt_drop_write_file(file);
ret = 0;
out_unlock:
mutex_unlock(&inode->i_mutex);
return ret;
@ -358,7 +368,7 @@ static noinline int create_subvol(struct btrfs_root *root,
return PTR_ERR(trans);
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
0, objectid, NULL, 0, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
goto fail;
@ -858,10 +868,8 @@ static int cluster_pages_for_defrag(struct inode *inode,
return 0;
file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
mutex_lock(&inode->i_mutex);
ret = btrfs_delalloc_reserve_space(inode,
num_pages << PAGE_CACHE_SHIFT);
mutex_unlock(&inode->i_mutex);
if (ret)
return ret;
again:
@ -1203,13 +1211,21 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
mutex_lock(&root->fs_info->volume_mutex);
if (root->fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: balance in progress\n");
ret = -EINVAL;
goto out;
}
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
goto out;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
mutex_lock(&root->fs_info->volume_mutex);
sizestr = vol_args->name;
devstr = strchr(sizestr, ':');
if (devstr) {
@ -1226,7 +1242,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
(unsigned long long)devid);
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
if (!strcmp(sizestr, "max"))
new_size = device->bdev->bd_inode->i_size;
@ -1241,7 +1257,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
new_size = memparse(sizestr, NULL);
if (new_size == 0) {
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
}
@ -1250,7 +1266,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (mod < 0) {
if (new_size > old_size) {
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
new_size = old_size - new_size;
} else if (mod > 0) {
@ -1259,11 +1275,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (new_size < 256 * 1024 * 1024) {
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
if (new_size > device->bdev->bd_inode->i_size) {
ret = -EFBIG;
goto out_unlock;
goto out_free;
}
do_div(new_size, root->sectorsize);
@ -1276,7 +1292,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_unlock;
goto out_free;
}
ret = btrfs_grow_device(trans, device, new_size);
btrfs_commit_transaction(trans, root);
@ -1284,9 +1300,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
ret = btrfs_shrink_device(device, new_size);
}
out_unlock:
mutex_unlock(&root->fs_info->volume_mutex);
out_free:
kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}
@ -2052,14 +2069,25 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
mutex_lock(&root->fs_info->volume_mutex);
if (root->fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: balance in progress\n");
ret = -EINVAL;
goto out;
}
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
goto out;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_init_new_device(root, vol_args->name);
kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}
@ -2074,14 +2102,25 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
mutex_lock(&root->fs_info->volume_mutex);
if (root->fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: balance in progress\n");
ret = -EINVAL;
goto out;
}
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
goto out;
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_rm_device(root, vol_args->name);
kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}
@ -2427,7 +2466,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
disko, diskl, 0,
root->root_key.objectid,
btrfs_ino(inode),
new_key.offset - datao);
new_key.offset - datao,
0);
BUG_ON(ret);
}
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
@ -2977,7 +3017,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
{
int ret = 0;
int size;
u64 extent_offset;
u64 extent_item_pos;
struct btrfs_ioctl_logical_ino_args *loi;
struct btrfs_data_container *inodes = NULL;
struct btrfs_path *path = NULL;
@ -3008,15 +3048,17 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
}
ret = extent_from_logical(root->fs_info, loi->logical, path, &key);
btrfs_release_path(path);
if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
ret = -ENOENT;
if (ret < 0)
goto out;
extent_offset = loi->logical - key.objectid;
extent_item_pos = loi->logical - key.objectid;
ret = iterate_extent_inodes(root->fs_info, path, key.objectid,
extent_offset, build_ino_list, inodes);
extent_item_pos, build_ino_list,
inodes);
if (ret < 0)
goto out;
@ -3034,6 +3076,163 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
return ret;
}
void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
struct btrfs_ioctl_balance_args *bargs)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;
bargs->flags = bctl->flags;
if (atomic_read(&fs_info->balance_running))
bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
if (atomic_read(&fs_info->balance_pause_req))
bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
if (atomic_read(&fs_info->balance_cancel_req))
bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ;
memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
if (lock) {
spin_lock(&fs_info->balance_lock);
memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
spin_unlock(&fs_info->balance_lock);
} else {
memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
}
}
static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
struct btrfs_balance_control *bctl;
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
if (arg) {
bargs = memdup_user(arg, sizeof(*bargs));
if (IS_ERR(bargs)) {
ret = PTR_ERR(bargs);
goto out;
}
if (bargs->flags & BTRFS_BALANCE_RESUME) {
if (!fs_info->balance_ctl) {
ret = -ENOTCONN;
goto out_bargs;
}
bctl = fs_info->balance_ctl;
spin_lock(&fs_info->balance_lock);
bctl->flags |= BTRFS_BALANCE_RESUME;
spin_unlock(&fs_info->balance_lock);
goto do_balance;
}
} else {
bargs = NULL;
}
if (fs_info->balance_ctl) {
ret = -EINPROGRESS;
goto out_bargs;
}
bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
if (!bctl) {
ret = -ENOMEM;
goto out_bargs;
}
bctl->fs_info = fs_info;
if (arg) {
memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
bctl->flags = bargs->flags;
} else {
/* balance everything - no filters */
bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
}
do_balance:
ret = btrfs_balance(bctl, bargs);
/*
* bctl is freed in __cancel_balance or in free_fs_info if
* restriper was paused all the way until unmount
*/
if (arg) {
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
}
out_bargs:
kfree(bargs);
out:
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
return ret;
}
static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
switch (cmd) {
case BTRFS_BALANCE_CTL_PAUSE:
return btrfs_pause_balance(root->fs_info);
case BTRFS_BALANCE_CTL_CANCEL:
return btrfs_cancel_balance(root->fs_info);
}
return -EINVAL;
}
static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
void __user *arg)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
ret = -ENOTCONN;
goto out;
}
bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
if (!bargs) {
ret = -ENOMEM;
goto out;
}
update_ioctl_balance_args(fs_info, 1, bargs);
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
kfree(bargs);
out:
mutex_unlock(&fs_info->balance_mutex);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@ -3078,7 +3277,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_DEV_INFO:
return btrfs_ioctl_dev_info(root, argp);
case BTRFS_IOC_BALANCE:
return btrfs_balance(root->fs_info->dev_root);
return btrfs_ioctl_balance(root, NULL);
case BTRFS_IOC_CLONE:
return btrfs_ioctl_clone(file, arg, 0, 0, 0);
case BTRFS_IOC_CLONE_RANGE:
@ -3110,6 +3309,12 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_scrub_cancel(root, argp);
case BTRFS_IOC_SCRUB_PROGRESS:
return btrfs_ioctl_scrub_progress(root, argp);
case BTRFS_IOC_BALANCE_V2:
return btrfs_ioctl_balance(root, argp);
case BTRFS_IOC_BALANCE_CTL:
return btrfs_ioctl_balance_ctl(root, arg);
case BTRFS_IOC_BALANCE_PROGRESS:
return btrfs_ioctl_balance_progress(root, argp);
}
return -ENOTTY;

View file

@ -109,6 +109,55 @@ struct btrfs_ioctl_fs_info_args {
__u64 reserved[124]; /* pad to 1k */
};
/* balance control ioctl modes */
#define BTRFS_BALANCE_CTL_PAUSE 1
#define BTRFS_BALANCE_CTL_CANCEL 2
/*
* this is packed, because it should be exactly the same as its disk
* byte order counterpart (struct btrfs_disk_balance_args)
*/
struct btrfs_balance_args {
__u64 profiles;
__u64 usage;
__u64 devid;
__u64 pstart;
__u64 pend;
__u64 vstart;
__u64 vend;
__u64 target;
__u64 flags;
__u64 unused[8];
} __attribute__ ((__packed__));
/* report balance progress to userspace */
struct btrfs_balance_progress {
__u64 expected; /* estimated # of chunks that will be
* relocated to fulfill the request */
__u64 considered; /* # of chunks we have considered so far */
__u64 completed; /* # of chunks relocated so far */
};
#define BTRFS_BALANCE_STATE_RUNNING (1ULL << 0)
#define BTRFS_BALANCE_STATE_PAUSE_REQ (1ULL << 1)
#define BTRFS_BALANCE_STATE_CANCEL_REQ (1ULL << 2)
struct btrfs_ioctl_balance_args {
__u64 flags; /* in/out */
__u64 state; /* out */
struct btrfs_balance_args data; /* in/out */
struct btrfs_balance_args meta; /* in/out */
struct btrfs_balance_args sys; /* in/out */
struct btrfs_balance_progress stat; /* out */
__u64 unused[72]; /* pad to 1k */
};
#define BTRFS_INO_LOOKUP_PATH_MAX 4080
struct btrfs_ioctl_ino_lookup_args {
__u64 treeid;
@ -272,6 +321,11 @@ struct btrfs_ioctl_logical_ino_args {
struct btrfs_ioctl_dev_info_args)
#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
struct btrfs_ioctl_fs_info_args)
#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_BALANCE_CTL _IOW(BTRFS_IOCTL_MAGIC, 33, int)
#define BTRFS_IOC_BALANCE_PROGRESS _IOR(BTRFS_IOCTL_MAGIC, 34, \
struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \

View file

@ -33,6 +33,14 @@ void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
*/
void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
if (eb->lock_nested) {
read_lock(&eb->lock);
if (eb->lock_nested && current->pid == eb->lock_owner) {
read_unlock(&eb->lock);
return;
}
read_unlock(&eb->lock);
}
if (rw == BTRFS_WRITE_LOCK) {
if (atomic_read(&eb->blocking_writers) == 0) {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
@ -57,6 +65,14 @@ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw)
*/
void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
{
if (eb->lock_nested) {
read_lock(&eb->lock);
if (&eb->lock_nested && current->pid == eb->lock_owner) {
read_unlock(&eb->lock);
return;
}
read_unlock(&eb->lock);
}
if (rw == BTRFS_WRITE_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_writers) != 1);
write_lock(&eb->lock);
@ -81,12 +97,25 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
void btrfs_tree_read_lock(struct extent_buffer *eb)
{
again:
read_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers) &&
current->pid == eb->lock_owner) {
/*
* This extent is already write-locked by our thread. We allow
* an additional read lock to be added because it's for the same
* thread. btrfs_find_all_roots() depends on this as it may be
* called on a partly (write-)locked tree.
*/
BUG_ON(eb->lock_nested);
eb->lock_nested = 1;
read_unlock(&eb->lock);
return;
}
read_unlock(&eb->lock);
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
read_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers)) {
read_unlock(&eb->lock);
wait_event(eb->write_lock_wq,
atomic_read(&eb->blocking_writers) == 0);
goto again;
}
atomic_inc(&eb->read_locks);
@ -129,6 +158,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
}
atomic_inc(&eb->write_locks);
atomic_inc(&eb->spinning_writers);
eb->lock_owner = current->pid;
return 1;
}
@ -137,6 +167,15 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
*/
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
if (eb->lock_nested) {
read_lock(&eb->lock);
if (eb->lock_nested && current->pid == eb->lock_owner) {
eb->lock_nested = 0;
read_unlock(&eb->lock);
return;
}
read_unlock(&eb->lock);
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
@ -149,6 +188,15 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
*/
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
{
if (eb->lock_nested) {
read_lock(&eb->lock);
if (eb->lock_nested && current->pid == eb->lock_owner) {
eb->lock_nested = 0;
read_unlock(&eb->lock);
return;
}
read_unlock(&eb->lock);
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
if (atomic_dec_and_test(&eb->blocking_readers))
@ -181,6 +229,7 @@ int btrfs_tree_lock(struct extent_buffer *eb)
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
atomic_inc(&eb->write_locks);
eb->lock_owner = current->pid;
return 0;
}

View file

@ -1604,12 +1604,12 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
num_bytes, parent,
btrfs_header_owner(leaf),
key.objectid, key.offset);
key.objectid, key.offset, 1);
BUG_ON(ret);
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
parent, btrfs_header_owner(leaf),
key.objectid, key.offset);
key.objectid, key.offset, 1);
BUG_ON(ret);
}
if (dirty)
@ -1778,21 +1778,23 @@ int replace_path(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
path->nodes[level]->start,
src->root_key.objectid, level - 1, 0);
src->root_key.objectid, level - 1, 0,
1);
BUG_ON(ret);
ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
0, dest->root_key.objectid, level - 1,
0);
0, 1);
BUG_ON(ret);
ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
path->nodes[level]->start,
src->root_key.objectid, level - 1, 0);
src->root_key.objectid, level - 1, 0,
1);
BUG_ON(ret);
ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
0, dest->root_key.objectid, level - 1,
0);
0, 1);
BUG_ON(ret);
btrfs_unlock_up_safe(path, 0);
@ -2244,7 +2246,7 @@ int merge_reloc_roots(struct reloc_control *rc)
} else {
list_del_init(&reloc_root->root_list);
}
btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0);
btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
}
if (found) {
@ -2558,7 +2560,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
node->eb->start, blocksize,
upper->eb->start,
btrfs_header_owner(upper->eb),
node->level, 0);
node->level, 0, 1);
BUG_ON(ret);
ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
@ -2947,9 +2949,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
while (index <= last_index) {
mutex_lock(&inode->i_mutex);
ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
mutex_unlock(&inode->i_mutex);
if (ret)
goto out;

View file

@ -25,6 +25,7 @@
#include "transaction.h"
#include "backref.h"
#include "extent_io.h"
#include "check-integrity.h"
/*
* This is only the first step towards a full-features scrub. It reads all
@ -309,7 +310,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
u8 ref_level;
unsigned long ptr = 0;
const int bufsize = 4096;
u64 extent_offset;
u64 extent_item_pos;
path = btrfs_alloc_path();
@ -329,12 +330,13 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
if (ret < 0)
goto out;
extent_offset = swarn.logical - found_key.objectid;
extent_item_pos = swarn.logical - found_key.objectid;
swarn.extent_item_size = found_key.offset;
eb = path->nodes[0];
ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
item_size = btrfs_item_size_nr(eb, path->slots[0]);
btrfs_release_path(path);
if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
do {
@ -351,7 +353,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
} else {
swarn.path = path;
iterate_extent_inodes(fs_info, path, found_key.objectid,
extent_offset,
extent_item_pos,
scrub_print_warning_inode, &swarn);
}
@ -732,7 +734,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
bio_add_page(bio, page, PAGE_SIZE, 0);
bio->bi_end_io = scrub_fixup_end_io;
bio->bi_private = &complete;
submit_bio(rw, bio);
btrfsic_submit_bio(rw, bio);
/* this will also unplug the queue */
wait_for_completion(&complete);
@ -958,7 +960,7 @@ static int scrub_submit(struct scrub_dev *sdev)
sdev->curr = -1;
atomic_inc(&sdev->in_flight);
submit_bio(READ, sbio->bio);
btrfsic_submit_bio(READ, sbio->bio);
return 0;
}

View file

@ -163,8 +163,11 @@ enum {
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err,
Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
Opt_check_integrity, Opt_check_integrity_including_extent_data,
Opt_check_integrity_print_mask,
Opt_err,
};
static match_table_t tokens = {
@ -199,6 +202,10 @@ static match_table_t tokens = {
{Opt_inode_cache, "inode_cache"},
{Opt_no_space_cache, "nospace_cache"},
{Opt_recovery, "recovery"},
{Opt_skip_balance, "skip_balance"},
{Opt_check_integrity, "check_int"},
{Opt_check_integrity_including_extent_data, "check_int_data"},
{Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
{Opt_err, NULL},
};
@ -397,6 +404,40 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
printk(KERN_INFO "btrfs: enabling auto recovery");
btrfs_set_opt(info->mount_opt, RECOVERY);
break;
case Opt_skip_balance:
btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
break;
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
case Opt_check_integrity_including_extent_data:
printk(KERN_INFO "btrfs: enabling check integrity"
" including extent data\n");
btrfs_set_opt(info->mount_opt,
CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
break;
case Opt_check_integrity:
printk(KERN_INFO "btrfs: enabling check integrity\n");
btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
break;
case Opt_check_integrity_print_mask:
intarg = 0;
match_int(&args[0], &intarg);
if (intarg) {
info->check_integrity_print_mask = intarg;
printk(KERN_INFO "btrfs:"
" check_integrity_print_mask 0x%x\n",
info->check_integrity_print_mask);
}
break;
#else
case Opt_check_integrity_including_extent_data:
case Opt_check_integrity:
case Opt_check_integrity_print_mask:
printk(KERN_ERR "btrfs: support for check_integrity*"
" not compiled in!\n");
ret = -EINVAL;
goto out;
#endif
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
@ -722,6 +763,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",autodefrag");
if (btrfs_test_opt(root, INODE_MAP_CACHE))
seq_puts(seq, ",inode_cache");
if (btrfs_test_opt(root, SKIP_BALANCE))
seq_puts(seq, ",skip_balance");
return 0;
}

View file

@ -36,6 +36,8 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
WARN_ON(atomic_read(&transaction->use_count) == 0);
if (atomic_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
WARN_ON(transaction->delayed_refs.root.rb_node);
WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
memset(transaction, 0, sizeof(*transaction));
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
@ -108,8 +110,11 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
cur_trans->delayed_refs.num_heads = 0;
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
cur_trans->delayed_refs.seq = 1;
init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
@ -321,6 +326,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
}
if (num_bytes) {
trace_btrfs_space_reservation(root->fs_info, "transaction",
(u64)h, num_bytes, 1);
h->block_rsv = &root->fs_info->trans_block_rsv;
h->bytes_reserved = num_bytes;
}
@ -467,19 +474,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
while (count < 4) {
while (count < 2) {
unsigned long cur = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (cur &&
trans->transaction->delayed_refs.num_heads_ready > 64) {
trans->delayed_ref_updates = 0;
/*
* do a full flush if the transaction is trying
* to close
*/
if (trans->transaction->delayed_refs.flushing)
cur = 0;
btrfs_run_delayed_refs(trans, root, cur);
} else {
break;
@ -1393,9 +1393,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
if (btrfs_header_backref_rev(root->node) <
BTRFS_MIXED_BACKREF_REV)
btrfs_drop_snapshot(root, NULL, 0);
btrfs_drop_snapshot(root, NULL, 0, 0);
else
btrfs_drop_snapshot(root, NULL, 1);
btrfs_drop_snapshot(root, NULL, 1, 0);
}
return 0;
}

View file

@ -589,7 +589,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root,
ins.objectid, ins.offset,
0, root->root_key.objectid,
key->objectid, offset);
key->objectid, offset, 0);
BUG_ON(ret);
} else {
/*

220
fs/btrfs/ulist.c Normal file
View file

@ -0,0 +1,220 @@
/*
* Copyright (C) 2011 STRATO AG
* written by Arne Jansen <sensille@gmx.net>
* Distributed under the GNU GPL license version 2.
*/
#include <linux/slab.h>
#include <linux/module.h>
#include "ulist.h"
/*
* ulist is a generic data structure to hold a collection of unique u64
* values. The only operations it supports is adding to the list and
* enumerating it.
* It is possible to store an auxiliary value along with the key.
*
* The implementation is preliminary and can probably be sped up
* significantly. A first step would be to store the values in an rbtree
* as soon as ULIST_SIZE is exceeded.
*
* A sample usage for ulists is the enumeration of directed graphs without
* visiting a node twice. The pseudo-code could look like this:
*
* ulist = ulist_alloc();
* ulist_add(ulist, root);
* elem = NULL;
*
* while ((elem = ulist_next(ulist, elem)) {
* for (all child nodes n in elem)
* ulist_add(ulist, n);
* do something useful with the node;
* }
* ulist_free(ulist);
*
* This assumes the graph nodes are adressable by u64. This stems from the
* usage for tree enumeration in btrfs, where the logical addresses are
* 64 bit.
*
* It is also useful for tree enumeration which could be done elegantly
* recursively, but is not possible due to kernel stack limitations. The
* loop would be similar to the above.
*/
/**
* ulist_init - freshly initialize a ulist
* @ulist: the ulist to initialize
*
* Note: don't use this function to init an already used ulist, use
* ulist_reinit instead.
*/
void ulist_init(struct ulist *ulist)
{
ulist->nnodes = 0;
ulist->nodes = ulist->int_nodes;
ulist->nodes_alloced = ULIST_SIZE;
}
EXPORT_SYMBOL(ulist_init);
/**
* ulist_fini - free up additionally allocated memory for the ulist
* @ulist: the ulist from which to free the additional memory
*
* This is useful in cases where the base 'struct ulist' has been statically
* allocated.
*/
void ulist_fini(struct ulist *ulist)
{
/*
* The first ULIST_SIZE elements are stored inline in struct ulist.
* Only if more elements are alocated they need to be freed.
*/
if (ulist->nodes_alloced > ULIST_SIZE)
kfree(ulist->nodes);
ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */
}
EXPORT_SYMBOL(ulist_fini);
/**
* ulist_reinit - prepare a ulist for reuse
* @ulist: ulist to be reused
*
* Free up all additional memory allocated for the list elements and reinit
* the ulist.
*/
void ulist_reinit(struct ulist *ulist)
{
ulist_fini(ulist);
ulist_init(ulist);
}
EXPORT_SYMBOL(ulist_reinit);
/**
* ulist_alloc - dynamically allocate a ulist
* @gfp_mask: allocation flags to for base allocation
*
* The allocated ulist will be returned in an initialized state.
*/
struct ulist *ulist_alloc(unsigned long gfp_mask)
{
struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask);
if (!ulist)
return NULL;
ulist_init(ulist);
return ulist;
}
EXPORT_SYMBOL(ulist_alloc);
/**
* ulist_free - free dynamically allocated ulist
* @ulist: ulist to free
*
* It is not necessary to call ulist_fini before.
*/
void ulist_free(struct ulist *ulist)
{
if (!ulist)
return;
ulist_fini(ulist);
kfree(ulist);
}
EXPORT_SYMBOL(ulist_free);
/**
* ulist_add - add an element to the ulist
* @ulist: ulist to add the element to
* @val: value to add to ulist
* @aux: auxiliary value to store along with val
* @gfp_mask: flags to use for allocation
*
* Note: locking must be provided by the caller. In case of rwlocks write
* locking is needed
*
* Add an element to a ulist. The @val will only be added if it doesn't
* already exist. If it is added, the auxiliary value @aux is stored along with
* it. In case @val already exists in the ulist, @aux is ignored, even if
* it differs from the already stored value.
*
* ulist_add returns 0 if @val already exists in ulist and 1 if @val has been
* inserted.
* In case of allocation failure -ENOMEM is returned and the ulist stays
* unaltered.
*/
int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
unsigned long gfp_mask)
{
int i;
for (i = 0; i < ulist->nnodes; ++i) {
if (ulist->nodes[i].val == val)
return 0;
}
if (ulist->nnodes >= ulist->nodes_alloced) {
u64 new_alloced = ulist->nodes_alloced + 128;
struct ulist_node *new_nodes;
void *old = NULL;
/*
* if nodes_alloced == ULIST_SIZE no memory has been allocated
* yet, so pass NULL to krealloc
*/
if (ulist->nodes_alloced > ULIST_SIZE)
old = ulist->nodes;
new_nodes = krealloc(old, sizeof(*new_nodes) * new_alloced,
gfp_mask);
if (!new_nodes)
return -ENOMEM;
if (!old)
memcpy(new_nodes, ulist->int_nodes,
sizeof(ulist->int_nodes));
ulist->nodes = new_nodes;
ulist->nodes_alloced = new_alloced;
}
ulist->nodes[ulist->nnodes].val = val;
ulist->nodes[ulist->nnodes].aux = aux;
++ulist->nnodes;
return 1;
}
EXPORT_SYMBOL(ulist_add);
/**
* ulist_next - iterate ulist
* @ulist: ulist to iterate
* @prev: previously returned element or %NULL to start iteration
*
* Note: locking must be provided by the caller. In case of rwlocks only read
* locking is needed
*
* This function is used to iterate an ulist. The iteration is started with
* @prev = %NULL. It returns the next element from the ulist or %NULL when the
* end is reached. No guarantee is made with respect to the order in which
* the elements are returned. They might neither be returned in order of
* addition nor in ascending order.
* It is allowed to call ulist_add during an enumeration. Newly added items
* are guaranteed to show up in the running enumeration.
*/
struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev)
{
int next;
if (ulist->nnodes == 0)
return NULL;
if (!prev)
return &ulist->nodes[0];
next = (prev - ulist->nodes) + 1;
if (next < 0 || next >= ulist->nnodes)
return NULL;
return &ulist->nodes[next];
}
EXPORT_SYMBOL(ulist_next);

68
fs/btrfs/ulist.h Normal file
View file

@ -0,0 +1,68 @@
/*
* Copyright (C) 2011 STRATO AG
* written by Arne Jansen <sensille@gmx.net>
* Distributed under the GNU GPL license version 2.
*
*/
#ifndef __ULIST__
#define __ULIST__
/*
* ulist is a generic data structure to hold a collection of unique u64
* values. The only operations it supports is adding to the list and
* enumerating it.
* It is possible to store an auxiliary value along with the key.
*
* The implementation is preliminary and can probably be sped up
* significantly. A first step would be to store the values in an rbtree
* as soon as ULIST_SIZE is exceeded.
*/
/*
* number of elements statically allocated inside struct ulist
*/
#define ULIST_SIZE 16
/*
* element of the list
*/
struct ulist_node {
u64 val; /* value to store */
unsigned long aux; /* auxiliary value saved along with the val */
};
struct ulist {
/*
* number of elements stored in list
*/
unsigned long nnodes;
/*
* number of nodes we already have room for
*/
unsigned long nodes_alloced;
/*
* pointer to the array storing the elements. The first ULIST_SIZE
* elements are stored inline. In this case the it points to int_nodes.
* After exceeding ULIST_SIZE, dynamic memory is allocated.
*/
struct ulist_node *nodes;
/*
* inline storage space for the first ULIST_SIZE entries
*/
struct ulist_node int_nodes[ULIST_SIZE];
};
void ulist_init(struct ulist *ulist);
void ulist_fini(struct ulist *ulist);
void ulist_reinit(struct ulist *ulist);
struct ulist *ulist_alloc(unsigned long gfp_mask);
void ulist_free(struct ulist *ulist);
int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
unsigned long gfp_mask);
struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev);
#endif

File diff suppressed because it is too large Load diff

View file

@ -186,6 +186,51 @@ struct map_lookup {
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
(sizeof(struct btrfs_bio_stripe) * (n)))
/*
* Restriper's general type filter
*/
#define BTRFS_BALANCE_DATA (1ULL << 0)
#define BTRFS_BALANCE_SYSTEM (1ULL << 1)
#define BTRFS_BALANCE_METADATA (1ULL << 2)
#define BTRFS_BALANCE_TYPE_MASK (BTRFS_BALANCE_DATA | \
BTRFS_BALANCE_SYSTEM | \
BTRFS_BALANCE_METADATA)
#define BTRFS_BALANCE_FORCE (1ULL << 3)
#define BTRFS_BALANCE_RESUME (1ULL << 4)
/*
* Balance filters
*/
#define BTRFS_BALANCE_ARGS_PROFILES (1ULL << 0)
#define BTRFS_BALANCE_ARGS_USAGE (1ULL << 1)
#define BTRFS_BALANCE_ARGS_DEVID (1ULL << 2)
#define BTRFS_BALANCE_ARGS_DRANGE (1ULL << 3)
#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
/*
* Profile changing flags. When SOFT is set we won't relocate chunk if
* it already has the target profile (even though it may be
* half-filled).
*/
#define BTRFS_BALANCE_ARGS_CONVERT (1ULL << 8)
#define BTRFS_BALANCE_ARGS_SOFT (1ULL << 9)
struct btrfs_balance_args;
struct btrfs_balance_progress;
struct btrfs_balance_control {
struct btrfs_fs_info *fs_info;
struct btrfs_balance_args data;
struct btrfs_balance_args meta;
struct btrfs_balance_args sys;
u64 flags;
struct btrfs_balance_progress stat;
};
int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
u64 end, u64 *length);
@ -228,9 +273,12 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
u8 *uuid, u8 *fsid);
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_balance(struct btrfs_root *dev_root);
int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs);
int btrfs_recover_balance(struct btrfs_root *tree_root);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);
#endif

View file

@ -200,7 +200,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
out:
btrfs_end_transaction_throttle(trans, root);
btrfs_end_transaction(trans, root);
return ret;
}

View file

@ -16,6 +16,8 @@ struct btrfs_delayed_ref_node;
struct btrfs_delayed_tree_ref;
struct btrfs_delayed_data_ref;
struct btrfs_delayed_ref_head;
struct btrfs_block_group_cache;
struct btrfs_free_cluster;
struct map_lookup;
struct extent_buffer;
@ -44,6 +46,17 @@ struct extent_buffer;
obj, ((obj >= BTRFS_DATA_RELOC_TREE_OBJECTID) || \
(obj <= BTRFS_CSUM_TREE_OBJECTID )) ? __show_root_type(obj) : "-"
#define BTRFS_GROUP_FLAGS \
{ BTRFS_BLOCK_GROUP_DATA, "DATA"}, \
{ BTRFS_BLOCK_GROUP_SYSTEM, "SYSTEM"}, \
{ BTRFS_BLOCK_GROUP_METADATA, "METADATA"}, \
{ BTRFS_BLOCK_GROUP_RAID0, "RAID0"}, \
{ BTRFS_BLOCK_GROUP_RAID1, "RAID1"}, \
{ BTRFS_BLOCK_GROUP_DUP, "DUP"}, \
{ BTRFS_BLOCK_GROUP_RAID10, "RAID10"}
#define BTRFS_UUID_SIZE 16
TRACE_EVENT(btrfs_transaction_commit,
TP_PROTO(struct btrfs_root *root),
@ -621,6 +634,34 @@ TRACE_EVENT(btrfs_cow_block,
__entry->cow_level)
);
TRACE_EVENT(btrfs_space_reservation,
TP_PROTO(struct btrfs_fs_info *fs_info, char *type, u64 val,
u64 bytes, int reserve),
TP_ARGS(fs_info, type, val, bytes, reserve),
TP_STRUCT__entry(
__array( u8, fsid, BTRFS_UUID_SIZE )
__string( type, type )
__field( u64, val )
__field( u64, bytes )
__field( int, reserve )
),
TP_fast_assign(
memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
__assign_str(type, type);
__entry->val = val;
__entry->bytes = bytes;
__entry->reserve = reserve;
),
TP_printk("%pU: %s: %Lu %s %Lu", __entry->fsid, __get_str(type),
__entry->val, __entry->reserve ? "reserve" : "release",
__entry->bytes)
);
DECLARE_EVENT_CLASS(btrfs__reserved_extent,
TP_PROTO(struct btrfs_root *root, u64 start, u64 len),
@ -659,6 +700,168 @@ DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_free,
TP_ARGS(root, start, len)
);
TRACE_EVENT(find_free_extent,
TP_PROTO(struct btrfs_root *root, u64 num_bytes, u64 empty_size,
u64 data),
TP_ARGS(root, num_bytes, empty_size, data),
TP_STRUCT__entry(
__field( u64, root_objectid )
__field( u64, num_bytes )
__field( u64, empty_size )
__field( u64, data )
),
TP_fast_assign(
__entry->root_objectid = root->root_key.objectid;
__entry->num_bytes = num_bytes;
__entry->empty_size = empty_size;
__entry->data = data;
),
TP_printk("root = %Lu(%s), len = %Lu, empty_size = %Lu, "
"flags = %Lu(%s)", show_root_type(__entry->root_objectid),
__entry->num_bytes, __entry->empty_size, __entry->data,
__print_flags((unsigned long)__entry->data, "|",
BTRFS_GROUP_FLAGS))
);
DECLARE_EVENT_CLASS(btrfs__reserve_extent,
TP_PROTO(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group, u64 start,
u64 len),
TP_ARGS(root, block_group, start, len),
TP_STRUCT__entry(
__field( u64, root_objectid )
__field( u64, bg_objectid )
__field( u64, flags )
__field( u64, start )
__field( u64, len )
),
TP_fast_assign(
__entry->root_objectid = root->root_key.objectid;
__entry->bg_objectid = block_group->key.objectid;
__entry->flags = block_group->flags;
__entry->start = start;
__entry->len = len;
),
TP_printk("root = %Lu(%s), block_group = %Lu, flags = %Lu(%s), "
"start = %Lu, len = %Lu",
show_root_type(__entry->root_objectid), __entry->bg_objectid,
__entry->flags, __print_flags((unsigned long)__entry->flags,
"|", BTRFS_GROUP_FLAGS),
__entry->start, __entry->len)
);
DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent,
TP_PROTO(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group, u64 start,
u64 len),
TP_ARGS(root, block_group, start, len)
);
DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster,
TP_PROTO(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group, u64 start,
u64 len),
TP_ARGS(root, block_group, start, len)
);
TRACE_EVENT(btrfs_find_cluster,
TP_PROTO(struct btrfs_block_group_cache *block_group, u64 start,
u64 bytes, u64 empty_size, u64 min_bytes),
TP_ARGS(block_group, start, bytes, empty_size, min_bytes),
TP_STRUCT__entry(
__field( u64, bg_objectid )
__field( u64, flags )
__field( u64, start )
__field( u64, bytes )
__field( u64, empty_size )
__field( u64, min_bytes )
),
TP_fast_assign(
__entry->bg_objectid = block_group->key.objectid;
__entry->flags = block_group->flags;
__entry->start = start;
__entry->bytes = bytes;
__entry->empty_size = empty_size;
__entry->min_bytes = min_bytes;
),
TP_printk("block_group = %Lu, flags = %Lu(%s), start = %Lu, len = %Lu,"
" empty_size = %Lu, min_bytes = %Lu", __entry->bg_objectid,
__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS), __entry->start,
__entry->bytes, __entry->empty_size, __entry->min_bytes)
);
TRACE_EVENT(btrfs_failed_cluster_setup,
TP_PROTO(struct btrfs_block_group_cache *block_group),
TP_ARGS(block_group),
TP_STRUCT__entry(
__field( u64, bg_objectid )
),
TP_fast_assign(
__entry->bg_objectid = block_group->key.objectid;
),
TP_printk("block_group = %Lu", __entry->bg_objectid)
);
TRACE_EVENT(btrfs_setup_cluster,
TP_PROTO(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, u64 size, int bitmap),
TP_ARGS(block_group, cluster, size, bitmap),
TP_STRUCT__entry(
__field( u64, bg_objectid )
__field( u64, flags )
__field( u64, start )
__field( u64, max_size )
__field( u64, size )
__field( int, bitmap )
),
TP_fast_assign(
__entry->bg_objectid = block_group->key.objectid;
__entry->flags = block_group->flags;
__entry->start = cluster->window_start;
__entry->max_size = cluster->max_size;
__entry->size = size;
__entry->bitmap = bitmap;
),
TP_printk("block_group = %Lu, flags = %Lu(%s), window_start = %Lu, "
"size = %Lu, max_size = %Lu, bitmap = %d",
__entry->bg_objectid,
__entry->flags,
__print_flags((unsigned long)__entry->flags, "|",
BTRFS_GROUP_FLAGS), __entry->start,
__entry->size, __entry->max_size, __entry->bitmap)
);
#endif /* _TRACE_BTRFS_H */
/* This part must be outside protection */