Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull large btrfs update from Chris Mason:
 "This pull request is very large, and the two main features in here
  have been under testing/devel for quite a while.

  We have subvolume quotas from the strato developers.  This enables
  full tracking of how many blocks are allocated to each subvolume (and
  all snapshots) and you can set limits on a per-subvolume basis.  You
  can also create quota groups and toss multiple subvolumes into a big
  group.  It's everything you need to be a web hosting company and give
  each user their own subvolume.

  The userland side of the quotas is being refreshed, they'll send out
  details on where to grab it soon.

  Next is the kernel side of btrfs send/receive from Alexander Block.
  This leverages the same infrastructure as the quota code to figure out
  relationships between blocks and their owners.  It can then compute
  the difference between two snapshots and sends the diffs in a neutral
  format into userland.

  The basic model:

        create a snapshot
        send that snapshot as the initial backup
        make changes
        create a second snapshot
        send the incremental as a backup
        delete the first snapshot
        (use the second snapshot for the next incremental)

  The receive portion is all in userland, and in the 'next' branch of my
  btrfs-progs repo.

  There's still some work to do in terms of optimizing the send side
  from kernel to userland.  The really important part is figuring out
  how two snapshots are different, and this is where we are
  concentrating right now.  The initial send of a dataset is a little
  slower than tar, but the incremental sends are dramatically faster
  than what rsync can do.

  On top of all of that, we have a nice queue of fixes, cleanups and
  optimizations."

Fix up trivial modify/del conflict in fs/btrfs/ioctl.c

Also fix up semantic conflict in fs/btrfs/send.c: the interface to
dentry_open() changed in commit 765927b2d5 ("switch dentry_open() to
struct path, make it grab references itself"), and since it now grabs
whatever references it needs, we should no longer do the mntget() on the
mnt (and we need to dput() the dentry reference we took).

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (65 commits)
  Btrfs: uninit variable fixes in send/receive
  Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive
  Btrfs: add btrfs_compare_trees function
  Btrfs: introduce subvol uuids and times
  Btrfs: make iref_to_path non static
  Btrfs: add a barrier before a waitqueue_active check
  Btrfs: call the ordered free operation without any locks held
  Btrfs: Check INCOMPAT flags on remount and add helper function
  Btrfs: add helper for tree enumeration
  btrfs: allow cross-subvolume file clone
  Btrfs: improve multi-thread buffer read
  Btrfs: make btrfs's allocation smoothly with preallocation
  Btrfs: lock the transition from dirty to writeback for an eb
  Btrfs: fix potential race in extent buffer freeing
  Btrfs: don't return true in releasepage unless we actually freed the eb
  Btrfs: suppress printk() if all device I/O stats are zero
  Btrfs: remove unwanted printk() for btrfs device I/O stats
  Btrfs: rewrite BTRFS_SETGET_FUNCS
  Btrfs: zero unused bytes in inode item
  Btrfs: kill free_space pointer from inode structure
  ...

Conflicts:
	fs/btrfs/ioctl.c
This commit is contained in:
Linus Torvalds 2012-07-26 14:48:55 -07:00
commit e2aed8dfa5
35 changed files with 8691 additions and 633 deletions

View file

@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o
reada.o backref.o ulist.o qgroup.o send.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

View file

@ -206,10 +206,17 @@ static noinline void run_ordered_completions(struct btrfs_workers *workers,
work->ordered_func(work);
/* now take the lock again and call the freeing code */
/* now take the lock again and drop our item from the list */
spin_lock(&workers->order_lock);
list_del(&work->order_list);
spin_unlock(&workers->order_lock);
/*
* we don't want to call the ordered free functions
* with the lock held though
*/
work->ordered_free(work);
spin_lock(&workers->order_lock);
}
spin_unlock(&workers->order_lock);

View file

@ -773,9 +773,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist *refs, struct ulist *roots,
const u64 *extent_item_pos)
u64 time_seq, struct ulist *refs,
struct ulist *roots, const u64 *extent_item_pos)
{
struct btrfs_key key;
struct btrfs_path *path;
@ -837,7 +836,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
btrfs_put_delayed_ref(&head->node);
goto again;
}
ret = __add_delayed_refs(head, delayed_ref_seq,
ret = __add_delayed_refs(head, time_seq,
&prefs_delayed);
mutex_unlock(&head->mutex);
if (ret) {
@ -981,8 +980,7 @@ static void free_leaf_list(struct ulist *blocks)
*/
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **leafs,
u64 time_seq, struct ulist **leafs,
const u64 *extent_item_pos)
{
struct ulist *tmp;
@ -997,7 +995,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
ret = find_parent_nodes(trans, fs_info, bytenr,
time_seq, *leafs, tmp, extent_item_pos);
ulist_free(tmp);
@ -1024,8 +1022,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
*/
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **roots)
u64 time_seq, struct ulist **roots)
{
struct ulist *tmp;
struct ulist_node *node = NULL;
@ -1043,7 +1040,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
ULIST_ITER_INIT(&uiter);
while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
ret = find_parent_nodes(trans, fs_info, bytenr,
time_seq, tmp, *roots, NULL);
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
@ -1125,10 +1122,10 @@ static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
* required for the path to fit into the buffer. in that case, the returned
* value will be smaller than dest. callers must check this!
*/
static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
struct btrfs_inode_ref *iref,
struct extent_buffer *eb_in, u64 parent,
char *dest, u32 size)
char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
struct btrfs_inode_ref *iref,
struct extent_buffer *eb_in, u64 parent,
char *dest, u32 size)
{
u32 len;
int slot;
@ -1376,11 +1373,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
struct ulist *roots = NULL;
struct ulist_node *ref_node = NULL;
struct ulist_node *root_node = NULL;
struct seq_list seq_elem = {};
struct seq_list tree_mod_seq_elem = {};
struct ulist_iterator ref_uiter;
struct ulist_iterator root_uiter;
struct btrfs_delayed_ref_root *delayed_refs = NULL;
pr_debug("resolving all inodes for extent %llu\n",
extent_item_objectid);
@ -1391,16 +1386,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
trans = btrfs_join_transaction(fs_info->extent_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
btrfs_get_delayed_seq(delayed_refs, &seq_elem);
spin_unlock(&delayed_refs->lock);
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
}
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
seq_elem.seq, tree_mod_seq_elem.seq, &refs,
tree_mod_seq_elem.seq, &refs,
&extent_item_pos);
if (ret)
goto out;
@ -1408,8 +1398,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
seq_elem.seq,
tree_mod_seq_elem.seq, &roots);
tree_mod_seq_elem.seq, &roots);
if (ret)
break;
ULIST_ITER_INIT(&root_uiter);
@ -1431,7 +1420,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
out:
if (!search_commit_root) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
btrfs_put_delayed_seq(delayed_refs, &seq_elem);
btrfs_end_transaction(trans, fs_info->extent_root);
}
@ -1543,7 +1531,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
ipath->fspath->bytes_left - s_ptr : 0;
fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr;
fspath = iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb,
fspath = btrfs_iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb,
inum, fspath_min, bytes_left);
if (IS_ERR(fspath))
return PTR_ERR(fspath);

View file

@ -21,6 +21,7 @@
#include "ioctl.h"
#include "ulist.h"
#include "extent_io.h"
#define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0)
@ -58,8 +59,10 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **roots);
u64 time_seq, struct ulist **roots);
char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
struct btrfs_inode_ref *iref, struct extent_buffer *eb,
u64 parent, char *dest, u32 size);
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,

View file

@ -87,9 +87,6 @@ struct btrfs_inode {
/* node for the red-black tree that links inodes in subvolume root */
struct rb_node rb_node;
/* the space_info for where this inode's data allocations are done */
struct btrfs_space_info *space_info;
unsigned long runtime_flags;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
@ -191,11 +188,14 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
BTRFS_I(inode)->disk_i_size = size;
}
static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
struct inode *inode)
static inline bool btrfs_is_free_space_inode(struct inode *inode)
{
if (root == root->fs_info->tree_root ||
BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
struct btrfs_root *root = BTRFS_I(inode)->root;
if (root == root->fs_info->tree_root &&
btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
return true;
if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}

View file

@ -1032,6 +1032,7 @@ static int btrfsic_process_metablock(
struct btrfs_disk_key *disk_key;
u8 type;
u32 item_offset;
u32 item_size;
if (disk_item_offset + sizeof(struct btrfs_item) >
sf->block_ctx->len) {
@ -1047,6 +1048,7 @@ static int btrfsic_process_metablock(
disk_item_offset,
sizeof(struct btrfs_item));
item_offset = le32_to_cpu(disk_item.offset);
item_size = le32_to_cpu(disk_item.size);
disk_key = &disk_item.key;
type = disk_key->type;
@ -1057,14 +1059,13 @@ static int btrfsic_process_metablock(
root_item_offset = item_offset +
offsetof(struct btrfs_leaf, items);
if (root_item_offset +
sizeof(struct btrfs_root_item) >
if (root_item_offset + item_size >
sf->block_ctx->len)
goto leaf_item_out_of_bounce_error;
btrfsic_read_from_block_data(
sf->block_ctx, &root_item,
root_item_offset,
sizeof(struct btrfs_root_item));
item_size);
next_bytenr = le64_to_cpu(root_item.bytenr);
sf->error =

File diff suppressed because it is too large Load diff

View file

@ -91,6 +91,9 @@ struct btrfs_ordered_sum;
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
/* holds quota configuration and tracking */
#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL
@ -709,6 +712,36 @@ struct btrfs_root_item {
struct btrfs_disk_key drop_progress;
u8 drop_level;
u8 level;
/*
* The following fields appear after subvol_uuids+subvol_times
* were introduced.
*/
/*
* This generation number is used to test if the new fields are valid
* and up to date while reading the root item. Everytime the root item
* is written out, the "generation" field is copied into this field. If
* anyone ever mounted the fs with an older kernel, we will have
* mismatching generation values here and thus must invalidate the
* new fields. See btrfs_update_root and btrfs_find_last_root for
* details.
* the offset of generation_v2 is also used as the start for the memset
* when invalidating the fields.
*/
__le64 generation_v2;
u8 uuid[BTRFS_UUID_SIZE];
u8 parent_uuid[BTRFS_UUID_SIZE];
u8 received_uuid[BTRFS_UUID_SIZE];
__le64 ctransid; /* updated when an inode changes */
__le64 otransid; /* trans when created */
__le64 stransid; /* trans when sent. non-zero for received subvol */
__le64 rtransid; /* trans when received. non-zero for received subvol */
struct btrfs_timespec ctime;
struct btrfs_timespec otime;
struct btrfs_timespec stime;
struct btrfs_timespec rtime;
__le64 reserved[8]; /* for future */
} __attribute__ ((__packed__));
/*
@ -883,6 +916,72 @@ struct btrfs_block_group_item {
__le64 flags;
} __attribute__ ((__packed__));
/*
* is subvolume quota turned on?
*/
#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
/*
* SCANNING is set during the initialization phase
*/
#define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1)
/*
* Some qgroup entries are known to be out of date,
* either because the configuration has changed in a way that
* makes a rescan necessary, or because the fs has been mounted
* with a non-qgroup-aware version.
* Turning qouta off and on again makes it inconsistent, too.
*/
#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
#define BTRFS_QGROUP_STATUS_VERSION 1
struct btrfs_qgroup_status_item {
__le64 version;
/*
* the generation is updated during every commit. As older
* versions of btrfs are not aware of qgroups, it will be
* possible to detect inconsistencies by checking the
* generation on mount time
*/
__le64 generation;
/* flag definitions see above */
__le64 flags;
/*
* only used during scanning to record the progress
* of the scan. It contains a logical address
*/
__le64 scan;
} __attribute__ ((__packed__));
struct btrfs_qgroup_info_item {
__le64 generation;
__le64 rfer;
__le64 rfer_cmpr;
__le64 excl;
__le64 excl_cmpr;
} __attribute__ ((__packed__));
/* flags definition for qgroup limits */
#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0)
#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1)
#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2)
#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3)
#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4)
#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5)
struct btrfs_qgroup_limit_item {
/*
* only updated when any of the other values change
*/
__le64 flags;
__le64 max_rfer;
__le64 max_excl;
__le64 rsv_rfer;
__le64 rsv_excl;
} __attribute__ ((__packed__));
struct btrfs_space_info {
u64 flags;
@ -1030,6 +1129,13 @@ struct btrfs_block_group_cache {
struct list_head cluster_list;
};
/* delayed seq elem */
struct seq_list {
struct list_head list;
u64 seq;
};
/* fs_info */
struct reloc_control;
struct btrfs_device;
struct btrfs_fs_devices;
@ -1044,6 +1150,7 @@ struct btrfs_fs_info {
struct btrfs_root *dev_root;
struct btrfs_root *fs_root;
struct btrfs_root *csum_root;
struct btrfs_root *quota_root;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
@ -1144,6 +1251,8 @@ struct btrfs_fs_info {
spinlock_t tree_mod_seq_lock;
atomic_t tree_mod_seq;
struct list_head tree_mod_seq_list;
struct seq_list tree_mod_seq_elem;
wait_queue_head_t tree_mod_seq_wait;
/* this protects tree_mod_log */
rwlock_t tree_mod_log_lock;
@ -1240,6 +1349,8 @@ struct btrfs_fs_info {
*/
struct list_head space_info;
struct btrfs_space_info *data_sinfo;
struct reloc_control *reloc_ctl;
spinlock_t delalloc_lock;
@ -1296,6 +1407,29 @@ struct btrfs_fs_info {
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
u32 check_integrity_print_mask;
#endif
/*
* quota information
*/
unsigned int quota_enabled:1;
/*
* quota_enabled only changes state after a commit. This holds the
* next state.
*/
unsigned int pending_quota_state:1;
/* is qgroup tracking in a consistent state? */
u64 qgroup_flags;
/* holds configuration and tracking. Protected by qgroup_lock */
struct rb_root qgroup_tree;
spinlock_t qgroup_lock;
/* list of dirty qgroups to be written at next commit */
struct list_head dirty_qgroups;
/* used by btrfs_qgroup_record_ref for an efficient tree traversal */
u64 qgroup_seq;
/* filesystem state */
u64 fs_state;
@ -1416,6 +1550,8 @@ struct btrfs_root {
dev_t anon_dev;
int force_cow;
spinlock_t root_times_lock;
};
struct btrfs_ioctl_defrag_range_args {
@ -1525,6 +1661,30 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_DEV_ITEM_KEY 216
#define BTRFS_CHUNK_ITEM_KEY 228
/*
* Records the overall state of the qgroups.
* There's only one instance of this key present,
* (0, BTRFS_QGROUP_STATUS_KEY, 0)
*/
#define BTRFS_QGROUP_STATUS_KEY 240
/*
* Records the currently used space of the qgroup.
* One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
*/
#define BTRFS_QGROUP_INFO_KEY 242
/*
* Contains the user configured limits for the qgroup.
* One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
*/
#define BTRFS_QGROUP_LIMIT_KEY 244
/*
* Records the child-parent relationship of qgroups. For
* each relation, 2 keys are present:
* (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
* (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
*/
#define BTRFS_QGROUP_RELATION_KEY 246
#define BTRFS_BALANCE_ITEM_KEY 248
/*
@ -1621,13 +1781,54 @@ static inline void btrfs_init_map_token (struct btrfs_map_token *token)
offsetof(type, member), \
sizeof(((type *)0)->member)))
#ifndef BTRFS_SETGET_FUNCS
#define DECLARE_BTRFS_SETGET_BITS(bits) \
u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
unsigned long off, \
struct btrfs_map_token *token); \
void btrfs_set_token_##bits(struct extent_buffer *eb, void *ptr, \
unsigned long off, u##bits val, \
struct btrfs_map_token *token); \
static inline u##bits btrfs_get_##bits(struct extent_buffer *eb, void *ptr, \
unsigned long off) \
{ \
return btrfs_get_token_##bits(eb, ptr, off, NULL); \
} \
static inline void btrfs_set_##bits(struct extent_buffer *eb, void *ptr, \
unsigned long off, u##bits val) \
{ \
btrfs_set_token_##bits(eb, ptr, off, val, NULL); \
}
DECLARE_BTRFS_SETGET_BITS(8)
DECLARE_BTRFS_SETGET_BITS(16)
DECLARE_BTRFS_SETGET_BITS(32)
DECLARE_BTRFS_SETGET_BITS(64)
#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, struct btrfs_map_token *token); \
void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token);\
void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val);
#endif
static inline u##bits btrfs_##name(struct extent_buffer *eb, type *s) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
return btrfs_get_##bits(eb, s, offsetof(type, member)); \
} \
static inline void btrfs_set_##name(struct extent_buffer *eb, type *s, \
u##bits val) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
btrfs_set_##bits(eb, s, offsetof(type, member), val); \
} \
static inline u##bits btrfs_token_##name(struct extent_buffer *eb, type *s, \
struct btrfs_map_token *token) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
return btrfs_get_token_##bits(eb, s, offsetof(type, member), token); \
} \
static inline void btrfs_set_token_##name(struct extent_buffer *eb, \
type *s, u##bits val, \
struct btrfs_map_token *token) \
{ \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \
btrfs_set_token_##bits(eb, s, offsetof(type, member), val, token); \
}
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
static inline u##bits btrfs_##name(struct extent_buffer *eb) \
@ -2189,6 +2390,16 @@ BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64);
BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
last_snapshot, 64);
BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item,
generation_v2, 64);
BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item,
ctransid, 64);
BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item,
otransid, 64);
BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item,
stransid, 64);
BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
rtransid, 64);
static inline bool btrfs_root_readonly(struct btrfs_root *root)
{
@ -2465,6 +2676,49 @@ static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb,
sizeof(val));
}
/* btrfs_qgroup_status_item */
BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item,
generation, 64);
BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
version, 64);
BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
flags, 64);
BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
scan, 64);
/* btrfs_qgroup_info_item */
BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
generation, 64);
BTRFS_SETGET_FUNCS(qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64);
BTRFS_SETGET_FUNCS(qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item,
rfer_cmpr, 64);
BTRFS_SETGET_FUNCS(qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64);
BTRFS_SETGET_FUNCS(qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item,
excl_cmpr, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation,
struct btrfs_qgroup_info_item, generation, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer, struct btrfs_qgroup_info_item,
rfer, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer_cmpr,
struct btrfs_qgroup_info_item, rfer_cmpr, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl, struct btrfs_qgroup_info_item,
excl, 64);
BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl_cmpr,
struct btrfs_qgroup_info_item, excl_cmpr, 64);
/* btrfs_qgroup_limit_item */
BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item,
flags, 64);
BTRFS_SETGET_FUNCS(qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item,
max_rfer, 64);
BTRFS_SETGET_FUNCS(qgroup_limit_max_excl, struct btrfs_qgroup_limit_item,
max_excl, 64);
BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item,
rsv_rfer, 64);
BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item,
rsv_excl, 64);
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
{
return sb->s_fs_info;
@ -2607,7 +2861,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
@ -2661,6 +2914,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@ -2680,6 +2935,21 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
struct btrfs_key *max_key,
struct btrfs_path *path, int cache_only,
u64 min_trans);
enum btrfs_compare_tree_result {
BTRFS_COMPARE_TREE_NEW,
BTRFS_COMPARE_TREE_DELETED,
BTRFS_COMPARE_TREE_CHANGED,
};
typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root,
struct btrfs_root *right_root,
struct btrfs_path *left_path,
struct btrfs_path *right_path,
struct btrfs_key *key,
enum btrfs_compare_tree_result result,
void *ctx);
int btrfs_compare_trees(struct btrfs_root *left_root,
struct btrfs_root *right_root,
btrfs_changed_cb_t cb, void *ctx);
int btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *parent, int parent_slot,
@ -2711,6 +2981,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
ins_len, int cow);
int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
struct btrfs_path *p, u64 time_seq);
int btrfs_search_slot_for_read(struct btrfs_root *root,
struct btrfs_key *key, struct btrfs_path *p,
int find_higher, int return_any);
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent,
int start_slot, int cache_only, u64 *last_ret,
@ -2793,11 +3066,22 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->chunk_root);
kfree(fs_info->dev_root);
kfree(fs_info->csum_root);
kfree(fs_info->quota_root);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
kfree(fs_info);
}
/* tree mod log functions from ctree.c */
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{
return atomic_inc_return(&fs_info->tree_mod_seq);
}
/* root-item.c */
int btrfs_find_root_ref(struct btrfs_root *tree_root,
struct btrfs_path *path,
@ -2819,6 +3103,9 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_key *key,
struct btrfs_root_item *item);
void btrfs_read_root_item(struct btrfs_root *root,
struct extent_buffer *eb, int slot,
struct btrfs_root_item *item);
int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
btrfs_root_item *item, struct btrfs_key *key);
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
@ -2826,6 +3113,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
void btrfs_set_root_node(struct btrfs_root_item *item,
struct extent_buffer *node);
void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
/* dir-item.c */
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
@ -3061,6 +3350,23 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *function,
unsigned int line, int errno);
#define btrfs_set_fs_incompat(__fs_info, opt) \
__btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
u64 flag)
{
struct btrfs_super_block *disk_super;
u64 features;
disk_super = fs_info->super_copy;
features = btrfs_super_incompat_flags(disk_super);
if (!(features & flag)) {
features |= flag;
btrfs_set_super_incompat_flags(disk_super, features);
}
}
#define btrfs_abort_transaction(trans, root, errno) \
do { \
__btrfs_abort_transaction(trans, root, __func__, \
@ -3156,17 +3462,49 @@ void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err);
/* delayed seq elem */
struct seq_list {
/* qgroup.c */
struct qgroup_update {
struct list_head list;
u64 seq;
u32 flags;
struct btrfs_delayed_ref_node *node;
struct btrfs_delayed_extent_op *extent_op;
};
void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_quota_rescan(struct btrfs_fs_info *fs_info);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
char *name);
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid);
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
static inline int is_fstree(u64 rootid)
{

View file

@ -62,6 +62,7 @@ static inline void btrfs_init_delayed_node(
INIT_LIST_HEAD(&delayed_node->n_list);
INIT_LIST_HEAD(&delayed_node->p_list);
delayed_node->bytes_reserved = 0;
memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item));
}
static inline int btrfs_is_continuous_delayed_item(
@ -1113,8 +1114,8 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
* Returns < 0 on error and returns with an aborted transaction with any
* outstanding delayed items cleaned up.
*/
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int nr)
{
struct btrfs_root *curr_root = root;
struct btrfs_delayed_root *delayed_root;
@ -1122,6 +1123,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret = 0;
bool count = (nr > 0);
if (trans->aborted)
return -EIO;
@ -1137,7 +1139,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
delayed_root = btrfs_get_delayed_root(root);
curr_node = btrfs_first_delayed_node(delayed_root);
while (curr_node) {
while (curr_node && (!count || (count && nr--))) {
curr_root = curr_node->root;
ret = btrfs_insert_delayed_items(trans, path, curr_root,
curr_node);
@ -1149,6 +1151,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
path, curr_node);
if (ret) {
btrfs_release_delayed_node(curr_node);
curr_node = NULL;
btrfs_abort_transaction(trans, root, ret);
break;
}
@ -1158,12 +1161,26 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
btrfs_release_delayed_node(prev_node);
}
if (curr_node)
btrfs_release_delayed_node(curr_node);
btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
}
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
return __btrfs_run_delayed_items(trans, root, -1);
}
int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int nr)
{
return __btrfs_run_delayed_items(trans, root, nr);
}
static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{

View file

@ -107,6 +107,8 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode);
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int nr);
void btrfs_balance_delayed_items(struct btrfs_root *root);

View file

@ -233,22 +233,26 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
{
struct seq_list *elem;
int ret = 0;
assert_spin_locked(&delayed_refs->lock);
if (list_empty(&delayed_refs->seq_head))
return 0;
elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list);
if (seq >= elem->seq) {
pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n",
seq, elem->seq, delayed_refs);
return 1;
spin_lock(&fs_info->tree_mod_seq_lock);
if (!list_empty(&fs_info->tree_mod_seq_list)) {
elem = list_first_entry(&fs_info->tree_mod_seq_list,
struct seq_list, list);
if (seq >= elem->seq) {
pr_debug("holding back delayed_ref %llu, lowest is "
"%llu (%p)\n", seq, elem->seq, delayed_refs);
ret = 1;
}
}
return 0;
spin_unlock(&fs_info->tree_mod_seq_lock);
return ret;
}
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
@ -525,8 +529,8 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref);
@ -584,8 +588,8 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref);
@ -658,10 +662,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
@ -707,10 +713,12 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
@ -736,8 +744,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}

View file

@ -139,26 +139,6 @@ struct btrfs_delayed_ref_root {
int flushing;
u64 run_delayed_start;
/*
* seq number of delayed refs. We need to know if a backref was being
* added before the currently processed ref or afterwards.
*/
u64 seq;
/*
* seq_list holds a list of all seq numbers that are currently being
* added to the list. While walking backrefs (btrfs_find_all_roots,
* qgroups), which might take some time, no newer ref must be processed,
* as it might influence the outcome of the walk.
*/
struct list_head seq_head;
/*
* when the only refs we have in the list must not be processed, we want
* to wait for more refs to show up or for the end of backref walking.
*/
wait_queue_head_t seq_wait;
};
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
@ -195,35 +175,29 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 search_start);
static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
{
assert_spin_locked(&delayed_refs->lock);
++delayed_refs->seq;
return delayed_refs->seq;
}
static inline void
btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
{
assert_spin_locked(&delayed_refs->lock);
elem->seq = delayed_refs->seq;
list_add_tail(&elem->list, &delayed_refs->seq_head);
}
static inline void
btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
{
spin_lock(&delayed_refs->lock);
list_del(&elem->list);
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
}
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
/*
* delayed refs with a ref_seq > 0 must be held back during backref walking.
* this only applies to items in one of the fs-trees. for_cow items never need
* to be held back, so they won't get a ref_seq number.
*/
static inline int need_ref_seq(int for_cow, u64 rootid)
{
if (for_cow)
return 0;
if (rootid == BTRFS_FS_TREE_OBJECTID)
return 1;
if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.

View file

@ -407,7 +407,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
break;
}
if (failed && !ret)
if (failed && !ret && failed_mirror)
repair_eb_io_failure(root, eb, failed_mirror);
return ret;
@ -1182,6 +1182,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->defrag_running = 0;
root->root_key.objectid = objectid;
root->anon_dev = 0;
spin_lock_init(&root->root_times_lock);
}
static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
@ -1225,6 +1227,82 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
return root;
}
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid)
{
struct extent_buffer *leaf;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root *root;
struct btrfs_key key;
int ret = 0;
u64 bytenr;
root = btrfs_alloc_root(fs_info);
if (!root)
return ERR_PTR(-ENOMEM);
__setup_root(tree_root->nodesize, tree_root->leafsize,
tree_root->sectorsize, tree_root->stripesize,
root, fs_info, objectid);
root->root_key.objectid = objectid;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
root->root_key.offset = 0;
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
goto fail;
}
bytenr = leaf->start;
memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_bytenr(leaf, leaf->start);
btrfs_set_header_generation(leaf, trans->transid);
btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(leaf, objectid);
root->node = leaf;
write_extent_buffer(leaf, fs_info->fsid,
(unsigned long)btrfs_header_fsid(leaf),
BTRFS_FSID_SIZE);
write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
(unsigned long)btrfs_header_chunk_tree_uuid(leaf),
BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(leaf);
root->commit_root = btrfs_root_node(root);
root->track_dirty = 1;
root->root_item.flags = 0;
root->root_item.byte_limit = 0;
btrfs_set_root_bytenr(&root->root_item, leaf->start);
btrfs_set_root_generation(&root->root_item, trans->transid);
btrfs_set_root_level(&root->root_item, 0);
btrfs_set_root_refs(&root->root_item, 1);
btrfs_set_root_used(&root->root_item, leaf->len);
btrfs_set_root_last_snapshot(&root->root_item, 0);
btrfs_set_root_dirid(&root->root_item, 0);
root->root_item.drop_level = 0;
key.objectid = objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = 0;
ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item);
if (ret)
goto fail;
btrfs_tree_unlock(leaf);
fail:
if (ret)
return ERR_PTR(ret);
return root;
}
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
@ -1326,6 +1404,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
u64 generation;
u32 blocksize;
int ret = 0;
int slot;
root = btrfs_alloc_root(fs_info);
if (!root)
@ -1352,9 +1431,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
if (ret == 0) {
l = path->nodes[0];
read_extent_buffer(l, &root->root_item,
btrfs_item_ptr_offset(l, path->slots[0]),
sizeof(root->root_item));
slot = path->slots[0];
btrfs_read_root_item(tree_root, l, slot, &root->root_item);
memcpy(&root->root_key, location, sizeof(*location));
}
btrfs_free_path(path);
@ -1396,6 +1474,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
return fs_info->dev_root;
if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
return fs_info->csum_root;
if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
return fs_info->quota_root ? fs_info->quota_root :
ERR_PTR(-ENOENT);
again:
spin_lock(&fs_info->fs_roots_radix_lock);
root = radix_tree_lookup(&fs_info->fs_roots_radix,
@ -1823,6 +1904,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
free_extent_buffer(info->extent_root->commit_root);
free_extent_buffer(info->csum_root->node);
free_extent_buffer(info->csum_root->commit_root);
if (info->quota_root) {
free_extent_buffer(info->quota_root->node);
free_extent_buffer(info->quota_root->commit_root);
}
info->tree_root->node = NULL;
info->tree_root->commit_root = NULL;
@ -1832,6 +1917,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
info->extent_root->commit_root = NULL;
info->csum_root->node = NULL;
info->csum_root->commit_root = NULL;
if (info->quota_root) {
info->quota_root->node = NULL;
info->quota_root->commit_root = NULL;
}
if (chunk_root) {
free_extent_buffer(info->chunk_root->node);
@ -1862,6 +1951,7 @@ int open_ctree(struct super_block *sb,
struct btrfs_root *csum_root;
struct btrfs_root *chunk_root;
struct btrfs_root *dev_root;
struct btrfs_root *quota_root;
struct btrfs_root *log_tree_root;
int ret;
int err = -EINVAL;
@ -1873,9 +1963,10 @@ int open_ctree(struct super_block *sb,
csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info);
quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info);
if (!tree_root || !extent_root || !csum_root ||
!chunk_root || !dev_root) {
!chunk_root || !dev_root || !quota_root) {
err = -ENOMEM;
goto fail;
}
@ -1944,6 +2035,8 @@ int open_ctree(struct super_block *sb,
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
init_waitqueue_head(&fs_info->tree_mod_seq_wait);
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
spin_lock_init(&fs_info->reada_lock);
@ -2032,6 +2125,13 @@ int open_ctree(struct super_block *sb,
init_rwsem(&fs_info->cleanup_work_sem);
init_rwsem(&fs_info->subvol_sem);
spin_lock_init(&fs_info->qgroup_lock);
fs_info->qgroup_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1;
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@ -2244,7 +2344,7 @@ int open_ctree(struct super_block *sb,
ret |= btrfs_start_workers(&fs_info->caching_workers);
ret |= btrfs_start_workers(&fs_info->readahead_workers);
if (ret) {
ret = -ENOMEM;
err = -ENOMEM;
goto fail_sb_buffer;
}
@ -2356,6 +2456,17 @@ int open_ctree(struct super_block *sb,
goto recovery_tree_root;
csum_root->track_dirty = 1;
ret = find_and_setup_root(tree_root, fs_info,
BTRFS_QUOTA_TREE_OBJECTID, quota_root);
if (ret) {
kfree(quota_root);
quota_root = fs_info->quota_root = NULL;
} else {
quota_root->track_dirty = 1;
fs_info->quota_enabled = 1;
fs_info->pending_quota_state = 1;
}
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
@ -2415,6 +2526,9 @@ int open_ctree(struct super_block *sb,
" integrity check module %s\n", sb->s_id);
}
#endif
ret = btrfs_read_qgroup_config(fs_info);
if (ret)
goto fail_trans_kthread;
/* do not make disk changes in broken FS */
if (btrfs_super_log_root(disk_super) != 0 &&
@ -2425,7 +2539,7 @@ int open_ctree(struct super_block *sb,
printk(KERN_WARNING "Btrfs log replay required "
"on RO media\n");
err = -EIO;
goto fail_trans_kthread;
goto fail_qgroup;
}
blocksize =
btrfs_level_size(tree_root,
@ -2434,7 +2548,7 @@ int open_ctree(struct super_block *sb,
log_tree_root = btrfs_alloc_root(fs_info);
if (!log_tree_root) {
err = -ENOMEM;
goto fail_trans_kthread;
goto fail_qgroup;
}
__setup_root(nodesize, leafsize, sectorsize, stripesize,
@ -2466,15 +2580,15 @@ int open_ctree(struct super_block *sb,
if (!(sb->s_flags & MS_RDONLY)) {
ret = btrfs_cleanup_fs_roots(fs_info);
if (ret) {
}
if (ret)
goto fail_trans_kthread;
ret = btrfs_recover_relocation(tree_root);
if (ret < 0) {
printk(KERN_WARNING
"btrfs: failed to recover relocation\n");
err = -EINVAL;
goto fail_trans_kthread;
goto fail_qgroup;
}
}
@ -2484,10 +2598,10 @@ int open_ctree(struct super_block *sb,
fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (!fs_info->fs_root)
goto fail_trans_kthread;
goto fail_qgroup;
if (IS_ERR(fs_info->fs_root)) {
err = PTR_ERR(fs_info->fs_root);
goto fail_trans_kthread;
goto fail_qgroup;
}
if (sb->s_flags & MS_RDONLY)
@ -2511,6 +2625,8 @@ int open_ctree(struct super_block *sb,
return 0;
fail_qgroup:
btrfs_free_qgroup_config(fs_info);
fail_trans_kthread:
kthread_stop(fs_info->transaction_kthread);
fail_cleaner:
@ -3109,6 +3225,8 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 2;
smp_mb();
btrfs_free_qgroup_config(root->fs_info);
if (fs_info->delalloc_bytes) {
printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
(unsigned long long)fs_info->delalloc_bytes);
@ -3128,6 +3246,10 @@ int close_ctree(struct btrfs_root *root)
free_extent_buffer(fs_info->dev_root->commit_root);
free_extent_buffer(fs_info->csum_root->node);
free_extent_buffer(fs_info->csum_root->commit_root);
if (fs_info->quota_root) {
free_extent_buffer(fs_info->quota_root->node);
free_extent_buffer(fs_info->quota_root->commit_root);
}
btrfs_free_block_groups(fs_info);
@ -3258,7 +3380,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
}
static int btree_lock_page_hook(struct page *page, void *data,
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *))
{
struct inode *inode = page->mapping->host;

View file

@ -89,6 +89,12 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
int btrfs_cleanup_transaction(struct btrfs_root *root);
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
struct btrfs_root *root);
void btrfs_abort_devices(struct btrfs_root *root);
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid);
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *));
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);

View file

@ -34,6 +34,8 @@
#include "locking.h"
#include "free-space-cache.h"
#undef SCRAMBLE_DELAYED_REFS
/*
* control flags for do_chunk_alloc's force field
* CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
@ -2217,6 +2219,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_head *locked_ref = NULL;
struct btrfs_delayed_extent_op *extent_op;
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
int count = 0;
int must_insert_reserved = 0;
@ -2255,7 +2258,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ref = select_delayed_ref(locked_ref);
if (ref && ref->seq &&
btrfs_check_delayed_seq(delayed_refs, ref->seq)) {
btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
/*
* there are still refs with lower seq numbers in the
* process of being added. Don't run this ref yet.
@ -2337,7 +2340,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
}
next:
do_chunk_alloc(trans, root->fs_info->extent_root,
do_chunk_alloc(trans, fs_info->extent_root,
2 * 1024 * 1024,
btrfs_get_alloc_profile(root, 0),
CHUNK_ALLOC_NO_FORCE);
@ -2347,21 +2350,99 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
return count;
}
static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
static void wait_for_more_refs(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
unsigned long num_refs,
struct list_head *first_seq)
{
spin_unlock(&delayed_refs->lock);
pr_debug("waiting for more refs (num %ld, first %p)\n",
num_refs, first_seq);
wait_event(delayed_refs->seq_wait,
wait_event(fs_info->tree_mod_seq_wait,
num_refs != delayed_refs->num_entries ||
delayed_refs->seq_head.next != first_seq);
fs_info->tree_mod_seq_list.next != first_seq);
pr_debug("done waiting for more refs (num %ld, first %p)\n",
delayed_refs->num_entries, delayed_refs->seq_head.next);
delayed_refs->num_entries, fs_info->tree_mod_seq_list.next);
spin_lock(&delayed_refs->lock);
}
#ifdef SCRAMBLE_DELAYED_REFS
/*
* Normally delayed refs get processed in ascending bytenr order. This
* correlates in most cases to the order added. To expose dependencies on this
* order, we start to process the tree in the middle instead of the beginning
*/
static u64 find_middle(struct rb_root *root)
{
struct rb_node *n = root->rb_node;
struct btrfs_delayed_ref_node *entry;
int alt = 1;
u64 middle;
u64 first = 0, last = 0;
n = rb_first(root);
if (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
first = entry->bytenr;
}
n = rb_last(root);
if (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
last = entry->bytenr;
}
n = root->rb_node;
while (n) {
entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
WARN_ON(!entry->in_tree);
middle = entry->bytenr;
if (alt)
n = n->rb_left;
else
n = n->rb_right;
alt = 1 - alt;
}
return middle;
}
#endif
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct qgroup_update *qgroup_update;
int ret = 0;
if (list_empty(&trans->qgroup_ref_list) !=
!trans->delayed_ref_elem.seq) {
/* list without seq or seq without list */
printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n",
list_empty(&trans->qgroup_ref_list) ? "" : " not",
trans->delayed_ref_elem.seq);
BUG();
}
if (!trans->delayed_ref_elem.seq)
return 0;
while (!list_empty(&trans->qgroup_ref_list)) {
qgroup_update = list_first_entry(&trans->qgroup_ref_list,
struct qgroup_update, list);
list_del(&qgroup_update->list);
if (!ret)
ret = btrfs_qgroup_account_ref(
trans, fs_info, qgroup_update->node,
qgroup_update->extent_op);
kfree(qgroup_update);
}
btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
return ret;
}
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
@ -2398,11 +2479,18 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2 * 1024 * 1024, btrfs_get_alloc_profile(root, 0),
CHUNK_ALLOC_NO_FORCE);
btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
delayed_refs = &trans->transaction->delayed_refs;
INIT_LIST_HEAD(&cluster);
again:
consider_waiting = 0;
spin_lock(&delayed_refs->lock);
#ifdef SCRAMBLE_DELAYED_REFS
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
#endif
if (count == 0) {
count = delayed_refs->num_entries * 2;
run_most = 1;
@ -2437,7 +2525,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
num_refs = delayed_refs->num_entries;
first_seq = root->fs_info->tree_mod_seq_list.next;
} else {
wait_for_more_refs(delayed_refs,
wait_for_more_refs(root->fs_info, delayed_refs,
num_refs, first_seq);
/*
* after waiting, things have changed. we
@ -2502,6 +2590,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
}
out:
spin_unlock(&delayed_refs->lock);
assert_qgroups_uptodate(trans);
return 0;
}
@ -2581,8 +2670,10 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
node = rb_prev(node);
if (node) {
int seq = ref->seq;
ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
if (ref->bytenr == bytenr)
if (ref->bytenr == bytenr && ref->seq == seq)
goto out_unlock;
}
@ -2903,8 +2994,13 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
}
spin_lock(&block_group->lock);
if (block_group->cached != BTRFS_CACHE_FINISHED) {
/* We're not cached, don't bother trying to write stuff out */
if (block_group->cached != BTRFS_CACHE_FINISHED ||
!btrfs_test_opt(root, SPACE_CACHE)) {
/*
* don't bother trying to write stuff out _if_
* a) we're not cached,
* b) we're with nospace_cache mount option.
*/
dcs = BTRFS_DC_WRITTEN;
spin_unlock(&block_group->lock);
goto out_put;
@ -3134,6 +3230,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
init_waitqueue_head(&found->wait);
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
if (flags & BTRFS_BLOCK_GROUP_DATA)
info->data_sinfo = found;
return 0;
}
@ -3263,12 +3361,6 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
return get_alloc_profile(root, flags);
}
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
{
BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
BTRFS_BLOCK_GROUP_DATA);
}
/*
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
@ -3277,6 +3369,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 used;
int ret = 0, committed = 0, alloc_chunk = 1;
@ -3289,7 +3382,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
committed = 1;
}
data_sinfo = BTRFS_I(inode)->space_info;
data_sinfo = fs_info->data_sinfo;
if (!data_sinfo)
goto alloc;
@ -3330,10 +3423,9 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
goto commit_trans;
}
if (!data_sinfo) {
btrfs_set_inode_space_info(root, inode);
data_sinfo = BTRFS_I(inode)->space_info;
}
if (!data_sinfo)
data_sinfo = fs_info->data_sinfo;
goto again;
}
@ -3380,7 +3472,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
data_sinfo = BTRFS_I(inode)->space_info;
data_sinfo = root->fs_info->data_sinfo;
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
@ -3586,89 +3678,58 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
/*
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim,
bool wait_ordered)
static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
bool wait_ordered)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
u64 reserved;
u64 delalloc_bytes;
u64 max_reclaim;
u64 reclaimed = 0;
long time_left;
unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
unsigned long progress;
trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
smp_mb();
reserved = space_info->bytes_may_use;
progress = space_info->reservation_progress;
if (reserved == 0)
return 0;
smp_mb();
if (root->fs_info->delalloc_bytes == 0) {
delalloc_bytes = root->fs_info->delalloc_bytes;
if (delalloc_bytes == 0) {
if (trans)
return 0;
return;
btrfs_wait_ordered_extents(root, 0, 0);
return 0;
return;
}
max_reclaim = min(reserved, to_reclaim);
nr_pages = max_t(unsigned long, nr_pages,
max_reclaim >> PAGE_CACHE_SHIFT);
while (loops < 1024) {
/* have the flusher threads jump in and do some IO */
smp_mb();
nr_pages = min_t(unsigned long, nr_pages,
root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
while (delalloc_bytes && loops < 3) {
max_reclaim = min(delalloc_bytes, to_reclaim);
nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
WB_REASON_FS_FREE_SPACE);
WB_REASON_FS_FREE_SPACE);
spin_lock(&space_info->lock);
if (reserved > space_info->bytes_may_use)
reclaimed += reserved - space_info->bytes_may_use;
reserved = space_info->bytes_may_use;
if (space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use + orig <=
space_info->total_bytes) {
spin_unlock(&space_info->lock);
break;
}
spin_unlock(&space_info->lock);
loops++;
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
if (wait_ordered && !trans) {
btrfs_wait_ordered_extents(root, 0, 0);
} else {
time_left = schedule_timeout_interruptible(1);
/* We were interrupted, exit */
time_left = schedule_timeout_killable(1);
if (time_left)
break;
}
/* we've kicked the IO a few times, if anything has been freed,
* exit. There is no sense in looping here for a long time
* when we really need to commit the transaction, or there are
* just too many writers without enough free space
*/
if (loops > 3) {
smp_mb();
if (progress != space_info->reservation_progress)
break;
}
smp_mb();
delalloc_bytes = root->fs_info->delalloc_bytes;
}
return reclaimed >= to_reclaim;
}
/**
@ -3728,6 +3789,58 @@ static int may_commit_transaction(struct btrfs_root *root,
return btrfs_commit_transaction(trans, root);
}
enum flush_state {
FLUSH_DELALLOC = 1,
FLUSH_DELALLOC_WAIT = 2,
FLUSH_DELAYED_ITEMS_NR = 3,
FLUSH_DELAYED_ITEMS = 4,
COMMIT_TRANS = 5,
};
static int flush_space(struct btrfs_root *root,
struct btrfs_space_info *space_info, u64 num_bytes,
u64 orig_bytes, int state)
{
struct btrfs_trans_handle *trans;
int nr;
int ret = 0;
switch (state) {
case FLUSH_DELALLOC:
case FLUSH_DELALLOC_WAIT:
shrink_delalloc(root, num_bytes, orig_bytes,
state == FLUSH_DELALLOC_WAIT);
break;
case FLUSH_DELAYED_ITEMS_NR:
case FLUSH_DELAYED_ITEMS:
if (state == FLUSH_DELAYED_ITEMS_NR) {
u64 bytes = btrfs_calc_trans_metadata_size(root, 1);
nr = (int)div64_u64(num_bytes, bytes);
if (!nr)
nr = 1;
nr *= 2;
} else {
nr = -1;
}
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
break;
}
ret = btrfs_run_delayed_items_nr(trans, root, nr);
btrfs_end_transaction(trans, root);
break;
case COMMIT_TRANS:
ret = may_commit_transaction(root, space_info, orig_bytes, 0);
break;
default:
ret = -ENOSPC;
break;
}
return ret;
}
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
@ -3749,11 +3862,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_space_info *space_info = block_rsv->space_info;
u64 used;
u64 num_bytes = orig_bytes;
int retries = 0;
int flush_state = FLUSH_DELALLOC;
int ret = 0;
bool committed = false;
bool flushing = false;
bool wait_ordered = false;
bool committed = false;
again:
ret = 0;
@ -3812,9 +3924,8 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
* amount plus the amount of bytes that we need for this
* reservation.
*/
wait_ordered = true;
num_bytes = used - space_info->total_bytes +
(orig_bytes * (retries + 1));
(orig_bytes * 2);
}
if (ret) {
@ -3867,8 +3978,6 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
trace_btrfs_space_reservation(root->fs_info,
"space_info", space_info->flags, orig_bytes, 1);
ret = 0;
} else {
wait_ordered = true;
}
}
@ -3887,36 +3996,13 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
if (!ret || !flush)
goto out;
/*
* We do synchronous shrinking since we don't actually unreserve
* metadata until after the IO is completed.
*/
ret = shrink_delalloc(root, num_bytes, wait_ordered);
if (ret < 0)
goto out;
ret = 0;
/*
* So if we were overcommitted it's possible that somebody else flushed
* out enough space and we simply didn't have enough space to reclaim,
* so go back around and try again.
*/
if (retries < 2) {
wait_ordered = true;
retries++;
ret = flush_space(root, space_info, num_bytes, orig_bytes,
flush_state);
flush_state++;
if (!ret)
goto again;
}
ret = -ENOSPC;
if (committed)
goto out;
ret = may_commit_transaction(root, space_info, orig_bytes, 0);
if (!ret) {
committed = true;
else if (flush_state <= COMMIT_TRANS)
goto again;
}
out:
if (flushing) {
@ -3934,7 +4020,10 @@ static struct btrfs_block_rsv *get_block_rsv(
{
struct btrfs_block_rsv *block_rsv = NULL;
if (root->ref_cows || root == root->fs_info->csum_root)
if (root->ref_cows)
block_rsv = trans->block_rsv;
if (root == root->fs_info->csum_root && trans->adding_csums)
block_rsv = trans->block_rsv;
if (!block_rsv)
@ -4286,6 +4375,9 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (!trans->block_rsv)
return;
if (!trans->bytes_reserved)
return;
@ -4444,7 +4536,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
int ret;
/* Need to be holding the i_mutex here if we aren't free space cache */
if (btrfs_is_free_space_inode(root, inode))
if (btrfs_is_free_space_inode(inode))
flush = 0;
if (flush && btrfs_transaction_in_commit(root->fs_info))
@ -4476,6 +4568,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
csum_bytes = BTRFS_I(inode)->csum_bytes;
spin_unlock(&BTRFS_I(inode)->lock);
if (root->fs_info->quota_enabled) {
ret = btrfs_qgroup_reserve(root, num_bytes +
nr_extents * root->leafsize);
if (ret)
return ret;
}
ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
if (ret) {
u64 to_free = 0;
@ -4554,6 +4653,11 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_free, 0);
if (root->fs_info->quota_enabled) {
btrfs_qgroup_free(root, num_bytes +
dropped * root->leafsize);
}
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free);
}
@ -5190,8 +5294,9 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
rb_erase(&head->node.rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
smp_mb();
if (waitqueue_active(&root->fs_info->tree_mod_seq_wait))
wake_up(&root->fs_info->tree_mod_seq_wait);
/*
* we don't take a ref on the node because we're removing it from the
@ -5748,7 +5853,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
ret = do_chunk_alloc(trans, root, num_bytes +
2 * 1024 * 1024, data,
CHUNK_ALLOC_LIMITED);
if (ret < 0) {
/*
* Do not bail out on ENOSPC since we
* can do more things.
*/
if (ret < 0 && ret != -ENOSPC) {
btrfs_abort_transaction(trans,
root, ret);
goto out;
@ -5816,13 +5925,13 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
again:
list_for_each_entry(cache, &info->block_groups[index], list) {
spin_lock(&cache->lock);
printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
"%llu pinned %llu reserved\n",
printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
(unsigned long long)cache->key.objectid,
(unsigned long long)cache->key.offset,
(unsigned long long)btrfs_block_group_used(&cache->item),
(unsigned long long)cache->pinned,
(unsigned long long)cache->reserved);
(unsigned long long)cache->reserved,
cache->ro ? "[readonly]" : "");
btrfs_dump_free_space(cache, bytes);
spin_unlock(&cache->lock);
}
@ -7610,8 +7719,21 @@ int btrfs_read_block_groups(struct btrfs_root *root)
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
if (need_clear)
if (need_clear) {
/*
* When we mount with old space cache, we need to
* set BTRFS_DC_CLEAR and set dirty flag.
*
* a) Setting 'BTRFS_DC_CLEAR' makes sure that we
* truncate the old free space cache inode and
* setup a new one.
* b) Setting 'dirty flag' makes sure that we flush
* the new space cache info onto disk.
*/
cache->disk_cache_state = BTRFS_DC_CLEAR;
if (btrfs_test_opt(root, SPACE_CACHE))
cache->dirty = 1;
}
read_extent_buffer(leaf, &cache->item,
btrfs_item_ptr_offset(leaf, path->slots[0]),

View file

@ -1919,7 +1919,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
return -EIO;
}
printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
"(dev %s sector %llu)\n", page->mapping->host->i_ino,
start, rcu_str_deref(dev->name), sector);
@ -3078,8 +3078,15 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
}
}
/*
* We need to do this to prevent races in people who check if the eb is
* under IO since we can end up having no IO bits set for a short period
* of time.
*/
spin_lock(&eb->refs_lock);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
spin_unlock(&eb->refs_lock);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
spin_lock(&fs_info->delalloc_lock);
if (fs_info->dirty_metadata_bytes >= eb->len)
@ -3088,6 +3095,8 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
WARN_ON(1);
spin_unlock(&fs_info->delalloc_lock);
ret = 1;
} else {
spin_unlock(&eb->refs_lock);
}
btrfs_tree_unlock(eb);
@ -3558,19 +3567,38 @@ int extent_readpages(struct extent_io_tree *tree,
struct bio *bio = NULL;
unsigned page_idx;
unsigned long bio_flags = 0;
struct page *pagepool[16];
struct page *page;
int i = 0;
int nr = 0;
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, lru);
page = list_entry(pages->prev, struct page, lru);
prefetchw(&page->flags);
list_del(&page->lru);
if (!add_to_page_cache_lru(page, mapping,
if (add_to_page_cache_lru(page, mapping,
page->index, GFP_NOFS)) {
__extent_read_full_page(tree, page, get_extent,
&bio, 0, &bio_flags);
page_cache_release(page);
continue;
}
page_cache_release(page);
pagepool[nr++] = page;
if (nr < ARRAY_SIZE(pagepool))
continue;
for (i = 0; i < nr; i++) {
__extent_read_full_page(tree, pagepool[i], get_extent,
&bio, 0, &bio_flags);
page_cache_release(pagepool[i]);
}
nr = 0;
}
for (i = 0; i < nr; i++) {
__extent_read_full_page(tree, pagepool[i], get_extent,
&bio, 0, &bio_flags);
page_cache_release(pagepool[i]);
}
BUG_ON(!list_empty(pages));
if (bio)
return submit_one_bio(READ, bio, 0, bio_flags);
@ -4124,11 +4152,10 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
* So bump the ref count first, then set the bit. If someone
* beat us to it, drop the ref we added.
*/
if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
spin_lock(&eb->refs_lock);
if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
atomic_inc(&eb->refs);
if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
atomic_dec(&eb->refs);
}
spin_unlock(&eb->refs_lock);
}
static void mark_extent_buffer_accessed(struct extent_buffer *eb)
@ -4240,9 +4267,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
goto free_eb;
}
/* add one reference for the tree */
spin_lock(&eb->refs_lock);
check_buffer_tree_ref(eb);
spin_unlock(&eb->refs_lock);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
@ -4301,7 +4326,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
}
/* Expects to have eb->eb_lock already held */
static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
{
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
@ -4322,9 +4347,11 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
btrfs_release_extent_buffer_page(eb, 0);
call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
return;
return 1;
}
spin_unlock(&eb->refs_lock);
return 0;
}
void free_extent_buffer(struct extent_buffer *eb)
@ -4963,7 +4990,6 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
spin_unlock(&eb->refs_lock);
return 0;
}
release_extent_buffer(eb, mask);
return 1;
return release_extent_buffer(eb, mask);
}

View file

@ -183,7 +183,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
* read from the commit root and sidestep a nasty deadlock
* between reading the free space cache and updating the csum tree.
*/
if (btrfs_is_free_space_inode(root, inode)) {
if (btrfs_is_free_space_inode(inode)) {
path->search_commit_root = 1;
path->skip_locking = 1;
}
@ -690,6 +690,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
return -ENOMEM;
sector_sum = sums->sums;
trans->adding_csums = 1;
again:
next_offset = (u64)-1;
found_next = 0;
@ -853,6 +854,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
goto again;
}
out:
trans->adding_csums = 0;
btrfs_free_path(path);
return ret;

View file

@ -1968,7 +1968,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
info = rb_entry(n, struct btrfs_free_space, offset_index);
if (info->bytes >= bytes)
if (info->bytes >= bytes && !block_group->ro)
count++;
printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
(unsigned long long)info->offset,

View file

@ -825,7 +825,7 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
BUG_ON(btrfs_is_free_space_inode(root, inode));
BUG_ON(btrfs_is_free_space_inode(inode));
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
extent_clear_unlock_delalloc(inode,
@ -1010,7 +1010,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
if (atomic_read(&root->fs_info->async_delalloc_pages) <
5 * 1042 * 1024 &&
5 * 1024 * 1024 &&
waitqueue_active(&root->fs_info->async_submit_wait))
wake_up(&root->fs_info->async_submit_wait);
@ -1035,7 +1035,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
struct btrfs_root *root = BTRFS_I(inode)->root;
unsigned long nr_pages;
u64 cur_end;
int limit = 10 * 1024 * 1042;
int limit = 10 * 1024 * 1024;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1, 0, NULL, GFP_NOFS);
@ -1153,7 +1153,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
return -ENOMEM;
}
nolock = btrfs_is_free_space_inode(root, inode);
nolock = btrfs_is_free_space_inode(inode);
if (nolock)
trans = btrfs_join_transaction_nolock(root);
@ -1466,7 +1466,7 @@ static void btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
bool do_list = !btrfs_is_free_space_inode(root, inode);
bool do_list = !btrfs_is_free_space_inode(inode);
if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
@ -1501,7 +1501,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
bool do_list = !btrfs_is_free_space_inode(root, inode);
bool do_list = !btrfs_is_free_space_inode(inode);
if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
@ -1612,7 +1612,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
if (btrfs_is_free_space_inode(root, inode))
if (btrfs_is_free_space_inode(inode))
metadata = 2;
if (!(rw & REQ_WRITE)) {
@ -1869,7 +1869,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
int ret;
bool nolock;
nolock = btrfs_is_free_space_inode(root, inode);
nolock = btrfs_is_free_space_inode(inode);
if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
ret = -EIO;
@ -2007,7 +2007,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
ordered_extent->work.func = finish_ordered_fn;
ordered_extent->work.flags = 0;
if (btrfs_is_free_space_inode(root, inode))
if (btrfs_is_free_space_inode(inode))
workers = &root->fs_info->endio_freespace_worker;
else
workers = &root->fs_info->endio_write_workers;
@ -2732,8 +2732,10 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
* The data relocation inode should also be directly updated
* without delay
*/
if (!btrfs_is_free_space_inode(root, inode)
if (!btrfs_is_free_space_inode(inode)
&& root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
btrfs_update_root_times(trans, root);
ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret)
btrfs_set_inode_last_trans(trans, inode);
@ -2833,7 +2835,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
inode_inc_iversion(inode);
inode_inc_iversion(dir);
inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
btrfs_update_inode(trans, root, dir);
ret = btrfs_update_inode(trans, root, dir);
out:
return ret;
}
@ -3743,7 +3745,7 @@ void btrfs_evict_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0);
if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
btrfs_is_free_space_inode(root, inode)))
btrfs_is_free_space_inode(inode)))
goto no_delete;
if (is_bad_inode(inode)) {
@ -4082,7 +4084,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
struct btrfs_iget_args *args = p;
inode->i_ino = args->ino;
BTRFS_I(inode)->root = args->root;
btrfs_set_inode_space_info(args->root, inode);
return 0;
}
@ -4457,7 +4458,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
return 0;
if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode))
nolock = true;
if (wbc->sync_mode == WB_SYNC_ALL) {
@ -4518,6 +4519,11 @@ int btrfs_dirty_inode(struct inode *inode)
static int btrfs_update_time(struct inode *inode, struct timespec *now,
int flags)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_readonly(root))
return -EROFS;
if (flags & S_VERSION)
inode_inc_iversion(inode);
if (flags & S_CTIME)
@ -4662,7 +4668,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->root = root;
BTRFS_I(inode)->generation = trans->transid;
inode->i_generation = BTRFS_I(inode)->generation;
btrfs_set_inode_space_info(root, inode);
if (S_ISDIR(mode))
owner = 0;
@ -4690,6 +4695,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
sizeof(*inode_item));
fill_inode_item(trans, path->nodes[0], inode_item, inode);
ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
@ -4723,6 +4730,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
trace_btrfs_inode_new(inode);
btrfs_set_inode_last_trans(trans, inode);
btrfs_update_root_times(trans, root);
return inode;
fail:
if (dir)
@ -6939,7 +6948,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
return NULL;
ei->root = NULL;
ei->space_info = NULL;
ei->generation = 0;
ei->last_trans = 0;
ei->last_sub_trans = 0;
@ -7046,7 +7054,7 @@ int btrfs_drop_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0 &&
!btrfs_is_free_space_inode(root, inode))
!btrfs_is_free_space_inode(inode))
return 1;
else
return generic_drop_inode(inode);

View file

@ -41,6 +41,7 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/uuid.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
@ -53,6 +54,7 @@
#include "inode-map.h"
#include "backref.h"
#include "rcu-string.h"
#include "send.h"
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@ -336,7 +338,8 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
static noinline int create_subvol(struct btrfs_root *root,
struct dentry *dentry,
char *name, int namelen,
u64 *async_transid)
u64 *async_transid,
struct btrfs_qgroup_inherit **inherit)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
@ -346,11 +349,13 @@ static noinline int create_subvol(struct btrfs_root *root,
struct btrfs_root *new_root;
struct dentry *parent = dentry->d_parent;
struct inode *dir;
struct timespec cur_time = CURRENT_TIME;
int ret;
int err;
u64 objectid;
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
u64 index = 0;
uuid_le new_uuid;
ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
if (ret)
@ -368,6 +373,11 @@ static noinline int create_subvol(struct btrfs_root *root,
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid,
inherit ? *inherit : NULL);
if (ret)
goto fail;
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
@ -389,8 +399,9 @@ static noinline int create_subvol(struct btrfs_root *root,
BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(leaf);
memset(&root_item, 0, sizeof(root_item));
inode_item = &root_item.inode;
memset(inode_item, 0, sizeof(*inode_item));
inode_item->generation = cpu_to_le64(1);
inode_item->size = cpu_to_le64(3);
inode_item->nlink = cpu_to_le32(1);
@ -408,8 +419,15 @@ static noinline int create_subvol(struct btrfs_root *root,
btrfs_set_root_used(&root_item, leaf->len);
btrfs_set_root_last_snapshot(&root_item, 0);
memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
root_item.drop_level = 0;
btrfs_set_root_generation_v2(&root_item,
btrfs_root_generation(&root_item));
uuid_le_gen(&new_uuid);
memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
root_item.otime.nsec = cpu_to_le64(cur_time.tv_nsec);
root_item.ctime = root_item.otime;
btrfs_set_root_ctransid(&root_item, trans->transid);
btrfs_set_root_otransid(&root_item, trans->transid);
btrfs_tree_unlock(leaf);
free_extent_buffer(leaf);
@ -484,7 +502,7 @@ static noinline int create_subvol(struct btrfs_root *root,
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
char *name, int namelen, u64 *async_transid,
bool readonly)
bool readonly, struct btrfs_qgroup_inherit **inherit)
{
struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
@ -502,6 +520,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
pending_snapshot->readonly = readonly;
if (inherit) {
pending_snapshot->inherit = *inherit;
*inherit = NULL; /* take responsibility to free it */
}
trans = btrfs_start_transaction(root->fs_info->extent_root, 5);
if (IS_ERR(trans)) {
@ -635,7 +657,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
static noinline int btrfs_mksubvol(struct path *parent,
char *name, int namelen,
struct btrfs_root *snap_src,
u64 *async_transid, bool readonly)
u64 *async_transid, bool readonly,
struct btrfs_qgroup_inherit **inherit)
{
struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
@ -652,13 +675,9 @@ static noinline int btrfs_mksubvol(struct path *parent,
if (dentry->d_inode)
goto out_dput;
error = mnt_want_write(parent->mnt);
if (error)
goto out_dput;
error = btrfs_may_create(dir, dentry);
if (error)
goto out_drop_write;
goto out_dput;
down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
@ -666,18 +685,16 @@ static noinline int btrfs_mksubvol(struct path *parent,
goto out_up_read;
if (snap_src) {
error = create_snapshot(snap_src, dentry,
name, namelen, async_transid, readonly);
error = create_snapshot(snap_src, dentry, name, namelen,
async_transid, readonly, inherit);
} else {
error = create_subvol(BTRFS_I(dir)->root, dentry,
name, namelen, async_transid);
name, namelen, async_transid, inherit);
}
if (!error)
fsnotify_mkdir(dir, dentry);
out_up_read:
up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
out_drop_write:
mnt_drop_write(parent->mnt);
out_dput:
dput(dentry);
out_unlock:
@ -832,7 +849,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
}
static int should_defrag_range(struct inode *inode, u64 start, int thresh,
u64 *last_len, u64 *skip, u64 *defrag_end)
u64 *last_len, u64 *skip, u64 *defrag_end,
int compress)
{
struct extent_map *em;
int ret = 1;
@ -863,7 +881,7 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh,
* we hit a real extent, if it is big or the next extent is not a
* real extent, don't bother defragging it
*/
if ((*last_len == 0 || *last_len >= thresh) &&
if (!compress && (*last_len == 0 || *last_len >= thresh) &&
(em->len >= thresh || !next_mergeable))
ret = 0;
out:
@ -1047,11 +1065,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
u64 newer_than, unsigned long max_to_defrag)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_super_block *disk_super;
struct file_ra_state *ra = NULL;
unsigned long last_index;
u64 isize = i_size_read(inode);
u64 features;
u64 last_len = 0;
u64 skip = 0;
u64 defrag_end = 0;
@ -1145,7 +1161,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
extent_thresh, &last_len, &skip,
&defrag_end)) {
&defrag_end, range->flags &
BTRFS_DEFRAG_RANGE_COMPRESS)) {
unsigned long next;
/*
* the should_defrag function tells us how much to skip
@ -1237,11 +1254,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
mutex_unlock(&inode->i_mutex);
}
disk_super = root->fs_info->super_copy;
features = btrfs_super_incompat_flags(disk_super);
if (range->compress_type == BTRFS_COMPRESS_LZO) {
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
btrfs_set_super_incompat_flags(disk_super, features);
btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO);
}
ret = defrag_count;
@ -1379,41 +1393,39 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
}
static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
char *name,
unsigned long fd,
int subvol,
u64 *transid,
bool readonly)
char *name, unsigned long fd, int subvol,
u64 *transid, bool readonly,
struct btrfs_qgroup_inherit **inherit)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
struct file *src_file;
int namelen;
int ret = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
ret = mnt_want_write_file(file);
if (ret)
goto out;
namelen = strlen(name);
if (strchr(name, '/')) {
ret = -EINVAL;
goto out;
goto out_drop_write;
}
if (name[0] == '.' &&
(namelen == 1 || (name[1] == '.' && namelen == 2))) {
ret = -EEXIST;
goto out;
goto out_drop_write;
}
if (subvol) {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
NULL, transid, readonly);
NULL, transid, readonly, inherit);
} else {
struct inode *src_inode;
src_file = fget(fd);
if (!src_file) {
ret = -EINVAL;
goto out;
goto out_drop_write;
}
src_inode = src_file->f_path.dentry->d_inode;
@ -1422,13 +1434,15 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
"another FS\n");
ret = -EINVAL;
fput(src_file);
goto out;
goto out_drop_write;
}
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
transid, readonly);
transid, readonly, inherit);
fput(src_file);
}
out_drop_write:
mnt_drop_write_file(file);
out:
return ret;
}
@ -1446,7 +1460,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
vol_args->fd, subvol,
NULL, false);
NULL, false, NULL);
kfree(vol_args);
return ret;
@ -1460,6 +1474,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
u64 transid = 0;
u64 *ptr = NULL;
bool readonly = false;
struct btrfs_qgroup_inherit *inherit = NULL;
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
@ -1467,7 +1482,8 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
if (vol_args->flags &
~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY)) {
~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY |
BTRFS_SUBVOL_QGROUP_INHERIT)) {
ret = -EOPNOTSUPP;
goto out;
}
@ -1476,10 +1492,21 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
ptr = &transid;
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
readonly = true;
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
if (vol_args->size > PAGE_CACHE_SIZE) {
ret = -EINVAL;
goto out;
}
inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size);
if (IS_ERR(inherit)) {
ret = PTR_ERR(inherit);
goto out;
}
}
ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
vol_args->fd, subvol,
ptr, readonly);
vol_args->fd, subvol, ptr,
readonly, &inherit);
if (ret == 0 && ptr &&
copy_to_user(arg +
@ -1488,6 +1515,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
ret = -EFAULT;
out:
kfree(vol_args);
kfree(inherit);
return ret;
}
@ -1523,29 +1551,40 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
u64 flags;
int ret = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
ret = mnt_want_write_file(file);
if (ret)
goto out;
if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID)
return -EINVAL;
if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
ret = -EINVAL;
goto out_drop_write;
}
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
if (copy_from_user(&flags, arg, sizeof(flags))) {
ret = -EFAULT;
goto out_drop_write;
}
if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
return -EINVAL;
if (flags & BTRFS_SUBVOL_CREATE_ASYNC) {
ret = -EINVAL;
goto out_drop_write;
}
if (flags & ~BTRFS_SUBVOL_RDONLY)
return -EOPNOTSUPP;
if (flags & ~BTRFS_SUBVOL_RDONLY) {
ret = -EOPNOTSUPP;
goto out_drop_write;
}
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!inode_owner_or_capable(inode)) {
ret = -EACCES;
goto out_drop_write;
}
down_write(&root->fs_info->subvol_sem);
/* nothing to do */
if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root))
goto out;
goto out_drop_sem;
root_flags = btrfs_root_flags(&root->root_item);
if (flags & BTRFS_SUBVOL_RDONLY)
@ -1568,8 +1607,11 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
out_reset:
if (ret)
btrfs_set_root_flags(&root->root_item, root_flags);
out:
out_drop_sem:
up_write(&root->fs_info->subvol_sem);
out_drop_write:
mnt_drop_write_file(file);
out:
return ret;
}
@ -2340,6 +2382,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
goto out_drop_write;
}
ret = -EXDEV;
if (src_file->f_path.mnt != file->f_path.mnt)
goto out_fput;
src = src_file->f_dentry->d_inode;
ret = -EINVAL;
@ -2360,7 +2406,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
goto out_fput;
ret = -EXDEV;
if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root)
if (src->i_sb != inode->i_sb)
goto out_fput;
ret = -ENOMEM;
@ -2434,13 +2480,14 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
* note the key will change type as we walk through the
* tree.
*/
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
0, 0);
if (ret < 0)
goto out;
nritems = btrfs_header_nritems(path->nodes[0]);
if (path->slots[0] >= nritems) {
ret = btrfs_next_leaf(root, path);
ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
if (ret < 0)
goto out;
if (ret > 0)
@ -2749,8 +2796,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
struct btrfs_path *path;
struct btrfs_key location;
struct btrfs_disk_key disk_key;
struct btrfs_super_block *disk_super;
u64 features;
u64 objectid = 0;
u64 dir_id;
@ -2801,12 +2846,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
disk_super = root->fs_info->super_copy;
features = btrfs_super_incompat_flags(disk_super);
if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) {
features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL;
btrfs_set_super_incompat_flags(disk_super, features);
}
btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL);
btrfs_end_transaction(trans, root);
return 0;
@ -3063,19 +3103,21 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
}
static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root,
void __user *arg, int reset_after_read)
void __user *arg)
{
struct btrfs_ioctl_get_dev_stats *sa;
int ret;
if (reset_after_read && !capable(CAP_SYS_ADMIN))
return -EPERM;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
ret = btrfs_get_dev_stats(root, sa, reset_after_read);
if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) {
kfree(sa);
return -EPERM;
}
ret = btrfs_get_dev_stats(root, sa);
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
@ -3265,9 +3307,6 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
ret = mnt_want_write_file(file);
if (ret)
return ret;
@ -3390,6 +3429,264 @@ static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
return ret;
}
static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_quota_ctl_args *sa;
struct btrfs_trans_handle *trans = NULL;
int ret;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
}
switch (sa->cmd) {
case BTRFS_QUOTA_CTL_ENABLE:
ret = btrfs_quota_enable(trans, root->fs_info);
break;
case BTRFS_QUOTA_CTL_DISABLE:
ret = btrfs_quota_disable(trans, root->fs_info);
break;
case BTRFS_QUOTA_CTL_RESCAN:
ret = btrfs_quota_rescan(root->fs_info);
break;
default:
ret = -EINVAL;
break;
}
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
if (trans) {
err = btrfs_commit_transaction(trans, root);
if (err && !ret)
ret = err;
}
out:
kfree(sa);
return ret;
}
static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_qgroup_assign_args *sa;
struct btrfs_trans_handle *trans;
int ret;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
/* FIXME: check if the IDs really exist */
if (sa->assign) {
ret = btrfs_add_qgroup_relation(trans, root->fs_info,
sa->src, sa->dst);
} else {
ret = btrfs_del_qgroup_relation(trans, root->fs_info,
sa->src, sa->dst);
}
err = btrfs_end_transaction(trans, root);
if (err && !ret)
ret = err;
out:
kfree(sa);
return ret;
}
static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_qgroup_create_args *sa;
struct btrfs_trans_handle *trans;
int ret;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
/* FIXME: check if the IDs really exist */
if (sa->create) {
ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid,
NULL);
} else {
ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
}
err = btrfs_end_transaction(trans, root);
if (err && !ret)
ret = err;
out:
kfree(sa);
return ret;
}
static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_qgroup_limit_args *sa;
struct btrfs_trans_handle *trans;
int ret;
int err;
u64 qgroupid;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
qgroupid = sa->qgroupid;
if (!qgroupid) {
/* take the current subvol as qgroup */
qgroupid = root->root_key.objectid;
}
/* FIXME: check if the IDs really exist */
ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim);
err = btrfs_end_transaction(trans, root);
if (err && !ret)
ret = err;
out:
kfree(sa);
return ret;
}
static long btrfs_ioctl_set_received_subvol(struct file *file,
void __user *arg)
{
struct btrfs_ioctl_received_subvol_args *sa = NULL;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_root_item *root_item = &root->root_item;
struct btrfs_trans_handle *trans;
struct timespec ct = CURRENT_TIME;
int ret = 0;
ret = mnt_want_write_file(file);
if (ret < 0)
return ret;
down_write(&root->fs_info->subvol_sem);
if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
ret = -EINVAL;
goto out;
}
if (btrfs_root_readonly(root)) {
ret = -EROFS;
goto out;
}
if (!inode_owner_or_capable(inode)) {
ret = -EACCES;
goto out;
}
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa)) {
ret = PTR_ERR(sa);
sa = NULL;
goto out;
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
goto out;
}
sa->rtransid = trans->transid;
sa->rtime.sec = ct.tv_sec;
sa->rtime.nsec = ct.tv_nsec;
memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
btrfs_set_root_stransid(root_item, sa->stransid);
btrfs_set_root_rtransid(root_item, sa->rtransid);
root_item->stime.sec = cpu_to_le64(sa->stime.sec);
root_item->stime.nsec = cpu_to_le32(sa->stime.nsec);
root_item->rtime.sec = cpu_to_le64(sa->rtime.sec);
root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec);
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
if (ret < 0) {
btrfs_end_transaction(trans, root);
trans = NULL;
goto out;
} else {
ret = btrfs_commit_transaction(trans, root);
if (ret < 0)
goto out;
}
ret = copy_to_user(arg, sa, sizeof(*sa));
if (ret)
ret = -EFAULT;
out:
kfree(sa);
up_write(&root->fs_info->subvol_sem);
mnt_drop_write_file(file);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@ -3411,6 +3708,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_snap_create_v2(file, argp, 0);
case BTRFS_IOC_SUBVOL_CREATE:
return btrfs_ioctl_snap_create(file, argp, 1);
case BTRFS_IOC_SUBVOL_CREATE_V2:
return btrfs_ioctl_snap_create_v2(file, argp, 1);
case BTRFS_IOC_SNAP_DESTROY:
return btrfs_ioctl_snap_destroy(file, argp);
case BTRFS_IOC_SUBVOL_GETFLAGS:
@ -3472,10 +3771,20 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_balance_ctl(root, arg);
case BTRFS_IOC_BALANCE_PROGRESS:
return btrfs_ioctl_balance_progress(root, argp);
case BTRFS_IOC_SET_RECEIVED_SUBVOL:
return btrfs_ioctl_set_received_subvol(file, argp);
case BTRFS_IOC_SEND:
return btrfs_ioctl_send(file, argp);
case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(root, argp, 0);
case BTRFS_IOC_GET_AND_RESET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(root, argp, 1);
return btrfs_ioctl_get_dev_stats(root, argp);
case BTRFS_IOC_QUOTA_CTL:
return btrfs_ioctl_quota_ctl(root, argp);
case BTRFS_IOC_QGROUP_ASSIGN:
return btrfs_ioctl_qgroup_assign(root, argp);
case BTRFS_IOC_QGROUP_CREATE:
return btrfs_ioctl_qgroup_create(root, argp);
case BTRFS_IOC_QGROUP_LIMIT:
return btrfs_ioctl_qgroup_limit(root, argp);
}
return -ENOTTY;

View file

@ -32,15 +32,46 @@ struct btrfs_ioctl_vol_args {
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
#define BTRFS_FSID_SIZE 16
#define BTRFS_UUID_SIZE 16
#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
struct btrfs_qgroup_limit {
__u64 flags;
__u64 max_rfer;
__u64 max_excl;
__u64 rsv_rfer;
__u64 rsv_excl;
};
struct btrfs_qgroup_inherit {
__u64 flags;
__u64 num_qgroups;
__u64 num_ref_copies;
__u64 num_excl_copies;
struct btrfs_qgroup_limit lim;
__u64 qgroups[0];
};
struct btrfs_ioctl_qgroup_limit_args {
__u64 qgroupid;
struct btrfs_qgroup_limit lim;
};
#define BTRFS_SUBVOL_NAME_MAX 4039
struct btrfs_ioctl_vol_args_v2 {
__s64 fd;
__u64 transid;
__u64 flags;
__u64 unused[4];
union {
struct {
__u64 size;
struct btrfs_qgroup_inherit __user *qgroup_inherit;
};
__u64 unused[4];
};
char name[BTRFS_SUBVOL_NAME_MAX + 1];
};
@ -285,9 +316,13 @@ enum btrfs_dev_stat_values {
BTRFS_DEV_STAT_VALUES_MAX
};
/* Reset statistics after reading; needs SYS_ADMIN capability */
#define BTRFS_DEV_STATS_RESET (1ULL << 0)
struct btrfs_ioctl_get_dev_stats {
__u64 devid; /* in */
__u64 nr_items; /* in/out */
__u64 flags; /* in/out */
/* out values: */
__u64 values[BTRFS_DEV_STAT_VALUES_MAX];
@ -295,6 +330,48 @@ struct btrfs_ioctl_get_dev_stats {
__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
};
#define BTRFS_QUOTA_CTL_ENABLE 1
#define BTRFS_QUOTA_CTL_DISABLE 2
#define BTRFS_QUOTA_CTL_RESCAN 3
struct btrfs_ioctl_quota_ctl_args {
__u64 cmd;
__u64 status;
};
struct btrfs_ioctl_qgroup_assign_args {
__u64 assign;
__u64 src;
__u64 dst;
};
struct btrfs_ioctl_qgroup_create_args {
__u64 create;
__u64 qgroupid;
};
struct btrfs_ioctl_timespec {
__u64 sec;
__u32 nsec;
};
struct btrfs_ioctl_received_subvol_args {
char uuid[BTRFS_UUID_SIZE]; /* in */
__u64 stransid; /* in */
__u64 rtransid; /* out */
struct btrfs_ioctl_timespec stime; /* in */
struct btrfs_ioctl_timespec rtime; /* out */
__u64 flags; /* in */
__u64 reserved[16]; /* in */
};
struct btrfs_ioctl_send_args {
__s64 send_fd; /* in */
__u64 clone_sources_count; /* in */
__u64 __user *clone_sources; /* in */
__u64 parent_root; /* in */
__u64 flags; /* in */
__u64 reserved[4]; /* in */
};
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@ -339,6 +416,8 @@ struct btrfs_ioctl_get_dev_stats {
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
@ -359,9 +438,19 @@ struct btrfs_ioctl_get_dev_stats {
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
struct btrfs_ioctl_received_subvol_args)
#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args)
#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \
struct btrfs_ioctl_quota_ctl_args)
#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \
struct btrfs_ioctl_qgroup_assign_args)
#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \
struct btrfs_ioctl_qgroup_create_args)
#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
struct btrfs_ioctl_qgroup_limit_args)
#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
struct btrfs_ioctl_get_dev_stats)
#define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \
struct btrfs_ioctl_get_dev_stats)
#endif

View file

@ -78,13 +78,15 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
write_lock(&eb->lock);
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
if (atomic_dec_and_test(&eb->blocking_writers))
if (atomic_dec_and_test(&eb->blocking_writers) &&
waitqueue_active(&eb->write_lock_wq))
wake_up(&eb->write_lock_wq);
} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
read_lock(&eb->lock);
atomic_inc(&eb->spinning_readers);
if (atomic_dec_and_test(&eb->blocking_readers))
if (atomic_dec_and_test(&eb->blocking_readers) &&
waitqueue_active(&eb->read_lock_wq))
wake_up(&eb->read_lock_wq);
}
return;
@ -199,7 +201,8 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
if (atomic_dec_and_test(&eb->blocking_readers))
if (atomic_dec_and_test(&eb->blocking_readers) &&
waitqueue_active(&eb->read_lock_wq))
wake_up(&eb->read_lock_wq);
atomic_dec(&eb->read_locks);
}
@ -247,8 +250,9 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
if (blockers) {
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_dec(&eb->blocking_writers);
smp_wmb();
wake_up(&eb->write_lock_wq);
smp_mb();
if (waitqueue_active(&eb->write_lock_wq))
wake_up(&eb->write_lock_wq);
} else {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);

1571
fs/btrfs/qgroup.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -1239,10 +1239,11 @@ static int __must_check __add_reloc_root(struct btrfs_root *root)
node->bytenr, &node->rb_node);
spin_unlock(&rc->reloc_root_tree.lock);
if (rb_node) {
kfree(node);
btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found "
"for start=%llu while inserting into relocation "
"tree\n");
kfree(node);
return -EEXIST;
}
list_add_tail(&root->root_list, &rc->reloc_roots);

View file

@ -16,11 +16,54 @@
* Boston, MA 021110-1307, USA.
*/
#include <linux/uuid.h>
#include "ctree.h"
#include "transaction.h"
#include "disk-io.h"
#include "print-tree.h"
/*
* Read a root item from the tree. In case we detect a root item smaller then
* sizeof(root_item), we know it's an old version of the root structure and
* initialize all new fields to zero. The same happens if we detect mismatching
* generation numbers as then we know the root was once mounted with an older
* kernel that was not aware of the root item structure change.
*/
void btrfs_read_root_item(struct btrfs_root *root,
struct extent_buffer *eb, int slot,
struct btrfs_root_item *item)
{
uuid_le uuid;
int len;
int need_reset = 0;
len = btrfs_item_size_nr(eb, slot);
read_extent_buffer(eb, item, btrfs_item_ptr_offset(eb, slot),
min_t(int, len, (int)sizeof(*item)));
if (len < sizeof(*item))
need_reset = 1;
if (!need_reset && btrfs_root_generation(item)
!= btrfs_root_generation_v2(item)) {
if (btrfs_root_generation_v2(item) != 0) {
printk(KERN_WARNING "btrfs: mismatching "
"generation and generation_v2 "
"found in root item. This root "
"was probably mounted with an "
"older kernel. Resetting all "
"new fields.\n");
}
need_reset = 1;
}
if (need_reset) {
memset(&item->generation_v2, 0,
sizeof(*item) - offsetof(struct btrfs_root_item,
generation_v2));
uuid_le_gen(&uuid);
memcpy(item->uuid, uuid.b, BTRFS_UUID_SIZE);
}
}
/*
* lookup the root with the highest offset for a given objectid. The key we do
* find is copied into 'key'. If we find something return 0, otherwise 1, < 0
@ -61,10 +104,10 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
goto out;
}
if (item)
read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
sizeof(*item));
btrfs_read_root_item(root, l, slot, item);
if (key)
memcpy(key, &found_key, sizeof(found_key));
ret = 0;
out:
btrfs_free_path(path);
@ -91,16 +134,15 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
int ret;
int slot;
unsigned long ptr;
int old_len;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
if (ret < 0) {
btrfs_abort_transaction(trans, root, ret);
goto out;
}
if (ret < 0)
goto out_abort;
if (ret != 0) {
btrfs_print_leaf(root, path->nodes[0]);
@ -113,16 +155,56 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
l = path->nodes[0];
slot = path->slots[0];
ptr = btrfs_item_ptr_offset(l, slot);
old_len = btrfs_item_size_nr(l, slot);
/*
* If this is the first time we update the root item which originated
* from an older kernel, we need to enlarge the item size to make room
* for the added fields.
*/
if (old_len < sizeof(*item)) {
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, key, path,
-1, 1);
if (ret < 0)
goto out_abort;
ret = btrfs_del_item(trans, root, path);
if (ret < 0)
goto out_abort;
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, root, path,
key, sizeof(*item));
if (ret < 0)
goto out_abort;
l = path->nodes[0];
slot = path->slots[0];
ptr = btrfs_item_ptr_offset(l, slot);
}
/*
* Update generation_v2 so at the next mount we know the new root
* fields are valid.
*/
btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
write_extent_buffer(l, item, ptr, sizeof(*item));
btrfs_mark_buffer_dirty(path->nodes[0]);
out:
btrfs_free_path(path);
return ret;
out_abort:
btrfs_abort_transaction(trans, root, ret);
goto out;
}
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_key *key, struct btrfs_root_item *item)
{
/*
* Make sure generation v1 and v2 match. See update_root for details.
*/
btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
return btrfs_insert_item(trans, root, key, item, sizeof(*item));
}
@ -454,3 +536,16 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
root_item->byte_limit = 0;
}
}
void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_root_item *item = &root->root_item;
struct timespec ct = CURRENT_TIME;
spin_lock(&root->root_times_lock);
item->ctransid = trans->transid;
item->ctime.sec = cpu_to_le64(ct.tv_sec);
item->ctime.nsec = cpu_to_le64(ct.tv_nsec);
spin_unlock(&root->root_times_lock);
}

4571
fs/btrfs/send.c Normal file

File diff suppressed because it is too large Load diff

133
fs/btrfs/send.h Normal file
View file

@ -0,0 +1,133 @@
/*
* Copyright (C) 2012 Alexander Block. All rights reserved.
* Copyright (C) 2012 STRATO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "ctree.h"
#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
#define BTRFS_SEND_STREAM_VERSION 1
#define BTRFS_SEND_BUF_SIZE (1024 * 64)
#define BTRFS_SEND_READ_SIZE (1024 * 48)
enum btrfs_tlv_type {
BTRFS_TLV_U8,
BTRFS_TLV_U16,
BTRFS_TLV_U32,
BTRFS_TLV_U64,
BTRFS_TLV_BINARY,
BTRFS_TLV_STRING,
BTRFS_TLV_UUID,
BTRFS_TLV_TIMESPEC,
};
struct btrfs_stream_header {
char magic[sizeof(BTRFS_SEND_STREAM_MAGIC)];
__le32 version;
} __attribute__ ((__packed__));
struct btrfs_cmd_header {
/* len excluding the header */
__le32 len;
__le16 cmd;
/* crc including the header with zero crc field */
__le32 crc;
} __attribute__ ((__packed__));
struct btrfs_tlv_header {
__le16 tlv_type;
/* len excluding the header */
__le16 tlv_len;
} __attribute__ ((__packed__));
/* commands */
enum btrfs_send_cmd {
BTRFS_SEND_C_UNSPEC,
BTRFS_SEND_C_SUBVOL,
BTRFS_SEND_C_SNAPSHOT,
BTRFS_SEND_C_MKFILE,
BTRFS_SEND_C_MKDIR,
BTRFS_SEND_C_MKNOD,
BTRFS_SEND_C_MKFIFO,
BTRFS_SEND_C_MKSOCK,
BTRFS_SEND_C_SYMLINK,
BTRFS_SEND_C_RENAME,
BTRFS_SEND_C_LINK,
BTRFS_SEND_C_UNLINK,
BTRFS_SEND_C_RMDIR,
BTRFS_SEND_C_SET_XATTR,
BTRFS_SEND_C_REMOVE_XATTR,
BTRFS_SEND_C_WRITE,
BTRFS_SEND_C_CLONE,
BTRFS_SEND_C_TRUNCATE,
BTRFS_SEND_C_CHMOD,
BTRFS_SEND_C_CHOWN,
BTRFS_SEND_C_UTIMES,
BTRFS_SEND_C_END,
__BTRFS_SEND_C_MAX,
};
#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
/* attributes in send stream */
enum {
BTRFS_SEND_A_UNSPEC,
BTRFS_SEND_A_UUID,
BTRFS_SEND_A_CTRANSID,
BTRFS_SEND_A_INO,
BTRFS_SEND_A_SIZE,
BTRFS_SEND_A_MODE,
BTRFS_SEND_A_UID,
BTRFS_SEND_A_GID,
BTRFS_SEND_A_RDEV,
BTRFS_SEND_A_CTIME,
BTRFS_SEND_A_MTIME,
BTRFS_SEND_A_ATIME,
BTRFS_SEND_A_OTIME,
BTRFS_SEND_A_XATTR_NAME,
BTRFS_SEND_A_XATTR_DATA,
BTRFS_SEND_A_PATH,
BTRFS_SEND_A_PATH_TO,
BTRFS_SEND_A_PATH_LINK,
BTRFS_SEND_A_FILE_OFFSET,
BTRFS_SEND_A_DATA,
BTRFS_SEND_A_CLONE_UUID,
BTRFS_SEND_A_CLONE_CTRANSID,
BTRFS_SEND_A_CLONE_PATH,
BTRFS_SEND_A_CLONE_OFFSET,
BTRFS_SEND_A_CLONE_LEN,
__BTRFS_SEND_A_MAX,
};
#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
#ifdef __KERNEL__
long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
#endif

View file

@ -17,15 +17,27 @@
*/
#include <linux/highmem.h>
#include <asm/unaligned.h>
/* this is some deeply nasty code. ctree.h has a different
* definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef
#include "ctree.h"
static inline u8 get_unaligned_le8(const void *p)
{
return *(u8 *)p;
}
static inline void put_unaligned_le8(u8 val, void *p)
{
*(u8 *)p = val;
}
/*
* this is some deeply nasty code.
*
* The end result is that anyone who #includes ctree.h gets a
* declaration for the btrfs_set_foo functions and btrfs_foo functions
*
* This file declares the macros and then #includes ctree.h, which results
* in cpp creating the function here based on the template below.
* declaration for the btrfs_set_foo functions and btrfs_foo functions,
* which are wappers of btrfs_set_token_#bits functions and
* btrfs_get_token_#bits functions, which are defined in this file.
*
* These setget functions do all the extent_buffer related mapping
* required to efficiently read and write specific fields in the extent
@ -33,103 +45,93 @@
* an unsigned long offset into the extent buffer which has been
* cast to a specific type. This gives us all the gcc type checking.
*
* The extent buffer api is used to do all the kmapping and page
* spanning work required to get extent buffers in highmem and have
* a metadata blocksize different from the page size.
*
* The macro starts with a simple function prototype declaration so that
* sparse won't complain about it being static.
* The extent buffer api is used to do the page spanning work required to
* have a metadata blocksize different from the page size.
*/
#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \
void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token); \
u##bits btrfs_token_##name(struct extent_buffer *eb, \
type *s, struct btrfs_map_token *token) \
#define DEFINE_BTRFS_SETGET_BITS(bits) \
u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
unsigned long off, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \
type *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
unsigned long mem_len = sizeof(((type *)0)->member); \
u##bits res; \
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
(token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
kaddr = token->kaddr; \
p = (type *)(kaddr + part_offset - token->offset); \
res = le##bits##_to_cpu(p->member); \
return res; \
} \
err = map_private_extent_buffer(eb, offset, \
mem_len, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits leres; \
read_eb_member(eb, s, type, member, &leres); \
return le##bits##_to_cpu(leres); \
} \
p = (type *)(kaddr + part_offset - map_start); \
res = le##bits##_to_cpu(p->member); \
if (token) { \
token->kaddr = kaddr; \
token->offset = map_start; \
token->eb = eb; \
} \
return res; \
unsigned long part_offset = (unsigned long)ptr; \
unsigned long offset = part_offset + off; \
void *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
int size = sizeof(u##bits); \
u##bits res; \
\
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
(token->offset + PAGE_CACHE_SIZE >= offset + size)) { \
kaddr = token->kaddr; \
p = kaddr + part_offset - token->offset; \
res = get_unaligned_le##bits(p + off); \
return res; \
} \
err = map_private_extent_buffer(eb, offset, size, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits leres; \
\
read_extent_buffer(eb, &leres, offset, size); \
return le##bits##_to_cpu(leres); \
} \
p = kaddr + part_offset - map_start; \
res = get_unaligned_le##bits(p + off); \
if (token) { \
token->kaddr = kaddr; \
token->offset = map_start; \
token->eb = eb; \
} \
return res; \
} \
void btrfs_set_token_##name(struct extent_buffer *eb, \
type *s, u##bits val, struct btrfs_map_token *token) \
void btrfs_set_token_##bits(struct extent_buffer *eb, \
void *ptr, unsigned long off, u##bits val, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \
type *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
unsigned long mem_len = sizeof(((type *)0)->member); \
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
(token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
kaddr = token->kaddr; \
p = (type *)(kaddr + part_offset - token->offset); \
p->member = cpu_to_le##bits(val); \
return; \
} \
err = map_private_extent_buffer(eb, offset, \
mem_len, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits val2; \
val2 = cpu_to_le##bits(val); \
write_eb_member(eb, s, type, member, &val2); \
return; \
} \
p = (type *)(kaddr + part_offset - map_start); \
p->member = cpu_to_le##bits(val); \
if (token) { \
token->kaddr = kaddr; \
token->offset = map_start; \
token->eb = eb; \
} \
} \
void btrfs_set_##name(struct extent_buffer *eb, \
type *s, u##bits val) \
{ \
btrfs_set_token_##name(eb, s, val, NULL); \
} \
u##bits btrfs_##name(struct extent_buffer *eb, \
type *s) \
{ \
return btrfs_token_##name(eb, s, NULL); \
} \
unsigned long part_offset = (unsigned long)ptr; \
unsigned long offset = part_offset + off; \
void *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
int size = sizeof(u##bits); \
\
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
(token->offset + PAGE_CACHE_SIZE >= offset + size)) { \
kaddr = token->kaddr; \
p = kaddr + part_offset - token->offset; \
put_unaligned_le##bits(val, p + off); \
return; \
} \
err = map_private_extent_buffer(eb, offset, size, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits val2; \
\
val2 = cpu_to_le##bits(val); \
write_extent_buffer(eb, &val2, offset, size); \
return; \
} \
p = kaddr + part_offset - map_start; \
put_unaligned_le##bits(val, p + off); \
if (token) { \
token->kaddr = kaddr; \
token->offset = map_start; \
token->eb = eb; \
} \
}
#include "ctree.h"
DEFINE_BTRFS_SETGET_BITS(8)
DEFINE_BTRFS_SETGET_BITS(16)
DEFINE_BTRFS_SETGET_BITS(32)
DEFINE_BTRFS_SETGET_BITS(64)
void btrfs_node_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)

View file

@ -396,15 +396,23 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
strcmp(args[0].from, "zlib") == 0) {
compress_type = "zlib";
info->compress_type = BTRFS_COMPRESS_ZLIB;
btrfs_set_opt(info->mount_opt, COMPRESS);
} else if (strcmp(args[0].from, "lzo") == 0) {
compress_type = "lzo";
info->compress_type = BTRFS_COMPRESS_LZO;
btrfs_set_opt(info->mount_opt, COMPRESS);
btrfs_set_fs_incompat(info, COMPRESS_LZO);
} else if (strncmp(args[0].from, "no", 2) == 0) {
compress_type = "no";
info->compress_type = BTRFS_COMPRESS_NONE;
btrfs_clear_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
compress_force = false;
} else {
ret = -EINVAL;
goto out;
}
btrfs_set_opt(info->mount_opt, COMPRESS);
if (compress_force) {
btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
pr_info("btrfs: force %s compression\n",
@ -1455,6 +1463,13 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
ret = btrfs_scan_one_device(vol->name, FMODE_READ,
&btrfs_fs_type, &fs_devices);
break;
case BTRFS_IOC_DEVICES_READY:
ret = btrfs_scan_one_device(vol->name, FMODE_READ,
&btrfs_fs_type, &fs_devices);
if (ret)
break;
ret = !(fs_devices->num_devices == fs_devices->total_devices);
break;
}
kfree(vol);
@ -1477,16 +1492,6 @@ static int btrfs_unfreeze(struct super_block *sb)
return 0;
}
static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
{
int ret;
ret = btrfs_dirty_inode(inode);
if (ret)
printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu "
"error %d\n", btrfs_ino(inode), ret);
}
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@ -1526,7 +1531,6 @@ static const struct super_operations btrfs_super_ops = {
.show_options = btrfs_show_options,
.show_devname = btrfs_show_devname,
.write_inode = btrfs_write_inode,
.dirty_inode = btrfs_fs_dirty_inode,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
.statfs = btrfs_statfs,

View file

@ -22,6 +22,7 @@
#include <linux/writeback.h>
#include <linux/pagemap.h>
#include <linux/blkdev.h>
#include <linux/uuid.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@ -38,7 +39,6 @@ void put_transaction(struct btrfs_transaction *transaction)
if (atomic_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
WARN_ON(transaction->delayed_refs.root.rb_node);
WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
memset(transaction, 0, sizeof(*transaction));
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
@ -100,8 +100,8 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
kmem_cache_free(btrfs_transaction_cachep, cur_trans);
cur_trans = fs_info->running_transaction;
goto loop;
} else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
spin_unlock(&root->fs_info->trans_lock);
} else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
spin_unlock(&fs_info->trans_lock);
kmem_cache_free(btrfs_transaction_cachep, cur_trans);
return -EROFS;
}
@ -126,7 +126,6 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
cur_trans->delayed_refs.num_heads = 0;
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
cur_trans->delayed_refs.seq = 1;
/*
* although the tree mod log is per file system and not per transaction,
@ -145,10 +144,8 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
}
atomic_set(&fs_info->tree_mod_seq, 0);
init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
@ -299,6 +296,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
struct btrfs_transaction *cur_trans;
u64 num_bytes = 0;
int ret;
u64 qgroup_reserved = 0;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
return ERR_PTR(-EROFS);
@ -317,6 +315,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
* the appropriate flushing if need be.
*/
if (num_items > 0 && root != root->fs_info->chunk_root) {
if (root->fs_info->quota_enabled &&
is_fstree(root->root_key.objectid)) {
qgroup_reserved = num_items * root->leafsize;
ret = btrfs_qgroup_reserve(root, qgroup_reserved);
if (ret)
return ERR_PTR(ret);
}
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(root,
&root->fs_info->trans_block_rsv,
@ -349,11 +355,16 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
h->transaction = cur_trans;
h->blocks_used = 0;
h->bytes_reserved = 0;
h->root = root;
h->delayed_ref_updates = 0;
h->use_count = 1;
h->adding_csums = 0;
h->block_rsv = NULL;
h->orig_rsv = NULL;
h->aborted = 0;
h->qgroup_reserved = qgroup_reserved;
h->delayed_ref_elem.seq = 0;
INIT_LIST_HEAD(&h->qgroup_ref_list);
smp_mb();
if (cur_trans->blocked && may_wait_transaction(root, type)) {
@ -473,7 +484,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_block_rsv *rsv = trans->block_rsv;
int updates;
int err;
@ -481,12 +491,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
return 1;
/*
* We need to do this in case we're deleting csums so the global block
* rsv get's used instead of the csum block rsv.
*/
trans->block_rsv = NULL;
updates = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (updates) {
@ -495,8 +499,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
return err;
}
trans->block_rsv = rsv;
return should_end_transaction(trans, root);
}
@ -513,8 +515,24 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
return 0;
}
/*
* do the qgroup accounting as early as possible
*/
err = btrfs_delayed_refs_qgroup_accounting(trans, info);
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
/*
* the same root has to be passed to start_transaction and
* end_transaction. Subvolume quota depends on this.
*/
WARN_ON(trans->root != root);
if (trans->qgroup_reserved) {
btrfs_qgroup_free(root, trans->qgroup_reserved);
trans->qgroup_reserved = 0;
}
while (count < 2) {
unsigned long cur = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
@ -527,6 +545,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
}
count++;
}
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
should_end_transaction(trans, root)) {
@ -567,6 +587,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
err = -EIO;
}
assert_qgroups_uptodate(trans);
memset(trans, 0, sizeof(*trans));
kmem_cache_free(btrfs_trans_handle_cachep, trans);
@ -785,6 +806,13 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
ret = btrfs_run_dev_stats(trans, root->fs_info);
BUG_ON(ret);
ret = btrfs_run_qgroups(trans, root->fs_info);
BUG_ON(ret);
/* run_qgroups might have added some more refs */
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
BUG_ON(ret);
while (!list_empty(&fs_info->dirty_cowonly_roots)) {
next = fs_info->dirty_cowonly_roots.next;
list_del_init(next);
@ -926,11 +954,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct dentry *dentry;
struct extent_buffer *tmp;
struct extent_buffer *old;
struct timespec cur_time = CURRENT_TIME;
int ret;
u64 to_reserve = 0;
u64 index = 0;
u64 objectid;
u64 root_flags;
uuid_le new_uuid;
rsv = trans->block_rsv;
@ -957,6 +987,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
}
ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid,
objectid, pending->inherit);
kfree(pending->inherit);
if (ret) {
pending->error = ret;
goto fail;
}
key.objectid = objectid;
key.offset = (u64)-1;
key.type = BTRFS_ROOT_ITEM_KEY;
@ -1016,6 +1054,20 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
btrfs_set_root_flags(new_root_item, root_flags);
btrfs_set_root_generation_v2(new_root_item,
trans->transid);
uuid_le_gen(&new_uuid);
memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
memcpy(new_root_item->parent_uuid, root->root_item.uuid,
BTRFS_UUID_SIZE);
new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec);
btrfs_set_root_otransid(new_root_item, trans->transid);
memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
btrfs_set_root_stransid(new_root_item, 0);
btrfs_set_root_rtransid(new_root_item, 0);
old = btrfs_lock_root_node(root);
ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
if (ret) {
@ -1269,9 +1321,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_run_ordered_operations(root, 0);
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
if (cur_trans->aborted)
goto cleanup_transaction;
@ -1282,6 +1331,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
if (ret)
goto cleanup_transaction;
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
cur_trans = trans->transaction;
/*
@ -1330,7 +1382,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
spin_unlock(&root->fs_info->trans_lock);
}
if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
if (!btrfs_test_opt(root, SSD) &&
(now < cur_trans->start_time || now - cur_trans->start_time < 1))
should_grow = 1;
do {
@ -1351,6 +1404,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
if (ret)
goto cleanup_transaction;
/*
* running the delayed items may have added new refs. account
* them now so that they hinder processing of more delayed refs
* as little as possible.
*/
btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
/*
* rename don't use btrfs_join_transaction, so, once we
* set the transaction to blocked above, we aren't going
@ -1463,6 +1523,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
root->fs_info->chunk_root->node);
switch_commit_root(root->fs_info->chunk_root);
assert_qgroups_uptodate(trans);
update_super_roots(root);
if (!root->fs_info->log_root_recovering) {
@ -1532,6 +1593,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
return ret;
cleanup_transaction:
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
// WARN_ON(1);
if (current->journal_info == trans)

View file

@ -20,6 +20,7 @@
#define __BTRFS_TRANSACTION__
#include "btrfs_inode.h"
#include "delayed-ref.h"
#include "ctree.h"
struct btrfs_transaction {
u64 transid;
@ -49,6 +50,7 @@ struct btrfs_transaction {
struct btrfs_trans_handle {
u64 transid;
u64 bytes_reserved;
u64 qgroup_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
unsigned long blocks_used;
@ -57,12 +59,22 @@ struct btrfs_trans_handle {
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *orig_rsv;
int aborted;
int adding_csums;
/*
* this root is only needed to validate that the root passed to
* start_transaction is the same as the one passed to end_transaction.
* Subvolume quota depends on this
*/
struct btrfs_root *root;
struct seq_list delayed_ref_elem;
struct list_head qgroup_ref_list;
};
struct btrfs_pending_snapshot {
struct dentry *dentry;
struct btrfs_root *root;
struct btrfs_root *snap;
struct btrfs_qgroup_inherit *inherit;
/* block reservation for the operation */
struct btrfs_block_rsv block_rsv;
/* extra metadata reseration for relocation */

View file

@ -637,7 +637,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
}
inode_set_bytes(inode, saved_nbytes);
btrfs_update_inode(trans, root, inode);
ret = btrfs_update_inode(trans, root, inode);
out:
if (inode)
iput(inode);
@ -1133,7 +1133,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
if (ret == 0) {
btrfs_inc_nlink(inode);
btrfs_update_inode(trans, root, inode);
ret = btrfs_update_inode(trans, root, inode);
} else if (ret == -EEXIST) {
ret = 0;
} else {

View file

@ -429,6 +429,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
mutex_init(&fs_devices->device_list_mutex);
fs_devices->latest_devid = orig->latest_devid;
fs_devices->latest_trans = orig->latest_trans;
fs_devices->total_devices = orig->total_devices;
memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
/* We have held the volume lock, it is safe to get the devices. */
@ -739,6 +740,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
int ret;
u64 devid;
u64 transid;
u64 total_devices;
flags |= FMODE_EXCL;
bdev = blkdev_get_by_path(path, flags, holder);
@ -760,6 +762,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
disk_super = (struct btrfs_super_block *)bh->b_data;
devid = btrfs_stack_device_id(&disk_super->dev_item);
transid = btrfs_super_generation(disk_super);
total_devices = btrfs_super_num_devices(disk_super);
if (disk_super->label[0])
printk(KERN_INFO "device label %s ", disk_super->label);
else
@ -767,7 +770,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
printk(KERN_CONT "devid %llu transid %llu %s\n",
(unsigned long long)devid, (unsigned long long)transid, path);
ret = device_list_add(path, disk_super, devid, fs_devices_ret);
if (!ret && fs_devices_ret)
(*fs_devices_ret)->total_devices = total_devices;
brelse(bh);
error_close:
mutex_unlock(&uuid_mutex);
@ -1433,6 +1437,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
list_del_rcu(&device->dev_list);
device->fs_devices->num_devices--;
device->fs_devices->total_devices--;
if (device->missing)
root->fs_info->fs_devices->missing_devices--;
@ -1550,6 +1555,7 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
fs_devices->seeding = 0;
fs_devices->num_devices = 0;
fs_devices->open_devices = 0;
fs_devices->total_devices = 0;
fs_devices->seed = seed_devices;
generate_random_uuid(fs_devices->fsid);
@ -1749,6 +1755,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
root->fs_info->fs_devices->num_devices++;
root->fs_info->fs_devices->open_devices++;
root->fs_info->fs_devices->rw_devices++;
root->fs_info->fs_devices->total_devices++;
if (device->can_discard)
root->fs_info->fs_devices->num_can_discard++;
root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
@ -4736,9 +4743,6 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
key.offset = device->devid;
ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
if (ret) {
printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
rcu_str_deref(device->name),
(unsigned long long)device->devid);
__btrfs_reset_dev_stats(device);
device->dev_stats_valid = 1;
btrfs_release_path(path);
@ -4880,6 +4884,14 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
{
int i;
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
if (btrfs_dev_stat_read(dev, i) != 0)
break;
if (i == BTRFS_DEV_STAT_VALUES_MAX)
return; /* all values == 0, suppress message */
printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
rcu_str_deref(dev->name),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
@ -4890,8 +4902,7 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
}
int btrfs_get_dev_stats(struct btrfs_root *root,
struct btrfs_ioctl_get_dev_stats *stats,
int reset_after_read)
struct btrfs_ioctl_get_dev_stats *stats)
{
struct btrfs_device *dev;
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
@ -4909,7 +4920,7 @@ int btrfs_get_dev_stats(struct btrfs_root *root,
printk(KERN_WARNING
"btrfs: get dev_stats failed, not yet valid\n");
return -ENODEV;
} else if (reset_after_read) {
} else if (stats->flags & BTRFS_DEV_STATS_RESET) {
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
if (stats->nr_items > i)
stats->values[i] =

View file

@ -126,6 +126,7 @@ struct btrfs_fs_devices {
u64 missing_devices;
u64 total_rw_bytes;
u64 num_can_discard;
u64 total_devices;
struct block_device *latest_bdev;
/* all of the devices in the FS, protected by a mutex
@ -293,8 +294,7 @@ struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_root *root,
struct btrfs_ioctl_get_dev_stats *stats,
int reset_after_read);
struct btrfs_ioctl_get_dev_stats *stats);
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);

View file

@ -1551,6 +1551,8 @@ void touch_atime(struct path *path)
* Btrfs), but since we touch atime while walking down the path we
* really don't care if we failed to update the atime of the file,
* so just ignore the return value.
* We may also fail on filesystems that have the ability to make parts
* of the fs read only, e.g. subvolumes in Btrfs.
*/
update_time(inode, &now, S_ATIME);
mnt_drop_write(mnt);