Btrfs: tree logging checksum fixes
This patch contains following things. 1) Limit the max size of btrfs_ordered_sum structure to PAGE_SIZE. This struct is kmalloced so we want to keep it reasonable. 2) Replace copy_extent_csums by btrfs_lookup_csums_range. This was duplicated code in tree-log.c 3) Remove replay_one_csum. csum items are replayed at the same time as replaying file extents. This guarantees we only replay useful csums. 4) nbytes accounting fix. Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
This commit is contained in:
parent
1ba12553f3
commit
07d400a6df
4 changed files with 130 additions and 232 deletions
|
@ -5579,7 +5579,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
|
|||
BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
|
||||
|
||||
disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
|
||||
ret = btrfs_lookup_csums_range(root, disk_bytenr,
|
||||
ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
|
||||
disk_bytenr + len - 1, &list);
|
||||
|
||||
while (!list_empty(&list)) {
|
||||
|
|
|
@ -27,6 +27,12 @@
|
|||
#define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \
|
||||
sizeof(struct btrfs_item) * 2) / \
|
||||
size) - 1))
|
||||
|
||||
#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
|
||||
sizeof(struct btrfs_ordered_sum)) / \
|
||||
sizeof(struct btrfs_sector_sum) * \
|
||||
(r)->sectorsize - (r)->sectorsize)
|
||||
|
||||
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 objectid, u64 pos,
|
||||
|
@ -259,8 +265,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
|||
key.offset = start;
|
||||
key.type = BTRFS_EXTENT_CSUM_KEY;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
|
||||
&key, path, 0, 0);
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
if (ret > 0 && path->slots[0] > 0) {
|
||||
|
@ -279,7 +284,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
|||
while (start <= end) {
|
||||
leaf = path->nodes[0];
|
||||
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
|
||||
ret = btrfs_next_leaf(root->fs_info->csum_root, path);
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
if (ret > 0)
|
||||
|
@ -306,33 +311,38 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
|||
continue;
|
||||
}
|
||||
|
||||
size = min(csum_end, end + 1) - start;
|
||||
sums = kzalloc(btrfs_ordered_sum_size(root, size), GFP_NOFS);
|
||||
BUG_ON(!sums);
|
||||
|
||||
sector_sum = sums->sums;
|
||||
sums->bytenr = start;
|
||||
sums->len = size;
|
||||
|
||||
offset = (start - key.offset) >>
|
||||
root->fs_info->sb->s_blocksize_bits;
|
||||
offset *= csum_size;
|
||||
|
||||
csum_end = min(csum_end, end + 1);
|
||||
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_csum_item);
|
||||
while (size > 0) {
|
||||
read_extent_buffer(path->nodes[0], §or_sum->sum,
|
||||
((unsigned long)item) + offset,
|
||||
csum_size);
|
||||
sector_sum->bytenr = start;
|
||||
while (start < csum_end) {
|
||||
size = min_t(size_t, csum_end - start,
|
||||
MAX_ORDERED_SUM_BYTES(root));
|
||||
sums = kzalloc(btrfs_ordered_sum_size(root, size),
|
||||
GFP_NOFS);
|
||||
BUG_ON(!sums);
|
||||
|
||||
size -= root->sectorsize;
|
||||
start += root->sectorsize;
|
||||
offset += csum_size;
|
||||
sector_sum++;
|
||||
sector_sum = sums->sums;
|
||||
sums->bytenr = start;
|
||||
sums->len = size;
|
||||
|
||||
offset = (start - key.offset) >>
|
||||
root->fs_info->sb->s_blocksize_bits;
|
||||
offset *= csum_size;
|
||||
|
||||
while (size > 0) {
|
||||
read_extent_buffer(path->nodes[0],
|
||||
§or_sum->sum,
|
||||
((unsigned long)item) +
|
||||
offset, csum_size);
|
||||
sector_sum->bytenr = start;
|
||||
|
||||
size -= root->sectorsize;
|
||||
start += root->sectorsize;
|
||||
offset += csum_size;
|
||||
sector_sum++;
|
||||
}
|
||||
list_add_tail(&sums->list, list);
|
||||
}
|
||||
list_add_tail(&sums->list, list);
|
||||
|
||||
path->slots[0]++;
|
||||
}
|
||||
ret = 0;
|
||||
|
|
|
@ -157,7 +157,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
|
|||
key.objectid = inode->i_ino;
|
||||
key.offset = start;
|
||||
btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
|
||||
inode_add_bytes(inode, size);
|
||||
datasize = btrfs_file_extent_calc_inline_size(cur_size);
|
||||
|
||||
inode_add_bytes(inode, size);
|
||||
|
@ -920,8 +919,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
|
|||
struct btrfs_ordered_sum *sums;
|
||||
LIST_HEAD(list);
|
||||
|
||||
ret = btrfs_lookup_csums_range(root, bytenr, bytenr + num_bytes - 1,
|
||||
&list);
|
||||
ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
|
||||
bytenr + num_bytes - 1, &list);
|
||||
if (ret == 0 && list_empty(&list))
|
||||
return 0;
|
||||
|
||||
|
|
|
@ -433,49 +433,6 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
|
|||
trans->transid);
|
||||
}
|
||||
}
|
||||
|
||||
if (overwrite_root &&
|
||||
key->type == BTRFS_EXTENT_DATA_KEY) {
|
||||
int extent_type;
|
||||
struct btrfs_file_extent_item *fi;
|
||||
|
||||
fi = (struct btrfs_file_extent_item *)dst_ptr;
|
||||
extent_type = btrfs_file_extent_type(path->nodes[0], fi);
|
||||
if (extent_type == BTRFS_FILE_EXTENT_REG ||
|
||||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
struct btrfs_key ins;
|
||||
ins.objectid = btrfs_file_extent_disk_bytenr(
|
||||
path->nodes[0], fi);
|
||||
ins.offset = btrfs_file_extent_disk_num_bytes(
|
||||
path->nodes[0], fi);
|
||||
ins.type = BTRFS_EXTENT_ITEM_KEY;
|
||||
|
||||
/*
|
||||
* is this extent already allocated in the extent
|
||||
* allocation tree? If so, just add a reference
|
||||
*/
|
||||
ret = btrfs_lookup_extent(root, ins.objectid,
|
||||
ins.offset);
|
||||
if (ret == 0) {
|
||||
ret = btrfs_inc_extent_ref(trans, root,
|
||||
ins.objectid, ins.offset,
|
||||
path->nodes[0]->start,
|
||||
root->root_key.objectid,
|
||||
trans->transid, key->objectid);
|
||||
} else {
|
||||
/*
|
||||
* insert the extent pointer in the extent
|
||||
* allocation tree
|
||||
*/
|
||||
ret = btrfs_alloc_logged_extent(trans, root,
|
||||
path->nodes[0]->start,
|
||||
root->root_key.objectid,
|
||||
trans->transid, key->objectid,
|
||||
&ins);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
no_copy:
|
||||
btrfs_mark_buffer_dirty(path->nodes[0]);
|
||||
btrfs_release_path(root, path);
|
||||
|
@ -530,6 +487,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
|
|||
u64 extent_end;
|
||||
u64 alloc_hint;
|
||||
u64 start = key->offset;
|
||||
u64 saved_nbytes;
|
||||
struct btrfs_file_extent_item *item;
|
||||
struct inode *inode = NULL;
|
||||
unsigned long size;
|
||||
|
@ -591,17 +549,95 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
btrfs_release_path(root, path);
|
||||
|
||||
saved_nbytes = inode_get_bytes(inode);
|
||||
/* drop any overlapping extents */
|
||||
ret = btrfs_drop_extents(trans, root, inode,
|
||||
start, extent_end, start, &alloc_hint);
|
||||
BUG_ON(ret);
|
||||
|
||||
/* insert the extent */
|
||||
ret = overwrite_item(trans, root, path, eb, slot, key);
|
||||
BUG_ON(ret);
|
||||
if (found_type == BTRFS_FILE_EXTENT_REG ||
|
||||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
unsigned long dest_offset;
|
||||
struct btrfs_key ins;
|
||||
|
||||
/* btrfs_drop_extents changes i_bytes & i_blocks, update it here */
|
||||
inode_add_bytes(inode, extent_end - start);
|
||||
ret = btrfs_insert_empty_item(trans, root, path, key,
|
||||
sizeof(*item));
|
||||
BUG_ON(ret);
|
||||
dest_offset = btrfs_item_ptr_offset(path->nodes[0],
|
||||
path->slots[0]);
|
||||
copy_extent_buffer(path->nodes[0], eb, dest_offset,
|
||||
(unsigned long)item, sizeof(*item));
|
||||
|
||||
ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
|
||||
ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
|
||||
ins.type = BTRFS_EXTENT_ITEM_KEY;
|
||||
|
||||
if (ins.objectid > 0) {
|
||||
u64 csum_start;
|
||||
u64 csum_end;
|
||||
LIST_HEAD(ordered_sums);
|
||||
/*
|
||||
* is this extent already allocated in the extent
|
||||
* allocation tree? If so, just add a reference
|
||||
*/
|
||||
ret = btrfs_lookup_extent(root, ins.objectid,
|
||||
ins.offset);
|
||||
if (ret == 0) {
|
||||
ret = btrfs_inc_extent_ref(trans, root,
|
||||
ins.objectid, ins.offset,
|
||||
path->nodes[0]->start,
|
||||
root->root_key.objectid,
|
||||
trans->transid, key->objectid);
|
||||
} else {
|
||||
/*
|
||||
* insert the extent pointer in the extent
|
||||
* allocation tree
|
||||
*/
|
||||
ret = btrfs_alloc_logged_extent(trans, root,
|
||||
path->nodes[0]->start,
|
||||
root->root_key.objectid,
|
||||
trans->transid, key->objectid,
|
||||
&ins);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
btrfs_release_path(root, path);
|
||||
|
||||
if (btrfs_file_extent_compression(eb, item)) {
|
||||
csum_start = ins.objectid;
|
||||
csum_end = csum_start + ins.offset;
|
||||
} else {
|
||||
csum_start = ins.objectid +
|
||||
btrfs_file_extent_offset(eb, item);
|
||||
csum_end = csum_start +
|
||||
btrfs_file_extent_num_bytes(eb, item);
|
||||
}
|
||||
|
||||
ret = btrfs_lookup_csums_range(root->log_root,
|
||||
csum_start, csum_end - 1,
|
||||
&ordered_sums);
|
||||
BUG_ON(ret);
|
||||
while (!list_empty(&ordered_sums)) {
|
||||
struct btrfs_ordered_sum *sums;
|
||||
sums = list_entry(ordered_sums.next,
|
||||
struct btrfs_ordered_sum,
|
||||
list);
|
||||
ret = btrfs_csum_file_blocks(trans,
|
||||
root->fs_info->csum_root,
|
||||
sums);
|
||||
BUG_ON(ret);
|
||||
list_del(&sums->list);
|
||||
kfree(sums);
|
||||
}
|
||||
} else {
|
||||
btrfs_release_path(root, path);
|
||||
}
|
||||
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
/* inline extents are easy, we just overwrite them */
|
||||
ret = overwrite_item(trans, root, path, eb, slot, key);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
inode_set_bytes(inode, saved_nbytes);
|
||||
btrfs_update_inode(trans, root, inode);
|
||||
out:
|
||||
if (inode)
|
||||
|
@ -902,70 +938,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* replay one csum item from the log tree into the subvolume 'root'
|
||||
* eb, slot and key all refer to the log tree
|
||||
* path is for temp use by this function and should be released on return
|
||||
*
|
||||
* This copies the checksums out of the log tree and inserts them into
|
||||
* the subvolume. Any existing checksums for this range in the file
|
||||
* are overwritten, and new items are added where required.
|
||||
*
|
||||
* We keep this simple by reusing the btrfs_ordered_sum code from
|
||||
* the data=ordered mode. This basically means making a copy
|
||||
* of all the checksums in ram, which we have to do anyway for kmap
|
||||
* rules.
|
||||
*
|
||||
* The copy is then sent down to btrfs_csum_file_blocks, which
|
||||
* does all the hard work of finding existing items in the file
|
||||
* or adding new ones.
|
||||
*/
|
||||
static noinline int replay_one_csum(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
struct extent_buffer *eb, int slot,
|
||||
struct btrfs_key *key)
|
||||
{
|
||||
int ret;
|
||||
u32 item_size = btrfs_item_size_nr(eb, slot);
|
||||
u64 cur_offset;
|
||||
u16 csum_size =
|
||||
btrfs_super_csum_size(&root->fs_info->super_copy);
|
||||
unsigned long file_bytes;
|
||||
struct btrfs_ordered_sum *sums;
|
||||
struct btrfs_sector_sum *sector_sum;
|
||||
unsigned long ptr;
|
||||
|
||||
file_bytes = (item_size / csum_size) * root->sectorsize;
|
||||
sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS);
|
||||
if (!sums)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&sums->list);
|
||||
sums->len = file_bytes;
|
||||
sums->bytenr = key->offset;
|
||||
|
||||
/*
|
||||
* copy all the sums into the ordered sum struct
|
||||
*/
|
||||
sector_sum = sums->sums;
|
||||
cur_offset = key->offset;
|
||||
ptr = btrfs_item_ptr_offset(eb, slot);
|
||||
while (item_size > 0) {
|
||||
sector_sum->bytenr = cur_offset;
|
||||
read_extent_buffer(eb, §or_sum->sum, ptr, csum_size);
|
||||
sector_sum++;
|
||||
item_size -= csum_size;
|
||||
ptr += csum_size;
|
||||
cur_offset += root->sectorsize;
|
||||
}
|
||||
|
||||
/* let btrfs_csum_file_blocks add them into the file */
|
||||
ret = btrfs_csum_file_blocks(trans, root->fs_info->csum_root, sums);
|
||||
BUG_ON(ret);
|
||||
kfree(sums);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* There are a few corners where the link count of the file can't
|
||||
* be properly maintained during replay. So, instead of adding
|
||||
|
@ -1659,10 +1631,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
|
|||
ret = replay_one_extent(wc->trans, root, path,
|
||||
eb, i, &key);
|
||||
BUG_ON(ret);
|
||||
} else if (key.type == BTRFS_EXTENT_CSUM_KEY) {
|
||||
ret = replay_one_csum(wc->trans, root, path,
|
||||
eb, i, &key);
|
||||
BUG_ON(ret);
|
||||
} else if (key.type == BTRFS_DIR_ITEM_KEY ||
|
||||
key.type == BTRFS_DIR_INDEX_KEY) {
|
||||
ret = replay_one_dir_item(wc->trans, root, path,
|
||||
|
@ -2021,7 +1989,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
|
|||
.process_func = process_one_buffer
|
||||
};
|
||||
|
||||
if (!root->log_root)
|
||||
if (!root->log_root || root->fs_info->log_root_recovering)
|
||||
return 0;
|
||||
|
||||
log = root->log_root;
|
||||
|
@ -2453,86 +2421,6 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static noinline int copy_extent_csums(struct btrfs_trans_handle *trans,
|
||||
struct list_head *list,
|
||||
struct btrfs_root *root,
|
||||
u64 disk_bytenr, u64 len)
|
||||
{
|
||||
struct btrfs_ordered_sum *sums;
|
||||
struct btrfs_sector_sum *sector_sum;
|
||||
int ret;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_csum_item *item = NULL;
|
||||
u64 end = disk_bytenr + len;
|
||||
u64 item_start_offset = 0;
|
||||
u64 item_last_offset = 0;
|
||||
u32 diff;
|
||||
u32 sum;
|
||||
u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
|
||||
|
||||
sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
|
||||
|
||||
sector_sum = sums->sums;
|
||||
sums->bytenr = disk_bytenr;
|
||||
sums->len = len;
|
||||
list_add_tail(&sums->list, list);
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
while (disk_bytenr < end) {
|
||||
if (!item || disk_bytenr < item_start_offset ||
|
||||
disk_bytenr >= item_last_offset) {
|
||||
struct btrfs_key found_key;
|
||||
u32 item_size;
|
||||
|
||||
if (item)
|
||||
btrfs_release_path(root, path);
|
||||
item = btrfs_lookup_csum(NULL, root, path,
|
||||
disk_bytenr, 0);
|
||||
if (IS_ERR(item)) {
|
||||
ret = PTR_ERR(item);
|
||||
if (ret == -ENOENT || ret == -EFBIG)
|
||||
ret = 0;
|
||||
sum = 0;
|
||||
printk(KERN_INFO "log no csum found for "
|
||||
"byte %llu\n",
|
||||
(unsigned long long)disk_bytenr);
|
||||
item = NULL;
|
||||
btrfs_release_path(root, path);
|
||||
goto found;
|
||||
}
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
|
||||
path->slots[0]);
|
||||
|
||||
item_start_offset = found_key.offset;
|
||||
item_size = btrfs_item_size_nr(path->nodes[0],
|
||||
path->slots[0]);
|
||||
item_last_offset = item_start_offset +
|
||||
(item_size / csum_size) *
|
||||
root->sectorsize;
|
||||
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_csum_item);
|
||||
}
|
||||
/*
|
||||
* this byte range must be able to fit inside
|
||||
* a single leaf so it will also fit inside a u32
|
||||
*/
|
||||
diff = disk_bytenr - item_start_offset;
|
||||
diff = diff / root->sectorsize;
|
||||
diff = diff * csum_size;
|
||||
|
||||
read_extent_buffer(path->nodes[0], &sum,
|
||||
((unsigned long)item) + diff,
|
||||
csum_size);
|
||||
found:
|
||||
sector_sum->bytenr = disk_bytenr;
|
||||
sector_sum->sum = sum;
|
||||
disk_bytenr += root->sectorsize;
|
||||
sector_sum++;
|
||||
}
|
||||
btrfs_free_path(path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline int copy_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *log,
|
||||
struct btrfs_path *dst_path,
|
||||
|
@ -2622,10 +2510,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
|||
trans->transid,
|
||||
ins_keys[i].objectid);
|
||||
BUG_ON(ret);
|
||||
ret = copy_extent_csums(trans,
|
||||
&ordered_sums,
|
||||
log->fs_info->csum_root,
|
||||
ds + cs, cl);
|
||||
ret = btrfs_lookup_csums_range(
|
||||
log->fs_info->csum_root,
|
||||
ds + cs, ds + cs + cl - 1,
|
||||
&ordered_sums);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
}
|
||||
|
@ -2942,9 +2830,9 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
|
|||
tmp_key.offset = (u64)-1;
|
||||
|
||||
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
|
||||
|
||||
BUG_ON(!wc.replay_dest);
|
||||
|
||||
wc.replay_dest->log_root = log;
|
||||
btrfs_record_root_in_trans(wc.replay_dest);
|
||||
ret = walk_log_tree(trans, log, &wc);
|
||||
BUG_ON(ret);
|
||||
|
@ -2961,6 +2849,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
|
|||
}
|
||||
|
||||
key.offset = found_key.offset - 1;
|
||||
wc.replay_dest->log_root = NULL;
|
||||
free_extent_buffer(log->node);
|
||||
kfree(log);
|
||||
|
||||
|
|
Loading…
Reference in a new issue