Btrfs: add hole punching
This patch adds hole punching via fallocate. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
This commit is contained in:
parent
2671485d39
commit
2aaa665581
5 changed files with 355 additions and 13 deletions
|
@ -3250,6 +3250,8 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
|
|||
struct btrfs_root *root,
|
||||
struct inode *dir, u64 objectid,
|
||||
const char *name, int name_len);
|
||||
int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
|
||||
int front);
|
||||
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct inode *inode, u64 new_size,
|
||||
|
@ -3323,7 +3325,7 @@ extern const struct file_operations btrfs_file_operations;
|
|||
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct inode *inode,
|
||||
struct btrfs_path *path, u64 start, u64 end,
|
||||
int drop_cache);
|
||||
u64 *drop_end, int drop_cache);
|
||||
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct inode *inode, u64 start,
|
||||
u64 end, int drop_cache);
|
||||
|
|
|
@ -4132,6 +4132,8 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
|
|||
void btrfs_free_block_rsv(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *rsv)
|
||||
{
|
||||
if (!rsv)
|
||||
return;
|
||||
btrfs_block_rsv_release(root, rsv, (u64)-1);
|
||||
kfree(rsv);
|
||||
}
|
||||
|
|
332
fs/btrfs/file.c
332
fs/btrfs/file.c
|
@ -39,6 +39,7 @@
|
|||
#include "tree-log.h"
|
||||
#include "locking.h"
|
||||
#include "compat.h"
|
||||
#include "volumes.h"
|
||||
|
||||
/*
|
||||
* when auto defrag is enabled we
|
||||
|
@ -584,7 +585,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
|
|||
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct inode *inode,
|
||||
struct btrfs_path *path, u64 start, u64 end,
|
||||
int drop_cache)
|
||||
u64 *drop_end, int drop_cache)
|
||||
{
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_file_extent_item *fi;
|
||||
|
@ -822,6 +823,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
|||
btrfs_abort_transaction(trans, root, ret);
|
||||
}
|
||||
|
||||
if (drop_end)
|
||||
*drop_end = min(end, extent_end);
|
||||
btrfs_release_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
@ -836,7 +839,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
|||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
ret = __btrfs_drop_extents(trans, root, inode, path, start, end,
|
||||
ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
|
||||
drop_cache);
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
|
@ -1645,6 +1648,324 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
|
||||
int slot, u64 start, u64 end)
|
||||
{
|
||||
struct btrfs_file_extent_item *fi;
|
||||
struct btrfs_key key;
|
||||
|
||||
if (slot < 0 || slot >= btrfs_header_nritems(leaf))
|
||||
return 0;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
if (key.objectid != btrfs_ino(inode) ||
|
||||
key.type != BTRFS_EXTENT_DATA_KEY)
|
||||
return 0;
|
||||
|
||||
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
||||
|
||||
if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
|
||||
return 0;
|
||||
|
||||
if (btrfs_file_extent_disk_bytenr(leaf, fi))
|
||||
return 0;
|
||||
|
||||
if (key.offset == end)
|
||||
return 1;
|
||||
if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
struct btrfs_path *path, u64 offset, u64 end)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_file_extent_item *fi;
|
||||
struct extent_map *hole_em;
|
||||
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
struct btrfs_key key;
|
||||
int ret;
|
||||
|
||||
key.objectid = btrfs_ino(inode);
|
||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||
key.offset = offset;
|
||||
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
BUG_ON(!ret);
|
||||
|
||||
leaf = path->nodes[0];
|
||||
if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
|
||||
u64 num_bytes;
|
||||
|
||||
path->slots[0]--;
|
||||
fi = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
|
||||
end - offset;
|
||||
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
|
||||
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
|
||||
btrfs_set_file_extent_offset(leaf, fi, 0);
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) {
|
||||
u64 num_bytes;
|
||||
|
||||
path->slots[0]++;
|
||||
key.offset = offset;
|
||||
btrfs_set_item_key_safe(trans, root, path, &key);
|
||||
fi = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
|
||||
offset;
|
||||
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
|
||||
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
|
||||
btrfs_set_file_extent_offset(leaf, fi, 0);
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
goto out;
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
|
||||
ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
|
||||
0, 0, end - offset, 0, end - offset,
|
||||
0, 0, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
out:
|
||||
btrfs_release_path(path);
|
||||
|
||||
hole_em = alloc_extent_map();
|
||||
if (!hole_em) {
|
||||
btrfs_drop_extent_cache(inode, offset, end - 1, 0);
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&BTRFS_I(inode)->runtime_flags);
|
||||
} else {
|
||||
hole_em->start = offset;
|
||||
hole_em->len = end - offset;
|
||||
hole_em->orig_start = offset;
|
||||
|
||||
hole_em->block_start = EXTENT_MAP_HOLE;
|
||||
hole_em->block_len = 0;
|
||||
hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
|
||||
hole_em->compress_type = BTRFS_COMPRESS_NONE;
|
||||
hole_em->generation = trans->transid;
|
||||
|
||||
do {
|
||||
btrfs_drop_extent_cache(inode, offset, end - 1, 0);
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, hole_em);
|
||||
if (!ret)
|
||||
list_move(&hole_em->list,
|
||||
&em_tree->modified_extents);
|
||||
write_unlock(&em_tree->lock);
|
||||
} while (ret == -EEXIST);
|
||||
free_extent_map(hole_em);
|
||||
if (ret)
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&BTRFS_I(inode)->runtime_flags);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct extent_state *cached_state = NULL;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_block_rsv *rsv;
|
||||
struct btrfs_trans_handle *trans;
|
||||
u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
|
||||
u64 lockstart = (offset + mask) & ~mask;
|
||||
u64 lockend = ((offset + len) & ~mask) - 1;
|
||||
u64 cur_offset = lockstart;
|
||||
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
|
||||
u64 drop_end;
|
||||
unsigned long nr;
|
||||
int ret = 0;
|
||||
int err = 0;
|
||||
bool same_page = (offset >> PAGE_CACHE_SHIFT) ==
|
||||
((offset + len) >> PAGE_CACHE_SHIFT);
|
||||
|
||||
btrfs_wait_ordered_range(inode, offset, len);
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
if (offset >= inode->i_size) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only do this if we are in the same page and we aren't doing the
|
||||
* entire page.
|
||||
*/
|
||||
if (same_page && len < PAGE_CACHE_SIZE) {
|
||||
ret = btrfs_truncate_page(inode, offset, len, 0);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* zero back part of the first page */
|
||||
ret = btrfs_truncate_page(inode, offset, 0, 0);
|
||||
if (ret) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* zero the front end of the last page */
|
||||
ret = btrfs_truncate_page(inode, offset + len, 0, 1);
|
||||
if (ret) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (lockend < lockstart) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
truncate_pagecache_range(inode, lockstart, lockend);
|
||||
|
||||
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
||||
0, &cached_state);
|
||||
ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
|
||||
|
||||
/*
|
||||
* We need to make sure we have no ordered extents in this range
|
||||
* and nobody raced in and read a page in this range, if we did
|
||||
* we need to try again.
|
||||
*/
|
||||
if ((!ordered ||
|
||||
(ordered->file_offset + ordered->len < lockstart ||
|
||||
ordered->file_offset > lockend)) &&
|
||||
!test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
|
||||
lockend, EXTENT_UPTODATE, 0,
|
||||
cached_state)) {
|
||||
if (ordered)
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
break;
|
||||
}
|
||||
if (ordered)
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
|
||||
lockend, &cached_state, GFP_NOFS);
|
||||
btrfs_wait_ordered_range(inode, lockstart,
|
||||
lockend - lockstart + 1);
|
||||
}
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
rsv = btrfs_alloc_block_rsv(root);
|
||||
if (!rsv) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
|
||||
rsv->failfast = 1;
|
||||
|
||||
/*
|
||||
* 1 - update the inode
|
||||
* 1 - removing the extents in the range
|
||||
* 1 - adding the hole extent
|
||||
*/
|
||||
trans = btrfs_start_transaction(root, 3);
|
||||
if (IS_ERR(trans)) {
|
||||
err = PTR_ERR(trans);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
|
||||
min_size);
|
||||
BUG_ON(ret);
|
||||
trans->block_rsv = rsv;
|
||||
|
||||
while (cur_offset < lockend) {
|
||||
ret = __btrfs_drop_extents(trans, root, inode, path,
|
||||
cur_offset, lockend + 1,
|
||||
&drop_end, 1);
|
||||
if (ret != -ENOSPC)
|
||||
break;
|
||||
|
||||
trans->block_rsv = &root->fs_info->trans_block_rsv;
|
||||
|
||||
ret = fill_holes(trans, inode, path, cur_offset, drop_end);
|
||||
if (ret) {
|
||||
err = ret;
|
||||
break;
|
||||
}
|
||||
|
||||
cur_offset = drop_end;
|
||||
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
if (ret) {
|
||||
err = ret;
|
||||
break;
|
||||
}
|
||||
|
||||
nr = trans->blocks_used;
|
||||
btrfs_end_transaction(trans, root);
|
||||
btrfs_btree_balance_dirty(root, nr);
|
||||
|
||||
trans = btrfs_start_transaction(root, 3);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
trans = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
|
||||
rsv, min_size);
|
||||
BUG_ON(ret); /* shouldn't happen */
|
||||
trans->block_rsv = rsv;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
err = ret;
|
||||
goto out_trans;
|
||||
}
|
||||
|
||||
trans->block_rsv = &root->fs_info->trans_block_rsv;
|
||||
ret = fill_holes(trans, inode, path, cur_offset, drop_end);
|
||||
if (ret) {
|
||||
err = ret;
|
||||
goto out_trans;
|
||||
}
|
||||
|
||||
out_trans:
|
||||
if (!trans)
|
||||
goto out_free;
|
||||
|
||||
trans->block_rsv = &root->fs_info->trans_block_rsv;
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
nr = trans->blocks_used;
|
||||
btrfs_end_transaction(trans, root);
|
||||
btrfs_btree_balance_dirty(root, nr);
|
||||
out_free:
|
||||
btrfs_free_path(path);
|
||||
btrfs_free_block_rsv(root, rsv);
|
||||
out:
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
||||
&cached_state, GFP_NOFS);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
if (ret && !err)
|
||||
err = ret;
|
||||
return err;
|
||||
}
|
||||
|
||||
static long btrfs_fallocate(struct file *file, int mode,
|
||||
loff_t offset, loff_t len)
|
||||
{
|
||||
|
@ -1663,10 +1984,13 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|||
alloc_start = offset & ~mask;
|
||||
alloc_end = (offset + len + mask) & ~mask;
|
||||
|
||||
/* We only support the FALLOC_FL_KEEP_SIZE mode */
|
||||
if (mode & ~FALLOC_FL_KEEP_SIZE)
|
||||
/* Make sure we aren't being give some crap mode */
|
||||
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (mode & FALLOC_FL_PUNCH_HOLE)
|
||||
return btrfs_punch_hole(inode, offset, len);
|
||||
|
||||
/*
|
||||
* Make sure we have enough space before we do the
|
||||
* allocation.
|
||||
|
|
|
@ -3475,12 +3475,20 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
|
|||
}
|
||||
|
||||
/*
|
||||
* taken from block_truncate_page, but does cow as it zeros out
|
||||
* any bytes left in the last page in the file.
|
||||
* btrfs_truncate_page - read, zero a chunk and write a page
|
||||
* @inode - inode that we're zeroing
|
||||
* @from - the offset to start zeroing
|
||||
* @len - the length to zero, 0 to zero the entire range respective to the
|
||||
* offset
|
||||
* @front - zero up to the offset instead of from the offset on
|
||||
*
|
||||
* This will find the page for the "from" offset and cow the page and zero the
|
||||
* part we want to zero. This is used with truncate and hole punching.
|
||||
*/
|
||||
static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
|
||||
int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
|
||||
int front)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
@ -3495,7 +3503,8 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
|
|||
u64 page_start;
|
||||
u64 page_end;
|
||||
|
||||
if ((offset & (blocksize - 1)) == 0)
|
||||
if ((offset & (blocksize - 1)) == 0 &&
|
||||
(!len || ((len & (blocksize - 1)) == 0)))
|
||||
goto out;
|
||||
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
|
||||
if (ret)
|
||||
|
@ -3555,8 +3564,13 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
|
|||
|
||||
ret = 0;
|
||||
if (offset != PAGE_CACHE_SIZE) {
|
||||
if (!len)
|
||||
len = PAGE_CACHE_SIZE - offset;
|
||||
kaddr = kmap(page);
|
||||
memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
|
||||
if (front)
|
||||
memset(kaddr, 0, offset);
|
||||
else
|
||||
memset(kaddr + offset, 0, len);
|
||||
flush_dcache_page(page);
|
||||
kunmap(page);
|
||||
}
|
||||
|
@ -6796,7 +6810,7 @@ static int btrfs_truncate(struct inode *inode)
|
|||
u64 mask = root->sectorsize - 1;
|
||||
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
|
||||
|
||||
ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
|
||||
ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
|
|
@ -2842,7 +2842,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
|
|||
|
||||
if (BTRFS_I(inode)->logged_trans == trans->transid) {
|
||||
ret = __btrfs_drop_extents(trans, log, inode, dst_path, start,
|
||||
start + len, 0);
|
||||
start + len, NULL, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue