Merge tag 'for-f2fs-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "The major work includes fixing and enhancing the existing extent_cache
  feature, which has been well settling down so far and now it becomes a
  default mount option accordingly.

  Also, this version newly registers a f2fs memory shrinker to reclaim
  several objects consumed by a couple of data structures in order to
  avoid memory pressures.

  Another new feature is to add ioctl(F2FS_GARBAGE_COLLECT) which
  triggers a cleaning job explicitly by users.

  Most of the other patches are to fix bugs occurred in the corner cases
  across the whole code area"

* tag 'for-f2fs-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (85 commits)
  f2fs: upset segment_info repair
  f2fs: avoid accessing NULL pointer in f2fs_drop_largest_extent
  f2fs: update extent tree in batches
  f2fs: fix to release inode correctly
  f2fs: handle f2fs_truncate error correctly
  f2fs: avoid unneeded initializing when converting inline dentry
  f2fs: atomically set inode->i_flags
  f2fs: fix wrong pointer access during try_to_free_nids
  f2fs: use __GFP_NOFAIL to avoid infinite loop
  f2fs: lookup neighbor extent nodes for merging later
  f2fs: split __insert_extent_tree_ret for readability
  f2fs: kill dead code in __insert_extent_tree
  f2fs: adjust showing of extent cache stat
  f2fs: add largest/cached stat in extent cache
  f2fs: fix incorrect mapping for bmap
  f2fs: add annotation for space utilization of regular/inline dentry
  f2fs: fix to update cached_en of extent tree properly
  f2fs: fix typo
  f2fs: check the node block address of newly allocated nid
  f2fs: go out for insert_inode_locked failure
  ...
This commit is contained in:
Linus Torvalds 2015-09-03 13:10:22 -07:00
commit 4c12ab7e5e
26 changed files with 1904 additions and 1027 deletions

View file

@ -143,7 +143,9 @@ fastboot This option is used when a system wants to reduce mount
extent_cache Enable an extent cache based on rb-tree, it can cache extent_cache Enable an extent cache based on rb-tree, it can cache
as many as extent which map between contiguous logical as many as extent which map between contiguous logical
address and physical address per inode, resulting in address and physical address per inode, resulting in
increasing the cache hit ratio. increasing the cache hit ratio. Set by default.
noextent_cache Diable an extent cache based on rb-tree explicitly, see
the above extent_cache mount option.
noinline_data Disable the inline data feature, inline data feature is noinline_data Disable the inline data feature, inline data feature is
enabled by default. enabled by default.

View file

@ -4416,6 +4416,7 @@ F: include/linux/fscache*.h
F2FS FILE SYSTEM F2FS FILE SYSTEM
M: Jaegeuk Kim <jaegeuk@kernel.org> M: Jaegeuk Kim <jaegeuk@kernel.org>
M: Changman Lee <cm224.lee@samsung.com> M: Changman Lee <cm224.lee@samsung.com>
R: Chao Yu <chao2.yu@samsung.com>
L: linux-f2fs-devel@lists.sourceforge.net L: linux-f2fs-devel@lists.sourceforge.net
W: http://en.wikipedia.org/wiki/F2FS W: http://en.wikipedia.org/wiki/F2FS
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
@ -4424,6 +4425,7 @@ F: Documentation/filesystems/f2fs.txt
F: Documentation/ABI/testing/sysfs-fs-f2fs F: Documentation/ABI/testing/sysfs-fs-f2fs
F: fs/f2fs/ F: fs/f2fs/
F: include/linux/f2fs_fs.h F: include/linux/f2fs_fs.h
F: include/trace/events/f2fs.h
FUJITSU FR-V (FRV) PORT FUJITSU FR-V (FRV) PORT
M: David Howells <dhowells@redhat.com> M: David Howells <dhowells@redhat.com>

View file

@ -45,7 +45,7 @@ config F2FS_FS_POSIX_ACL
default y default y
help help
Posix Access Control Lists (ACLs) support permissions for users and Posix Access Control Lists (ACLs) support permissions for users and
gourps beyond the owner/group/world scheme. groups beyond the owner/group/world scheme.
To learn more about Access Control Lists, visit the POSIX ACLs for To learn more about Access Control Lists, visit the POSIX ACLs for
Linux website <http://acl.bestbits.at/>. Linux website <http://acl.bestbits.at/>.

View file

@ -2,6 +2,7 @@ obj-$(CONFIG_F2FS_FS) += f2fs.o
f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o
f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
f2fs-y += shrinker.o extent_cache.o
f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o

View file

@ -69,14 +69,24 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
fio.page = page; fio.page = page;
if (f2fs_submit_page_bio(&fio)) if (f2fs_submit_page_bio(&fio)) {
f2fs_put_page(page, 1);
goto repeat; goto repeat;
}
lock_page(page); lock_page(page);
if (unlikely(page->mapping != mapping)) { if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1); f2fs_put_page(page, 1);
goto repeat; goto repeat;
} }
/*
* if there is any IO error when accessing device, make our filesystem
* readonly and make sure do not write checkpoint with non-uptodate
* meta page.
*/
if (unlikely(!PageUptodate(page)))
f2fs_stop_checkpoint(sbi);
out: out:
return page; return page;
} }
@ -326,26 +336,18 @@ const struct address_space_operations f2fs_meta_aops = {
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{ {
struct inode_management *im = &sbi->im[type]; struct inode_management *im = &sbi->im[type];
struct ino_entry *e; struct ino_entry *e, *tmp;
tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
retry: retry:
if (radix_tree_preload(GFP_NOFS)) { radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
cond_resched();
goto retry;
}
spin_lock(&im->ino_lock); spin_lock(&im->ino_lock);
e = radix_tree_lookup(&im->ino_root, ino); e = radix_tree_lookup(&im->ino_root, ino);
if (!e) { if (!e) {
e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); e = tmp;
if (!e) {
spin_unlock(&im->ino_lock);
radix_tree_preload_end();
goto retry;
}
if (radix_tree_insert(&im->ino_root, ino, e)) { if (radix_tree_insert(&im->ino_root, ino, e)) {
spin_unlock(&im->ino_lock); spin_unlock(&im->ino_lock);
kmem_cache_free(ino_entry_slab, e);
radix_tree_preload_end(); radix_tree_preload_end();
goto retry; goto retry;
} }
@ -358,6 +360,9 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
} }
spin_unlock(&im->ino_lock); spin_unlock(&im->ino_lock);
radix_tree_preload_end(); radix_tree_preload_end();
if (e != tmp)
kmem_cache_free(ino_entry_slab, tmp);
} }
static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@ -458,24 +463,34 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
__remove_ino_entry(sbi, ino, ORPHAN_INO); __remove_ino_entry(sbi, ino, ORPHAN_INO);
} }
static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{ {
struct inode *inode = f2fs_iget(sbi->sb, ino); struct inode *inode;
f2fs_bug_on(sbi, IS_ERR(inode));
inode = f2fs_iget(sbi->sb, ino);
if (IS_ERR(inode)) {
/*
* there should be a bug that we can't find the entry
* to orphan inode.
*/
f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
return PTR_ERR(inode);
}
clear_nlink(inode); clear_nlink(inode);
/* truncate all the data during iput */ /* truncate all the data during iput */
iput(inode); iput(inode);
return 0;
} }
void recover_orphan_inodes(struct f2fs_sb_info *sbi) int recover_orphan_inodes(struct f2fs_sb_info *sbi)
{ {
block_t start_blk, orphan_blocks, i, j; block_t start_blk, orphan_blocks, i, j;
int err;
if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
return; return 0;
set_sbi_flag(sbi, SBI_POR_DOING);
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
@ -489,14 +504,17 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
orphan_blk = (struct f2fs_orphan_block *)page_address(page); orphan_blk = (struct f2fs_orphan_block *)page_address(page);
for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
nid_t ino = le32_to_cpu(orphan_blk->ino[j]); nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
recover_orphan_inode(sbi, ino); err = recover_orphan_inode(sbi, ino);
if (err) {
f2fs_put_page(page, 1);
return err;
}
} }
f2fs_put_page(page, 1); f2fs_put_page(page, 1);
} }
/* clear Orphan Flag */ /* clear Orphan Flag */
clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
clear_sbi_flag(sbi, SBI_POR_DOING); return 0;
return;
} }
static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
@ -504,7 +522,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
struct list_head *head; struct list_head *head;
struct f2fs_orphan_block *orphan_blk = NULL; struct f2fs_orphan_block *orphan_blk = NULL;
unsigned int nentries = 0; unsigned int nentries = 0;
unsigned short index; unsigned short index = 1;
unsigned short orphan_blocks; unsigned short orphan_blocks;
struct page *page = NULL; struct page *page = NULL;
struct ino_entry *orphan = NULL; struct ino_entry *orphan = NULL;
@ -512,11 +530,6 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
for (index = 0; index < orphan_blocks; index++)
grab_meta_page(sbi, start_blk + index);
index = 1;
/* /*
* we don't need to do spin_lock(&im->ino_lock) here, since all the * we don't need to do spin_lock(&im->ino_lock) here, since all the
* orphan inode operations are covered under f2fs_lock_op(). * orphan inode operations are covered under f2fs_lock_op().
@ -527,12 +540,10 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
/* loop for each orphan inode entry and write them in Jornal block */ /* loop for each orphan inode entry and write them in Jornal block */
list_for_each_entry(orphan, head, list) { list_for_each_entry(orphan, head, list) {
if (!page) { if (!page) {
page = find_get_page(META_MAPPING(sbi), start_blk++); page = grab_meta_page(sbi, start_blk++);
f2fs_bug_on(sbi, !page);
orphan_blk = orphan_blk =
(struct f2fs_orphan_block *)page_address(page); (struct f2fs_orphan_block *)page_address(page);
memset(orphan_blk, 0, sizeof(*orphan_blk)); memset(orphan_blk, 0, sizeof(*orphan_blk));
f2fs_put_page(page, 0);
} }
orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
@ -704,7 +715,8 @@ void update_dirty_page(struct inode *inode, struct page *page)
struct inode_entry *new; struct inode_entry *new;
int ret = 0; int ret = 0;
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
return; return;
if (!S_ISDIR(inode->i_mode)) { if (!S_ISDIR(inode->i_mode)) {
@ -892,12 +904,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__u32 crc32 = 0; __u32 crc32 = 0;
int i; int i;
int cp_payload_blks = __cp_payload(sbi); int cp_payload_blks = __cp_payload(sbi);
block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
bool invalidate = false;
/* /*
* This avoids to conduct wrong roll-forward operations and uses * This avoids to conduct wrong roll-forward operations and uses
* metapages, so should be called prior to sync_meta_pages below. * metapages, so should be called prior to sync_meta_pages below.
*/ */
discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); if (discard_next_dnode(sbi, discard_blk))
invalidate = true;
/* Flush all the NAT/SIT pages */ /* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) { while (get_pages(sbi, F2FS_DIRTY_META)) {
@ -1026,6 +1041,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* wait for previous submitted meta pages writeback */ /* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi); wait_on_all_pages_writeback(sbi);
/*
* invalidate meta page which is used temporarily for zeroing out
* block at the end of warm node chain.
*/
if (invalidate)
invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
discard_blk);
release_dirty_inode(sbi); release_dirty_inode(sbi);
if (unlikely(f2fs_cp_error(sbi))) if (unlikely(f2fs_cp_error(sbi)))

View file

@ -92,8 +92,7 @@ static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci)
if (!ci) if (!ci)
return; return;
if (ci->ci_keyring_key) key_put(ci->ci_keyring_key);
key_put(ci->ci_keyring_key);
crypto_free_ablkcipher(ci->ci_ctfm); crypto_free_ablkcipher(ci->ci_ctfm);
kmem_cache_free(f2fs_crypt_info_cachep, ci); kmem_cache_free(f2fs_crypt_info_cachep, ci);
} }

File diff suppressed because it is too large Load diff

View file

@ -33,8 +33,11 @@ static void update_general_status(struct f2fs_sb_info *sbi)
int i; int i;
/* validation check of the segment numbers */ /* validation check of the segment numbers */
si->hit_ext = sbi->read_hit_ext; si->hit_largest = atomic_read(&sbi->read_hit_largest);
si->total_ext = sbi->total_hit_ext; si->hit_cached = atomic_read(&sbi->read_hit_cached);
si->hit_rbtree = atomic_read(&sbi->read_hit_rbtree);
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic_read(&sbi->total_hit_ext);
si->ext_tree = sbi->total_ext_tree; si->ext_tree = sbi->total_ext_tree;
si->ext_node = atomic_read(&sbi->total_ext_node); si->ext_node = atomic_read(&sbi->total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
@ -49,6 +52,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->valid_count = valid_user_blocks(sbi); si->valid_count = valid_user_blocks(sbi);
si->valid_node_count = valid_node_count(sbi); si->valid_node_count = valid_node_count(sbi);
si->valid_inode_count = valid_inode_count(sbi); si->valid_inode_count = valid_inode_count(sbi);
si->inline_xattr = atomic_read(&sbi->inline_xattr);
si->inline_inode = atomic_read(&sbi->inline_inode); si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir); si->inline_dir = atomic_read(&sbi->inline_dir);
si->utilization = utilization(sbi); si->utilization = utilization(sbi);
@ -226,6 +230,8 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "Other: %u)\n - Data: %u\n", seq_printf(s, "Other: %u)\n - Data: %u\n",
si->valid_node_count - si->valid_inode_count, si->valid_node_count - si->valid_inode_count,
si->valid_count - si->valid_node_count); si->valid_count - si->valid_node_count);
seq_printf(s, " - Inline_xattr Inode: %u\n",
si->inline_xattr);
seq_printf(s, " - Inline_data Inode: %u\n", seq_printf(s, " - Inline_data Inode: %u\n",
si->inline_inode); si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n", seq_printf(s, " - Inline_dentry Inode: %u\n",
@ -276,10 +282,16 @@ static int stat_show(struct seq_file *s, void *v)
si->bg_data_blks); si->bg_data_blks);
seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
si->bg_node_blks); si->bg_node_blks);
seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", seq_puts(s, "\nExtent Cache:\n");
si->hit_ext, si->total_ext); seq_printf(s, " - Hit Count: L1-1:%d L1-2:%d L2:%d\n",
seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree); si->hit_largest, si->hit_cached,
seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node); si->hit_rbtree);
seq_printf(s, " - Hit Ratio: %d%% (%d / %d)\n",
!si->total_ext ? 0 :
(si->hit_total * 100) / si->total_ext,
si->hit_total, si->total_ext);
seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n",
si->ext_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n"); seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4d, wb: %4d\n", seq_printf(s, " - inmem: %4d, wb: %4d\n",
si->inmem_pages, si->wb_pages); si->inmem_pages, si->wb_pages);
@ -366,6 +378,12 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
si->sbi = sbi; si->sbi = sbi;
sbi->stat_info = si; sbi->stat_info = si;
atomic_set(&sbi->total_hit_ext, 0);
atomic_set(&sbi->read_hit_rbtree, 0);
atomic_set(&sbi->read_hit_largest, 0);
atomic_set(&sbi->read_hit_cached, 0);
atomic_set(&sbi->inline_xattr, 0);
atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_inode, 0);
atomic_set(&sbi->inline_dir, 0); atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->inplace_count, 0); atomic_set(&sbi->inplace_count, 0);

View file

@ -718,8 +718,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
if (inode) if (inode)
f2fs_drop_nlink(dir, inode, NULL); f2fs_drop_nlink(dir, inode, NULL);
if (bit_pos == NR_DENTRY_IN_BLOCK) { if (bit_pos == NR_DENTRY_IN_BLOCK &&
truncate_hole(dir, page->index, page->index + 1); !truncate_hole(dir, page->index, page->index + 1)) {
clear_page_dirty_for_io(page); clear_page_dirty_for_io(page);
ClearPagePrivate(page); ClearPagePrivate(page);
ClearPageUptodate(page); ClearPageUptodate(page);

791
fs/f2fs/extent_cache.c Normal file
View file

@ -0,0 +1,791 @@
/*
* f2fs extent cache support
*
* Copyright (c) 2015 Motorola Mobility
* Copyright (c) 2015 Samsung Electronics
* Authors: Jaegeuk Kim <jaegeuk@kernel.org>
* Chao Yu <chao2.yu@samsung.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include <trace/events/f2fs.h>
static struct kmem_cache *extent_tree_slab;
static struct kmem_cache *extent_node_slab;
static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct rb_node *parent, struct rb_node **p)
{
struct extent_node *en;
en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
if (!en)
return NULL;
en->ei = *ei;
INIT_LIST_HEAD(&en->list);
rb_link_node(&en->rb_node, parent, p);
rb_insert_color(&en->rb_node, &et->root);
et->count++;
atomic_inc(&sbi->total_ext_node);
return en;
}
static void __detach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
rb_erase(&en->rb_node, &et->root);
et->count--;
atomic_dec(&sbi->total_ext_node);
if (et->cached_en == en)
et->cached_en = NULL;
}
static struct extent_tree *__grab_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
nid_t ino = inode->i_ino;
down_write(&sbi->extent_tree_lock);
et = radix_tree_lookup(&sbi->extent_tree_root, ino);
if (!et) {
et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
memset(et, 0, sizeof(struct extent_tree));
et->ino = ino;
et->root = RB_ROOT;
et->cached_en = NULL;
rwlock_init(&et->lock);
atomic_set(&et->refcount, 0);
et->count = 0;
sbi->total_ext_tree++;
}
atomic_inc(&et->refcount);
up_write(&sbi->extent_tree_lock);
/* never died until evict_inode */
F2FS_I(inode)->extent_tree = et;
return et;
}
static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, unsigned int fofs)
{
struct rb_node *node = et->root.rb_node;
struct extent_node *en = et->cached_en;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) {
stat_inc_cached_node_hit(sbi);
return en;
}
}
while (node) {
en = rb_entry(node, struct extent_node, rb_node);
if (fofs < en->ei.fofs) {
node = node->rb_left;
} else if (fofs >= en->ei.fofs + en->ei.len) {
node = node->rb_right;
} else {
stat_inc_rbtree_node_hit(sbi);
return en;
}
}
return NULL;
}
static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei)
{
struct rb_node **p = &et->root.rb_node;
struct extent_node *en;
en = __attach_extent_node(sbi, et, ei, NULL, p);
if (!en)
return NULL;
et->largest = en->ei;
et->cached_en = en;
return en;
}
static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, bool free_all)
{
struct rb_node *node, *next;
struct extent_node *en;
unsigned int count = et->count;
node = rb_first(&et->root);
while (node) {
next = rb_next(node);
en = rb_entry(node, struct extent_node, rb_node);
if (free_all) {
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list))
list_del_init(&en->list);
spin_unlock(&sbi->extent_lock);
}
if (free_all || list_empty(&en->list)) {
__detach_extent_node(sbi, et, en);
kmem_cache_free(extent_node_slab, en);
}
node = next;
}
return count - et->count;
}
static void __drop_largest_extent(struct inode *inode, pgoff_t fofs)
{
struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
if (largest->fofs <= fofs && largest->fofs + largest->len > fofs)
largest->len = 0;
}
void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
{
if (!f2fs_may_extent_tree(inode))
return;
__drop_largest_extent(inode, fofs);
}
void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
struct extent_node *en;
struct extent_info ei;
if (!f2fs_may_extent_tree(inode))
return;
et = __grab_extent_tree(inode);
if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
return;
set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
write_lock(&et->lock);
if (et->count)
goto out;
en = __init_extent_tree(sbi, et, &ei);
if (en) {
spin_lock(&sbi->extent_lock);
list_add_tail(&en->list, &sbi->extent_list);
spin_unlock(&sbi->extent_lock);
}
out:
write_unlock(&et->lock);
}
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
struct extent_node *en;
bool ret = false;
f2fs_bug_on(sbi, !et);
trace_f2fs_lookup_extent_tree_start(inode, pgofs);
read_lock(&et->lock);
if (et->largest.fofs <= pgofs &&
et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
goto out;
}
en = __lookup_extent_tree(sbi, et, pgofs);
if (en) {
*ei = en->ei;
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list))
list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
spin_unlock(&sbi->extent_lock);
ret = true;
}
out:
stat_inc_total_hit(sbi);
read_unlock(&et->lock);
trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
return ret;
}
/*
* lookup extent at @fofs, if hit, return the extent
* if not, return NULL and
* @prev_ex: extent before fofs
* @next_ex: extent after fofs
* @insert_p: insert point for new extent at fofs
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
unsigned int fofs,
struct extent_node **prev_ex,
struct extent_node **next_ex,
struct rb_node ***insert_p,
struct rb_node **insert_parent)
{
struct rb_node **pnode = &et->root.rb_node;
struct rb_node *parent = NULL, *tmp_node;
struct extent_node *en = et->cached_en;
*insert_p = NULL;
*insert_parent = NULL;
*prev_ex = NULL;
*next_ex = NULL;
if (RB_EMPTY_ROOT(&et->root))
return NULL;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
goto lookup_neighbors;
}
while (*pnode) {
parent = *pnode;
en = rb_entry(*pnode, struct extent_node, rb_node);
if (fofs < en->ei.fofs)
pnode = &(*pnode)->rb_left;
else if (fofs >= en->ei.fofs + en->ei.len)
pnode = &(*pnode)->rb_right;
else
goto lookup_neighbors;
}
*insert_p = pnode;
*insert_parent = parent;
en = rb_entry(parent, struct extent_node, rb_node);
tmp_node = parent;
if (parent && fofs > en->ei.fofs)
tmp_node = rb_next(parent);
*next_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
tmp_node = parent;
if (parent && fofs < en->ei.fofs)
tmp_node = rb_prev(parent);
*prev_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
return NULL;
lookup_neighbors:
if (fofs == en->ei.fofs) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&en->rb_node);
*prev_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
}
if (fofs == en->ei.fofs + en->ei.len - 1) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&en->rb_node);
*next_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
}
return en;
}
static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct extent_node **den,
struct extent_node *prev_ex,
struct extent_node *next_ex)
{
struct extent_node *en = NULL;
if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
prev_ex->ei.len += ei->len;
ei = &prev_ex->ei;
en = prev_ex;
}
if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
if (en) {
__detach_extent_node(sbi, et, prev_ex);
*den = prev_ex;
}
next_ex->ei.fofs = ei->fofs;
next_ex->ei.blk = ei->blk;
next_ex->ei.len += ei->len;
en = next_ex;
}
if (en) {
if (en->ei.len > et->largest.len)
et->largest = en->ei;
et->cached_en = en;
}
return en;
}
static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct rb_node **insert_p,
struct rb_node *insert_parent)
{
struct rb_node **p = &et->root.rb_node;
struct rb_node *parent = NULL;
struct extent_node *en = NULL;
if (insert_p && insert_parent) {
parent = insert_parent;
p = insert_p;
goto do_insert;
}
while (*p) {
parent = *p;
en = rb_entry(parent, struct extent_node, rb_node);
if (ei->fofs < en->ei.fofs)
p = &(*p)->rb_left;
else if (ei->fofs >= en->ei.fofs + en->ei.len)
p = &(*p)->rb_right;
else
f2fs_bug_on(sbi, 1);
}
do_insert:
en = __attach_extent_node(sbi, et, ei, parent, p);
if (!en)
return NULL;
if (en->ei.len > et->largest.len)
et->largest = en->ei;
et->cached_en = en;
return en;
}
unsigned int f2fs_update_extent_tree_range(struct inode *inode,
pgoff_t fofs, block_t blkaddr, unsigned int len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
struct extent_node *prev_en = NULL, *next_en = NULL;
struct extent_info ei, dei, prev;
struct rb_node **insert_p = NULL, *insert_parent = NULL;
unsigned int end = fofs + len;
unsigned int pos = (unsigned int)fofs;
if (!et)
return false;
write_lock(&et->lock);
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
write_unlock(&et->lock);
return false;
}
prev = et->largest;
dei.len = 0;
/* we do not guarantee that the largest extent is cached all the time */
__drop_largest_extent(inode, fofs);
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
&insert_p, &insert_parent);
if (!en) {
if (next_en) {
en = next_en;
f2fs_bug_on(sbi, en->ei.fofs <= pos);
pos = en->ei.fofs;
} else {
/*
* skip searching in the tree since there is no
* larger extent node in the cache.
*/
goto update_extent;
}
}
/* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
while (en) {
struct rb_node *node;
if (pos >= end)
break;
dei = en->ei;
en1 = en2 = NULL;
node = rb_next(&en->rb_node);
/*
* 2.1 there are four cases when we invalidate blkaddr in extent
* node, |V: valid address, X: will be invalidated|
*/
/* case#1, invalidate right part of extent node |VVVVVXXXXX| */
if (pos > dei.fofs && end >= dei.fofs + dei.len) {
en->ei.len = pos - dei.fofs;
if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
__detach_extent_node(sbi, et, en);
insert_p = NULL;
insert_parent = NULL;
goto update;
}
if (__is_extent_same(&dei, &et->largest))
et->largest = en->ei;
goto next;
}
/* case#2, invalidate left part of extent node |XXXXXVVVVV| */
if (pos <= dei.fofs && end < dei.fofs + dei.len) {
en->ei.fofs = end;
en->ei.blk += end - dei.fofs;
en->ei.len -= end - dei.fofs;
if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
__detach_extent_node(sbi, et, en);
insert_p = NULL;
insert_parent = NULL;
goto update;
}
if (__is_extent_same(&dei, &et->largest))
et->largest = en->ei;
goto next;
}
__detach_extent_node(sbi, et, en);
/*
* if we remove node in rb-tree, our parent node pointer may
* point the wrong place, discard them.
*/
insert_p = NULL;
insert_parent = NULL;
/* case#3, invalidate entire extent node |XXXXXXXXXX| */
if (pos <= dei.fofs && end >= dei.fofs + dei.len) {
if (__is_extent_same(&dei, &et->largest))
et->largest.len = 0;
goto update;
}
/*
* case#4, invalidate data in the middle of extent node
* |VVVXXXXVVV|
*/
if (dei.len > F2FS_MIN_EXTENT_LEN) {
unsigned int endofs;
/* insert left part of split extent into cache */
if (pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, dei.fofs, dei.blk,
pos - dei.fofs);
en1 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
}
/* insert right part of split extent into cache */
endofs = dei.fofs + dei.len;
if (endofs - end >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, end,
end - dei.fofs + dei.blk,
endofs - end);
en2 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
}
}
update:
/* 2.2 update in global extent list */
spin_lock(&sbi->extent_lock);
if (en && !list_empty(&en->list))
list_del(&en->list);
if (en1)
list_add_tail(&en1->list, &sbi->extent_list);
if (en2)
list_add_tail(&en2->list, &sbi->extent_list);
spin_unlock(&sbi->extent_lock);
/* 2.3 release extent node */
if (en)
kmem_cache_free(extent_node_slab, en);
next:
en = node ? rb_entry(node, struct extent_node, rb_node) : NULL;
next_en = en;
if (en)
pos = en->ei.fofs;
}
update_extent:
/* 3. update extent in extent cache */
if (blkaddr) {
struct extent_node *den = NULL;
set_extent_info(&ei, fofs, blkaddr, len);
en3 = __try_merge_extent_node(sbi, et, &ei, &den,
prev_en, next_en);
if (!en3)
en3 = __insert_extent_tree(sbi, et, &ei,
insert_p, insert_parent);
/* give up extent_cache, if split and small updates happen */
if (dei.len >= 1 &&
prev.len < F2FS_MIN_EXTENT_LEN &&
et->largest.len < F2FS_MIN_EXTENT_LEN) {
et->largest.len = 0;
set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
}
spin_lock(&sbi->extent_lock);
if (en3) {
if (list_empty(&en3->list))
list_add_tail(&en3->list, &sbi->extent_list);
else
list_move_tail(&en3->list, &sbi->extent_list);
}
if (den && !list_empty(&den->list))
list_del(&den->list);
spin_unlock(&sbi->extent_lock);
if (den)
kmem_cache_free(extent_node_slab, den);
}
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
__free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
return !__is_extent_same(&prev, &et->largest);
}
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
struct extent_node *en, *tmp;
unsigned long ino = F2FS_ROOT_INO(sbi);
struct radix_tree_root *root = &sbi->extent_tree_root;
unsigned int found;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
if (!test_opt(sbi, EXTENT_CACHE))
return 0;
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
while ((found = radix_tree_gang_lookup(root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
ino = treevec[found - 1]->ino + 1;
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
if (!atomic_read(&et->refcount)) {
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
radix_tree_delete(root, et->ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
tree_cnt++;
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
}
}
}
up_write(&sbi->extent_tree_lock);
/* 2. remove LRU extent entries */
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
remained = nr_shrink - (node_cnt + tree_cnt);
spin_lock(&sbi->extent_lock);
list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
if (!remained--)
break;
list_del_init(&en->list);
}
spin_unlock(&sbi->extent_lock);
while ((found = radix_tree_gang_lookup(root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
ino = treevec[found - 1]->ino + 1;
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et, false);
write_unlock(&et->lock);
if (node_cnt + tree_cnt >= nr_shrink)
break;
}
}
unlock_out:
up_write(&sbi->extent_tree_lock);
out:
trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
return node_cnt + tree_cnt;
}
unsigned int f2fs_destroy_extent_node(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
unsigned int node_cnt = 0;
if (!et)
return 0;
write_lock(&et->lock);
node_cnt = __free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
return node_cnt;
}
void f2fs_destroy_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
unsigned int node_cnt = 0;
if (!et)
return;
if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
atomic_dec(&et->refcount);
return;
}
/* free all extent info belong to this extent tree */
node_cnt = f2fs_destroy_extent_node(inode);
/* delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
atomic_dec(&et->refcount);
f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
up_write(&sbi->extent_tree_lock);
F2FS_I(inode)->extent_tree = NULL;
trace_f2fs_destroy_extent_tree(inode, node_cnt);
}
bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
if (!f2fs_may_extent_tree(inode))
return false;
return f2fs_lookup_extent_tree(inode, pgofs, ei);
}
void f2fs_update_extent_cache(struct dnode_of_data *dn)
{
struct f2fs_inode_info *fi = F2FS_I(dn->inode);
pgoff_t fofs;
if (!f2fs_may_extent_tree(dn->inode))
return;
f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
dn->ofs_in_node;
if (f2fs_update_extent_tree_range(dn->inode, fofs, dn->data_blkaddr, 1))
sync_inode_page(dn);
}
void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
pgoff_t fofs, block_t blkaddr, unsigned int len)
{
if (!f2fs_may_extent_tree(dn->inode))
return;
if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len))
sync_inode_page(dn);
}
void init_extent_cache_info(struct f2fs_sb_info *sbi)
{
INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
init_rwsem(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
sbi->total_ext_tree = 0;
atomic_set(&sbi->total_ext_node, 0);
}
int __init create_extent_cache(void)
{
extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
sizeof(struct extent_tree));
if (!extent_tree_slab)
return -ENOMEM;
extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
sizeof(struct extent_node));
if (!extent_node_slab) {
kmem_cache_destroy(extent_tree_slab);
return -ENOMEM;
}
return 0;
}
void destroy_extent_cache(void)
{
kmem_cache_destroy(extent_node_slab);
kmem_cache_destroy(extent_tree_slab);
}

View file

@ -19,6 +19,7 @@
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/kobject.h> #include <linux/kobject.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/bio.h>
#ifdef CONFIG_F2FS_CHECK_FS #ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition) #define f2fs_bug_on(sbi, condition) BUG_ON(condition)
@ -228,6 +229,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6)
#define F2FS_IOC_SET_ENCRYPTION_POLICY \ #define F2FS_IOC_SET_ENCRYPTION_POLICY \
_IOR('f', 19, struct f2fs_encryption_policy) _IOR('f', 19, struct f2fs_encryption_policy)
@ -320,7 +322,7 @@ enum {
*/ */
}; };
#define F2FS_LINK_MAX 32000 /* maximum link count per file */ #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
@ -349,6 +351,7 @@ struct extent_tree {
nid_t ino; /* inode number */ nid_t ino; /* inode number */
struct rb_root root; /* root of extent info rb-tree */ struct rb_root root; /* root of extent info rb-tree */
struct extent_node *cached_en; /* recently accessed extent node */ struct extent_node *cached_en; /* recently accessed extent node */
struct extent_info largest; /* largested extent info */
rwlock_t lock; /* protect extent info rb-tree */ rwlock_t lock; /* protect extent info rb-tree */
atomic_t refcount; /* reference count of rb-tree */ atomic_t refcount; /* reference count of rb-tree */
unsigned int count; /* # of extent node in rb-tree*/ unsigned int count; /* # of extent node in rb-tree*/
@ -372,6 +375,12 @@ struct f2fs_map_blocks {
unsigned int m_flags; unsigned int m_flags;
}; };
/* for flag in get_data_block */
#define F2FS_GET_BLOCK_READ 0
#define F2FS_GET_BLOCK_DIO 1
#define F2FS_GET_BLOCK_FIEMAP 2
#define F2FS_GET_BLOCK_BMAP 3
/* /*
* i_advise uses FADVISE_XXX_BIT. We can add additional hints later. * i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
*/ */
@ -420,14 +429,13 @@ struct f2fs_inode_info {
unsigned int clevel; /* maximum level of given file name */ unsigned int clevel; /* maximum level of given file name */
nid_t i_xattr_nid; /* node id that contains xattrs */ nid_t i_xattr_nid; /* node id that contains xattrs */
unsigned long long xattr_ver; /* cp version of xattr modification */ unsigned long long xattr_ver; /* cp version of xattr modification */
struct extent_info ext; /* in-memory extent cache entry */
rwlock_t ext_lock; /* rwlock for single extent cache */
struct inode_entry *dirty_dir; /* the pointer of dirty dir */ struct inode_entry *dirty_dir; /* the pointer of dirty dir */
struct radix_tree_root inmem_root; /* radix tree for inmem pages */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct mutex inmem_lock; /* lock for inmemory pages */ struct mutex inmem_lock; /* lock for inmemory pages */
struct extent_tree *extent_tree; /* cached extent_tree entry */
#ifdef CONFIG_F2FS_FS_ENCRYPTION #ifdef CONFIG_F2FS_FS_ENCRYPTION
/* Encryption params */ /* Encryption params */
struct f2fs_crypt_info *i_crypt_info; struct f2fs_crypt_info *i_crypt_info;
@ -779,7 +787,11 @@ struct f2fs_sb_info {
unsigned int segment_count[2]; /* # of allocated segments */ unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */ unsigned int block_count[2]; /* # of allocated blocks */
atomic_t inplace_count; /* # of inplace update */ atomic_t inplace_count; /* # of inplace update */
int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ atomic_t total_hit_ext; /* # of lookup extent cache */
atomic_t read_hit_rbtree; /* # of hit rbtree extent node */
atomic_t read_hit_largest; /* # of hit largest extent node */
atomic_t read_hit_cached; /* # of hit cached extent node */
atomic_t inline_xattr; /* # of inline_xattr inodes */
atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */
int bg_gc; /* background gc calls */ int bg_gc; /* background gc calls */
@ -791,6 +803,11 @@ struct f2fs_sb_info {
/* For sysfs suppport */ /* For sysfs suppport */
struct kobject s_kobj; struct kobject s_kobj;
struct completion s_kobj_unregister; struct completion s_kobj_unregister;
/* For shrinker support */
struct list_head s_list;
struct mutex umount_mutex;
unsigned int shrinker_run_no;
}; };
/* /*
@ -1039,7 +1056,8 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_dec_dirty_pages(struct inode *inode) static inline void inode_dec_dirty_pages(struct inode *inode)
{ {
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
return; return;
atomic_dec(&F2FS_I(inode)->dirty_pages); atomic_dec(&F2FS_I(inode)->dirty_pages);
@ -1234,16 +1252,24 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
gfp_t flags) gfp_t flags)
{ {
void *entry; void *entry;
retry:
entry = kmem_cache_alloc(cachep, flags);
if (!entry) {
cond_resched();
goto retry;
}
entry = kmem_cache_alloc(cachep, flags);
if (!entry)
entry = kmem_cache_alloc(cachep, flags | __GFP_NOFAIL);
return entry; return entry;
} }
static inline struct bio *f2fs_bio_alloc(int npages)
{
struct bio *bio;
/* No failure on bio allocation */
bio = bio_alloc(GFP_NOIO, npages);
if (!bio)
bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
return bio;
}
static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
unsigned long index, void *item) unsigned long index, void *item)
{ {
@ -1342,6 +1368,7 @@ enum {
FI_INC_LINK, /* need to increment i_nlink */ FI_INC_LINK, /* need to increment i_nlink */
FI_ACL_MODE, /* indicate acl mode */ FI_ACL_MODE, /* indicate acl mode */
FI_NO_ALLOC, /* should not allocate any blocks */ FI_NO_ALLOC, /* should not allocate any blocks */
FI_FREE_NID, /* free allocated nide */
FI_UPDATE_DIR, /* should update inode block for consistency */ FI_UPDATE_DIR, /* should update inode block for consistency */
FI_DELAY_IPUT, /* used for the recovery */ FI_DELAY_IPUT, /* used for the recovery */
FI_NO_EXTENT, /* not to use the extent cache */ FI_NO_EXTENT, /* not to use the extent cache */
@ -1541,6 +1568,17 @@ static inline bool is_dot_dotdot(const struct qstr *str)
return false; return false;
} }
static inline bool f2fs_may_extent_tree(struct inode *inode)
{
mode_t mode = inode->i_mode;
if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) ||
is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
return false;
return S_ISREG(mode);
}
#define get_inode_mode(i) \ #define get_inode_mode(i) \
((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@ -1557,7 +1595,7 @@ static inline bool is_dot_dotdot(const struct qstr *str)
int f2fs_sync_file(struct file *, loff_t, loff_t, int); int f2fs_sync_file(struct file *, loff_t, loff_t, int);
void truncate_data_blocks(struct dnode_of_data *); void truncate_data_blocks(struct dnode_of_data *);
int truncate_blocks(struct inode *, u64, bool); int truncate_blocks(struct inode *, u64, bool);
void f2fs_truncate(struct inode *); int f2fs_truncate(struct inode *, bool);
int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int f2fs_setattr(struct dentry *, struct iattr *); int f2fs_setattr(struct dentry *, struct iattr *);
int truncate_hole(struct inode *, pgoff_t, pgoff_t); int truncate_hole(struct inode *, pgoff_t, pgoff_t);
@ -1649,7 +1687,7 @@ int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t); int truncate_inode_blocks(struct inode *, pgoff_t);
int truncate_xattr_node(struct inode *, struct page *); int truncate_xattr_node(struct inode *, struct page *);
int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
void remove_inode_page(struct inode *); int remove_inode_page(struct inode *);
struct page *new_inode_page(struct inode *); struct page *new_inode_page(struct inode *);
struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
void ra_node_page(struct f2fs_sb_info *, nid_t); void ra_node_page(struct f2fs_sb_info *, nid_t);
@ -1660,6 +1698,7 @@ int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
bool alloc_nid(struct f2fs_sb_info *, nid_t *); bool alloc_nid(struct f2fs_sb_info *, nid_t *);
void alloc_nid_done(struct f2fs_sb_info *, nid_t); void alloc_nid_done(struct f2fs_sb_info *, nid_t);
void alloc_nid_failed(struct f2fs_sb_info *, nid_t); void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
int try_to_free_nids(struct f2fs_sb_info *, int);
void recover_inline_xattr(struct inode *, struct page *); void recover_inline_xattr(struct inode *, struct page *);
void recover_xattr_data(struct inode *, struct page *, block_t); void recover_xattr_data(struct inode *, struct page *, block_t);
int recover_inode_page(struct f2fs_sb_info *, struct page *); int recover_inode_page(struct f2fs_sb_info *, struct page *);
@ -1675,7 +1714,7 @@ void destroy_node_manager_caches(void);
* segment.c * segment.c
*/ */
void register_inmem_page(struct inode *, struct page *); void register_inmem_page(struct inode *, struct page *);
void commit_inmem_pages(struct inode *, bool); int commit_inmem_pages(struct inode *, bool);
void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs(struct f2fs_sb_info *);
void f2fs_balance_fs_bg(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *);
int f2fs_issue_flush(struct f2fs_sb_info *); int f2fs_issue_flush(struct f2fs_sb_info *);
@ -1685,7 +1724,7 @@ void invalidate_blocks(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
void release_discard_addrs(struct f2fs_sb_info *); void release_discard_addrs(struct f2fs_sb_info *);
void discard_next_dnode(struct f2fs_sb_info *, block_t); bool discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *, bool); int npages_for_summary_flush(struct f2fs_sb_info *, bool);
void allocate_new_segments(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *);
int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
@ -1727,7 +1766,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *);
void add_orphan_inode(struct f2fs_sb_info *, nid_t); void add_orphan_inode(struct f2fs_sb_info *, nid_t);
void remove_orphan_inode(struct f2fs_sb_info *, nid_t); void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
void recover_orphan_inodes(struct f2fs_sb_info *); int recover_orphan_inodes(struct f2fs_sb_info *);
int get_valid_checkpoint(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *);
void update_dirty_page(struct inode *, struct page *); void update_dirty_page(struct inode *, struct page *);
void add_dirty_dir_inode(struct inode *); void add_dirty_dir_inode(struct inode *);
@ -1746,21 +1785,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *);
void f2fs_submit_page_mbio(struct f2fs_io_info *); void f2fs_submit_page_mbio(struct f2fs_io_info *);
void set_data_blkaddr(struct dnode_of_data *); void set_data_blkaddr(struct dnode_of_data *);
int reserve_new_block(struct dnode_of_data *); int reserve_new_block(struct dnode_of_data *);
int f2fs_get_block(struct dnode_of_data *, pgoff_t);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
void f2fs_destroy_extent_tree(struct inode *);
void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *);
void f2fs_update_extent_cache(struct dnode_of_data *);
void f2fs_preserve_extent_tree(struct inode *);
struct page *get_read_data_page(struct inode *, pgoff_t, int); struct page *get_read_data_page(struct inode *, pgoff_t, int);
struct page *find_data_page(struct inode *, pgoff_t); struct page *find_data_page(struct inode *, pgoff_t);
struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_lock_data_page(struct inode *, pgoff_t);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
int do_write_data_page(struct f2fs_io_info *); int do_write_data_page(struct f2fs_io_info *);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
void init_extent_cache_info(struct f2fs_sb_info *);
int __init create_extent_cache(void);
void destroy_extent_cache(void);
void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
int f2fs_release_page(struct page *, gfp_t); int f2fs_release_page(struct page *, gfp_t);
@ -1788,11 +1820,13 @@ struct f2fs_stat_info {
struct f2fs_sb_info *sbi; struct f2fs_sb_info *sbi;
int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
int main_area_segs, main_area_sections, main_area_zones; int main_area_segs, main_area_sections, main_area_zones;
int hit_ext, total_ext, ext_tree, ext_node; int hit_largest, hit_cached, hit_rbtree, hit_total, total_ext;
int ext_tree, ext_node;
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int nats, dirty_nats, sits, dirty_sits, fnids; int nats, dirty_nats, sits, dirty_sits, fnids;
int total_count, utilization; int total_count, utilization;
int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages; int bg_gc, inmem_pages, wb_pages;
int inline_xattr, inline_inode, inline_dir;
unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int valid_count, valid_node_count, valid_inode_count;
unsigned int bimodal, avg_vblocks; unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid; int util_free, util_valid, util_invalid;
@ -1823,8 +1857,20 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) #define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) #define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++) #define stat_inc_total_hit(sbi) (atomic_inc(&(sbi)->total_hit_ext))
#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++) #define stat_inc_rbtree_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_rbtree))
#define stat_inc_largest_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_largest))
#define stat_inc_cached_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_cached))
#define stat_inc_inline_xattr(inode) \
do { \
if (f2fs_has_inline_xattr(inode)) \
(atomic_inc(&F2FS_I_SB(inode)->inline_xattr)); \
} while (0)
#define stat_dec_inline_xattr(inode) \
do { \
if (f2fs_has_inline_xattr(inode)) \
(atomic_dec(&F2FS_I_SB(inode)->inline_xattr)); \
} while (0)
#define stat_inc_inline_inode(inode) \ #define stat_inc_inline_inode(inode) \
do { \ do { \
if (f2fs_has_inline_data(inode)) \ if (f2fs_has_inline_data(inode)) \
@ -1894,7 +1940,11 @@ void f2fs_destroy_root_stats(void);
#define stat_inc_dirty_dir(sbi) #define stat_inc_dirty_dir(sbi)
#define stat_dec_dirty_dir(sbi) #define stat_dec_dirty_dir(sbi)
#define stat_inc_total_hit(sb) #define stat_inc_total_hit(sb)
#define stat_inc_read_hit(sb) #define stat_inc_rbtree_node_hit(sb)
#define stat_inc_largest_node_hit(sbi)
#define stat_inc_cached_node_hit(sbi)
#define stat_inc_inline_xattr(inode)
#define stat_dec_inline_xattr(inode)
#define stat_inc_inline_inode(inode) #define stat_inc_inline_inode(inode)
#define stat_dec_inline_inode(inode) #define stat_dec_inline_inode(inode)
#define stat_inc_inline_dir(inode) #define stat_inc_inline_dir(inode)
@ -1949,6 +1999,30 @@ bool f2fs_empty_inline_dir(struct inode *);
int f2fs_read_inline_dir(struct file *, struct dir_context *, int f2fs_read_inline_dir(struct file *, struct dir_context *,
struct f2fs_str *); struct f2fs_str *);
/*
* shrinker.c
*/
unsigned long f2fs_shrink_count(struct shrinker *, struct shrink_control *);
unsigned long f2fs_shrink_scan(struct shrinker *, struct shrink_control *);
void f2fs_join_shrinker(struct f2fs_sb_info *);
void f2fs_leave_shrinker(struct f2fs_sb_info *);
/*
* extent_cache.c
*/
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
void f2fs_drop_largest_extent(struct inode *, pgoff_t);
void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
unsigned int f2fs_destroy_extent_node(struct inode *);
void f2fs_destroy_extent_tree(struct inode *);
bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
void f2fs_update_extent_cache(struct dnode_of_data *);
void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
pgoff_t, block_t, unsigned int);
void init_extent_cache_info(struct f2fs_sb_info *);
int __init create_extent_cache(void);
void destroy_extent_cache(void);
/* /*
* crypto support * crypto support
*/ */

View file

@ -27,6 +27,7 @@
#include "segment.h" #include "segment.h"
#include "xattr.h" #include "xattr.h"
#include "acl.h" #include "acl.h"
#include "gc.h"
#include "trace.h" #include "trace.h"
#include <trace/events/f2fs.h> #include <trace/events/f2fs.h>
@ -85,6 +86,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
mapped: mapped:
/* fill the page */ /* fill the page */
f2fs_wait_on_page_writeback(page, DATA); f2fs_wait_on_page_writeback(page, DATA);
/* if gced page is attached, don't write to cold segment */
clear_cold_data(page);
out: out:
sb_end_pagefault(inode->i_sb); sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(err); return block_page_mkwrite_return(err);
@ -203,8 +206,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
} }
/* if the inode is dirty, let's recover all the time */ /* if the inode is dirty, let's recover all the time */
if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { if (!datasync) {
update_inode_page(inode); f2fs_write_inode(inode, NULL);
goto go_write; goto go_write;
} }
@ -442,9 +445,9 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
int truncate_data_blocks_range(struct dnode_of_data *dn, int count) int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
{ {
int nr_free = 0, ofs = dn->ofs_in_node;
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_node *raw_node; struct f2fs_node *raw_node;
int nr_free = 0, ofs = dn->ofs_in_node, len = count;
__le32 *addr; __le32 *addr;
raw_node = F2FS_NODE(dn->node_page); raw_node = F2FS_NODE(dn->node_page);
@ -457,14 +460,22 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->data_blkaddr = NULL_ADDR; dn->data_blkaddr = NULL_ADDR;
set_data_blkaddr(dn); set_data_blkaddr(dn);
f2fs_update_extent_cache(dn);
invalidate_blocks(sbi, blkaddr); invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(F2FS_I(dn->inode), clear_inode_flag(F2FS_I(dn->inode),
FI_FIRST_BLOCK_WRITTEN); FI_FIRST_BLOCK_WRITTEN);
nr_free++; nr_free++;
} }
if (nr_free) { if (nr_free) {
pgoff_t fofs;
/*
* once we invalidate valid blkaddr in range [ofs, ofs + count],
* we will invalidate all blkaddr in the whole range.
*/
fofs = start_bidx_of_node(ofs_of_node(dn->node_page),
F2FS_I(dn->inode)) + ofs;
f2fs_update_extent_cache_range(dn, fofs, 0, len);
dec_valid_block_count(sbi, dn->inode, nr_free); dec_valid_block_count(sbi, dn->inode, nr_free);
set_page_dirty(dn->node_page); set_page_dirty(dn->node_page);
sync_inode_page(dn); sync_inode_page(dn);
@ -576,24 +587,30 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
return err; return err;
} }
void f2fs_truncate(struct inode *inode) int f2fs_truncate(struct inode *inode, bool lock)
{ {
int err;
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode))) S_ISLNK(inode->i_mode)))
return; return 0;
trace_f2fs_truncate(inode); trace_f2fs_truncate(inode);
/* we should check inline_data size */ /* we should check inline_data size */
if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) { if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) {
if (f2fs_convert_inline_inode(inode)) err = f2fs_convert_inline_inode(inode);
return; if (err)
return err;
} }
if (!truncate_blocks(inode, i_size_read(inode), true)) { err = truncate_blocks(inode, i_size_read(inode), lock);
inode->i_mtime = inode->i_ctime = CURRENT_TIME; if (err)
mark_inode_dirty(inode); return err;
}
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
return 0;
} }
int f2fs_getattr(struct vfsmount *mnt, int f2fs_getattr(struct vfsmount *mnt,
@ -653,7 +670,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_size <= i_size_read(inode)) { if (attr->ia_size <= i_size_read(inode)) {
truncate_setsize(inode, attr->ia_size); truncate_setsize(inode, attr->ia_size);
f2fs_truncate(inode); err = f2fs_truncate(inode, true);
if (err)
return err;
f2fs_balance_fs(F2FS_I_SB(inode)); f2fs_balance_fs(F2FS_I_SB(inode));
} else { } else {
/* /*
@ -692,14 +711,14 @@ const struct inode_operations f2fs_file_inode_operations = {
.fiemap = f2fs_fiemap, .fiemap = f2fs_fiemap,
}; };
static void fill_zero(struct inode *inode, pgoff_t index, static int fill_zero(struct inode *inode, pgoff_t index,
loff_t start, loff_t len) loff_t start, loff_t len)
{ {
struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *page; struct page *page;
if (!len) if (!len)
return; return 0;
f2fs_balance_fs(sbi); f2fs_balance_fs(sbi);
@ -707,12 +726,14 @@ static void fill_zero(struct inode *inode, pgoff_t index,
page = get_new_data_page(inode, NULL, index, false); page = get_new_data_page(inode, NULL, index, false);
f2fs_unlock_op(sbi); f2fs_unlock_op(sbi);
if (!IS_ERR(page)) { if (IS_ERR(page))
f2fs_wait_on_page_writeback(page, DATA); return PTR_ERR(page);
zero_user(page, start, len);
set_page_dirty(page); f2fs_wait_on_page_writeback(page, DATA);
f2fs_put_page(page, 1); zero_user(page, start, len);
} set_page_dirty(page);
f2fs_put_page(page, 1);
return 0;
} }
int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
@ -760,14 +781,22 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
if (pg_start == pg_end) { if (pg_start == pg_end) {
fill_zero(inode, pg_start, off_start, ret = fill_zero(inode, pg_start, off_start,
off_end - off_start); off_end - off_start);
if (ret)
return ret;
} else { } else {
if (off_start) if (off_start) {
fill_zero(inode, pg_start++, off_start, ret = fill_zero(inode, pg_start++, off_start,
PAGE_CACHE_SIZE - off_start); PAGE_CACHE_SIZE - off_start);
if (off_end) if (ret)
fill_zero(inode, pg_end, 0, off_end); return ret;
}
if (off_end) {
ret = fill_zero(inode, pg_end, 0, off_end);
if (ret)
return ret;
}
if (pg_start < pg_end) { if (pg_start < pg_end) {
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
@ -797,11 +826,11 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
int ret = 0; int ret = 0;
f2fs_lock_op(sbi);
for (; end < nrpages; start++, end++) { for (; end < nrpages; start++, end++) {
block_t new_addr, old_addr; block_t new_addr, old_addr;
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0); set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA); ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA);
if (ret && ret != -ENOENT) { if (ret && ret != -ENOENT) {
@ -817,13 +846,16 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
if (new_addr == NULL_ADDR) { if (new_addr == NULL_ADDR) {
set_new_dnode(&dn, inode, NULL, NULL, 0); set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA); ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA);
if (ret && ret != -ENOENT) if (ret && ret != -ENOENT) {
goto out; goto out;
else if (ret == -ENOENT) } else if (ret == -ENOENT) {
f2fs_unlock_op(sbi);
continue; continue;
}
if (dn.data_blkaddr == NULL_ADDR) { if (dn.data_blkaddr == NULL_ADDR) {
f2fs_put_dnode(&dn); f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
continue; continue;
} else { } else {
truncate_data_blocks_range(&dn, 1); truncate_data_blocks_range(&dn, 1);
@ -862,8 +894,9 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
f2fs_put_dnode(&dn); f2fs_put_dnode(&dn);
} }
f2fs_unlock_op(sbi);
} }
ret = 0; return 0;
out: out:
f2fs_unlock_op(sbi); f2fs_unlock_op(sbi);
return ret; return ret;
@ -885,6 +918,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
return -EINVAL; return -EINVAL;
f2fs_balance_fs(F2FS_I_SB(inode));
if (f2fs_has_inline_data(inode)) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
}
pg_start = offset >> PAGE_CACHE_SHIFT; pg_start = offset >> PAGE_CACHE_SHIFT;
pg_end = (offset + len) >> PAGE_CACHE_SHIFT; pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
@ -946,14 +987,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
if (pg_start == pg_end) { if (pg_start == pg_end) {
fill_zero(inode, pg_start, off_start, off_end - off_start); ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
return ret;
if (offset + len > new_size) if (offset + len > new_size)
new_size = offset + len; new_size = offset + len;
new_size = max_t(loff_t, new_size, offset + len); new_size = max_t(loff_t, new_size, offset + len);
} else { } else {
if (off_start) { if (off_start) {
fill_zero(inode, pg_start++, off_start, ret = fill_zero(inode, pg_start++, off_start,
PAGE_CACHE_SIZE - off_start); PAGE_CACHE_SIZE - off_start);
if (ret)
return ret;
new_size = max_t(loff_t, new_size, new_size = max_t(loff_t, new_size,
pg_start << PAGE_CACHE_SHIFT); pg_start << PAGE_CACHE_SHIFT);
} }
@ -995,7 +1043,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
} }
if (off_end) { if (off_end) {
fill_zero(inode, pg_end, 0, off_end); ret = fill_zero(inode, pg_end, 0, off_end);
if (ret)
goto out;
new_size = max_t(loff_t, new_size, offset + len); new_size = max_t(loff_t, new_size, offset + len);
} }
} }
@ -1033,6 +1084,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi); f2fs_balance_fs(sbi);
if (f2fs_has_inline_data(inode)) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
}
ret = truncate_blocks(inode, i_size_read(inode), true); ret = truncate_blocks(inode, i_size_read(inode), true);
if (ret) if (ret)
return ret; return ret;
@ -1302,6 +1359,7 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
static int f2fs_ioc_start_atomic_write(struct file *filp) static int f2fs_ioc_start_atomic_write(struct file *filp)
{ {
struct inode *inode = file_inode(filp); struct inode *inode = file_inode(filp);
int ret;
if (!inode_owner_or_capable(inode)) if (!inode_owner_or_capable(inode))
return -EACCES; return -EACCES;
@ -1311,9 +1369,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (f2fs_is_atomic_file(inode)) if (f2fs_is_atomic_file(inode))
return 0; return 0;
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
return f2fs_convert_inline_inode(inode); set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
return 0;
} }
static int f2fs_ioc_commit_atomic_write(struct file *filp) static int f2fs_ioc_commit_atomic_write(struct file *filp)
@ -1333,10 +1394,13 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (f2fs_is_atomic_file(inode)) { if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
commit_inmem_pages(inode, false); ret = commit_inmem_pages(inode, false);
if (ret)
goto err_out;
} }
ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
err_out:
mnt_drop_write_file(filp); mnt_drop_write_file(filp);
return ret; return ret;
} }
@ -1344,6 +1408,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
static int f2fs_ioc_start_volatile_write(struct file *filp) static int f2fs_ioc_start_volatile_write(struct file *filp)
{ {
struct inode *inode = file_inode(filp); struct inode *inode = file_inode(filp);
int ret;
if (!inode_owner_or_capable(inode)) if (!inode_owner_or_capable(inode))
return -EACCES; return -EACCES;
@ -1351,9 +1416,12 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
if (f2fs_is_volatile_file(inode)) if (f2fs_is_volatile_file(inode))
return 0; return 0;
set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
return f2fs_convert_inline_inode(inode); set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
return 0;
} }
static int f2fs_ioc_release_volatile_write(struct file *filp) static int f2fs_ioc_release_volatile_write(struct file *filp)
@ -1389,7 +1457,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
if (f2fs_is_atomic_file(inode)) { if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
commit_inmem_pages(inode, false); commit_inmem_pages(inode, true);
} }
if (f2fs_is_volatile_file(inode)) if (f2fs_is_volatile_file(inode))
@ -1544,6 +1612,35 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
return 0; return 0;
} }
static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
__u32 i, count;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (get_user(count, (__u32 __user *)arg))
return -EFAULT;
if (!count || count > F2FS_BATCH_GC_MAX_NUM)
return -EINVAL;
for (i = 0; i < count; i++) {
if (!mutex_trylock(&sbi->gc_mutex))
break;
if (f2fs_gc(sbi))
break;
}
if (put_user(i, (__u32 __user *)arg))
return -EFAULT;
return 0;
}
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{ {
switch (cmd) { switch (cmd) {
@ -1573,6 +1670,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_get_encryption_policy(filp, arg); return f2fs_ioc_get_encryption_policy(filp, arg);
case F2FS_IOC_GET_ENCRYPTION_PWSALT: case F2FS_IOC_GET_ENCRYPTION_PWSALT:
return f2fs_ioc_get_encryption_pwsalt(filp, arg); return f2fs_ioc_get_encryption_pwsalt(filp, arg);
case F2FS_IOC_GARBAGE_COLLECT:
return f2fs_ioc_gc(filp, arg);
default: default:
return -ENOTTY; return -ENOTTY;
} }

View file

@ -391,23 +391,27 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
* On validity, copy that node with cold status, otherwise (invalid node) * On validity, copy that node with cold status, otherwise (invalid node)
* ignore that. * ignore that.
*/ */
static void gc_node_segment(struct f2fs_sb_info *sbi, static int gc_node_segment(struct f2fs_sb_info *sbi,
struct f2fs_summary *sum, unsigned int segno, int gc_type) struct f2fs_summary *sum, unsigned int segno, int gc_type)
{ {
bool initial = true; bool initial = true;
struct f2fs_summary *entry; struct f2fs_summary *entry;
block_t start_addr;
int off; int off;
start_addr = START_BLOCK(sbi, segno);
next_step: next_step:
entry = sum; entry = sum;
for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
nid_t nid = le32_to_cpu(entry->nid); nid_t nid = le32_to_cpu(entry->nid);
struct page *node_page; struct page *node_page;
struct node_info ni;
/* stop BG_GC if there is not enough free sections. */ /* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
return; return 0;
if (check_valid_map(sbi, segno, off) == 0) if (check_valid_map(sbi, segno, off) == 0)
continue; continue;
@ -426,6 +430,12 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
continue; continue;
} }
get_node_info(sbi, nid, &ni);
if (ni.blk_addr != start_addr + off) {
f2fs_put_page(node_page, 1);
continue;
}
/* set page dirty and write it */ /* set page dirty and write it */
if (gc_type == FG_GC) { if (gc_type == FG_GC) {
f2fs_wait_on_page_writeback(node_page, NODE); f2fs_wait_on_page_writeback(node_page, NODE);
@ -451,13 +461,11 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
}; };
sync_node_pages(sbi, 0, &wbc); sync_node_pages(sbi, 0, &wbc);
/* /* return 1 only if FG_GC succefully reclaimed one */
* In the case of FG_GC, it'd be better to reclaim this victim if (get_valid_blocks(sbi, segno, 1) == 0)
* completely. return 1;
*/
if (get_valid_blocks(sbi, segno, 1) != 0)
goto next_step;
} }
return 0;
} }
/* /*
@ -487,7 +495,7 @@ block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
} }
static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
struct node_info *dni, block_t blkaddr, unsigned int *nofs) struct node_info *dni, block_t blkaddr, unsigned int *nofs)
{ {
struct page *node_page; struct page *node_page;
@ -500,13 +508,13 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
node_page = get_node_page(sbi, nid); node_page = get_node_page(sbi, nid);
if (IS_ERR(node_page)) if (IS_ERR(node_page))
return 0; return false;
get_node_info(sbi, nid, dni); get_node_info(sbi, nid, dni);
if (sum->version != dni->version) { if (sum->version != dni->version) {
f2fs_put_page(node_page, 1); f2fs_put_page(node_page, 1);
return 0; return false;
} }
*nofs = ofs_of_node(node_page); *nofs = ofs_of_node(node_page);
@ -514,8 +522,8 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
f2fs_put_page(node_page, 1); f2fs_put_page(node_page, 1);
if (source_blkaddr != blkaddr) if (source_blkaddr != blkaddr)
return 0; return false;
return 1; return true;
} }
static void move_encrypted_block(struct inode *inode, block_t bidx) static void move_encrypted_block(struct inode *inode, block_t bidx)
@ -552,7 +560,10 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
fio.page = page; fio.page = page;
fio.blk_addr = dn.data_blkaddr; fio.blk_addr = dn.data_blkaddr;
fio.encrypted_page = grab_cache_page(META_MAPPING(fio.sbi), fio.blk_addr); fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi),
fio.blk_addr,
FGP_LOCK|FGP_CREAT,
GFP_NOFS);
if (!fio.encrypted_page) if (!fio.encrypted_page)
goto put_out; goto put_out;
@ -636,7 +647,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
* If the parent node is not valid or the data block address is different, * If the parent node is not valid or the data block address is different,
* the victim data block is ignored. * the victim data block is ignored.
*/ */
static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
struct gc_inode_list *gc_list, unsigned int segno, int gc_type) struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
{ {
struct super_block *sb = sbi->sb; struct super_block *sb = sbi->sb;
@ -659,7 +670,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
/* stop BG_GC if there is not enough free sections. */ /* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
return; return 0;
if (check_valid_map(sbi, segno, off) == 0) if (check_valid_map(sbi, segno, off) == 0)
continue; continue;
@ -670,7 +681,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
} }
/* Get an inode by ino with checking validity */ /* Get an inode by ino with checking validity */
if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0) if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
continue; continue;
if (phase == 1) { if (phase == 1) {
@ -724,15 +735,11 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (gc_type == FG_GC) { if (gc_type == FG_GC) {
f2fs_submit_merged_bio(sbi, DATA, WRITE); f2fs_submit_merged_bio(sbi, DATA, WRITE);
/* /* return 1 only if FG_GC succefully reclaimed one */
* In the case of FG_GC, it'd be better to reclaim this victim if (get_valid_blocks(sbi, segno, 1) == 0)
* completely. return 1;
*/
if (get_valid_blocks(sbi, segno, 1) != 0) {
phase = 2;
goto next_step;
}
} }
return 0;
} }
static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@ -748,12 +755,13 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
return ret; return ret;
} }
static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
struct gc_inode_list *gc_list, int gc_type) struct gc_inode_list *gc_list, int gc_type)
{ {
struct page *sum_page; struct page *sum_page;
struct f2fs_summary_block *sum; struct f2fs_summary_block *sum;
struct blk_plug plug; struct blk_plug plug;
int nfree = 0;
/* read segment summary of victim */ /* read segment summary of victim */
sum_page = get_sum_page(sbi, segno); sum_page = get_sum_page(sbi, segno);
@ -773,10 +781,11 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
switch (GET_SUM_TYPE((&sum->footer))) { switch (GET_SUM_TYPE((&sum->footer))) {
case SUM_TYPE_NODE: case SUM_TYPE_NODE:
gc_node_segment(sbi, sum->entries, segno, gc_type); nfree = gc_node_segment(sbi, sum->entries, segno, gc_type);
break; break;
case SUM_TYPE_DATA: case SUM_TYPE_DATA:
gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type); nfree = gc_data_segment(sbi, sum->entries, gc_list,
segno, gc_type);
break; break;
} }
blk_finish_plug(&plug); blk_finish_plug(&plug);
@ -785,11 +794,13 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
stat_inc_call_count(sbi->stat_info); stat_inc_call_count(sbi->stat_info);
f2fs_put_page(sum_page, 0); f2fs_put_page(sum_page, 0);
return nfree;
} }
int f2fs_gc(struct f2fs_sb_info *sbi) int f2fs_gc(struct f2fs_sb_info *sbi)
{ {
unsigned int segno, i; unsigned int segno = NULL_SEGNO;
unsigned int i;
int gc_type = BG_GC; int gc_type = BG_GC;
int nfree = 0; int nfree = 0;
int ret = -1; int ret = -1;
@ -808,10 +819,11 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
gc_type = FG_GC; gc_type = FG_GC;
write_checkpoint(sbi, &cpc); if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
write_checkpoint(sbi, &cpc);
} }
if (!__get_victim(sbi, &segno, gc_type)) if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
goto stop; goto stop;
ret = 0; ret = 0;
@ -821,13 +833,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
META_SSA); META_SSA);
for (i = 0; i < sbi->segs_per_sec; i++) for (i = 0; i < sbi->segs_per_sec; i++)
do_garbage_collect(sbi, segno + i, &gc_list, gc_type); nfree += do_garbage_collect(sbi, segno + i, &gc_list, gc_type);
if (gc_type == FG_GC) { if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO; sbi->cur_victim_sec = NULL_SEGNO;
nfree++;
WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec));
}
if (has_not_enough_free_secs(sbi, nfree)) if (has_not_enough_free_secs(sbi, nfree))
goto gc_more; goto gc_more;

View file

@ -19,6 +19,12 @@
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
/*
* with this macro, we can control the max time we do garbage collection,
* when user triggers batch mode gc by ioctl.
*/
#define F2FS_BATCH_GC_MAX_NUM 16
/* Search max. number of dirty segments to select a victim segment */ /* Search max. number of dirty segments to select a victim segment */
#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */

View file

@ -360,6 +360,10 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
return 0; return 0;
} }
/*
* NOTE: ipage is grabbed by caller, but if any error occurs, we should
* release ipage in this function.
*/
static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry) struct f2fs_inline_dentry *inline_dentry)
{ {
@ -369,8 +373,10 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
int err; int err;
page = grab_cache_page(dir->i_mapping, 0); page = grab_cache_page(dir->i_mapping, 0);
if (!page) if (!page) {
f2fs_put_page(ipage, 1);
return -ENOMEM; return -ENOMEM;
}
set_new_dnode(&dn, dir, ipage, NULL, 0); set_new_dnode(&dn, dir, ipage, NULL, 0);
err = f2fs_reserve_block(&dn, 0); err = f2fs_reserve_block(&dn, 0);
@ -378,13 +384,21 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
goto out; goto out;
f2fs_wait_on_page_writeback(page, DATA); f2fs_wait_on_page_writeback(page, DATA);
zero_user_segment(page, 0, PAGE_CACHE_SIZE); zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
dentry_blk = kmap_atomic(page); dentry_blk = kmap_atomic(page);
/* copy data from inline dentry block to new dentry block */ /* copy data from inline dentry block to new dentry block */
memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap,
INLINE_DENTRY_BITMAP_SIZE); INLINE_DENTRY_BITMAP_SIZE);
memset(dentry_blk->dentry_bitmap + INLINE_DENTRY_BITMAP_SIZE, 0,
SIZE_OF_DENTRY_BITMAP - INLINE_DENTRY_BITMAP_SIZE);
/*
* we do not need to zero out remainder part of dentry and filename
* field, since we have used bitmap for marking the usage status of
* them, besides, we can also ignore copying/zeroing reserved space
* of dentry block, because them haven't been used so far.
*/
memcpy(dentry_blk->dentry, inline_dentry->dentry, memcpy(dentry_blk->dentry, inline_dentry->dentry,
sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY);
memcpy(dentry_blk->filename, inline_dentry->filename, memcpy(dentry_blk->filename, inline_dentry->filename,
@ -434,8 +448,9 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
slots, NR_INLINE_DENTRY); slots, NR_INLINE_DENTRY);
if (bit_pos >= NR_INLINE_DENTRY) { if (bit_pos >= NR_INLINE_DENTRY) {
err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
if (!err) if (err)
err = -EAGAIN; return err;
err = -EAGAIN;
goto out; goto out;
} }

View file

@ -12,7 +12,6 @@
#include <linux/f2fs_fs.h> #include <linux/f2fs_fs.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/bitops.h>
#include "f2fs.h" #include "f2fs.h"
#include "node.h" #include "node.h"
@ -34,8 +33,8 @@ void f2fs_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME; new_fl |= S_NOATIME;
if (flags & FS_DIRSYNC_FL) if (flags & FS_DIRSYNC_FL)
new_fl |= S_DIRSYNC; new_fl |= S_DIRSYNC;
set_mask_bits(&inode->i_flags, inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
} }
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@ -139,7 +138,7 @@ static int do_read_inode(struct inode *inode)
fi->i_pino = le32_to_cpu(ri->i_pino); fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level; fi->i_dir_level = ri->i_dir_level;
f2fs_init_extent_cache(inode, &ri->i_ext); f2fs_init_extent_tree(inode, &ri->i_ext);
get_inline_info(fi, ri); get_inline_info(fi, ri);
@ -155,6 +154,7 @@ static int do_read_inode(struct inode *inode)
f2fs_put_page(node_page, 1); f2fs_put_page(node_page, 1);
stat_inc_inline_xattr(inode);
stat_inc_inline_inode(inode); stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode); stat_inc_inline_dir(inode);
@ -237,10 +237,11 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_size = cpu_to_le64(i_size_read(inode)); ri->i_size = cpu_to_le64(i_size_read(inode));
ri->i_blocks = cpu_to_le64(inode->i_blocks); ri->i_blocks = cpu_to_le64(inode->i_blocks);
read_lock(&F2FS_I(inode)->ext_lock); if (F2FS_I(inode)->extent_tree)
set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); set_raw_extent(&F2FS_I(inode)->extent_tree->largest,
read_unlock(&F2FS_I(inode)->ext_lock); &ri->i_ext);
else
memset(&ri->i_ext, 0, sizeof(ri->i_ext));
set_raw_inline(F2FS_I(inode), ri); set_raw_inline(F2FS_I(inode), ri);
ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
@ -314,7 +315,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
void f2fs_evict_inode(struct inode *inode) void f2fs_evict_inode(struct inode *inode)
{ {
struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t xnid = F2FS_I(inode)->i_xattr_nid; struct f2fs_inode_info *fi = F2FS_I(inode);
nid_t xnid = fi->i_xattr_nid;
int err = 0;
/* some remained atomic pages should discarded */ /* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode)) if (f2fs_is_atomic_file(inode))
@ -330,41 +333,62 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_bug_on(sbi, get_dirty_pages(inode)); f2fs_bug_on(sbi, get_dirty_pages(inode));
remove_dirty_dir_inode(inode); remove_dirty_dir_inode(inode);
f2fs_destroy_extent_tree(inode);
if (inode->i_nlink || is_bad_inode(inode)) if (inode->i_nlink || is_bad_inode(inode))
goto no_delete; goto no_delete;
sb_start_intwrite(inode->i_sb); sb_start_intwrite(inode->i_sb);
set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); set_inode_flag(fi, FI_NO_ALLOC);
i_size_write(inode, 0); i_size_write(inode, 0);
if (F2FS_HAS_BLOCKS(inode)) if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode); err = f2fs_truncate(inode, true);
f2fs_lock_op(sbi); if (!err) {
remove_inode_page(inode); f2fs_lock_op(sbi);
f2fs_unlock_op(sbi); err = remove_inode_page(inode);
f2fs_unlock_op(sbi);
}
sb_end_intwrite(inode->i_sb); sb_end_intwrite(inode->i_sb);
no_delete: no_delete:
stat_dec_inline_xattr(inode);
stat_dec_inline_dir(inode); stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode); stat_dec_inline_inode(inode);
/* update extent info in inode */
if (inode->i_nlink)
f2fs_preserve_extent_tree(inode);
f2fs_destroy_extent_tree(inode);
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
if (xnid) if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE)) if (is_inode_flag_set(fi, FI_APPEND_WRITE))
add_dirty_inode(sbi, inode->i_ino, APPEND_INO); add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE)) if (is_inode_flag_set(fi, FI_UPDATE_WRITE))
add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
if (is_inode_flag_set(fi, FI_FREE_NID)) {
if (err && err != -ENOENT)
alloc_nid_done(sbi, inode->i_ino);
else
alloc_nid_failed(sbi, inode->i_ino);
clear_inode_flag(fi, FI_FREE_NID);
}
if (err && err != -ENOENT) {
if (!exist_written_data(sbi, inode->i_ino, ORPHAN_INO)) {
/*
* get here because we failed to release resource
* of inode previously, reminder our user to run fsck
* for fixing.
*/
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"inode (ino:%lu) resource leak, run fsck "
"to fix this issue!", inode->i_ino);
}
}
out_clear: out_clear:
#ifdef CONFIG_F2FS_FS_ENCRYPTION #ifdef CONFIG_F2FS_FS_ENCRYPTION
if (F2FS_I(inode)->i_crypt_info) if (fi->i_crypt_info)
f2fs_free_encryption_info(inode, F2FS_I(inode)->i_crypt_info); f2fs_free_encryption_info(inode, fi->i_crypt_info);
#endif #endif
clear_inode(inode); clear_inode(inode);
} }
@ -373,6 +397,7 @@ void f2fs_evict_inode(struct inode *inode)
void handle_failed_inode(struct inode *inode) void handle_failed_inode(struct inode *inode)
{ {
struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err = 0;
clear_nlink(inode); clear_nlink(inode);
make_bad_inode(inode); make_bad_inode(inode);
@ -380,13 +405,29 @@ void handle_failed_inode(struct inode *inode)
i_size_write(inode, 0); i_size_write(inode, 0);
if (F2FS_HAS_BLOCKS(inode)) if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode); err = f2fs_truncate(inode, false);
remove_inode_page(inode); if (!err)
err = remove_inode_page(inode);
clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); /*
clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); * if we skip truncate_node in remove_inode_page bacause we failed
alloc_nid_failed(sbi, inode->i_ino); * before, it's better to find another way to release resource of
* this inode (e.g. valid block count, node block or nid). Here we
* choose to add this inode to orphan list, so that we can call iput
* for releasing in orphan recovery flow.
*
* Note: we should add inode to orphan list before f2fs_unlock_op()
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
if (err && err != -ENOENT) {
err = acquire_orphan_inode(sbi);
if (!err)
add_orphan_inode(sbi, inode->i_ino);
}
set_inode_flag(F2FS_I(inode), FI_FREE_NID);
f2fs_unlock_op(sbi); f2fs_unlock_op(sbi);
/* iput will drop the inode object */ /* iput will drop the inode object */

View file

@ -53,7 +53,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (err) { if (err) {
err = -EINVAL; err = -EINVAL;
nid_free = true; nid_free = true;
goto out; goto fail;
} }
/* If the directory encrypted, then we should encrypt the inode. */ /* If the directory encrypted, then we should encrypt the inode. */
@ -65,6 +65,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (f2fs_may_inline_dentry(inode)) if (f2fs_may_inline_dentry(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
f2fs_init_extent_tree(inode, NULL);
stat_inc_inline_xattr(inode);
stat_inc_inline_inode(inode); stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode); stat_inc_inline_dir(inode);
@ -72,15 +75,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
mark_inode_dirty(inode); mark_inode_dirty(inode);
return inode; return inode;
out:
clear_nlink(inode);
unlock_new_inode(inode);
fail: fail:
trace_f2fs_new_inode(inode, err); trace_f2fs_new_inode(inode, err);
make_bad_inode(inode); make_bad_inode(inode);
iput(inode);
if (nid_free) if (nid_free)
alloc_nid_failed(sbi, ino); set_inode_flag(F2FS_I(inode), FI_FREE_NID);
iput(inode);
return ERR_PTR(err); return ERR_PTR(err);
} }
@ -89,7 +89,14 @@ static int is_multimedia_file(const unsigned char *s, const char *sub)
size_t slen = strlen(s); size_t slen = strlen(s);
size_t sublen = strlen(sub); size_t sublen = strlen(sub);
if (sublen > slen) /*
* filename format of multimedia file should be defined as:
* "filename + '.' + extension".
*/
if (slen < sublen + 2)
return 0;
if (s[slen - sublen - 1] != '.')
return 0; return 0;
return !strncasecmp(s + slen - sublen, sub, sublen); return !strncasecmp(s + slen - sublen, sub, sublen);

View file

@ -159,7 +159,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
head = radix_tree_lookup(&nm_i->nat_set_root, set); head = radix_tree_lookup(&nm_i->nat_set_root, set);
if (!head) { if (!head) {
head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
INIT_LIST_HEAD(&head->entry_list); INIT_LIST_HEAD(&head->entry_list);
INIT_LIST_HEAD(&head->set_list); INIT_LIST_HEAD(&head->set_list);
@ -246,7 +246,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
{ {
struct nat_entry *new; struct nat_entry *new;
new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
memset(new, 0, sizeof(struct nat_entry)); memset(new, 0, sizeof(struct nat_entry));
nat_set_nid(new, nid); nat_set_nid(new, nid);
@ -306,6 +306,10 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
unsigned char version = nat_get_version(e); unsigned char version = nat_get_version(e);
nat_set_version(e, inc_node_version(version)); nat_set_version(e, inc_node_version(version));
/* in order to reuse the nid */
if (nm_i->next_scan_nid > ni->nid)
nm_i->next_scan_nid = ni->nid;
} }
/* change address */ /* change address */
@ -328,11 +332,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
{ {
struct f2fs_nm_info *nm_i = NM_I(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi);
int nr = nr_shrink;
if (available_free_memory(sbi, NAT_ENTRIES)) if (!down_write_trylock(&nm_i->nat_tree_lock))
return 0; return 0;
down_write(&nm_i->nat_tree_lock);
while (nr_shrink && !list_empty(&nm_i->nat_entries)) { while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
struct nat_entry *ne; struct nat_entry *ne;
ne = list_first_entry(&nm_i->nat_entries, ne = list_first_entry(&nm_i->nat_entries,
@ -341,7 +345,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
nr_shrink--; nr_shrink--;
} }
up_write(&nm_i->nat_tree_lock); up_write(&nm_i->nat_tree_lock);
return nr_shrink; return nr - nr_shrink;
} }
/* /*
@ -898,17 +902,20 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
* Caller should grab and release a rwsem by calling f2fs_lock_op() and * Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op(). * f2fs_unlock_op().
*/ */
void remove_inode_page(struct inode *inode) int remove_inode_page(struct inode *inode)
{ {
struct dnode_of_data dn; struct dnode_of_data dn;
int err;
set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
if (get_dnode_of_data(&dn, 0, LOOKUP_NODE)) err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
return; if (err)
return err;
if (truncate_xattr_node(inode, dn.inode_page)) { err = truncate_xattr_node(inode, dn.inode_page);
if (err) {
f2fs_put_dnode(&dn); f2fs_put_dnode(&dn);
return; return err;
} }
/* remove potential inline_data blocks */ /* remove potential inline_data blocks */
@ -922,6 +929,7 @@ void remove_inode_page(struct inode *inode)
/* will put inode & node pages */ /* will put inode & node pages */
truncate_node(&dn); truncate_node(&dn);
return 0;
} }
struct page *new_inode_page(struct inode *inode) struct page *new_inode_page(struct inode *inode)
@ -991,8 +999,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
/* /*
* Caller should do after getting the following values. * Caller should do after getting the following values.
* 0: f2fs_put_page(page, 0) * 0: f2fs_put_page(page, 0)
* LOCKED_PAGE: f2fs_put_page(page, 1) * LOCKED_PAGE or error: f2fs_put_page(page, 1)
* error: nothing
*/ */
static int read_node_page(struct page *page, int rw) static int read_node_page(struct page *page, int rw)
{ {
@ -1010,7 +1017,6 @@ static int read_node_page(struct page *page, int rw)
if (unlikely(ni.blk_addr == NULL_ADDR)) { if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page); ClearPageUptodate(page);
f2fs_put_page(page, 1);
return -ENOENT; return -ENOENT;
} }
@ -1041,10 +1047,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
return; return;
err = read_node_page(apage, READA); err = read_node_page(apage, READA);
if (err == 0) f2fs_put_page(apage, err ? 1 : 0);
f2fs_put_page(apage, 0);
else if (err == LOCKED_PAGE)
f2fs_put_page(apage, 1);
} }
struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
@ -1057,10 +1060,12 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
err = read_node_page(page, READ_SYNC); err = read_node_page(page, READ_SYNC);
if (err < 0) if (err < 0) {
f2fs_put_page(page, 1);
return ERR_PTR(err); return ERR_PTR(err);
else if (err != LOCKED_PAGE) } else if (err != LOCKED_PAGE) {
lock_page(page); lock_page(page);
}
if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
ClearPageUptodate(page); ClearPageUptodate(page);
@ -1096,10 +1101,12 @@ struct page *get_node_page_ra(struct page *parent, int start)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
err = read_node_page(page, READ_SYNC); err = read_node_page(page, READ_SYNC);
if (err < 0) if (err < 0) {
f2fs_put_page(page, 1);
return ERR_PTR(err); return ERR_PTR(err);
else if (err == LOCKED_PAGE) } else if (err == LOCKED_PAGE) {
goto page_hit; goto page_hit;
}
blk_start_plug(&plug); blk_start_plug(&plug);
@ -1533,7 +1540,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
if (unlikely(nid >= nm_i->max_nid)) if (unlikely(nid >= nm_i->max_nid))
nid = 0; nid = 0;
if (i++ == FREE_NID_PAGES) if (++i >= FREE_NID_PAGES)
break; break;
} }
@ -1570,6 +1577,8 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
/* We should not use stale free nids created by build_free_nids */ /* We should not use stale free nids created by build_free_nids */
if (nm_i->fcnt && !on_build_free_nids(nm_i)) { if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
struct node_info ni;
f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
list_for_each_entry(i, &nm_i->free_nid_list, list) list_for_each_entry(i, &nm_i->free_nid_list, list)
if (i->state == NID_NEW) if (i->state == NID_NEW)
@ -1580,6 +1589,13 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
i->state = NID_ALLOC; i->state = NID_ALLOC;
nm_i->fcnt--; nm_i->fcnt--;
spin_unlock(&nm_i->free_nid_list_lock); spin_unlock(&nm_i->free_nid_list_lock);
/* check nid is allocated already */
get_node_info(sbi, *nid, &ni);
if (ni.blk_addr != NULL_ADDR) {
alloc_nid_done(sbi, *nid);
goto retry;
}
return true; return true;
} }
spin_unlock(&nm_i->free_nid_list_lock); spin_unlock(&nm_i->free_nid_list_lock);
@ -1636,6 +1652,32 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i); kmem_cache_free(free_nid_slab, i);
} }
int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i, *next;
int nr = nr_shrink;
if (!mutex_trylock(&nm_i->build_lock))
return 0;
spin_lock(&nm_i->free_nid_list_lock);
list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
if (nr_shrink <= 0 || nm_i->fcnt <= NAT_ENTRY_PER_BLOCK)
break;
if (i->state == NID_ALLOC)
continue;
__del_from_free_nid_list(nm_i, i);
kmem_cache_free(free_nid_slab, i);
nm_i->fcnt--;
nr_shrink--;
}
spin_unlock(&nm_i->free_nid_list_lock);
mutex_unlock(&nm_i->build_lock);
return nr - nr_shrink;
}
void recover_inline_xattr(struct inode *inode, struct page *page) void recover_inline_xattr(struct inode *inode, struct page *page)
{ {
void *src_addr, *dst_addr; void *src_addr, *dst_addr;

View file

@ -399,14 +399,35 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
for (; start < end; start++) { for (; start < end; start++, dn.ofs_in_node++) {
block_t src, dest; block_t src, dest;
src = datablock_addr(dn.node_page, dn.ofs_in_node); src = datablock_addr(dn.node_page, dn.ofs_in_node);
dest = datablock_addr(page, dn.ofs_in_node); dest = datablock_addr(page, dn.ofs_in_node);
if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR && /* skip recovering if dest is the same as src */
is_valid_blkaddr(sbi, dest, META_POR)) { if (src == dest)
continue;
/* dest is invalid, just invalidate src block */
if (dest == NULL_ADDR) {
truncate_data_blocks_range(&dn, 1);
continue;
}
/*
* dest is reserved block, invalidate src block
* and then reserve one new block in dnode page.
*/
if (dest == NEW_ADDR) {
truncate_data_blocks_range(&dn, 1);
err = reserve_new_block(&dn);
f2fs_bug_on(sbi, err);
continue;
}
/* dest is valid block, try to recover from src to dest */
if (is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) { if (src == NULL_ADDR) {
err = reserve_new_block(&dn); err = reserve_new_block(&dn);
@ -424,7 +445,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
ni.version, false); ni.version, false);
recovered++; recovered++;
} }
dn.ofs_in_node++;
} }
if (IS_INODE(dn.node_page)) if (IS_INODE(dn.node_page))
@ -525,14 +545,12 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&inode_list); INIT_LIST_HEAD(&inode_list);
/* step #1: find fsynced inode numbers */
set_sbi_flag(sbi, SBI_POR_DOING);
/* prevent checkpoint */ /* prevent checkpoint */
mutex_lock(&sbi->cp_mutex); mutex_lock(&sbi->cp_mutex);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list); err = find_fsync_dnodes(sbi, &inode_list);
if (err) if (err)
goto out; goto out;
@ -561,11 +579,20 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
clear_sbi_flag(sbi, SBI_POR_DOING); clear_sbi_flag(sbi, SBI_POR_DOING);
if (err) { if (err) {
discard_next_dnode(sbi, blkaddr); bool invalidate = false;
if (discard_next_dnode(sbi, blkaddr))
invalidate = true;
/* Flush all the NAT/SIT pages */ /* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) while (get_pages(sbi, F2FS_DIRTY_META))
sync_meta_pages(sbi, META, LONG_MAX); sync_meta_pages(sbi, META, LONG_MAX);
/* invalidate temporary meta page */
if (invalidate)
invalidate_mapping_pages(META_MAPPING(sbi),
blkaddr, blkaddr);
set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
mutex_unlock(&sbi->cp_mutex); mutex_unlock(&sbi->cp_mutex);
} else if (need_writecp) { } else if (need_writecp) {

View file

@ -197,28 +197,20 @@ void register_inmem_page(struct inode *inode, struct page *page)
{ {
struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *new; struct inmem_pages *new;
int err;
SetPagePrivate(page);
f2fs_trace_pid(page); f2fs_trace_pid(page);
set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
SetPagePrivate(page);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
/* add atomic page indices to the list */ /* add atomic page indices to the list */
new->page = page; new->page = page;
INIT_LIST_HEAD(&new->list); INIT_LIST_HEAD(&new->list);
retry:
/* increase reference count with clean state */ /* increase reference count with clean state */
mutex_lock(&fi->inmem_lock); mutex_lock(&fi->inmem_lock);
err = radix_tree_insert(&fi->inmem_root, page->index, new);
if (err == -EEXIST) {
mutex_unlock(&fi->inmem_lock);
kmem_cache_free(inmem_entry_slab, new);
return;
} else if (err) {
mutex_unlock(&fi->inmem_lock);
goto retry;
}
get_page(page); get_page(page);
list_add_tail(&new->list, &fi->inmem_pages); list_add_tail(&new->list, &fi->inmem_pages);
inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
@ -227,7 +219,7 @@ void register_inmem_page(struct inode *inode, struct page *page)
trace_f2fs_register_inmem_page(page, INMEM); trace_f2fs_register_inmem_page(page, INMEM);
} }
void commit_inmem_pages(struct inode *inode, bool abort) int commit_inmem_pages(struct inode *inode, bool abort)
{ {
struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_inode_info *fi = F2FS_I(inode);
@ -239,6 +231,7 @@ void commit_inmem_pages(struct inode *inode, bool abort)
.rw = WRITE_SYNC | REQ_PRIO, .rw = WRITE_SYNC | REQ_PRIO,
.encrypted_page = NULL, .encrypted_page = NULL,
}; };
int err = 0;
/* /*
* The abort is true only when f2fs_evict_inode is called. * The abort is true only when f2fs_evict_inode is called.
@ -254,8 +247,8 @@ void commit_inmem_pages(struct inode *inode, bool abort)
mutex_lock(&fi->inmem_lock); mutex_lock(&fi->inmem_lock);
list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
lock_page(cur->page);
if (!abort) { if (!abort) {
lock_page(cur->page);
if (cur->page->mapping == inode->i_mapping) { if (cur->page->mapping == inode->i_mapping) {
set_page_dirty(cur->page); set_page_dirty(cur->page);
f2fs_wait_on_page_writeback(cur->page, DATA); f2fs_wait_on_page_writeback(cur->page, DATA);
@ -263,15 +256,20 @@ void commit_inmem_pages(struct inode *inode, bool abort)
inode_dec_dirty_pages(inode); inode_dec_dirty_pages(inode);
trace_f2fs_commit_inmem_page(cur->page, INMEM); trace_f2fs_commit_inmem_page(cur->page, INMEM);
fio.page = cur->page; fio.page = cur->page;
do_write_data_page(&fio); err = do_write_data_page(&fio);
submit_bio = true; submit_bio = true;
if (err) {
unlock_page(cur->page);
break;
}
} }
f2fs_put_page(cur->page, 1);
} else { } else {
trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
put_page(cur->page);
} }
radix_tree_delete(&fi->inmem_root, cur->page->index); set_page_private(cur->page, 0);
ClearPagePrivate(cur->page);
f2fs_put_page(cur->page, 1);
list_del(&cur->list); list_del(&cur->list);
kmem_cache_free(inmem_entry_slab, cur); kmem_cache_free(inmem_entry_slab, cur);
dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
@ -283,6 +281,7 @@ void commit_inmem_pages(struct inode *inode, bool abort)
if (submit_bio) if (submit_bio)
f2fs_submit_merged_bio(sbi, DATA, WRITE); f2fs_submit_merged_bio(sbi, DATA, WRITE);
} }
return err;
} }
/* /*
@ -304,10 +303,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
{ {
/* try to shrink extent cache when there is no enough memory */ /* try to shrink extent cache when there is no enough memory */
f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); if (!available_free_memory(sbi, EXTENT_CACHE))
f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
/* check the # of cached NAT entries and prefree segments */ /* check the # of cached NAT entries */
if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || if (!available_free_memory(sbi, NAT_ENTRIES))
try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
if (!available_free_memory(sbi, FREE_NIDS))
try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES);
/* checkpoint is the only way to shrink partial cached entries */
if (!available_free_memory(sbi, NAT_ENTRIES) ||
excess_prefree_segs(sbi) || excess_prefree_segs(sbi) ||
!available_free_memory(sbi, INO_ENTRIES)) !available_free_memory(sbi, INO_ENTRIES))
f2fs_sync_fs(sbi->sb, true); f2fs_sync_fs(sbi->sb, true);
@ -323,10 +330,12 @@ static int issue_flush_thread(void *data)
return 0; return 0;
if (!llist_empty(&fcc->issue_list)) { if (!llist_empty(&fcc->issue_list)) {
struct bio *bio = bio_alloc(GFP_NOIO, 0); struct bio *bio;
struct flush_cmd *cmd, *next; struct flush_cmd *cmd, *next;
int ret; int ret;
bio = f2fs_bio_alloc(0);
fcc->dispatch_list = llist_del_all(&fcc->issue_list); fcc->dispatch_list = llist_del_all(&fcc->issue_list);
fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
@ -358,8 +367,15 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
if (test_opt(sbi, NOBARRIER)) if (test_opt(sbi, NOBARRIER))
return 0; return 0;
if (!test_opt(sbi, FLUSH_MERGE)) if (!test_opt(sbi, FLUSH_MERGE)) {
return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); struct bio *bio = f2fs_bio_alloc(0);
int ret;
bio->bi_bdev = sbi->sb->s_bdev;
ret = submit_bio_wait(WRITE_FLUSH, bio);
bio_put(bio);
return ret;
}
init_completion(&cmd.wait); init_completion(&cmd.wait);
@ -503,7 +519,7 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
} }
void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
{ {
int err = -ENOTSUPP; int err = -ENOTSUPP;
@ -513,13 +529,16 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
if (f2fs_test_bit(offset, se->discard_map)) if (f2fs_test_bit(offset, se->discard_map))
return; return false;
err = f2fs_issue_discard(sbi, blkaddr, 1); err = f2fs_issue_discard(sbi, blkaddr, 1);
} }
if (err) if (err) {
update_meta_page(sbi, NULL, blkaddr); update_meta_page(sbi, NULL, blkaddr);
return true;
}
return false;
} }
static void __add_discard_entry(struct f2fs_sb_info *sbi, static void __add_discard_entry(struct f2fs_sb_info *sbi,
@ -1218,7 +1237,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
mutex_lock(&sit_i->sentry_lock); mutex_lock(&sit_i->sentry_lock);
/* direct_io'ed data is aligned to the segment for better performance */ /* direct_io'ed data is aligned to the segment for better performance */
if (direct_io && curseg->next_blkoff) if (direct_io && curseg->next_blkoff &&
!has_not_enough_free_secs(sbi, 0))
__allocate_new_segments(sbi, type); __allocate_new_segments(sbi, type);
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
@ -1733,7 +1753,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
static struct sit_entry_set *grab_sit_entry_set(void) static struct sit_entry_set *grab_sit_entry_set(void)
{ {
struct sit_entry_set *ses = struct sit_entry_set *ses =
f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC); f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
ses->entry_cnt = 0; ses->entry_cnt = 0;
INIT_LIST_HEAD(&ses->set_list); INIT_LIST_HEAD(&ses->set_list);

View file

@ -177,6 +177,15 @@ struct segment_allocation {
void (*allocate_segment)(struct f2fs_sb_info *, int, bool); void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
}; };
/*
* this value is set in page as a private data which indicate that
* the page is atomically written, and it is in inmem_pages list.
*/
#define ATOMIC_WRITTEN_PAGE 0x0000ffff
#define IS_ATOMIC_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
struct inmem_pages { struct inmem_pages {
struct list_head list; struct list_head list;
struct page *page; struct page *page;
@ -555,16 +564,15 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
return curseg->next_blkoff; return curseg->next_blkoff;
} }
#ifdef CONFIG_F2FS_CHECK_FS
static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
{ {
BUG_ON(segno > TOTAL_SEGS(sbi) - 1); f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
} }
static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
{ {
BUG_ON(blk_addr < SEG0_BLKADDR(sbi)); f2fs_bug_on(sbi, blk_addr < SEG0_BLKADDR(sbi)
BUG_ON(blk_addr >= MAX_BLKADDR(sbi)); || blk_addr >= MAX_BLKADDR(sbi));
} }
/* /*
@ -573,16 +581,11 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
static inline void check_block_count(struct f2fs_sb_info *sbi, static inline void check_block_count(struct f2fs_sb_info *sbi,
int segno, struct f2fs_sit_entry *raw_sit) int segno, struct f2fs_sit_entry *raw_sit)
{ {
#ifdef CONFIG_F2FS_CHECK_FS
bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0; int valid_blocks = 0;
int cur_pos = 0, next_pos; int cur_pos = 0, next_pos;
/* check segment usage */
BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
/* check boundary of a given segment number */
BUG_ON(segno > TOTAL_SEGS(sbi) - 1);
/* check bitmap with valid block count */ /* check bitmap with valid block count */
do { do {
if (is_valid) { if (is_valid) {
@ -598,35 +601,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
is_valid = !is_valid; is_valid = !is_valid;
} while (cur_pos < sbi->blocks_per_seg); } while (cur_pos < sbi->blocks_per_seg);
BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
}
#else
static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
{
if (segno > TOTAL_SEGS(sbi) - 1)
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
{
if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi))
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
/*
* Summary block is always treated as an invalid block
*/
static inline void check_block_count(struct f2fs_sb_info *sbi,
int segno, struct f2fs_sit_entry *raw_sit)
{
/* check segment usage */
if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg)
set_sbi_flag(sbi, SBI_NEED_FSCK);
/* check boundary of a given segment number */
if (segno > TOTAL_SEGS(sbi) - 1)
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
#endif #endif
/* check segment usage, and check boundary of a given segment number */
f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1);
}
static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
unsigned int start) unsigned int start)

139
fs/f2fs/shrinker.c Normal file
View file

@ -0,0 +1,139 @@
/*
* f2fs shrinker support
* the basic infra was copied from fs/ubifs/shrinker.c
*
* Copyright (c) 2015 Motorola Mobility
* Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
static LIST_HEAD(f2fs_list);
static DEFINE_SPINLOCK(f2fs_list_lock);
static unsigned int shrinker_run_no;
static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
{
return NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
}
static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
{
if (NM_I(sbi)->fcnt > NAT_ENTRY_PER_BLOCK)
return NM_I(sbi)->fcnt - NAT_ENTRY_PER_BLOCK;
return 0;
}
static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
{
return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node);
}
unsigned long f2fs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc)
{
struct f2fs_sb_info *sbi;
struct list_head *p;
unsigned long count = 0;
spin_lock(&f2fs_list_lock);
p = f2fs_list.next;
while (p != &f2fs_list) {
sbi = list_entry(p, struct f2fs_sb_info, s_list);
/* stop f2fs_put_super */
if (!mutex_trylock(&sbi->umount_mutex)) {
p = p->next;
continue;
}
spin_unlock(&f2fs_list_lock);
/* count extent cache entries */
count += __count_extent_cache(sbi);
/* shrink clean nat cache entries */
count += __count_nat_entries(sbi);
/* count free nids cache entries */
count += __count_free_nids(sbi);
spin_lock(&f2fs_list_lock);
p = p->next;
mutex_unlock(&sbi->umount_mutex);
}
spin_unlock(&f2fs_list_lock);
return count;
}
unsigned long f2fs_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
unsigned long nr = sc->nr_to_scan;
struct f2fs_sb_info *sbi;
struct list_head *p;
unsigned int run_no;
unsigned long freed = 0;
spin_lock(&f2fs_list_lock);
do {
run_no = ++shrinker_run_no;
} while (run_no == 0);
p = f2fs_list.next;
while (p != &f2fs_list) {
sbi = list_entry(p, struct f2fs_sb_info, s_list);
if (sbi->shrinker_run_no == run_no)
break;
/* stop f2fs_put_super */
if (!mutex_trylock(&sbi->umount_mutex)) {
p = p->next;
continue;
}
spin_unlock(&f2fs_list_lock);
sbi->shrinker_run_no = run_no;
/* shrink extent cache entries */
freed += f2fs_shrink_extent_tree(sbi, nr >> 1);
/* shrink clean nat cache entries */
if (freed < nr)
freed += try_to_free_nats(sbi, nr - freed);
/* shrink free nids cache entries */
if (freed < nr)
freed += try_to_free_nids(sbi, nr - freed);
spin_lock(&f2fs_list_lock);
p = p->next;
list_move_tail(&sbi->s_list, &f2fs_list);
mutex_unlock(&sbi->umount_mutex);
if (freed >= nr)
break;
}
spin_unlock(&f2fs_list_lock);
return freed;
}
void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
{
spin_lock(&f2fs_list_lock);
list_add_tail(&sbi->s_list, &f2fs_list);
spin_unlock(&f2fs_list_lock);
}
void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
{
f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
spin_lock(&f2fs_list_lock);
list_del(&sbi->s_list);
spin_unlock(&f2fs_list_lock);
}

View file

@ -39,6 +39,13 @@ static struct proc_dir_entry *f2fs_proc_root;
static struct kmem_cache *f2fs_inode_cachep; static struct kmem_cache *f2fs_inode_cachep;
static struct kset *f2fs_kset; static struct kset *f2fs_kset;
/* f2fs-wide shrinker description */
static struct shrinker f2fs_shrinker_info = {
.scan_objects = f2fs_shrink_scan,
.count_objects = f2fs_shrink_count,
.seeks = DEFAULT_SEEKS,
};
enum { enum {
Opt_gc_background, Opt_gc_background,
Opt_disable_roll_forward, Opt_disable_roll_forward,
@ -58,6 +65,7 @@ enum {
Opt_nobarrier, Opt_nobarrier,
Opt_fastboot, Opt_fastboot,
Opt_extent_cache, Opt_extent_cache,
Opt_noextent_cache,
Opt_noinline_data, Opt_noinline_data,
Opt_err, Opt_err,
}; };
@ -81,6 +89,7 @@ static match_table_t f2fs_tokens = {
{Opt_nobarrier, "nobarrier"}, {Opt_nobarrier, "nobarrier"},
{Opt_fastboot, "fastboot"}, {Opt_fastboot, "fastboot"},
{Opt_extent_cache, "extent_cache"}, {Opt_extent_cache, "extent_cache"},
{Opt_noextent_cache, "noextent_cache"},
{Opt_noinline_data, "noinline_data"}, {Opt_noinline_data, "noinline_data"},
{Opt_err, NULL}, {Opt_err, NULL},
}; };
@ -382,6 +391,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_extent_cache: case Opt_extent_cache:
set_opt(sbi, EXTENT_CACHE); set_opt(sbi, EXTENT_CACHE);
break; break;
case Opt_noextent_cache:
clear_opt(sbi, EXTENT_CACHE);
break;
case Opt_noinline_data: case Opt_noinline_data:
clear_opt(sbi, INLINE_DATA); clear_opt(sbi, INLINE_DATA);
break; break;
@ -410,9 +422,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->dirty_pages, 0);
fi->i_current_depth = 1; fi->i_current_depth = 1;
fi->i_advise = 0; fi->i_advise = 0;
rwlock_init(&fi->ext_lock);
init_rwsem(&fi->i_sem); init_rwsem(&fi->i_sem);
INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
INIT_LIST_HEAD(&fi->inmem_pages); INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock); mutex_init(&fi->inmem_lock);
@ -441,17 +451,22 @@ static int f2fs_drop_inode(struct inode *inode)
*/ */
if (!inode_unhashed(inode) && inode->i_state & I_SYNC) { if (!inode_unhashed(inode) && inode->i_state & I_SYNC) {
if (!inode->i_nlink && !is_bad_inode(inode)) { if (!inode->i_nlink && !is_bad_inode(inode)) {
/* to avoid evict_inode call simultaneously */
atomic_inc(&inode->i_count);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
/* some remained atomic pages should discarded */ /* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode)) if (f2fs_is_atomic_file(inode))
commit_inmem_pages(inode, true); commit_inmem_pages(inode, true);
/* should remain fi->extent_tree for writepage */
f2fs_destroy_extent_node(inode);
sb_start_intwrite(inode->i_sb); sb_start_intwrite(inode->i_sb);
i_size_write(inode, 0); i_size_write(inode, 0);
if (F2FS_HAS_BLOCKS(inode)) if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode); f2fs_truncate(inode, true);
sb_end_intwrite(inode->i_sb); sb_end_intwrite(inode->i_sb);
@ -461,6 +476,7 @@ static int f2fs_drop_inode(struct inode *inode)
F2FS_I(inode)->i_crypt_info); F2FS_I(inode)->i_crypt_info);
#endif #endif
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
atomic_dec(&inode->i_count);
} }
return 0; return 0;
} }
@ -498,9 +514,11 @@ static void f2fs_put_super(struct super_block *sb)
} }
kobject_del(&sbi->s_kobj); kobject_del(&sbi->s_kobj);
f2fs_destroy_stats(sbi);
stop_gc_thread(sbi); stop_gc_thread(sbi);
/* prevent remaining shrinker jobs */
mutex_lock(&sbi->umount_mutex);
/* /*
* We don't need to do checkpoint when superblock is clean. * We don't need to do checkpoint when superblock is clean.
* But, the previous checkpoint was not done by umount, it needs to do * But, the previous checkpoint was not done by umount, it needs to do
@ -514,6 +532,9 @@ static void f2fs_put_super(struct super_block *sb)
write_checkpoint(sbi, &cpc); write_checkpoint(sbi, &cpc);
} }
/* write_checkpoint can update stat informaion */
f2fs_destroy_stats(sbi);
/* /*
* normally superblock is clean, so we need to release this. * normally superblock is clean, so we need to release this.
* In addition, EIO will skip do checkpoint, we need this as well. * In addition, EIO will skip do checkpoint, we need this as well.
@ -521,6 +542,9 @@ static void f2fs_put_super(struct super_block *sb)
release_dirty_inode(sbi); release_dirty_inode(sbi);
release_discard_addrs(sbi); release_discard_addrs(sbi);
f2fs_leave_shrinker(sbi);
mutex_unlock(&sbi->umount_mutex);
iput(sbi->node_inode); iput(sbi->node_inode);
iput(sbi->meta_inode); iput(sbi->meta_inode);
@ -647,6 +671,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",fastboot"); seq_puts(seq, ",fastboot");
if (test_opt(sbi, EXTENT_CACHE)) if (test_opt(sbi, EXTENT_CACHE))
seq_puts(seq, ",extent_cache"); seq_puts(seq, ",extent_cache");
else
seq_puts(seq, ",noextent_cache");
seq_printf(seq, ",active_logs=%u", sbi->active_logs); seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0; return 0;
@ -667,7 +693,7 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
struct seg_entry *se = get_seg_entry(sbi, i); struct seg_entry *se = get_seg_entry(sbi, i);
if ((i % 10) == 0) if ((i % 10) == 0)
seq_printf(seq, "%-5d", i); seq_printf(seq, "%-10d", i);
seq_printf(seq, "%d|%-3u", se->type, seq_printf(seq, "%d|%-3u", se->type,
get_valid_blocks(sbi, i, 1)); get_valid_blocks(sbi, i, 1));
if ((i % 10) == 9 || i == (total_segs - 1)) if ((i % 10) == 9 || i == (total_segs - 1))
@ -699,6 +725,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, BG_GC); set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DATA);
set_opt(sbi, EXTENT_CACHE);
#ifdef CONFIG_F2FS_FS_XATTR #ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER); set_opt(sbi, XATTR_USER);
@ -970,6 +997,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->dir_level = DEF_DIR_LEVEL; sbi->dir_level = DEF_DIR_LEVEL;
clear_sbi_flag(sbi, SBI_NEED_FSCK); clear_sbi_flag(sbi, SBI_NEED_FSCK);
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
} }
/* /*
@ -1135,7 +1165,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&sbi->writepages); mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex); mutex_init(&sbi->cp_mutex);
init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_write);
clear_sbi_flag(sbi, SBI_POR_DOING);
/* disallow all the data/node/meta page writes */
set_sbi_flag(sbi, SBI_POR_DOING);
spin_lock_init(&sbi->stat_lock); spin_lock_init(&sbi->stat_lock);
init_rwsem(&sbi->read_io.io_rwsem); init_rwsem(&sbi->read_io.io_rwsem);
@ -1212,8 +1244,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_nm; goto free_nm;
} }
f2fs_join_shrinker(sbi);
/* if there are nt orphan nodes free them */ /* if there are nt orphan nodes free them */
recover_orphan_inodes(sbi); err = recover_orphan_inodes(sbi);
if (err)
goto free_node_inode;
/* read root inode and dentry */ /* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
@ -1275,6 +1311,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_kobj; goto free_kobj;
} }
} }
/* recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
/* /*
* If filesystem is not mounted as read-only then * If filesystem is not mounted as read-only then
@ -1308,7 +1346,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
dput(sb->s_root); dput(sb->s_root);
sb->s_root = NULL; sb->s_root = NULL;
free_node_inode: free_node_inode:
mutex_lock(&sbi->umount_mutex);
f2fs_leave_shrinker(sbi);
iput(sbi->node_inode); iput(sbi->node_inode);
mutex_unlock(&sbi->umount_mutex);
free_nm: free_nm:
destroy_node_manager(sbi); destroy_node_manager(sbi);
free_sm: free_sm:
@ -1404,13 +1445,20 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_crypto(); err = f2fs_init_crypto();
if (err) if (err)
goto free_kset; goto free_kset;
err = register_filesystem(&f2fs_fs_type);
err = register_shrinker(&f2fs_shrinker_info);
if (err) if (err)
goto free_crypto; goto free_crypto;
err = register_filesystem(&f2fs_fs_type);
if (err)
goto free_shrinker;
f2fs_create_root_stats(); f2fs_create_root_stats();
f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
return 0; return 0;
free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
free_crypto: free_crypto:
f2fs_exit_crypto(); f2fs_exit_crypto();
free_kset: free_kset:
@ -1433,6 +1481,7 @@ static void __exit exit_f2fs_fs(void)
{ {
remove_proc_entry("fs/f2fs", NULL); remove_proc_entry("fs/f2fs", NULL);
f2fs_destroy_root_stats(); f2fs_destroy_root_stats();
unregister_shrinker(&f2fs_shrinker_info);
unregister_filesystem(&f2fs_fs_type); unregister_filesystem(&f2fs_fs_type);
f2fs_exit_crypto(); f2fs_exit_crypto();
destroy_extent_cache(); destroy_extent_cache();

View file

@ -499,9 +499,12 @@ static int __f2fs_setxattr(struct inode *inode, int index,
len = strlen(name); len = strlen(name);
if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode)) if (len > F2FS_NAME_LEN)
return -ERANGE; return -ERANGE;
if (size > MAX_VALUE_LEN(inode))
return -E2BIG;
base_addr = read_all_xattrs(inode, ipage); base_addr = read_all_xattrs(inode, ipage);
if (!base_addr) if (!base_addr)
goto exit; goto exit;

View file

@ -417,15 +417,25 @@ typedef __le32 f2fs_hash_t;
#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) #define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS)
/* the number of dentry in a block */
#define NR_DENTRY_IN_BLOCK 214
/* MAX level for dir lookup */ /* MAX level for dir lookup */
#define MAX_DIR_HASH_DEPTH 63 #define MAX_DIR_HASH_DEPTH 63
/* MAX buckets in one level of dir */ /* MAX buckets in one level of dir */
#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1)) #define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
/*
* space utilization of regular dentry and inline dentry
* regular dentry inline dentry
* bitmap 1 * 27 = 27 1 * 23 = 23
* reserved 1 * 3 = 3 1 * 7 = 7
* dentry 11 * 214 = 2354 11 * 182 = 2002
* filename 8 * 214 = 1712 8 * 182 = 1456
* total 4096 3488
*
* Note: there are more reserved space in inline dentry than in regular
* dentry, when converting inline dentry we should handle this carefully.
*/
#define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */
#define SIZE_OF_DIR_ENTRY 11 /* by byte */ #define SIZE_OF_DIR_ENTRY 11 /* by byte */
#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
BITS_PER_BYTE) BITS_PER_BYTE)

View file

@ -1099,11 +1099,11 @@ TRACE_EVENT(f2fs_lookup_extent_tree_start,
TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
TP_PROTO(struct inode *inode, unsigned int pgofs, TP_PROTO(struct inode *inode, unsigned int pgofs,
struct extent_node *en), struct extent_info *ei),
TP_ARGS(inode, pgofs, en), TP_ARGS(inode, pgofs, ei),
TP_CONDITION(en), TP_CONDITION(ei),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
@ -1118,9 +1118,9 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
__entry->dev = inode->i_sb->s_dev; __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->pgofs = pgofs; __entry->pgofs = pgofs;
__entry->fofs = en->ei.fofs; __entry->fofs = ei->fofs;
__entry->blk = en->ei.blk; __entry->blk = ei->blk;
__entry->len = en->ei.len; __entry->len = ei->len;
), ),
TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "