kernel-fxtec-pro1x/fs/sysv/inode.c
Nick Piggin fa0d7e3de6 fs: icache RCU free inodes
RCU free the struct inode. This will allow:

- Subsequent store-free path walking patch. The inode must be consulted for
  permissions when walking, so an RCU inode reference is a must.
- sb_inode_list_lock to be moved inside i_lock because sb list walkers who want
  to take i_lock no longer need to take sb_inode_list_lock to walk the list in
  the first place. This will simplify and optimize locking.
- Could remove some nested trylock loops in dcache code
- Could potentially simplify things a bit in VM land. Do not need to take the
  page lock to follow page->mapping.

The downsides of this is the performance cost of using RCU. In a simple
creat/unlink microbenchmark, performance drops by about 10% due to inability to
reuse cache-hot slab objects. As iterations increase and RCU freeing starts
kicking over, this increases to about 20%.

In cases where inode lifetimes are longer (ie. many inodes may be allocated
during the average life span of a single inode), a lot of this cache reuse is
not applicable, so the regression caused by this patch is smaller.

The cache-hot regression could largely be avoided by using SLAB_DESTROY_BY_RCU,
however this adds some complexity to list walking and store-free path walking,
so I prefer to implement this at a later date, if it is shown to be a win in
real situations. I haven't found a regression in any non-micro benchmark so I
doubt it will be a problem.

Signed-off-by: Nick Piggin <npiggin@kernel.dk>
2011-01-07 17:50:26 +11:00

381 lines
9.6 KiB
C

/*
* linux/fs/sysv/inode.c
*
* minix/inode.c
* Copyright (C) 1991, 1992 Linus Torvalds
*
* xenix/inode.c
* Copyright (C) 1992 Doug Evans
*
* coh/inode.c
* Copyright (C) 1993 Pascal Haible, Bruno Haible
*
* sysv/inode.c
* Copyright (C) 1993 Paul B. Monday
*
* sysv/inode.c
* Copyright (C) 1993 Bruno Haible
* Copyright (C) 1997, 1998 Krzysztof G. Baranowski
*
* This file contains code for allocating/freeing inodes and for read/writing
* the superblock.
*/
#include <linux/highuid.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <linux/writeback.h>
#include <linux/namei.h>
#include <asm/byteorder.h>
#include "sysv.h"
static int sysv_sync_fs(struct super_block *sb, int wait)
{
struct sysv_sb_info *sbi = SYSV_SB(sb);
unsigned long time = get_seconds(), old_time;
lock_super(sb);
/*
* If we are going to write out the super block,
* then attach current time stamp.
* But if the filesystem was marked clean, keep it clean.
*/
sb->s_dirt = 0;
old_time = fs32_to_cpu(sbi, *sbi->s_sb_time);
if (sbi->s_type == FSTYPE_SYSV4) {
if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time))
*sbi->s_sb_state = cpu_to_fs32(sbi, 0x7c269d38 - time);
*sbi->s_sb_time = cpu_to_fs32(sbi, time);
mark_buffer_dirty(sbi->s_bh2);
}
unlock_super(sb);
return 0;
}
static void sysv_write_super(struct super_block *sb)
{
if (!(sb->s_flags & MS_RDONLY))
sysv_sync_fs(sb, 1);
else
sb->s_dirt = 0;
}
static int sysv_remount(struct super_block *sb, int *flags, char *data)
{
struct sysv_sb_info *sbi = SYSV_SB(sb);
lock_super(sb);
if (sbi->s_forced_ro)
*flags |= MS_RDONLY;
if (*flags & MS_RDONLY)
sysv_write_super(sb);
unlock_super(sb);
return 0;
}
static void sysv_put_super(struct super_block *sb)
{
struct sysv_sb_info *sbi = SYSV_SB(sb);
if (sb->s_dirt)
sysv_write_super(sb);
if (!(sb->s_flags & MS_RDONLY)) {
/* XXX ext2 also updates the state here */
mark_buffer_dirty(sbi->s_bh1);
if (sbi->s_bh1 != sbi->s_bh2)
mark_buffer_dirty(sbi->s_bh2);
}
brelse(sbi->s_bh1);
if (sbi->s_bh1 != sbi->s_bh2)
brelse(sbi->s_bh2);
kfree(sbi);
}
static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct sysv_sb_info *sbi = SYSV_SB(sb);
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
buf->f_type = sb->s_magic;
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = sbi->s_ndatazones;
buf->f_bavail = buf->f_bfree = sysv_count_free_blocks(sb);
buf->f_files = sbi->s_ninodes;
buf->f_ffree = sysv_count_free_inodes(sb);
buf->f_namelen = SYSV_NAMELEN;
buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32);
return 0;
}
/*
* NXI <-> N0XI for PDP, XIN <-> XIN0 for le32, NIX <-> 0NIX for be32
*/
static inline void read3byte(struct sysv_sb_info *sbi,
unsigned char * from, unsigned char * to)
{
if (sbi->s_bytesex == BYTESEX_PDP) {
to[0] = from[0];
to[1] = 0;
to[2] = from[1];
to[3] = from[2];
} else if (sbi->s_bytesex == BYTESEX_LE) {
to[0] = from[0];
to[1] = from[1];
to[2] = from[2];
to[3] = 0;
} else {
to[0] = 0;
to[1] = from[0];
to[2] = from[1];
to[3] = from[2];
}
}
static inline void write3byte(struct sysv_sb_info *sbi,
unsigned char * from, unsigned char * to)
{
if (sbi->s_bytesex == BYTESEX_PDP) {
to[0] = from[0];
to[1] = from[2];
to[2] = from[3];
} else if (sbi->s_bytesex == BYTESEX_LE) {
to[0] = from[0];
to[1] = from[1];
to[2] = from[2];
} else {
to[0] = from[1];
to[1] = from[2];
to[2] = from[3];
}
}
static const struct inode_operations sysv_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
.getattr = sysv_getattr,
};
void sysv_set_inode(struct inode *inode, dev_t rdev)
{
if (S_ISREG(inode->i_mode)) {
inode->i_op = &sysv_file_inode_operations;
inode->i_fop = &sysv_file_operations;
inode->i_mapping->a_ops = &sysv_aops;
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &sysv_dir_inode_operations;
inode->i_fop = &sysv_dir_operations;
inode->i_mapping->a_ops = &sysv_aops;
} else if (S_ISLNK(inode->i_mode)) {
if (inode->i_blocks) {
inode->i_op = &sysv_symlink_inode_operations;
inode->i_mapping->a_ops = &sysv_aops;
} else {
inode->i_op = &sysv_fast_symlink_inode_operations;
nd_terminate_link(SYSV_I(inode)->i_data, inode->i_size,
sizeof(SYSV_I(inode)->i_data) - 1);
}
} else
init_special_inode(inode, inode->i_mode, rdev);
}
struct inode *sysv_iget(struct super_block *sb, unsigned int ino)
{
struct sysv_sb_info * sbi = SYSV_SB(sb);
struct buffer_head * bh;
struct sysv_inode * raw_inode;
struct sysv_inode_info * si;
struct inode *inode;
unsigned int block;
if (!ino || ino > sbi->s_ninodes) {
printk("Bad inode number on dev %s: %d is out of range\n",
sb->s_id, ino);
return ERR_PTR(-EIO);
}
inode = iget_locked(sb, ino);
if (!inode)
return ERR_PTR(-ENOMEM);
if (!(inode->i_state & I_NEW))
return inode;
raw_inode = sysv_raw_inode(sb, ino, &bh);
if (!raw_inode) {
printk("Major problem: unable to read inode from dev %s\n",
inode->i_sb->s_id);
goto bad_inode;
}
/* SystemV FS: kludge permissions if ino==SYSV_ROOT_INO ?? */
inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode);
inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid);
inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid);
inode->i_nlink = fs16_to_cpu(sbi, raw_inode->i_nlink);
inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size);
inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime);
inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime);
inode->i_ctime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_ctime);
inode->i_ctime.tv_nsec = 0;
inode->i_atime.tv_nsec = 0;
inode->i_mtime.tv_nsec = 0;
inode->i_blocks = 0;
si = SYSV_I(inode);
for (block = 0; block < 10+1+1+1; block++)
read3byte(sbi, &raw_inode->i_data[3*block],
(u8 *)&si->i_data[block]);
brelse(bh);
si->i_dir_start_lookup = 0;
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
sysv_set_inode(inode,
old_decode_dev(fs32_to_cpu(sbi, si->i_data[0])));
else
sysv_set_inode(inode, 0);
unlock_new_inode(inode);
return inode;
bad_inode:
iget_failed(inode);
return ERR_PTR(-EIO);
}
static int __sysv_write_inode(struct inode *inode, int wait)
{
struct super_block * sb = inode->i_sb;
struct sysv_sb_info * sbi = SYSV_SB(sb);
struct buffer_head * bh;
struct sysv_inode * raw_inode;
struct sysv_inode_info * si;
unsigned int ino, block;
int err = 0;
ino = inode->i_ino;
if (!ino || ino > sbi->s_ninodes) {
printk("Bad inode number on dev %s: %d is out of range\n",
inode->i_sb->s_id, ino);
return -EIO;
}
raw_inode = sysv_raw_inode(sb, ino, &bh);
if (!raw_inode) {
printk("unable to read i-node block\n");
return -EIO;
}
raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode);
raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid));
raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid));
raw_inode->i_nlink = cpu_to_fs16(sbi, inode->i_nlink);
raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size);
raw_inode->i_atime = cpu_to_fs32(sbi, inode->i_atime.tv_sec);
raw_inode->i_mtime = cpu_to_fs32(sbi, inode->i_mtime.tv_sec);
raw_inode->i_ctime = cpu_to_fs32(sbi, inode->i_ctime.tv_sec);
si = SYSV_I(inode);
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
si->i_data[0] = cpu_to_fs32(sbi, old_encode_dev(inode->i_rdev));
for (block = 0; block < 10+1+1+1; block++)
write3byte(sbi, (u8 *)&si->i_data[block],
&raw_inode->i_data[3*block]);
mark_buffer_dirty(bh);
if (wait) {
sync_dirty_buffer(bh);
if (buffer_req(bh) && !buffer_uptodate(bh)) {
printk ("IO error syncing sysv inode [%s:%08x]\n",
sb->s_id, ino);
err = -EIO;
}
}
brelse(bh);
return 0;
}
int sysv_write_inode(struct inode *inode, struct writeback_control *wbc)
{
return __sysv_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
}
int sysv_sync_inode(struct inode *inode)
{
return __sysv_write_inode(inode, 1);
}
static void sysv_evict_inode(struct inode *inode)
{
truncate_inode_pages(&inode->i_data, 0);
if (!inode->i_nlink) {
inode->i_size = 0;
sysv_truncate(inode);
}
invalidate_inode_buffers(inode);
end_writeback(inode);
if (!inode->i_nlink)
sysv_free_inode(inode);
}
static struct kmem_cache *sysv_inode_cachep;
static struct inode *sysv_alloc_inode(struct super_block *sb)
{
struct sysv_inode_info *si;
si = kmem_cache_alloc(sysv_inode_cachep, GFP_KERNEL);
if (!si)
return NULL;
return &si->vfs_inode;
}
static void sysv_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
}
static void sysv_destroy_inode(struct inode *inode)
{
call_rcu(&inode->i_rcu, sysv_i_callback);
}
static void init_once(void *p)
{
struct sysv_inode_info *si = (struct sysv_inode_info *)p;
inode_init_once(&si->vfs_inode);
}
const struct super_operations sysv_sops = {
.alloc_inode = sysv_alloc_inode,
.destroy_inode = sysv_destroy_inode,
.write_inode = sysv_write_inode,
.evict_inode = sysv_evict_inode,
.put_super = sysv_put_super,
.write_super = sysv_write_super,
.sync_fs = sysv_sync_fs,
.remount_fs = sysv_remount,
.statfs = sysv_statfs,
};
int __init sysv_init_icache(void)
{
sysv_inode_cachep = kmem_cache_create("sysv_inode_cache",
sizeof(struct sysv_inode_info), 0,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
init_once);
if (!sysv_inode_cachep)
return -ENOMEM;
return 0;
}
void sysv_destroy_icache(void)
{
kmem_cache_destroy(sysv_inode_cachep);
}