[GFS2] Use ->page_mkwrite() for mmap()
This cleans up the mmap() code path for GFS2 by implementing the page_mkwrite function for GFS2. We are thus able to use the generic filemap_fault function for our ->fault() implementation. This now means that shared writable mappings will be much more efficiently shared across the cluster if there is a reasonable proportion of read activity (the greater proportion, the better). As a side effect, it also reduces the size of the code, removes special cases from readpage and readpages, and makes the code path easier to follow. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
This commit is contained in:
parent
51ff87bdd9
commit
3cc3f710ce
7 changed files with 131 additions and 251 deletions
|
@ -2,7 +2,7 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o
|
|||
gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
|
||||
glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
|
||||
mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
|
||||
ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
|
||||
ops_fstype.o ops_inode.o ops_super.o quota.o \
|
||||
recovery.o rgrp.o super.o sys.o trans.o util.o
|
||||
|
||||
obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/
|
||||
|
|
|
@ -86,15 +86,10 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
|
|||
if (!ip || !S_ISREG(inode->i_mode))
|
||||
return;
|
||||
|
||||
if (!test_bit(GIF_PAGED, &ip->i_flags))
|
||||
return;
|
||||
|
||||
unmap_shared_mapping_range(inode->i_mapping, 0, 0);
|
||||
|
||||
if (test_bit(GIF_SW_PAGED, &ip->i_flags))
|
||||
set_bit(GLF_DIRTY, &gl->gl_flags);
|
||||
|
||||
clear_bit(GIF_SW_PAGED, &ip->i_flags);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -234,10 +229,8 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
|
|||
set_bit(GIF_INVALID, &ip->i_flags);
|
||||
}
|
||||
|
||||
if (ip && S_ISREG(ip->i_inode.i_mode)) {
|
||||
if (ip && S_ISREG(ip->i_inode.i_mode))
|
||||
truncate_inode_pages(ip->i_inode.i_mapping, 0);
|
||||
clear_bit(GIF_PAGED, &ip->i_flags);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -241,7 +241,6 @@ struct gfs2_alloc {
|
|||
enum {
|
||||
GIF_INVALID = 0,
|
||||
GIF_QD_LOCKED = 1,
|
||||
GIF_PAGED = 2,
|
||||
GIF_SW_PAGED = 3,
|
||||
};
|
||||
|
||||
|
@ -289,19 +288,12 @@ static inline struct gfs2_inode *GFS2_I(struct inode *inode)
|
|||
return container_of(inode, struct gfs2_inode, i_inode);
|
||||
}
|
||||
|
||||
/* To be removed? */
|
||||
static inline struct gfs2_sbd *GFS2_SB(struct inode *inode)
|
||||
{
|
||||
return inode->i_sb->s_fs_info;
|
||||
}
|
||||
|
||||
enum {
|
||||
GFF_DID_DIRECT_ALLOC = 0,
|
||||
GFF_EXLOCK = 1,
|
||||
};
|
||||
|
||||
struct gfs2_file {
|
||||
unsigned long f_flags; /* GFF_... */
|
||||
struct mutex f_fl_mutex;
|
||||
struct gfs2_holder f_fl_gh;
|
||||
};
|
||||
|
|
|
@ -265,9 +265,7 @@ static int __gfs2_readpage(void *file, struct page *page)
|
|||
* @file: The file to read
|
||||
* @page: The page of the file
|
||||
*
|
||||
* This deals with the locking required. If the GFF_EXLOCK flags is set
|
||||
* then we already hold the glock (due to page fault) and thus we call
|
||||
* __gfs2_readpage() directly. Otherwise we use a trylock in order to
|
||||
* This deals with the locking required. We use a trylock in order to
|
||||
* avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE
|
||||
* in the event that we are unable to get the lock.
|
||||
*/
|
||||
|
@ -278,12 +276,6 @@ static int gfs2_readpage(struct file *file, struct page *page)
|
|||
struct gfs2_holder gh;
|
||||
int error;
|
||||
|
||||
if (file) {
|
||||
struct gfs2_file *gf = file->private_data;
|
||||
if (test_bit(GFF_EXLOCK, &gf->f_flags))
|
||||
return __gfs2_readpage(file, page);
|
||||
}
|
||||
|
||||
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
|
||||
error = gfs2_glock_nq_atime(&gh);
|
||||
if (unlikely(error)) {
|
||||
|
@ -354,9 +346,8 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
|
|||
* 2. We don't handle stuffed files here we let readpage do the honours.
|
||||
* 3. mpage_readpages() does most of the heavy lifting in the common case.
|
||||
* 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
|
||||
* 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
|
||||
* well as read-ahead.
|
||||
*/
|
||||
|
||||
static int gfs2_readpages(struct file *file, struct address_space *mapping,
|
||||
struct list_head *pages, unsigned nr_pages)
|
||||
{
|
||||
|
@ -364,40 +355,20 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
|
|||
struct gfs2_inode *ip = GFS2_I(inode);
|
||||
struct gfs2_sbd *sdp = GFS2_SB(inode);
|
||||
struct gfs2_holder gh;
|
||||
int ret = 0;
|
||||
int do_unlock = 0;
|
||||
int ret;
|
||||
|
||||
if (file) {
|
||||
struct gfs2_file *gf = file->private_data;
|
||||
if (test_bit(GFF_EXLOCK, &gf->f_flags))
|
||||
goto skip_lock;
|
||||
}
|
||||
gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
|
||||
LM_FLAG_TRY_1CB|GL_ATIME, &gh);
|
||||
do_unlock = 1;
|
||||
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
|
||||
ret = gfs2_glock_nq_atime(&gh);
|
||||
if (ret == GLR_TRYFAILED)
|
||||
goto out_noerror;
|
||||
if (unlikely(ret))
|
||||
goto out_unlock;
|
||||
skip_lock:
|
||||
goto out_uninit;
|
||||
if (!gfs2_is_stuffed(ip))
|
||||
ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
|
||||
|
||||
if (do_unlock) {
|
||||
gfs2_glock_dq_m(1, &gh);
|
||||
gfs2_holder_uninit(&gh);
|
||||
}
|
||||
out:
|
||||
gfs2_glock_dq(&gh);
|
||||
out_uninit:
|
||||
gfs2_holder_uninit(&gh);
|
||||
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
|
||||
ret = -EIO;
|
||||
return ret;
|
||||
out_noerror:
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
if (do_unlock)
|
||||
gfs2_holder_uninit(&gh);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -33,7 +33,6 @@
|
|||
#include "lm.h"
|
||||
#include "log.h"
|
||||
#include "meta_io.h"
|
||||
#include "ops_vm.h"
|
||||
#include "quota.h"
|
||||
#include "rgrp.h"
|
||||
#include "trans.h"
|
||||
|
@ -169,7 +168,7 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
|
|||
if (put_user(fsflags, ptr))
|
||||
error = -EFAULT;
|
||||
|
||||
gfs2_glock_dq_m(1, &gh);
|
||||
gfs2_glock_dq(&gh);
|
||||
gfs2_holder_uninit(&gh);
|
||||
return error;
|
||||
}
|
||||
|
@ -293,6 +292,125 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|||
return -ENOTTY;
|
||||
}
|
||||
|
||||
/**
|
||||
* gfs2_allocate_page_backing - Use bmap to allocate blocks
|
||||
* @page: The (locked) page to allocate backing for
|
||||
*
|
||||
* We try to allocate all the blocks required for the page in
|
||||
* one go. This might fail for various reasons, so we keep
|
||||
* trying until all the blocks to back this page are allocated.
|
||||
* If some of the blocks are already allocated, thats ok too.
|
||||
*/
|
||||
|
||||
static int gfs2_allocate_page_backing(struct page *page)
|
||||
{
|
||||
struct inode *inode = page->mapping->host;
|
||||
struct buffer_head bh;
|
||||
unsigned long size = PAGE_CACHE_SIZE;
|
||||
u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||
|
||||
do {
|
||||
bh.b_state = 0;
|
||||
bh.b_size = size;
|
||||
gfs2_block_map(inode, lblock, 1, &bh);
|
||||
if (!buffer_mapped(&bh))
|
||||
return -EIO;
|
||||
size -= bh.b_size;
|
||||
lblock += (bh.b_size >> inode->i_blkbits);
|
||||
} while(size > 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* gfs2_page_mkwrite - Make a shared, mmap()ed, page writable
|
||||
* @vma: The virtual memory area
|
||||
* @page: The page which is about to become writable
|
||||
*
|
||||
* When the page becomes writable, we need to ensure that we have
|
||||
* blocks allocated on disk to back that page.
|
||||
*/
|
||||
|
||||
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
|
||||
{
|
||||
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
|
||||
struct gfs2_inode *ip = GFS2_I(inode);
|
||||
struct gfs2_sbd *sdp = GFS2_SB(inode);
|
||||
unsigned long last_index;
|
||||
u64 pos = page->index << (PAGE_CACHE_SIZE - inode->i_blkbits);
|
||||
unsigned int data_blocks, ind_blocks, rblocks;
|
||||
int alloc_required = 0;
|
||||
struct gfs2_holder gh;
|
||||
struct gfs2_alloc *al;
|
||||
int ret;
|
||||
|
||||
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh);
|
||||
ret = gfs2_glock_nq_atime(&gh);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
set_bit(GIF_SW_PAGED, &ip->i_flags);
|
||||
gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
|
||||
ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
|
||||
if (ret || !alloc_required)
|
||||
goto out_unlock;
|
||||
|
||||
ip->i_alloc.al_requested = 0;
|
||||
al = gfs2_alloc_get(ip);
|
||||
ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
|
||||
if (ret)
|
||||
goto out_alloc_put;
|
||||
ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
|
||||
if (ret)
|
||||
goto out_quota_unlock;
|
||||
al->al_requested = data_blocks + ind_blocks;
|
||||
ret = gfs2_inplace_reserve(ip);
|
||||
if (ret)
|
||||
goto out_quota_unlock;
|
||||
|
||||
rblocks = RES_DINODE + ind_blocks;
|
||||
if (gfs2_is_jdata(ip))
|
||||
rblocks += data_blocks ? data_blocks : 1;
|
||||
if (ind_blocks || data_blocks)
|
||||
rblocks += RES_STATFS + RES_QUOTA;
|
||||
ret = gfs2_trans_begin(sdp, rblocks, 0);
|
||||
if (ret)
|
||||
goto out_trans_fail;
|
||||
|
||||
lock_page(page);
|
||||
ret = -EINVAL;
|
||||
last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT;
|
||||
if (page->index > last_index)
|
||||
goto out_unlock_page;
|
||||
if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping)
|
||||
goto out_unlock_page;
|
||||
if (gfs2_is_stuffed(ip)) {
|
||||
ret = gfs2_unstuff_dinode(ip, page);
|
||||
if (ret)
|
||||
goto out_unlock_page;
|
||||
}
|
||||
ret = gfs2_allocate_page_backing(page);
|
||||
|
||||
out_unlock_page:
|
||||
unlock_page(page);
|
||||
gfs2_trans_end(sdp);
|
||||
out_trans_fail:
|
||||
gfs2_inplace_release(ip);
|
||||
out_quota_unlock:
|
||||
gfs2_quota_unlock(ip);
|
||||
out_alloc_put:
|
||||
gfs2_alloc_put(ip);
|
||||
out_unlock:
|
||||
gfs2_glock_dq(&gh);
|
||||
out:
|
||||
gfs2_holder_uninit(&gh);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct vm_operations_struct gfs2_vm_ops = {
|
||||
.fault = filemap_fault,
|
||||
.page_mkwrite = gfs2_page_mkwrite,
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* gfs2_mmap -
|
||||
|
@ -315,14 +433,7 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
return error;
|
||||
}
|
||||
|
||||
/* This is VM_MAYWRITE instead of VM_WRITE because a call
|
||||
to mprotect() can turn on VM_WRITE later. */
|
||||
|
||||
if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
|
||||
(VM_MAYSHARE | VM_MAYWRITE))
|
||||
vma->vm_ops = &gfs2_vm_ops_sharewrite;
|
||||
else
|
||||
vma->vm_ops = &gfs2_vm_ops_private;
|
||||
vma->vm_ops = &gfs2_vm_ops;
|
||||
|
||||
gfs2_glock_dq_uninit(&i_gh);
|
||||
|
||||
|
|
169
fs/gfs2/ops_vm.c
169
fs/gfs2/ops_vm.c
|
@ -1,169 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
|
||||
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This copyrighted material is made available to anyone wishing to use,
|
||||
* modify, copy, or redistribute it subject to the terms and conditions
|
||||
* of the GNU General Public License version 2.
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/gfs2_ondisk.h>
|
||||
#include <linux/lm_interface.h>
|
||||
|
||||
#include "gfs2.h"
|
||||
#include "incore.h"
|
||||
#include "bmap.h"
|
||||
#include "glock.h"
|
||||
#include "inode.h"
|
||||
#include "ops_vm.h"
|
||||
#include "quota.h"
|
||||
#include "rgrp.h"
|
||||
#include "trans.h"
|
||||
#include "util.h"
|
||||
|
||||
static int gfs2_private_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host);
|
||||
|
||||
set_bit(GIF_PAGED, &ip->i_flags);
|
||||
return filemap_fault(vma, vmf);
|
||||
}
|
||||
|
||||
static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
|
||||
{
|
||||
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
|
||||
unsigned long index = page->index;
|
||||
u64 lblock = index << (PAGE_CACHE_SHIFT -
|
||||
sdp->sd_sb.sb_bsize_shift);
|
||||
unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
|
||||
struct gfs2_alloc *al;
|
||||
unsigned int data_blocks, ind_blocks;
|
||||
unsigned int x;
|
||||
int error;
|
||||
|
||||
al = gfs2_alloc_get(ip);
|
||||
|
||||
error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
|
||||
if (error)
|
||||
goto out_gunlock_q;
|
||||
|
||||
gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
|
||||
|
||||
al->al_requested = data_blocks + ind_blocks;
|
||||
|
||||
error = gfs2_inplace_reserve(ip);
|
||||
if (error)
|
||||
goto out_gunlock_q;
|
||||
|
||||
error = gfs2_trans_begin(sdp, al->al_rgd->rd_length +
|
||||
ind_blocks + RES_DINODE +
|
||||
RES_STATFS + RES_QUOTA, 0);
|
||||
if (error)
|
||||
goto out_ipres;
|
||||
|
||||
if (gfs2_is_stuffed(ip)) {
|
||||
error = gfs2_unstuff_dinode(ip, NULL);
|
||||
if (error)
|
||||
goto out_trans;
|
||||
}
|
||||
|
||||
for (x = 0; x < blocks; ) {
|
||||
u64 dblock;
|
||||
unsigned int extlen;
|
||||
int new = 1;
|
||||
|
||||
error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen);
|
||||
if (error)
|
||||
goto out_trans;
|
||||
|
||||
lblock += extlen;
|
||||
x += extlen;
|
||||
}
|
||||
|
||||
gfs2_assert_warn(sdp, al->al_alloced);
|
||||
|
||||
out_trans:
|
||||
gfs2_trans_end(sdp);
|
||||
out_ipres:
|
||||
gfs2_inplace_release(ip);
|
||||
out_gunlock_q:
|
||||
gfs2_quota_unlock(ip);
|
||||
out:
|
||||
gfs2_alloc_put(ip);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int gfs2_sharewrite_fault(struct vm_area_struct *vma,
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
struct file *file = vma->vm_file;
|
||||
struct gfs2_file *gf = file->private_data;
|
||||
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
|
||||
struct gfs2_holder i_gh;
|
||||
int alloc_required;
|
||||
int error;
|
||||
int ret = 0;
|
||||
|
||||
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
set_bit(GIF_PAGED, &ip->i_flags);
|
||||
set_bit(GIF_SW_PAGED, &ip->i_flags);
|
||||
|
||||
error = gfs2_write_alloc_required(ip,
|
||||
(u64)vmf->pgoff << PAGE_CACHE_SHIFT,
|
||||
PAGE_CACHE_SIZE, &alloc_required);
|
||||
if (error) {
|
||||
ret = VM_FAULT_OOM; /* XXX: are these right? */
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
set_bit(GFF_EXLOCK, &gf->f_flags);
|
||||
ret = filemap_fault(vma, vmf);
|
||||
clear_bit(GFF_EXLOCK, &gf->f_flags);
|
||||
if (ret & VM_FAULT_ERROR)
|
||||
goto out_unlock;
|
||||
|
||||
if (alloc_required) {
|
||||
/* XXX: do we need to drop page lock around alloc_page_backing?*/
|
||||
error = alloc_page_backing(ip, vmf->page);
|
||||
if (error) {
|
||||
/*
|
||||
* VM_FAULT_LOCKED should always be the case for
|
||||
* filemap_fault, but it may not be in a future
|
||||
* implementation.
|
||||
*/
|
||||
if (ret & VM_FAULT_LOCKED)
|
||||
unlock_page(vmf->page);
|
||||
page_cache_release(vmf->page);
|
||||
ret = VM_FAULT_OOM;
|
||||
goto out_unlock;
|
||||
}
|
||||
set_page_dirty(vmf->page);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
gfs2_glock_dq_uninit(&i_gh);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct vm_operations_struct gfs2_vm_ops_private = {
|
||||
.fault = gfs2_private_fault,
|
||||
};
|
||||
|
||||
struct vm_operations_struct gfs2_vm_ops_sharewrite = {
|
||||
.fault = gfs2_sharewrite_fault,
|
||||
};
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
|
||||
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This copyrighted material is made available to anyone wishing to use,
|
||||
* modify, copy, or redistribute it subject to the terms and conditions
|
||||
* of the GNU General Public License version 2.
|
||||
*/
|
||||
|
||||
#ifndef __OPS_VM_DOT_H__
|
||||
#define __OPS_VM_DOT_H__
|
||||
|
||||
#include <linux/mm.h>
|
||||
|
||||
extern struct vm_operations_struct gfs2_vm_ops_private;
|
||||
extern struct vm_operations_struct gfs2_vm_ops_sharewrite;
|
||||
|
||||
#endif /* __OPS_VM_DOT_H__ */
|
Loading…
Reference in a new issue