[XFS] Start writeout earlier (on last close) in the case where we have a
truncate down followed by delayed allocation (buffered writes) - worst case scenario for the notorious NULL files problem. This reduces the window where we are exposed to that problem significantly. SGI-PV: 917976 SGI-Modid: xfs-linux-melb:xfs-kern:26100a Signed-off-by: Nathan Scott <nathans@sgi.com>
This commit is contained in:
parent
59c1b082f5
commit
7d4fb40ad7
5 changed files with 114 additions and 94 deletions
|
@ -1157,6 +1157,18 @@ xfs_vm_writepage(
|
|||
return error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_vm_writepages(
|
||||
struct address_space *mapping,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
struct vnode *vp = vn_from_inode(mapping->host);
|
||||
|
||||
if (VN_TRUNC(vp))
|
||||
VUNTRUNCATE(vp);
|
||||
return generic_writepages(mapping, wbc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called to move a page into cleanable state - and from there
|
||||
* to be released. Possibly the page is already clean. We always
|
||||
|
@ -1451,6 +1463,7 @@ struct address_space_operations xfs_address_space_operations = {
|
|||
.readpage = xfs_vm_readpage,
|
||||
.readpages = xfs_vm_readpages,
|
||||
.writepage = xfs_vm_writepage,
|
||||
.writepages = xfs_vm_writepages,
|
||||
.sync_page = block_sync_page,
|
||||
.releasepage = xfs_vm_releasepage,
|
||||
.invalidatepage = xfs_vm_invalidatepage,
|
||||
|
|
|
@ -323,6 +323,17 @@ xfs_file_open(
|
|||
return -error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_file_close(
|
||||
struct file *filp)
|
||||
{
|
||||
vnode_t *vp = vn_from_inode(filp->f_dentry->d_inode);
|
||||
int error;
|
||||
|
||||
VOP_CLOSE(vp, 0, file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL, error);
|
||||
return -error;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_file_release(
|
||||
struct inode *inode,
|
||||
|
@ -349,6 +360,8 @@ xfs_file_fsync(
|
|||
|
||||
if (datasync)
|
||||
flags |= FSYNC_DATA;
|
||||
if (VN_TRUNC(vp))
|
||||
VUNTRUNCATE(vp);
|
||||
VOP_FSYNC(vp, flags, NULL, (xfs_off_t)0, (xfs_off_t)-1, error);
|
||||
return -error;
|
||||
}
|
||||
|
@ -578,6 +591,7 @@ const struct file_operations xfs_file_operations = {
|
|||
#endif
|
||||
.mmap = xfs_file_mmap,
|
||||
.open = xfs_file_open,
|
||||
.flush = xfs_file_close,
|
||||
.release = xfs_file_release,
|
||||
.fsync = xfs_file_fsync,
|
||||
#ifdef HAVE_FOP_OPEN_EXEC
|
||||
|
@ -602,6 +616,7 @@ const struct file_operations xfs_invis_file_operations = {
|
|||
#endif
|
||||
.mmap = xfs_file_mmap,
|
||||
.open = xfs_file_open,
|
||||
.flush = xfs_file_close,
|
||||
.release = xfs_file_release,
|
||||
.fsync = xfs_file_fsync,
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
|
||||
* Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
|
@ -15,40 +15,12 @@
|
|||
* along with this program; if not, write the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "xfs.h"
|
||||
|
||||
/*
|
||||
* Stub for no-op vnode operations that return error status.
|
||||
*/
|
||||
int
|
||||
fs_noerr(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
int fs_noerr(void) { return 0; }
|
||||
int fs_nosys(void) { return ENOSYS; }
|
||||
void fs_noval(void) { return; }
|
||||
|
||||
/*
|
||||
* Operation unsupported under this file system.
|
||||
*/
|
||||
int
|
||||
fs_nosys(void)
|
||||
{
|
||||
return ENOSYS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stub for inactive, strategy, and read/write lock/unlock. Does nothing.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
void
|
||||
fs_noval(void)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* vnode pcache layer for vnode_tosspages.
|
||||
* 'last' parameter unused but left in for IRIX compatibility
|
||||
*/
|
||||
void
|
||||
fs_tosspages(
|
||||
bhv_desc_t *bdp,
|
||||
|
@ -63,11 +35,6 @@ fs_tosspages(
|
|||
truncate_inode_pages(ip->i_mapping, first);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* vnode pcache layer for vnode_flushinval_pages.
|
||||
* 'last' parameter unused but left in for IRIX compatibility
|
||||
*/
|
||||
void
|
||||
fs_flushinval_pages(
|
||||
bhv_desc_t *bdp,
|
||||
|
@ -79,16 +46,13 @@ fs_flushinval_pages(
|
|||
struct inode *ip = vn_to_inode(vp);
|
||||
|
||||
if (VN_CACHED(vp)) {
|
||||
if (VN_TRUNC(vp))
|
||||
VUNTRUNCATE(vp);
|
||||
filemap_write_and_wait(ip->i_mapping);
|
||||
|
||||
truncate_inode_pages(ip->i_mapping, first);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* vnode pcache layer for vnode_flush_pages.
|
||||
* 'last' parameter unused but left in for IRIX compatibility
|
||||
*/
|
||||
int
|
||||
fs_flush_pages(
|
||||
bhv_desc_t *bdp,
|
||||
|
@ -100,12 +64,13 @@ fs_flush_pages(
|
|||
vnode_t *vp = BHV_TO_VNODE(bdp);
|
||||
struct inode *ip = vn_to_inode(vp);
|
||||
|
||||
if (VN_CACHED(vp)) {
|
||||
if (VN_DIRTY(vp)) {
|
||||
if (VN_TRUNC(vp))
|
||||
VUNTRUNCATE(vp);
|
||||
filemap_fdatawrite(ip->i_mapping);
|
||||
if (flags & XFS_B_ASYNC)
|
||||
return 0;
|
||||
filemap_fdatawait(ip->i_mapping);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -56,12 +56,18 @@ typedef xfs_ino_t vnumber_t;
|
|||
typedef struct dentry vname_t;
|
||||
typedef bhv_head_t vn_bhv_head_t;
|
||||
|
||||
typedef enum vflags {
|
||||
VMODIFIED = 0x08, /* XFS inode state possibly differs */
|
||||
/* to the Linux inode state. */
|
||||
VTRUNCATED = 0x40, /* truncated down so flush-on-close */
|
||||
} vflags_t;
|
||||
|
||||
/*
|
||||
* MP locking protocols:
|
||||
* v_flag, v_vfsp VN_LOCK/VN_UNLOCK
|
||||
*/
|
||||
typedef struct vnode {
|
||||
__u32 v_flag; /* vnode flags (see below) */
|
||||
vflags_t v_flag; /* vnode flags (see above) */
|
||||
struct vfs *v_vfsp; /* ptr to containing VFS */
|
||||
vnumber_t v_number; /* in-core vnode number */
|
||||
vn_bhv_head_t v_bh; /* behavior head */
|
||||
|
@ -125,12 +131,6 @@ static inline struct inode *vn_to_inode(struct vnode *vnode)
|
|||
return &vnode->v_inode;
|
||||
}
|
||||
|
||||
/*
|
||||
* Vnode flags.
|
||||
*/
|
||||
#define VMODIFIED 0x8 /* XFS inode state possibly differs */
|
||||
/* to the Linux inode state. */
|
||||
|
||||
/*
|
||||
* Values for the VOP_RWLOCK and VOP_RWUNLOCK flags parameter.
|
||||
*/
|
||||
|
@ -162,8 +162,10 @@ typedef enum vchange {
|
|||
VCHANGE_FLAGS_IOEXCL_COUNT = 4
|
||||
} vchange_t;
|
||||
|
||||
typedef enum { L_FALSE, L_TRUE } lastclose_t;
|
||||
|
||||
typedef int (*vop_open_t)(bhv_desc_t *, struct cred *);
|
||||
typedef int (*vop_close_t)(bhv_desc_t *, int, lastclose_t, struct cred *);
|
||||
typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
|
||||
const struct iovec *, unsigned int,
|
||||
loff_t *, int, struct cred *);
|
||||
|
@ -234,6 +236,7 @@ typedef int (*vop_iflush_t)(bhv_desc_t *, int);
|
|||
typedef struct vnodeops {
|
||||
bhv_position_t vn_position; /* position within behavior chain */
|
||||
vop_open_t vop_open;
|
||||
vop_close_t vop_close;
|
||||
vop_read_t vop_read;
|
||||
vop_write_t vop_write;
|
||||
vop_sendfile_t vop_sendfile;
|
||||
|
@ -278,6 +281,10 @@ typedef struct vnodeops {
|
|||
*/
|
||||
#define _VOP_(op, vp) (*((vnodeops_t *)(vp)->v_fops)->op)
|
||||
|
||||
#define VOP_OPEN(vp, cr, rv) \
|
||||
rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
|
||||
#define VOP_CLOSE(vp, f, last, cr, rv) \
|
||||
rv = _VOP_(vop_close, vp)((vp)->v_fbhv, f, last, cr)
|
||||
#define VOP_READ(vp,file,iov,segs,offset,ioflags,cr,rv) \
|
||||
rv = _VOP_(vop_read, vp)((vp)->v_fbhv,file,iov,segs,offset,ioflags,cr)
|
||||
#define VOP_WRITE(vp,file,iov,segs,offset,ioflags,cr,rv) \
|
||||
|
@ -290,8 +297,6 @@ typedef struct vnodeops {
|
|||
rv = _VOP_(vop_splice_write, vp)((vp)->v_fbhv,f,o,pipe,cnt,fl,iofl,cr)
|
||||
#define VOP_BMAP(vp,of,sz,rw,b,n,rv) \
|
||||
rv = _VOP_(vop_bmap, vp)((vp)->v_fbhv,of,sz,rw,b,n)
|
||||
#define VOP_OPEN(vp, cr, rv) \
|
||||
rv = _VOP_(vop_open, vp)((vp)->v_fbhv, cr)
|
||||
#define VOP_GETATTR(vp, vap, f, cr, rv) \
|
||||
rv = _VOP_(vop_getattr, vp)((vp)->v_fbhv, vap, f, cr)
|
||||
#define VOP_SETATTR(vp, vap, f, cr, rv) \
|
||||
|
@ -556,8 +561,6 @@ static inline struct vnode *vn_grab(struct vnode *vp)
|
|||
*/
|
||||
#define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock)
|
||||
#define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s)
|
||||
#define VN_FLAGSET(vp,b) vn_flagset(vp,b)
|
||||
#define VN_FLAGCLR(vp,b) vn_flagclr(vp,b)
|
||||
|
||||
static __inline__ void vn_flagset(struct vnode *vp, uint flag)
|
||||
{
|
||||
|
@ -566,13 +569,22 @@ static __inline__ void vn_flagset(struct vnode *vp, uint flag)
|
|||
spin_unlock(&vp->v_lock);
|
||||
}
|
||||
|
||||
static __inline__ void vn_flagclr(struct vnode *vp, uint flag)
|
||||
static __inline__ uint vn_flagclr(struct vnode *vp, uint flag)
|
||||
{
|
||||
uint cleared;
|
||||
|
||||
spin_lock(&vp->v_lock);
|
||||
cleared = (vp->v_flag & flag);
|
||||
vp->v_flag &= ~flag;
|
||||
spin_unlock(&vp->v_lock);
|
||||
return cleared;
|
||||
}
|
||||
|
||||
#define VMODIFY(vp) vn_flagset(vp, VMODIFIED)
|
||||
#define VUNMODIFY(vp) vn_flagclr(vp, VMODIFIED)
|
||||
#define VTRUNCATE(vp) vn_flagset(vp, VTRUNCATED)
|
||||
#define VUNTRUNCATE(vp) vn_flagclr(vp, VTRUNCATED)
|
||||
|
||||
/*
|
||||
* Dealing with bad inodes
|
||||
*/
|
||||
|
@ -612,8 +624,7 @@ static inline void vn_atime_to_time_t(struct vnode *vp, time_t *tt)
|
|||
#define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages)
|
||||
#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \
|
||||
PAGECACHE_TAG_DIRTY)
|
||||
#define VMODIFY(vp) VN_FLAGSET(vp, VMODIFIED)
|
||||
#define VUNMODIFY(vp) VN_FLAGCLR(vp, VMODIFIED)
|
||||
#define VN_TRUNC(vp) ((vp)->v_flag & VTRUNCATED)
|
||||
|
||||
/*
|
||||
* Flags to VOP_SETATTR/VOP_GETATTR.
|
||||
|
|
|
@ -16,8 +16,6 @@
|
|||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <linux/capability.h>
|
||||
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_types.h"
|
||||
|
@ -58,32 +56,14 @@
|
|||
#include "xfs_log_priv.h"
|
||||
#include "xfs_mac.h"
|
||||
|
||||
|
||||
/*
|
||||
* The maximum pathlen is 1024 bytes. Since the minimum file system
|
||||
* blocksize is 512 bytes, we can get a max of 2 extents back from
|
||||
* bmapi.
|
||||
*/
|
||||
#define SYMLINK_MAPS 2
|
||||
|
||||
/*
|
||||
* For xfs, we check that the file isn't too big to be opened by this kernel.
|
||||
* No other open action is required for regular files. Devices are handled
|
||||
* through the specfs file system, pipes through fifofs. Device and
|
||||
* fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively,
|
||||
* when a new vnode is first looked up or created.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_open(
|
||||
bhv_desc_t *bdp,
|
||||
cred_t *credp)
|
||||
{
|
||||
int mode;
|
||||
vnode_t *vp;
|
||||
xfs_inode_t *ip;
|
||||
|
||||
vp = BHV_TO_VNODE(bdp);
|
||||
ip = XFS_BHVTOI(bdp);
|
||||
vnode_t *vp = BHV_TO_VNODE(bdp);
|
||||
xfs_inode_t *ip = XFS_BHVTOI(bdp);
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
return XFS_ERROR(EIO);
|
||||
|
@ -101,6 +81,36 @@ xfs_open(
|
|||
return 0;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_close(
|
||||
bhv_desc_t *bdp,
|
||||
int flags,
|
||||
lastclose_t lastclose,
|
||||
cred_t *credp)
|
||||
{
|
||||
vnode_t *vp = BHV_TO_VNODE(bdp);
|
||||
xfs_inode_t *ip = XFS_BHVTOI(bdp);
|
||||
int error = 0;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
|
||||
return XFS_ERROR(EIO);
|
||||
|
||||
if (lastclose != L_TRUE || !VN_ISREG(vp))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If we previously truncated this file and removed old data in
|
||||
* the process, we want to initiate "early" writeout on the last
|
||||
* close. This is an attempt to combat the notorious NULL files
|
||||
* problem which is particularly noticable from a truncate down,
|
||||
* buffered (re-)write (delalloc), followed by a crash. What we
|
||||
* are effectively doing here is significantly reducing the time
|
||||
* window where we'd otherwise be exposed to that problem.
|
||||
*/
|
||||
if (VUNTRUNCATE(vp) && VN_DIRTY(vp) && ip->i_delayed_blks > 0)
|
||||
VOP_FLUSH_PAGES(vp, 0, -1, XFS_B_ASYNC, FI_NONE, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* xfs_getattr
|
||||
|
@ -665,9 +675,17 @@ xfs_setattr(
|
|||
((ip->i_d.di_nlink != 0 ||
|
||||
!(mp->m_flags & XFS_MOUNT_WSYNC))
|
||||
? 1 : 0));
|
||||
if (code) {
|
||||
if (code)
|
||||
goto abort_return;
|
||||
}
|
||||
/*
|
||||
* Truncated "down", so we're removing references
|
||||
* to old data here - if we now delay flushing for
|
||||
* a long time, we expose ourselves unduly to the
|
||||
* notorious NULL files problem. So, we mark this
|
||||
* vnode and flush it when the file is closed, and
|
||||
* do not wait the usual (long) time for writeout.
|
||||
*/
|
||||
VTRUNCATE(vp);
|
||||
}
|
||||
/*
|
||||
* Have to do this even if the file's size doesn't change.
|
||||
|
@ -936,6 +954,13 @@ xfs_access(
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* The maximum pathlen is 1024 bytes. Since the minimum file system
|
||||
* blocksize is 512 bytes, we can get a max of 2 extents back from
|
||||
* bmapi.
|
||||
*/
|
||||
#define SYMLINK_MAPS 2
|
||||
|
||||
/*
|
||||
* xfs_readlink
|
||||
*
|
||||
|
@ -1470,9 +1495,6 @@ xfs_inactive_symlink_local(
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
STATIC int
|
||||
xfs_inactive_attrs(
|
||||
xfs_inode_t *ip,
|
||||
|
@ -1531,10 +1553,10 @@ xfs_release(
|
|||
|
||||
vp = BHV_TO_VNODE(bdp);
|
||||
ip = XFS_BHVTOI(bdp);
|
||||
mp = ip->i_mount;
|
||||
|
||||
if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) {
|
||||
if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0))
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If this is a read-only mount, don't do this (would generate I/O) */
|
||||
if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
|
||||
|
@ -1546,8 +1568,6 @@ xfs_release(
|
|||
return 0;
|
||||
#endif
|
||||
|
||||
mp = ip->i_mount;
|
||||
|
||||
if (ip->i_d.di_nlink != 0) {
|
||||
if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
|
||||
((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0 ||
|
||||
|
@ -3745,7 +3765,6 @@ xfs_inode_flush(
|
|||
return error;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
xfs_set_dmattrs (
|
||||
bhv_desc_t *bdp,
|
||||
|
@ -3786,10 +3805,6 @@ xfs_set_dmattrs (
|
|||
return error;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* xfs_reclaim
|
||||
*/
|
||||
STATIC int
|
||||
xfs_reclaim(
|
||||
bhv_desc_t *bdp)
|
||||
|
@ -4645,6 +4660,7 @@ xfs_change_file_space(
|
|||
vnodeops_t xfs_vnodeops = {
|
||||
BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS),
|
||||
.vop_open = xfs_open,
|
||||
.vop_close = xfs_close,
|
||||
.vop_read = xfs_read,
|
||||
#ifdef HAVE_SENDFILE
|
||||
.vop_sendfile = xfs_sendfile,
|
||||
|
|
Loading…
Reference in a new issue