Merge branch 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6
* 'osync_cleanup' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6: fsync: wait for data writeout completion before calling ->fsync vfs: Remove generic_osync_inode() and sync_page_range{_nolock}() fat: Opencode sync_page_range_nolock() pohmelfs: Use new syncing helper xfs: Convert sync_page_range() to simple filemap_write_and_wait_range() ocfs2: Update syncing after splicing to match generic version ntfs: Use new syncing helpers and update comments ext4: Remove syncing logic from ext4_file_write ext3: Remove syncing logic from ext3_file_write ext2: Update comment about generic_osync_inode vfs: Introduce new helpers for syncing after writing to O_SYNC file or IS_SYNC inode vfs: Rename generic_file_aio_write_nolock ocfs2: Use __generic_file_aio_write instead of generic_file_aio_write_nolock pohmelfs: Use __generic_file_aio_write instead of generic_file_aio_write_nolock vfs: Remove syncing from generic_file_direct_write() and generic_file_buffered_write() vfs: Export __generic_file_aio_write() and add some comments vfs: Introduce filemap_fdatawait_range
This commit is contained in:
commit
4142e0d1de
18 changed files with 204 additions and 380 deletions
|
@ -246,7 +246,7 @@ static const struct file_operations raw_fops = {
|
|||
.read = do_sync_read,
|
||||
.aio_read = generic_file_aio_read,
|
||||
.write = do_sync_write,
|
||||
.aio_write = generic_file_aio_write_nolock,
|
||||
.aio_write = blkdev_aio_write,
|
||||
.open = raw_open,
|
||||
.release= raw_release,
|
||||
.ioctl = raw_ioctl,
|
||||
|
|
|
@ -921,16 +921,16 @@ ssize_t pohmelfs_write(struct file *file, const char __user *buf,
|
|||
if (ret)
|
||||
goto err_out_unlock;
|
||||
|
||||
ret = generic_file_aio_write_nolock(&kiocb, &iov, 1, pos);
|
||||
ret = __generic_file_aio_write(&kiocb, &iov, 1, &kiocb.ki_pos);
|
||||
*ppos = kiocb.ki_pos;
|
||||
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
WARN_ON(ret < 0);
|
||||
|
||||
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
|
||||
if (ret > 0) {
|
||||
ssize_t err;
|
||||
|
||||
err = sync_page_range(inode, mapping, pos, ret);
|
||||
err = generic_write_sync(file, pos, ret);
|
||||
if (err < 0)
|
||||
ret = err;
|
||||
WARN_ON(ret < 0);
|
||||
|
|
|
@ -1404,6 +1404,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
|
|||
return blkdev_ioctl(bdev, mode, cmd, arg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write data to the block device. Only intended for the block device itself
|
||||
* and the raw driver which basically is a fake block device.
|
||||
*
|
||||
* Does not take i_mutex for the write and thus is not for general purpose
|
||||
* use.
|
||||
*/
|
||||
ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
ssize_t ret;
|
||||
|
||||
BUG_ON(iocb->ki_pos != pos);
|
||||
|
||||
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
|
||||
if (ret > 0 || ret == -EIOCBQUEUED) {
|
||||
ssize_t err;
|
||||
|
||||
err = generic_write_sync(file, pos, ret);
|
||||
if (err < 0 && ret > 0)
|
||||
ret = err;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkdev_aio_write);
|
||||
|
||||
/*
|
||||
* Try to release a page associated with block device when the system
|
||||
* is under memory pressure.
|
||||
|
@ -1436,7 +1463,7 @@ const struct file_operations def_blk_fops = {
|
|||
.read = do_sync_read,
|
||||
.write = do_sync_write,
|
||||
.aio_read = generic_file_aio_read,
|
||||
.aio_write = generic_file_aio_write_nolock,
|
||||
.aio_write = blkdev_aio_write,
|
||||
.mmap = generic_file_mmap,
|
||||
.fsync = block_fsync,
|
||||
.unlocked_ioctl = block_ioctl,
|
||||
|
|
|
@ -482,7 +482,7 @@ static int ext2_alloc_branch(struct inode *inode,
|
|||
unlock_buffer(bh);
|
||||
mark_buffer_dirty_inode(bh, inode);
|
||||
/* We used to sync bh here if IS_SYNC(inode).
|
||||
* But we now rely upon generic_osync_inode()
|
||||
* But we now rely upon generic_write_sync()
|
||||
* and b_inode_buffers. But not for directories.
|
||||
*/
|
||||
if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
|
||||
|
|
|
@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file->f_path.dentry->d_inode;
|
||||
ssize_t ret;
|
||||
int err;
|
||||
|
||||
ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
|
||||
|
||||
/*
|
||||
* Skip flushing if there was an error, or if nothing was written.
|
||||
*/
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* If the inode is IS_SYNC, or is O_SYNC and we are doing data
|
||||
* journalling then we need to make sure that we force the transaction
|
||||
* to disk to keep all metadata uptodate synchronously.
|
||||
*/
|
||||
if (file->f_flags & O_SYNC) {
|
||||
/*
|
||||
* If we are non-data-journaled, then the dirty data has
|
||||
* already been flushed to backing store by generic_osync_inode,
|
||||
* and the inode has been flushed too if there have been any
|
||||
* modifications other than mere timestamp updates.
|
||||
*
|
||||
* Open question --- do we care about flushing timestamps too
|
||||
* if the inode is IS_SYNC?
|
||||
*/
|
||||
if (!ext3_should_journal_data(inode))
|
||||
return ret;
|
||||
|
||||
goto force_commit;
|
||||
}
|
||||
|
||||
/*
|
||||
* So we know that there has been no forced data flush. If the inode
|
||||
* is marked IS_SYNC, we need to force one ourselves.
|
||||
*/
|
||||
if (!IS_SYNC(inode))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Open question #2 --- should we force data to disk here too? If we
|
||||
* don't, the only impact is that data=writeback filesystems won't
|
||||
* flush data to disk automatically on IS_SYNC, only metadata (but
|
||||
* historically, that is what ext2 has done.)
|
||||
*/
|
||||
|
||||
force_commit:
|
||||
err = ext3_force_commit(inode->i_sb);
|
||||
if (err)
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct file_operations ext3_file_operations = {
|
||||
.llseek = generic_file_llseek,
|
||||
.read = do_sync_read,
|
||||
.write = do_sync_write,
|
||||
.aio_read = generic_file_aio_read,
|
||||
.aio_write = ext3_file_write,
|
||||
.aio_write = generic_file_aio_write,
|
||||
.unlocked_ioctl = ext3_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = ext3_compat_ioctl,
|
||||
|
|
|
@ -58,10 +58,7 @@ static ssize_t
|
|||
ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file->f_path.dentry->d_inode;
|
||||
ssize_t ret;
|
||||
int err;
|
||||
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
|
||||
|
||||
/*
|
||||
* If we have encountered a bitmap-format file, the size limit
|
||||
|
@ -81,53 +78,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
|
|||
}
|
||||
}
|
||||
|
||||
ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
|
||||
/*
|
||||
* Skip flushing if there was an error, or if nothing was written.
|
||||
*/
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* If the inode is IS_SYNC, or is O_SYNC and we are doing data
|
||||
* journalling then we need to make sure that we force the transaction
|
||||
* to disk to keep all metadata uptodate synchronously.
|
||||
*/
|
||||
if (file->f_flags & O_SYNC) {
|
||||
/*
|
||||
* If we are non-data-journaled, then the dirty data has
|
||||
* already been flushed to backing store by generic_osync_inode,
|
||||
* and the inode has been flushed too if there have been any
|
||||
* modifications other than mere timestamp updates.
|
||||
*
|
||||
* Open question --- do we care about flushing timestamps too
|
||||
* if the inode is IS_SYNC?
|
||||
*/
|
||||
if (!ext4_should_journal_data(inode))
|
||||
return ret;
|
||||
|
||||
goto force_commit;
|
||||
}
|
||||
|
||||
/*
|
||||
* So we know that there has been no forced data flush. If the inode
|
||||
* is marked IS_SYNC, we need to force one ourselves.
|
||||
*/
|
||||
if (!IS_SYNC(inode))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Open question #2 --- should we force data to disk here too? If we
|
||||
* don't, the only impact is that data=writeback filesystems won't
|
||||
* flush data to disk automatically on IS_SYNC, only metadata (but
|
||||
* historically, that is what ext2 has done.)
|
||||
*/
|
||||
|
||||
force_commit:
|
||||
err = ext4_force_commit(inode->i_sb);
|
||||
if (err)
|
||||
return err;
|
||||
return ret;
|
||||
return generic_file_aio_write(iocb, iov, nr_segs, pos);
|
||||
}
|
||||
|
||||
static struct vm_operations_struct ext4_file_vm_ops = {
|
||||
|
|
|
@ -176,8 +176,26 @@ static int fat_cont_expand(struct inode *inode, loff_t size)
|
|||
|
||||
inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
|
||||
mark_inode_dirty(inode);
|
||||
if (IS_SYNC(inode))
|
||||
err = sync_page_range_nolock(inode, mapping, start, count);
|
||||
if (IS_SYNC(inode)) {
|
||||
int err2;
|
||||
|
||||
/*
|
||||
* Opencode syncing since we don't have a file open to use
|
||||
* standard fsync path.
|
||||
*/
|
||||
err = filemap_fdatawrite_range(mapping, start,
|
||||
start + count - 1);
|
||||
err2 = sync_mapping_buffers(mapping);
|
||||
if (!err)
|
||||
err = err2;
|
||||
err2 = write_inode_now(inode, 1);
|
||||
if (!err)
|
||||
err = err2;
|
||||
if (!err) {
|
||||
err = filemap_fdatawait_range(mapping, start,
|
||||
start + count - 1);
|
||||
}
|
||||
}
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
|
|
@ -119,8 +119,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
|
|||
MSDOS_I(inode)->i_start = new_dclus;
|
||||
MSDOS_I(inode)->i_logstart = new_dclus;
|
||||
/*
|
||||
* Since generic_osync_inode() synchronize later if
|
||||
* this is not directory, we don't here.
|
||||
* Since generic_write_sync() synchronizes regular files later,
|
||||
* we sync here only directories.
|
||||
*/
|
||||
if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) {
|
||||
ret = fat_sync_inode(inode);
|
||||
|
|
|
@ -1242,57 +1242,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
|
|||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(sync_inode);
|
||||
|
||||
/**
|
||||
* generic_osync_inode - flush all dirty data for a given inode to disk
|
||||
* @inode: inode to write
|
||||
* @mapping: the address_space that should be flushed
|
||||
* @what: what to write and wait upon
|
||||
*
|
||||
* This can be called by file_write functions for files which have the
|
||||
* O_SYNC flag set, to flush dirty writes to disk.
|
||||
*
|
||||
* @what is a bitmask, specifying which part of the inode's data should be
|
||||
* written and waited upon.
|
||||
*
|
||||
* OSYNC_DATA: i_mapping's dirty data
|
||||
* OSYNC_METADATA: the buffers at i_mapping->private_list
|
||||
* OSYNC_INODE: the inode itself
|
||||
*/
|
||||
|
||||
int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what)
|
||||
{
|
||||
int err = 0;
|
||||
int need_write_inode_now = 0;
|
||||
int err2;
|
||||
|
||||
if (what & OSYNC_DATA)
|
||||
err = filemap_fdatawrite(mapping);
|
||||
if (what & (OSYNC_METADATA|OSYNC_DATA)) {
|
||||
err2 = sync_mapping_buffers(mapping);
|
||||
if (!err)
|
||||
err = err2;
|
||||
}
|
||||
if (what & OSYNC_DATA) {
|
||||
err2 = filemap_fdatawait(mapping);
|
||||
if (!err)
|
||||
err = err2;
|
||||
}
|
||||
|
||||
spin_lock(&inode_lock);
|
||||
if ((inode->i_state & I_DIRTY) &&
|
||||
((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
|
||||
need_write_inode_now = 1;
|
||||
spin_unlock(&inode_lock);
|
||||
|
||||
if (need_write_inode_now) {
|
||||
err2 = write_inode_now(inode, 1);
|
||||
if (!err)
|
||||
err = err2;
|
||||
}
|
||||
else
|
||||
inode_sync_wait(inode);
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL(generic_osync_inode);
|
||||
|
|
|
@ -2076,14 +2076,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
|
|||
*ppos = pos;
|
||||
if (cached_page)
|
||||
page_cache_release(cached_page);
|
||||
/* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */
|
||||
if (likely(!status)) {
|
||||
if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) {
|
||||
if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb))
|
||||
status = generic_osync_inode(vi, mapping,
|
||||
OSYNC_METADATA|OSYNC_DATA);
|
||||
}
|
||||
}
|
||||
pagevec_lru_add_file(&lru_pvec);
|
||||
ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
|
||||
written ? "written" : "status", (unsigned long)written,
|
||||
|
@ -2145,8 +2137,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
|
|||
mutex_lock(&inode->i_mutex);
|
||||
ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
|
||||
int err = sync_page_range(inode, mapping, pos, ret);
|
||||
if (ret > 0) {
|
||||
int err = generic_write_sync(file, pos, ret);
|
||||
if (err < 0)
|
||||
ret = err;
|
||||
}
|
||||
|
@ -2173,8 +2165,8 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
|
|||
if (ret == -EIOCBQUEUED)
|
||||
ret = wait_on_sync_kiocb(&kiocb);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
|
||||
int err = sync_page_range(inode, mapping, *ppos - ret, ret);
|
||||
if (ret > 0) {
|
||||
int err = generic_write_sync(file, *ppos - ret, ret);
|
||||
if (err < 0)
|
||||
ret = err;
|
||||
}
|
||||
|
|
|
@ -384,13 +384,12 @@ MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref,
|
|||
* it is dirty in the inode meta data rather than the data page cache of the
|
||||
* inode, and thus there are no data pages that need writing out. Therefore, a
|
||||
* full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the
|
||||
* other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to
|
||||
* ensure ->write_inode is called from generic_osync_inode() and this needs to
|
||||
* happen or the file data would not necessarily hit the device synchronously,
|
||||
* even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC
|
||||
* simply "feels" better than just I_DIRTY_SYNC, since the file data has not
|
||||
* actually hit the block device yet, which is not what I_DIRTY_SYNC on its own
|
||||
* would suggest.
|
||||
* other hand, is not sufficient, because ->write_inode needs to be called even
|
||||
* in case of fdatasync. This needs to happen or the file data would not
|
||||
* necessarily hit the device synchronously, even though the vfs inode has the
|
||||
* O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just
|
||||
* I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
|
||||
* which is not what I_DIRTY_SYNC on its own would suggest.
|
||||
*/
|
||||
void __mark_mft_record_dirty(ntfs_inode *ni)
|
||||
{
|
||||
|
|
|
@ -1871,8 +1871,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
|
|||
goto out_dio;
|
||||
}
|
||||
} else {
|
||||
written = generic_file_aio_write_nolock(iocb, iov, nr_segs,
|
||||
*ppos);
|
||||
written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
|
||||
}
|
||||
|
||||
out_dio:
|
||||
|
@ -1880,18 +1879,21 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
|
|||
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
|
||||
|
||||
if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {
|
||||
/*
|
||||
* The generic write paths have handled getting data
|
||||
* to disk, but since we don't make use of the dirty
|
||||
* inode list, a manual journal commit is necessary
|
||||
* here.
|
||||
*/
|
||||
if (old_size != i_size_read(inode) ||
|
||||
old_clusters != OCFS2_I(inode)->ip_clusters) {
|
||||
ret = filemap_fdatawrite_range(file->f_mapping, pos,
|
||||
pos + count - 1);
|
||||
if (ret < 0)
|
||||
written = ret;
|
||||
|
||||
if (!ret && (old_size != i_size_read(inode) ||
|
||||
old_clusters != OCFS2_I(inode)->ip_clusters)) {
|
||||
ret = jbd2_journal_force_commit(osb->journal->j_journal);
|
||||
if (ret < 0)
|
||||
written = ret;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
ret = filemap_fdatawait_range(file->f_mapping, pos,
|
||||
pos + count - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1991,31 +1993,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
|
|||
|
||||
if (ret > 0) {
|
||||
unsigned long nr_pages;
|
||||
int err;
|
||||
|
||||
*ppos += ret;
|
||||
nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
|
||||
|
||||
/*
|
||||
* If file or inode is SYNC and we actually wrote some data,
|
||||
* sync it.
|
||||
*/
|
||||
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
|
||||
int err;
|
||||
err = generic_write_sync(out, *ppos, ret);
|
||||
if (err)
|
||||
ret = err;
|
||||
else
|
||||
*ppos += ret;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
err = ocfs2_rw_lock(inode, 1);
|
||||
if (err < 0) {
|
||||
mlog_errno(err);
|
||||
} else {
|
||||
err = generic_osync_inode(inode, mapping,
|
||||
OSYNC_METADATA|OSYNC_DATA);
|
||||
ocfs2_rw_unlock(inode, 1);
|
||||
}
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
if (err)
|
||||
ret = err;
|
||||
}
|
||||
balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
|
||||
}
|
||||
|
||||
|
|
22
fs/splice.c
22
fs/splice.c
|
@ -976,25 +976,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
|
|||
|
||||
if (ret > 0) {
|
||||
unsigned long nr_pages;
|
||||
int err;
|
||||
|
||||
*ppos += ret;
|
||||
nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
|
||||
|
||||
/*
|
||||
* If file or inode is SYNC and we actually wrote some data,
|
||||
* sync it.
|
||||
*/
|
||||
if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
|
||||
int err;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
err = generic_osync_inode(inode, mapping,
|
||||
OSYNC_METADATA|OSYNC_DATA);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
if (err)
|
||||
ret = err;
|
||||
}
|
||||
err = generic_write_sync(out, *ppos, ret);
|
||||
if (err)
|
||||
ret = err;
|
||||
else
|
||||
*ppos += ret;
|
||||
balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
|
||||
}
|
||||
|
||||
|
|
56
fs/sync.c
56
fs/sync.c
|
@ -178,19 +178,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
|
|||
}
|
||||
|
||||
/**
|
||||
* vfs_fsync - perform a fsync or fdatasync on a file
|
||||
* vfs_fsync_range - helper to sync a range of data & metadata to disk
|
||||
* @file: file to sync
|
||||
* @dentry: dentry of @file
|
||||
* @data: only perform a fdatasync operation
|
||||
* @start: offset in bytes of the beginning of data range to sync
|
||||
* @end: offset in bytes of the end of data range (inclusive)
|
||||
* @datasync: perform only datasync
|
||||
*
|
||||
* Write back data and metadata for @file to disk. If @datasync is
|
||||
* set only metadata needed to access modified file data is written.
|
||||
* Write back data in range @start..@end and metadata for @file to disk. If
|
||||
* @datasync is set only metadata needed to access modified file data is
|
||||
* written.
|
||||
*
|
||||
* In case this function is called from nfsd @file may be %NULL and
|
||||
* only @dentry is set. This can only happen when the filesystem
|
||||
* implements the export_operations API.
|
||||
*/
|
||||
int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
|
||||
int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
|
||||
loff_t end, int datasync)
|
||||
{
|
||||
const struct file_operations *fop;
|
||||
struct address_space *mapping;
|
||||
|
@ -214,7 +218,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
|
|||
goto out;
|
||||
}
|
||||
|
||||
ret = filemap_fdatawrite(mapping);
|
||||
ret = filemap_write_and_wait_range(mapping, start, end);
|
||||
|
||||
/*
|
||||
* We need to protect against concurrent writers, which could cause
|
||||
|
@ -225,12 +229,29 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
|
|||
if (!ret)
|
||||
ret = err;
|
||||
mutex_unlock(&mapping->host->i_mutex);
|
||||
err = filemap_fdatawait(mapping);
|
||||
if (!ret)
|
||||
ret = err;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(vfs_fsync_range);
|
||||
|
||||
/**
|
||||
* vfs_fsync - perform a fsync or fdatasync on a file
|
||||
* @file: file to sync
|
||||
* @dentry: dentry of @file
|
||||
* @datasync: only perform a fdatasync operation
|
||||
*
|
||||
* Write back data and metadata for @file to disk. If @datasync is
|
||||
* set only metadata needed to access modified file data is written.
|
||||
*
|
||||
* In case this function is called from nfsd @file may be %NULL and
|
||||
* only @dentry is set. This can only happen when the filesystem
|
||||
* implements the export_operations API.
|
||||
*/
|
||||
int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
|
||||
{
|
||||
return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync);
|
||||
}
|
||||
EXPORT_SYMBOL(vfs_fsync);
|
||||
|
||||
static int do_fsync(unsigned int fd, int datasync)
|
||||
|
@ -256,6 +277,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
|
|||
return do_fsync(fd, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* generic_write_sync - perform syncing after a write if file / inode is sync
|
||||
* @file: file to which the write happened
|
||||
* @pos: offset where the write started
|
||||
* @count: length of the write
|
||||
*
|
||||
* This is just a simple wrapper about our general syncing function.
|
||||
*/
|
||||
int generic_write_sync(struct file *file, loff_t pos, loff_t count)
|
||||
{
|
||||
if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host))
|
||||
return 0;
|
||||
return vfs_fsync_range(file, file->f_path.dentry, pos,
|
||||
pos + count - 1, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(generic_write_sync);
|
||||
|
||||
/*
|
||||
* sys_sync_file_range() permits finely controlled syncing over a segment of
|
||||
* a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
|
||||
|
|
|
@ -817,7 +817,8 @@ xfs_write(
|
|||
xfs_iunlock(xip, iolock);
|
||||
if (need_i_mutex)
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
error2 = sync_page_range(inode, mapping, pos, ret);
|
||||
error2 = filemap_write_and_wait_range(mapping, pos,
|
||||
pos + ret - 1);
|
||||
if (!error)
|
||||
error = error2;
|
||||
if (need_i_mutex)
|
||||
|
|
|
@ -1455,11 +1455,6 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
|
|||
#define DT_SOCK 12
|
||||
#define DT_WHT 14
|
||||
|
||||
#define OSYNC_METADATA (1<<0)
|
||||
#define OSYNC_DATA (1<<1)
|
||||
#define OSYNC_INODE (1<<2)
|
||||
int generic_osync_inode(struct inode *, struct address_space *, int);
|
||||
|
||||
/*
|
||||
* This is the "filldir" function type, used by readdir() to let
|
||||
* the kernel specify what kind of dirent layout it wants to have.
|
||||
|
@ -2086,6 +2081,8 @@ extern int write_inode_now(struct inode *, int);
|
|||
extern int filemap_fdatawrite(struct address_space *);
|
||||
extern int filemap_flush(struct address_space *);
|
||||
extern int filemap_fdatawait(struct address_space *);
|
||||
extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
|
||||
loff_t lend);
|
||||
extern int filemap_write_and_wait(struct address_space *mapping);
|
||||
extern int filemap_write_and_wait_range(struct address_space *mapping,
|
||||
loff_t lstart, loff_t lend);
|
||||
|
@ -2096,7 +2093,10 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
|
|||
extern int filemap_fdatawrite_range(struct address_space *mapping,
|
||||
loff_t start, loff_t end);
|
||||
|
||||
extern int vfs_fsync_range(struct file *file, struct dentry *dentry,
|
||||
loff_t start, loff_t end, int datasync);
|
||||
extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
|
||||
extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
|
||||
extern void sync_supers(void);
|
||||
extern void emergency_sync(void);
|
||||
extern void emergency_remount(void);
|
||||
|
@ -2202,9 +2202,9 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
|
|||
extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
|
||||
int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
|
||||
extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
|
||||
extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long,
|
||||
loff_t *);
|
||||
extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
|
||||
extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
|
||||
unsigned long, loff_t);
|
||||
extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
|
||||
unsigned long *, loff_t, loff_t *, size_t, size_t);
|
||||
extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
|
||||
|
@ -2214,6 +2214,10 @@ extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t l
|
|||
extern int generic_segment_checks(const struct iovec *iov,
|
||||
unsigned long *nr_segs, size_t *count, int access_flags);
|
||||
|
||||
/* fs/block_dev.c */
|
||||
extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos);
|
||||
|
||||
/* fs/splice.c */
|
||||
extern ssize_t generic_file_splice_read(struct file *, loff_t *,
|
||||
struct pipe_inode_info *, size_t, unsigned int);
|
||||
|
|
|
@ -150,10 +150,6 @@ int write_cache_pages(struct address_space *mapping,
|
|||
struct writeback_control *wbc, writepage_t writepage,
|
||||
void *data);
|
||||
int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
|
||||
int sync_page_range(struct inode *inode, struct address_space *mapping,
|
||||
loff_t pos, loff_t count);
|
||||
int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
|
||||
loff_t pos, loff_t count);
|
||||
void set_page_dirty_balance(struct page *page, int page_mkwrite);
|
||||
void writeback_set_ratelimit(void);
|
||||
|
||||
|
|
170
mm/filemap.c
170
mm/filemap.c
|
@ -39,11 +39,10 @@
|
|||
/*
|
||||
* FIXME: remove all knowledge of the buffer layer from the core VM
|
||||
*/
|
||||
#include <linux/buffer_head.h> /* for generic_osync_inode */
|
||||
#include <linux/buffer_head.h> /* for try_to_free_buffers */
|
||||
|
||||
#include <asm/mman.h>
|
||||
|
||||
|
||||
/*
|
||||
* Shared mappings implemented 30.11.1994. It's not fully working yet,
|
||||
* though.
|
||||
|
@ -307,68 +306,24 @@ int wait_on_page_writeback_range(struct address_space *mapping,
|
|||
}
|
||||
|
||||
/**
|
||||
* sync_page_range - write and wait on all pages in the passed range
|
||||
* @inode: target inode
|
||||
* @mapping: target address_space
|
||||
* @pos: beginning offset in pages to write
|
||||
* @count: number of bytes to write
|
||||
* filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range
|
||||
* @mapping: address space structure to wait for
|
||||
* @start: offset in bytes where the range starts
|
||||
* @end: offset in bytes where the range ends (inclusive)
|
||||
*
|
||||
* Write and wait upon all the pages in the passed range. This is a "data
|
||||
* integrity" operation. It waits upon in-flight writeout before starting and
|
||||
* waiting upon new writeout. If there was an IO error, return it.
|
||||
* Walk the list of under-writeback pages of the given address space
|
||||
* in the given range and wait for all of them.
|
||||
*
|
||||
* We need to re-take i_mutex during the generic_osync_inode list walk because
|
||||
* it is otherwise livelockable.
|
||||
* This is just a simple wrapper so that callers don't have to convert offsets
|
||||
* to page indexes themselves
|
||||
*/
|
||||
int sync_page_range(struct inode *inode, struct address_space *mapping,
|
||||
loff_t pos, loff_t count)
|
||||
int filemap_fdatawait_range(struct address_space *mapping, loff_t start,
|
||||
loff_t end)
|
||||
{
|
||||
pgoff_t start = pos >> PAGE_CACHE_SHIFT;
|
||||
pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
|
||||
int ret;
|
||||
|
||||
if (!mapping_cap_writeback_dirty(mapping) || !count)
|
||||
return 0;
|
||||
ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
|
||||
if (ret == 0) {
|
||||
mutex_lock(&inode->i_mutex);
|
||||
ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
if (ret == 0)
|
||||
ret = wait_on_page_writeback_range(mapping, start, end);
|
||||
return ret;
|
||||
return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT,
|
||||
end >> PAGE_CACHE_SHIFT);
|
||||
}
|
||||
EXPORT_SYMBOL(sync_page_range);
|
||||
|
||||
/**
|
||||
* sync_page_range_nolock - write & wait on all pages in the passed range without locking
|
||||
* @inode: target inode
|
||||
* @mapping: target address_space
|
||||
* @pos: beginning offset in pages to write
|
||||
* @count: number of bytes to write
|
||||
*
|
||||
* Note: Holding i_mutex across sync_page_range_nolock() is not a good idea
|
||||
* as it forces O_SYNC writers to different parts of the same file
|
||||
* to be serialised right until io completion.
|
||||
*/
|
||||
int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
|
||||
loff_t pos, loff_t count)
|
||||
{
|
||||
pgoff_t start = pos >> PAGE_CACHE_SHIFT;
|
||||
pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
|
||||
int ret;
|
||||
|
||||
if (!mapping_cap_writeback_dirty(mapping) || !count)
|
||||
return 0;
|
||||
ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
|
||||
if (ret == 0)
|
||||
ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
|
||||
if (ret == 0)
|
||||
ret = wait_on_page_writeback_range(mapping, start, end);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(sync_page_range_nolock);
|
||||
EXPORT_SYMBOL(filemap_fdatawait_range);
|
||||
|
||||
/**
|
||||
* filemap_fdatawait - wait for all under-writeback pages to complete
|
||||
|
@ -2167,20 +2122,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
|
|||
}
|
||||
*ppos = end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sync the fs metadata but not the minor inode changes and
|
||||
* of course not the data as we did direct DMA for the IO.
|
||||
* i_mutex is held, which protects generic_osync_inode() from
|
||||
* livelocking. AIO O_DIRECT ops attempt to sync metadata here.
|
||||
*/
|
||||
out:
|
||||
if ((written >= 0 || written == -EIOCBQUEUED) &&
|
||||
((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
|
||||
int err = generic_osync_inode(inode, mapping, OSYNC_METADATA);
|
||||
if (err < 0)
|
||||
written = err;
|
||||
}
|
||||
return written;
|
||||
}
|
||||
EXPORT_SYMBOL(generic_file_direct_write);
|
||||
|
@ -2312,8 +2254,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
|||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
const struct address_space_operations *a_ops = mapping->a_ops;
|
||||
struct inode *inode = mapping->host;
|
||||
ssize_t status;
|
||||
struct iov_iter i;
|
||||
|
||||
|
@ -2323,16 +2263,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
|||
if (likely(status >= 0)) {
|
||||
written += status;
|
||||
*ppos = pos + status;
|
||||
|
||||
/*
|
||||
* For now, when the user asks for O_SYNC, we'll actually give
|
||||
* O_DSYNC
|
||||
*/
|
||||
if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
|
||||
if (!a_ops->writepage || !is_sync_kiocb(iocb))
|
||||
status = generic_osync_inode(inode, mapping,
|
||||
OSYNC_METADATA|OSYNC_DATA);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2348,9 +2278,27 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
|||
}
|
||||
EXPORT_SYMBOL(generic_file_buffered_write);
|
||||
|
||||
static ssize_t
|
||||
__generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t *ppos)
|
||||
/**
|
||||
* __generic_file_aio_write - write data to a file
|
||||
* @iocb: IO state structure (file, offset, etc.)
|
||||
* @iov: vector with data to write
|
||||
* @nr_segs: number of segments in the vector
|
||||
* @ppos: position where to write
|
||||
*
|
||||
* This function does all the work needed for actually writing data to a
|
||||
* file. It does all basic checks, removes SUID from the file, updates
|
||||
* modification times and calls proper subroutines depending on whether we
|
||||
* do direct IO or a standard buffered write.
|
||||
*
|
||||
* It expects i_mutex to be grabbed unless we work on a block device or similar
|
||||
* object which does not need locking at all.
|
||||
*
|
||||
* This function does *not* take care of syncing data in case of O_SYNC write.
|
||||
* A caller has to handle it. This is mainly due to the fact that we want to
|
||||
* avoid syncing under i_mutex.
|
||||
*/
|
||||
ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t *ppos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct address_space * mapping = file->f_mapping;
|
||||
|
@ -2447,51 +2395,37 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
|
|||
current->backing_dev_info = NULL;
|
||||
return written ? written : err;
|
||||
}
|
||||
EXPORT_SYMBOL(__generic_file_aio_write);
|
||||
|
||||
ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
|
||||
const struct iovec *iov, unsigned long nr_segs, loff_t pos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
ssize_t ret;
|
||||
|
||||
BUG_ON(iocb->ki_pos != pos);
|
||||
|
||||
ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs,
|
||||
&iocb->ki_pos);
|
||||
|
||||
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
|
||||
ssize_t err;
|
||||
|
||||
err = sync_page_range_nolock(inode, mapping, pos, ret);
|
||||
if (err < 0)
|
||||
ret = err;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(generic_file_aio_write_nolock);
|
||||
|
||||
/**
|
||||
* generic_file_aio_write - write data to a file
|
||||
* @iocb: IO state structure
|
||||
* @iov: vector with data to write
|
||||
* @nr_segs: number of segments in the vector
|
||||
* @pos: position in file where to write
|
||||
*
|
||||
* This is a wrapper around __generic_file_aio_write() to be used by most
|
||||
* filesystems. It takes care of syncing the file in case of O_SYNC file
|
||||
* and acquires i_mutex as needed.
|
||||
*/
|
||||
ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
struct inode *inode = mapping->host;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
ssize_t ret;
|
||||
|
||||
BUG_ON(iocb->ki_pos != pos);
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs,
|
||||
&iocb->ki_pos);
|
||||
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
|
||||
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
|
||||
if (ret > 0 || ret == -EIOCBQUEUED) {
|
||||
ssize_t err;
|
||||
|
||||
err = sync_page_range(inode, mapping, pos, ret);
|
||||
if (err < 0)
|
||||
err = generic_write_sync(file, pos, ret);
|
||||
if (err < 0 && ret > 0)
|
||||
ret = err;
|
||||
}
|
||||
return ret;
|
||||
|
|
Loading…
Reference in a new issue