xfs: fixes for v3.17-rc3

Fix:
 - a direct IO read/buffered read data corruption
 - the associated fallout from the DIO data corruption fix
 - collapse range bugs that are potential data corruption issues.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQIcBAABAgAGBQJUCkM+AAoJEK3oKUf0dfodUBgP+gJu50/XV4TFRLPlCRxhvN61
 371i3ASls1y7ivhj40NzgbDDAZHM2q8Zqwd//318dFViHhWQDlH/1ga06kscRpZX
 d8cQEbFHApgUGQL5Gdq2l2hvAzYa75H0m6cq3jveyrN2rscjCSmAXwtlcEmx3AR6
 TnCpxuVL5asjEGZYb0KfQACq//rASHJbhukpo1gB4ccZ0boWHOVf5SxuS4remzs9
 y+rlPFNl5RD/WVdnJSvu9zu/nP6op3Ax5r7jZanoKbisKHfd7QOa+k65+Vz0Vq9G
 kxgfhz+yLfkOvcktq+41e1lVBln7fCIlcO9m3b53uxWPx5cla323893UiGYsA4F/
 j/gGlh1qaT6C/1M1JBWDLDx931S78XiR1Y+WbtAU1PO+GuO0IEap3+iqtS2+oNAv
 OrpThLOgqTspK6MJeToCzdn2lRT2BJpcKwxIyDK8g+p9N6qCpyw3DfiKyu0wipGH
 D2D3mtE6drSHNaSceFAz8CrQvPOR7Ygj92QGpGSfkohxap9h6VJR/wNp/oGnpmN0
 qgcxTrHvx3kw1hXssB4gjh6fBDnOUkac0isqxdow22Qt529t9sIanzMBvz+JxHQF
 zeqeFSh96lOXmB7UFBU+QyOhbDp3cJWChHrtY3Esw/+FmG6fxEy8z6pdZsiJYELr
 5tka2richPD+gyXzcZwP
 =tRQo
 -----END PGP SIGNATURE-----

Merge tag 'xfs-for-linus-3.17-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs fixes from Dave Chinner:
 "The fixes all address recently discovered data corruption issues.

  The original Direct IO issue was discovered by Chris Mason @ Facebook
  on a production workload which mixed buffered reads with direct reads
  and writes IO to the same file.  The fix for that exposed other issues
  with page invalidation (exposed by millions of fsx operations) failing
  due to dirty buffers beyond EOF.

  Finally, the collapse_range code could also cause problems due to
  racing writeback changing the extent map while it was being shifted
  around.  The commits for that problem are simple mitigation fixes that
  prevent the problem from occuring.  A more robust fix for 3.18 that
  addresses the underlying problem is currently being worked on by
  Brian.

  Summary of fixes:
   - a direct IO read/buffered read data corruption
   - the associated fallout from the DIO data corruption fix
   - collapse range bugs that are potential data corruption issues"

* tag 'xfs-for-linus-3.17-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs:
  xfs: trim eofblocks before collapse range
  xfs: xfs_file_collapse_range is delalloc challenged
  xfs: don't log inode unless extent shift makes extent modifications
  xfs: use ranged writeback and invalidation for direct IO
  xfs: don't zero partial page cache pages during O_DIRECT writes
  xfs: don't zero partial page cache pages during O_DIRECT writes
  xfs: don't dirty buffers beyond EOF
This commit is contained in:
Linus Torvalds 2014-09-06 12:13:17 -07:00
commit 11e9739813
4 changed files with 114 additions and 12 deletions

View file

@ -5424,7 +5424,7 @@ xfs_bmap_shift_extents(
struct xfs_bmap_free *flist,
int num_exts)
{
struct xfs_btree_cur *cur;
struct xfs_btree_cur *cur = NULL;
struct xfs_bmbt_rec_host *gotp;
struct xfs_bmbt_irec got;
struct xfs_bmbt_irec left;
@ -5435,7 +5435,7 @@ xfs_bmap_shift_extents(
int error = 0;
int i;
int whichfork = XFS_DATA_FORK;
int logflags;
int logflags = 0;
xfs_filblks_t blockcount = 0;
int total_extents;
@ -5478,16 +5478,11 @@ xfs_bmap_shift_extents(
}
}
/* We are going to change core inode */
logflags = XFS_ILOG_CORE;
if (ifp->if_flags & XFS_IFBROOT) {
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
cur->bc_private.b.firstblock = *firstblock;
cur->bc_private.b.flist = flist;
cur->bc_private.b.flags = 0;
} else {
cur = NULL;
logflags |= XFS_ILOG_DEXT;
}
/*
@ -5545,11 +5540,14 @@ xfs_bmap_shift_extents(
blockcount = left.br_blockcount +
got.br_blockcount;
xfs_iext_remove(ip, *current_ext, 1, 0);
logflags |= XFS_ILOG_CORE;
if (cur) {
error = xfs_btree_delete(cur, &i);
if (error)
goto del_cursor;
XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
} else {
logflags |= XFS_ILOG_DEXT;
}
XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
@ -5575,6 +5573,7 @@ xfs_bmap_shift_extents(
got.br_startoff = startoff;
}
logflags |= XFS_ILOG_CORE;
if (cur) {
error = xfs_bmbt_update(cur, got.br_startoff,
got.br_startblock,
@ -5582,6 +5581,8 @@ xfs_bmap_shift_extents(
got.br_state);
if (error)
goto del_cursor;
} else {
logflags |= XFS_ILOG_DEXT;
}
(*current_ext)++;
@ -5597,6 +5598,7 @@ xfs_bmap_shift_extents(
xfs_btree_del_cursor(cur,
error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
xfs_trans_log_inode(tp, ip, logflags);
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
return error;
}

View file

@ -1753,11 +1753,72 @@ xfs_vm_readpages(
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
/*
* This is basically a copy of __set_page_dirty_buffers() with one
* small tweak: buffers beyond EOF do not get marked dirty. If we mark them
* dirty, we'll never be able to clean them because we don't write buffers
* beyond EOF, and that means we can't invalidate pages that span EOF
* that have been marked dirty. Further, the dirty state can leak into
* the file interior if the file is extended, resulting in all sorts of
* bad things happening as the state does not match the underlying data.
*
* XXX: this really indicates that bufferheads in XFS need to die. Warts like
* this only exist because of bufferheads and how the generic code manages them.
*/
STATIC int
xfs_vm_set_page_dirty(
struct page *page)
{
struct address_space *mapping = page->mapping;
struct inode *inode = mapping->host;
loff_t end_offset;
loff_t offset;
int newly_dirty;
if (unlikely(!mapping))
return !TestSetPageDirty(page);
end_offset = i_size_read(inode);
offset = page_offset(page);
spin_lock(&mapping->private_lock);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
struct buffer_head *bh = head;
do {
if (offset < end_offset)
set_buffer_dirty(bh);
bh = bh->b_this_page;
offset += 1 << inode->i_blkbits;
} while (bh != head);
}
newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock);
if (newly_dirty) {
/* sigh - __set_page_dirty() is static, so copy it here, too */
unsigned long flags;
spin_lock_irqsave(&mapping->tree_lock, flags);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(!PageUptodate(page));
account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
spin_unlock_irqrestore(&mapping->tree_lock, flags);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
return newly_dirty;
}
const struct address_space_operations xfs_address_space_operations = {
.readpage = xfs_vm_readpage,
.readpages = xfs_vm_readpages,
.writepage = xfs_vm_writepage,
.writepages = xfs_vm_writepages,
.set_page_dirty = xfs_vm_set_page_dirty,
.releasepage = xfs_vm_releasepage,
.invalidatepage = xfs_vm_invalidatepage,
.write_begin = xfs_vm_write_begin,

View file

@ -1470,6 +1470,26 @@ xfs_collapse_file_space(
start_fsb = XFS_B_TO_FSB(mp, offset + len);
shift_fsb = XFS_B_TO_FSB(mp, len);
/*
* Writeback the entire file and force remove any post-eof blocks. The
* writeback prevents changes to the extent list via concurrent
* writeback and the eofblocks trim prevents the extent shift algorithm
* from running into a post-eof delalloc extent.
*
* XXX: This is a temporary fix until the extent shift loop below is
* converted to use offsets and lookups within the ILOCK rather than
* carrying around the index into the extent list for the next
* iteration.
*/
error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
if (error)
return error;
if (xfs_can_free_eofblocks(ip, true)) {
error = xfs_free_eofblocks(mp, ip, false);
if (error)
return error;
}
error = xfs_free_file_space(ip, offset, len);
if (error)
return error;

View file

@ -291,12 +291,22 @@ xfs_file_read_iter(
if (inode->i_mapping->nrpages) {
ret = filemap_write_and_wait_range(
VFS_I(ip)->i_mapping,
pos, -1);
pos, pos + size - 1);
if (ret) {
xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
truncate_pagecache_range(VFS_I(ip), pos, -1);
/*
* Invalidate whole pages. This can return an error if
* we fail to invalidate a page, but this should never
* happen on XFS. Warn if it does fail.
*/
ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
pos >> PAGE_CACHE_SHIFT,
(pos + size - 1) >> PAGE_CACHE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
}
@ -632,10 +642,19 @@ xfs_file_dio_aio_write(
if (mapping->nrpages) {
ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
pos, -1);
pos, pos + count - 1);
if (ret)
goto out;
truncate_pagecache_range(VFS_I(ip), pos, -1);
/*
* Invalidate whole pages. This can return an error if
* we fail to invalidate a page, but this should never
* happen on XFS. Warn if it does fail.
*/
ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
pos >> PAGE_CACHE_SHIFT,
(pos + count - 1) >> PAGE_CACHE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
/*