From f273ab848b7cbc0088b0ac7457b3769e6566074e Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 28 Sep 2006 11:06:03 +1000 Subject: [PATCH] [XFS] Really fix use after free in xfs_iunpin. The previous attempts to fix the linux inode use-after-free in xfs_iunpin simply made the problem harder to hit. We actually need complete exclusion between xfs_reclaim and xfs_iunpin, as well as ensuring that the i_flags are consistent during both of these functions. Introduce a new spinlock for exclusion and the i_flags, and fix up xfs_iunpin to use igrab before marking the inode dirty. SGI-PV: 952967 SGI-Modid: xfs-linux-melb:xfs-kern:26964a Signed-off-by: David Chinner Signed-off-by: Tim Shimmin --- fs/xfs/linux-2.6/xfs_super.c | 2 ++ fs/xfs/xfs_iget.c | 6 ++++++ fs/xfs/xfs_inode.c | 23 ++++++++++++++++++++--- fs/xfs/xfs_inode.h | 1 + fs/xfs/xfs_itable.c | 1 + fs/xfs/xfs_vnodeops.c | 5 +++++ 6 files changed, 35 insertions(+), 3 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 9df9ed37d219..38c4d128a8c0 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -227,7 +227,9 @@ xfs_initialize_vnode( xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); xfs_set_inodeops(inode); + spin_lock(&ip->i_flags_lock); ip->i_flags &= ~XFS_INEW; + spin_unlock(&ip->i_flags_lock); barrier(); unlock_new_inode(inode); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 9b9379792f37..b73d216ecaf9 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -243,7 +243,9 @@ xfs_iget_core( XFS_STATS_INC(xs_ig_found); + spin_lock(&ip->i_flags_lock); ip->i_flags &= ~XFS_IRECLAIMABLE; + spin_unlock(&ip->i_flags_lock); version = ih->ih_version; read_unlock(&ih->ih_lock); xfs_ihash_promote(ih, ip, version); @@ -297,7 +299,9 @@ xfs_iget_core( if (lock_flags != 0) xfs_ilock(ip, lock_flags); + spin_lock(&ip->i_flags_lock); ip->i_flags &= ~XFS_ISTALE; + spin_unlock(&ip->i_flags_lock); vn_trace_exit(vp, "xfs_iget.found", (inst_t *)__return_address); @@ -367,7 +371,9 @@ xfs_iget_core( ih->ih_next = ip; ip->i_udquot = ip->i_gdquot = NULL; ih->ih_version++; + spin_lock(&ip->i_flags_lock); ip->i_flags |= XFS_INEW; + spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 66a78287a957..c27d7d495aa0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -867,6 +867,7 @@ xfs_iread( ip = kmem_zone_zalloc(xfs_inode_zone, KM_SLEEP); ip->i_ino = ino; ip->i_mount = mp; + spin_lock_init(&ip->i_flags_lock); /* * Get pointer's to the on-disk inode and the buffer containing it. @@ -2214,7 +2215,9 @@ xfs_ifree_cluster( if (ip == free_ip) { if (xfs_iflock_nowait(ip)) { + spin_lock(&ip->i_flags_lock); ip->i_flags |= XFS_ISTALE; + spin_unlock(&ip->i_flags_lock); if (xfs_inode_clean(ip)) { xfs_ifunlock(ip); @@ -2228,7 +2231,9 @@ xfs_ifree_cluster( if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { if (xfs_iflock_nowait(ip)) { + spin_lock(&ip->i_flags_lock); ip->i_flags |= XFS_ISTALE; + spin_unlock(&ip->i_flags_lock); if (xfs_inode_clean(ip)) { xfs_ifunlock(ip); @@ -2258,7 +2263,9 @@ xfs_ifree_cluster( AIL_LOCK(mp,s); iip->ili_flush_lsn = iip->ili_item.li_lsn; AIL_UNLOCK(mp, s); + spin_lock(&iip->ili_inode->i_flags_lock); iip->ili_inode->i_flags |= XFS_ISTALE; + spin_unlock(&iip->ili_inode->i_flags_lock); pre_flushed++; } lip = lip->li_bio_list; @@ -2754,19 +2761,29 @@ xfs_iunpin( * call as the inode reclaim may be blocked waiting for * the inode to become unpinned. */ + struct inode *inode = NULL; + + spin_lock(&ip->i_flags_lock); if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { bhv_vnode_t *vp = XFS_ITOV_NULL(ip); /* make sync come back and flush this inode */ if (vp) { - struct inode *inode = vn_to_inode(vp); + inode = vn_to_inode(vp); if (!(inode->i_state & - (I_NEW|I_FREEING|I_CLEAR))) - mark_inode_dirty_sync(inode); + (I_NEW|I_FREEING|I_CLEAR))) { + inode = igrab(inode); + if (inode) + mark_inode_dirty_sync(inode); + } else + inode = NULL; } } + spin_unlock(&ip->i_flags_lock); wake_up(&ip->i_ipin_wait); + if (inode) + iput(inode); } } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index fdb89f72db95..e96eb0835fe6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -267,6 +267,7 @@ typedef struct xfs_inode { sema_t i_flock; /* inode flush lock */ atomic_t i_pincount; /* inode pin count */ wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ + spinlock_t i_flags_lock; /* inode i_flags lock */ #ifdef HAVE_REFCACHE struct xfs_inode **i_refcache; /* ptr to entry in ref cache */ struct xfs_inode *i_release; /* inode to unref */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index b608b8ab1dd6..136c6b06f1c9 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -577,6 +577,7 @@ xfs_bulkstat( KM_SLEEP); ip->i_ino = ino; ip->i_mount = mp; + spin_lock_init(&ip->i_flags_lock); if (bp) xfs_buf_relse(bp); error = xfs_itobp(mp, NULL, ip, diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1a6782eaf5d4..061e2ffdd1de 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -3844,7 +3844,9 @@ xfs_reclaim( XFS_MOUNT_ILOCK(mp); vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip)); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); + spin_lock(&ip->i_flags_lock); ip->i_flags |= XFS_IRECLAIMABLE; + spin_unlock(&ip->i_flags_lock); XFS_MOUNT_IUNLOCK(mp); } return 0; @@ -3869,8 +3871,10 @@ xfs_finish_reclaim( * us. */ write_lock(&ih->ih_lock); + spin_lock(&ip->i_flags_lock); if ((ip->i_flags & XFS_IRECLAIM) || (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) { + spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); if (locked) { xfs_ifunlock(ip); @@ -3879,6 +3883,7 @@ xfs_finish_reclaim( return 1; } ip->i_flags |= XFS_IRECLAIM; + spin_unlock(&ip->i_flags_lock); write_unlock(&ih->ih_lock); /*