[XFS] Fix transaction overrun during writeback.

Prevent transaction overrun in xfs_iomap_write_allocate() if we race with a truncate that overlaps the delalloc range we were planning to allocate. If we race, we may allocate into a hole and that requires block allocation. At this point in time we don't have a reservation for block allocation (apart from metadata blocks) and so allocating into a hole rather than a delalloc region results in overflowing the transaction block reservation. Fix it by only allowing a single extent to be allocated at a time. SGI-PV: 972757 SGI-Modid: xfs-linux-melb:xfs-kern:30005a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
2007-11-23 16:29:11 +11:00 · 2007-11-23 16:29:11 +11:00 · e4143a1cf5
commit e4143a1cf5
parent 786f486f81
1 changed files with 50 additions and 25 deletions
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@ -702,6 +702,9 @@ xfs_iomap_write_delay(
 * the originating callers request.
 *
 * Called without a lock on the inode.
 *
 * We no longer bother to look at the incoming map - all we have to
 * guarantee is that whatever we allocate fills the required range.
 */
 int
 xfs_iomap_write_allocate(
@ -717,9 +720,9 @@ xfs_iomap_write_allocate(
 	xfs_fsblock_t	first_block;
 	xfs_bmap_free_t	free_list;
 	xfs_filblks_t	count_fsb;
-	xfs_bmbt_irec_t	imap[XFS_STRAT_WRITE_IMAPS];
+	xfs_bmbt_irec_t	imap;
 	xfs_trans_t	*tp;
-	int		i, nimaps, committed;
+	int		nimaps, committed;
 	int		error = 0;
 	int		nres;
@ -766,13 +769,38 @@ xfs_iomap_write_allocate(
 			XFS_BMAP_INIT(&free_list, &first_block);
 			nimaps = XFS_STRAT_WRITE_IMAPS;
 			/*
-			 * Ensure we don't go beyond eof - it is possible
+			 * it is possible that the extents have changed since
-			 * the extents changed since we did the read call,
+			 * we did the read call as we dropped the ilock for a
-			 * we dropped the ilock in the interim.
+			 * while. We have to be careful about truncates or hole
 			 * punchs here - we are not allowed to allocate
 			 * non-delalloc blocks here.
 			 *
 			 * The only protection against truncation is the pages
 			 * for the range we are being asked to convert are
 			 * locked and hence a truncate will block on them
 			 * first.
 			 *
 			 * As a result, if we go beyond the range we really
 			 * need and hit an delalloc extent boundary followed by
 			 * a hole while we have excess blocks in the map, we
 			 * will fill the hole incorrectly and overrun the
 			 * transaction reservation.
 			 *
 			 * Using a single map prevents this as we are forced to
 			 * check each map we look for overlap with the desired
 			 * range and abort as soon as we find it. Also, given
 			 * that we only return a single map, having one beyond
 			 * what we can return is probably a bit silly.
 			 *
 			 * We also need to check that we don't go beyond EOF;
 			 * this is a truncate optimisation as a truncate sets
 			 * the new file size before block on the pages we
 			 * currently have locked under writeback. Because they
 			 * are about to be tossed, we don't need to write them
 			 * back....
 			 */
-
+			nimaps = 1;
 			end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
 			xfs_bmap_last_offset(NULL, ip, &last_block,
 				XFS_DATA_FORK);
@ -788,7 +816,7 @@ xfs_iomap_write_allocate(
 			/* Go get the actual blocks */
 			error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
 					XFS_BMAPI_WRITE, &first_block, 1,
-					imap, &nimaps, &free_list, NULL);
+					&imap, &nimaps, &free_list, NULL);
 			if (error)
 				goto trans_cancel;
@ -807,27 +835,24 @@ xfs_iomap_write_allocate(
 		 * See if we were able to allocate an extent that
 		 * covers at least part of the callers request
 		 */
-		for (i = 0; i < nimaps; i++) {
+		if (unlikely(!imap.br_startblock &&
-			if (unlikely(!imap[i].br_startblock &&
+			     XFS_IS_REALTIME_INODE(ip)))
-				     !(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)))
+			return xfs_cmn_err_fsblock_zero(ip, &imap);
-				return xfs_cmn_err_fsblock_zero(ip, &imap[i]);
+		if ((offset_fsb >= imap.br_startoff) &&
-			if ((offset_fsb >= imap[i].br_startoff) &&
+		    (offset_fsb < (imap.br_startoff +
-			    (offset_fsb < (imap[i].br_startoff +
+				   imap.br_blockcount))) {
-					   imap[i].br_blockcount))) {
+			*map = imap;
-				*map = imap[i];
+			*retmap = 1;
-				*retmap = 1;
+			XFS_STATS_INC(xs_xstrat_quick);
-				XFS_STATS_INC(xs_xstrat_quick);
+			return 0;
 				return 0;
 			}
 			count_fsb -= imap[i].br_blockcount;
 		}
-		/* So far we have not mapped the requested part of the
+		/*
 		 * So far we have not mapped the requested part of the
 		 * file, just surrounding data, try again.
 		 */
-		nimaps--;
+		count_fsb -= imap.br_blockcount;
-		map_start_fsb = imap[nimaps].br_startoff +
+		map_start_fsb = imap.br_startoff + imap.br_blockcount;
 				imap[nimaps].br_blockcount;
 	}
 trans_cancel: