Merge git://oss.sgi.com:8090/xfs/linux-2.6

* git://oss.sgi.com:8090/xfs/linux-2.6:
  [XFS] Don't do I/O beyond eof when unreserving space
  [XFS] Fix use-after-free with buffers
  [XFS] Prevent lockdep false positives when locking two inodes.
  [XFS] Fix barrier status change detection.
  [XFS] Prevent direct I/O from mapping extents beyond eof
  [XFS] Fix regression introduced by remount fixup
  [XFS] Move memory allocations for log tracing out of the critical path
This commit is contained in:
Linus Torvalds 2008-09-19 16:21:59 -07:00
commit ab048fb1aa
7 changed files with 119 additions and 47 deletions

View file

@ -1338,6 +1338,10 @@ __xfs_get_blocks(
offset = (xfs_off_t)iblock << inode->i_blkbits;
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
size = bh_result->b_size;
if (!create && direct && offset >= i_size_read(inode))
return 0;
error = xfs_iomap(XFS_I(inode), offset, size,
create ? flags : BMAPI_READ, &iomap, &niomap);
if (error)

View file

@ -1302,9 +1302,29 @@ xfs_fs_remount(
mp->m_flags &= ~XFS_MOUNT_BARRIER;
break;
default:
/*
* Logically we would return an error here to prevent
* users from believing they might have changed
* mount options using remount which can't be changed.
*
* But unfortunately mount(8) adds all options from
* mtab and fstab to the mount arguments in some cases
* so we can't blindly reject options, but have to
* check for each specified option if it actually
* differs from the currently set option and only
* reject it if that's the case.
*
* Until that is implemented we return success for
* every remount request, and silently ignore all
* options that we can't actually change.
*/
#if 0
printk(KERN_INFO
"XFS: mount option \"%s\" not supported for remount\n", p);
return -EINVAL;
#else
return 0;
#endif
}
}

View file

@ -732,6 +732,7 @@ xfs_buf_item_init(
bip->bli_item.li_ops = &xfs_buf_item_ops;
bip->bli_item.li_mountp = mp;
bip->bli_buf = bp;
xfs_buf_hold(bp);
bip->bli_format.blf_type = XFS_LI_BUF;
bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
@ -867,6 +868,21 @@ xfs_buf_item_dirty(
return (bip->bli_flags & XFS_BLI_DIRTY);
}
STATIC void
xfs_buf_item_free(
xfs_buf_log_item_t *bip)
{
#ifdef XFS_TRANS_DEBUG
kmem_free(bip->bli_orig);
kmem_free(bip->bli_logged);
#endif /* XFS_TRANS_DEBUG */
#ifdef XFS_BLI_TRACE
ktrace_free(bip->bli_trace);
#endif
kmem_zone_free(xfs_buf_item_zone, bip);
}
/*
* This is called when the buf log item is no longer needed. It should
* free the buf log item associated with the given buffer and clear
@ -887,18 +903,8 @@ xfs_buf_item_relse(
(XFS_BUF_IODONE_FUNC(bp) != NULL)) {
XFS_BUF_CLR_IODONE_FUNC(bp);
}
#ifdef XFS_TRANS_DEBUG
kmem_free(bip->bli_orig);
bip->bli_orig = NULL;
kmem_free(bip->bli_logged);
bip->bli_logged = NULL;
#endif /* XFS_TRANS_DEBUG */
#ifdef XFS_BLI_TRACE
ktrace_free(bip->bli_trace);
#endif
kmem_zone_free(xfs_buf_item_zone, bip);
xfs_buf_rele(bp);
xfs_buf_item_free(bip);
}
@ -1120,6 +1126,7 @@ xfs_buf_iodone(
ASSERT(bip->bli_buf == bp);
xfs_buf_rele(bp);
mp = bip->bli_item.li_mountp;
/*
@ -1136,18 +1143,7 @@ xfs_buf_iodone(
* xfs_trans_delete_ail() drops the AIL lock.
*/
xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
#ifdef XFS_TRANS_DEBUG
kmem_free(bip->bli_orig);
bip->bli_orig = NULL;
kmem_free(bip->bli_logged);
bip->bli_logged = NULL;
#endif /* XFS_TRANS_DEBUG */
#ifdef XFS_BLI_TRACE
ktrace_free(bip->bli_trace);
#endif
kmem_zone_free(xfs_buf_item_zone, bip);
xfs_buf_item_free(bip);
}
#if defined(XFS_BLI_TRACE)

View file

@ -149,7 +149,14 @@ xfs_swap_extents(
sbp = &sxp->sx_stat;
xfs_lock_two_inodes(ip, tip, lock_flags);
/*
* we have to do two separate lock calls here to keep lockdep
* happy. If we try to get all the locks in one call, lock will
* report false positives when we drop the ILOCK and regain them
* below.
*/
xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
locked = 1;
/* Verify that both files have the same format */

View file

@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
STATIC int xlog_iclogs_empty(xlog_t *log);
#if defined(XFS_LOG_TRACE)
#define XLOG_TRACE_LOGGRANT_SIZE 2048
#define XLOG_TRACE_ICLOG_SIZE 256
void
xlog_trace_loggrant_alloc(xlog_t *log)
{
log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
}
void
xlog_trace_loggrant_dealloc(xlog_t *log)
{
ktrace_free(log->l_grant_trace);
}
void
xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
{
unsigned long cnts;
if (!log->l_grant_trace) {
log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP);
if (!log->l_grant_trace)
return;
}
/* ticket counts are 1 byte each */
cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
@ -156,11 +167,21 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
(void *)((unsigned long)tic->t_unit_res));
}
void
xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
{
iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
}
void
xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
{
ktrace_free(iclog->ic_trace);
}
void
xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
{
if (!iclog->ic_trace)
iclog->ic_trace = ktrace_alloc(256, KM_NOFS);
ktrace_enter(iclog->ic_trace,
(void *)((unsigned long)state),
(void *)((unsigned long)current_pid()),
@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
(void *)NULL, (void *)NULL);
}
#else
#define xlog_trace_loggrant_alloc(log)
#define xlog_trace_loggrant_dealloc(log)
#define xlog_trace_loggrant(log,tic,string)
#define xlog_trace_iclog_alloc(iclog)
#define xlog_trace_iclog_dealloc(iclog)
#define xlog_trace_iclog(iclog,state)
#endif /* XFS_LOG_TRACE */
@ -1009,7 +1037,7 @@ xlog_iodone(xfs_buf_t *bp)
* layer, it means the underlyin device no longer supports
* barrier I/O. Warn loudly and turn off barriers.
*/
if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) {
if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ISORDERED(bp)) {
l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
xfs_fs_cmn_err(CE_WARN, l->l_mp,
"xlog_iodone: Barriers are no longer supported"
@ -1231,6 +1259,7 @@ xlog_alloc_log(xfs_mount_t *mp,
spin_lock_init(&log->l_grant_lock);
sv_init(&log->l_flush_wait, 0, "flush_wait");
xlog_trace_loggrant_alloc(log);
/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@ -1285,6 +1314,8 @@ xlog_alloc_log(xfs_mount_t *mp,
sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
xlog_trace_iclog_alloc(iclog);
iclogp = &iclog->ic_next;
}
*iclogp = log->l_iclog; /* complete ring */
@ -1565,11 +1596,7 @@ xlog_dealloc_log(xlog_t *log)
sv_destroy(&iclog->ic_force_wait);
sv_destroy(&iclog->ic_write_wait);
xfs_buf_free(iclog->ic_bp);
#ifdef XFS_LOG_TRACE
if (iclog->ic_trace != NULL) {
ktrace_free(iclog->ic_trace);
}
#endif
xlog_trace_iclog_dealloc(iclog);
next_iclog = iclog->ic_next;
kmem_free(iclog);
iclog = next_iclog;
@ -1578,14 +1605,7 @@ xlog_dealloc_log(xlog_t *log)
spinlock_destroy(&log->l_grant_lock);
xfs_buf_free(log->l_xbuf);
#ifdef XFS_LOG_TRACE
if (log->l_trace != NULL) {
ktrace_free(log->l_trace);
}
if (log->l_grant_trace != NULL) {
ktrace_free(log->l_grant_trace);
}
#endif
xlog_trace_loggrant_dealloc(log);
log->l_mp->m_log = NULL;
kmem_free(log);
} /* xlog_dealloc_log */

View file

@ -448,7 +448,6 @@ typedef struct log {
int l_grant_write_bytes;
#ifdef XFS_LOG_TRACE
struct ktrace *l_trace;
struct ktrace *l_grant_trace;
#endif

View file

@ -1838,6 +1838,12 @@ xfs_lock_inodes(
#endif
}
/*
* xfs_lock_two_inodes() can only be used to lock one type of lock
* at a time - the iolock or the ilock, but not both at once. If
* we lock both at once, lockdep will report false positives saying
* we have violated locking orders.
*/
void
xfs_lock_two_inodes(
xfs_inode_t *ip0,
@ -1848,6 +1854,8 @@ xfs_lock_two_inodes(
int attempts = 0;
xfs_log_item_t *lp;
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
ASSERT(ip0->i_ino != ip1->i_ino);
if (ip0->i_ino > ip1->i_ino) {
@ -3152,6 +3160,13 @@ xfs_alloc_file_space(
/*
* Zero file bytes between startoff and endoff inclusive.
* The iolock is held exclusive and no blocks are buffered.
*
* This function is used by xfs_free_file_space() to zero
* partial blocks when the range to free is not block aligned.
* When unreserving space with boundaries that are not block
* aligned we round up the start and round down the end
* boundaries and then use this function to zero the parts of
* the blocks that got dropped during the rounding.
*/
STATIC int
xfs_zero_remaining_bytes(
@ -3168,6 +3183,17 @@ xfs_zero_remaining_bytes(
int nimap;
int error = 0;
/*
* Avoid doing I/O beyond eof - it's not necessary
* since nothing can read beyond eof. The space will
* be zeroed when the file is extended anyway.
*/
if (startoff >= ip->i_size)
return 0;
if (endoff > ip->i_size)
endoff = ip->i_size;
bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp);