Merge branch 'xfs-buf-iosubmit' into for-next
This commit is contained in:
commit
75e58ce4c8
12 changed files with 284 additions and 360 deletions
|
@ -1122,14 +1122,6 @@ xfs_zero_remaining_bytes(
|
|||
if (endoff > XFS_ISIZE(ip))
|
||||
endoff = XFS_ISIZE(ip);
|
||||
|
||||
bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
|
||||
mp->m_rtdev_targp : mp->m_ddev_targp,
|
||||
BTOBB(mp->m_sb.sb_blocksize), 0);
|
||||
if (!bp)
|
||||
return -ENOMEM;
|
||||
|
||||
xfs_buf_unlock(bp);
|
||||
|
||||
for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
|
||||
uint lock_mode;
|
||||
|
||||
|
@ -1152,42 +1144,24 @@ xfs_zero_remaining_bytes(
|
|||
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
|
||||
if (imap.br_state == XFS_EXT_UNWRITTEN)
|
||||
continue;
|
||||
XFS_BUF_UNDONE(bp);
|
||||
XFS_BUF_UNWRITE(bp);
|
||||
XFS_BUF_READ(bp);
|
||||
XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp)) {
|
||||
error = -EIO;
|
||||
break;
|
||||
}
|
||||
xfs_buf_iorequest(bp);
|
||||
error = xfs_buf_iowait(bp);
|
||||
if (error) {
|
||||
xfs_buf_ioerror_alert(bp,
|
||||
"xfs_zero_remaining_bytes(read)");
|
||||
break;
|
||||
}
|
||||
error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
|
||||
mp->m_rtdev_targp : mp->m_ddev_targp,
|
||||
xfs_fsb_to_db(ip, imap.br_startblock),
|
||||
BTOBB(mp->m_sb.sb_blocksize),
|
||||
0, &bp, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
memset(bp->b_addr +
|
||||
(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
|
||||
0, lastoffset - offset + 1);
|
||||
XFS_BUF_UNDONE(bp);
|
||||
XFS_BUF_UNREAD(bp);
|
||||
XFS_BUF_WRITE(bp);
|
||||
(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
|
||||
0, lastoffset - offset + 1);
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(mp)) {
|
||||
error = -EIO;
|
||||
break;
|
||||
}
|
||||
xfs_buf_iorequest(bp);
|
||||
error = xfs_buf_iowait(bp);
|
||||
if (error) {
|
||||
xfs_buf_ioerror_alert(bp,
|
||||
"xfs_zero_remaining_bytes(write)");
|
||||
break;
|
||||
}
|
||||
error = xfs_bwrite(bp);
|
||||
xfs_buf_relse(bp);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
xfs_buf_free(bp);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
|
357
fs/xfs/xfs_buf.c
357
fs/xfs/xfs_buf.c
|
@ -623,10 +623,11 @@ _xfs_buf_read(
|
|||
bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
|
||||
bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
|
||||
|
||||
xfs_buf_iorequest(bp);
|
||||
if (flags & XBF_ASYNC)
|
||||
if (flags & XBF_ASYNC) {
|
||||
xfs_buf_submit(bp);
|
||||
return 0;
|
||||
return xfs_buf_iowait(bp);
|
||||
}
|
||||
return xfs_buf_submit_wait(bp);
|
||||
}
|
||||
|
||||
xfs_buf_t *
|
||||
|
@ -687,34 +688,39 @@ xfs_buf_readahead_map(
|
|||
* Read an uncached buffer from disk. Allocates and returns a locked
|
||||
* buffer containing the disk contents or nothing.
|
||||
*/
|
||||
struct xfs_buf *
|
||||
int
|
||||
xfs_buf_read_uncached(
|
||||
struct xfs_buftarg *target,
|
||||
xfs_daddr_t daddr,
|
||||
size_t numblks,
|
||||
int flags,
|
||||
struct xfs_buf **bpp,
|
||||
const struct xfs_buf_ops *ops)
|
||||
{
|
||||
struct xfs_buf *bp;
|
||||
|
||||
*bpp = NULL;
|
||||
|
||||
bp = xfs_buf_get_uncached(target, numblks, flags);
|
||||
if (!bp)
|
||||
return NULL;
|
||||
return -ENOMEM;
|
||||
|
||||
/* set up the buffer for a read IO */
|
||||
ASSERT(bp->b_map_count == 1);
|
||||
bp->b_bn = daddr;
|
||||
bp->b_bn = XFS_BUF_DADDR_NULL; /* always null for uncached buffers */
|
||||
bp->b_maps[0].bm_bn = daddr;
|
||||
bp->b_flags |= XBF_READ;
|
||||
bp->b_ops = ops;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(target->bt_mount)) {
|
||||
xfs_buf_submit_wait(bp);
|
||||
if (bp->b_error) {
|
||||
int error = bp->b_error;
|
||||
xfs_buf_relse(bp);
|
||||
return NULL;
|
||||
return error;
|
||||
}
|
||||
xfs_buf_iorequest(bp);
|
||||
xfs_buf_iowait(bp);
|
||||
return bp;
|
||||
|
||||
*bpp = bp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -998,53 +1004,56 @@ xfs_buf_wait_unpin(
|
|||
* Buffer Utility Routines
|
||||
*/
|
||||
|
||||
STATIC void
|
||||
xfs_buf_iodone_work(
|
||||
struct work_struct *work)
|
||||
void
|
||||
xfs_buf_ioend(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
struct xfs_buf *bp =
|
||||
container_of(work, xfs_buf_t, b_iodone_work);
|
||||
bool read = !!(bp->b_flags & XBF_READ);
|
||||
bool read = bp->b_flags & XBF_READ;
|
||||
|
||||
trace_xfs_buf_iodone(bp, _RET_IP_);
|
||||
|
||||
bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
|
||||
|
||||
/* only validate buffers that were read without errors */
|
||||
if (read && bp->b_ops && !bp->b_error && (bp->b_flags & XBF_DONE))
|
||||
/*
|
||||
* Pull in IO completion errors now. We are guaranteed to be running
|
||||
* single threaded, so we don't need the lock to read b_io_error.
|
||||
*/
|
||||
if (!bp->b_error && bp->b_io_error)
|
||||
xfs_buf_ioerror(bp, bp->b_io_error);
|
||||
|
||||
/* Only validate buffers that were read without errors */
|
||||
if (read && !bp->b_error && bp->b_ops) {
|
||||
ASSERT(!bp->b_iodone);
|
||||
bp->b_ops->verify_read(bp);
|
||||
}
|
||||
|
||||
if (!bp->b_error)
|
||||
bp->b_flags |= XBF_DONE;
|
||||
|
||||
if (bp->b_iodone)
|
||||
(*(bp->b_iodone))(bp);
|
||||
else if (bp->b_flags & XBF_ASYNC)
|
||||
xfs_buf_relse(bp);
|
||||
else {
|
||||
ASSERT(read && bp->b_ops);
|
||||
else
|
||||
complete(&bp->b_iowait);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
xfs_buf_ioend_work(
|
||||
struct work_struct *work)
|
||||
{
|
||||
struct xfs_buf *bp =
|
||||
container_of(work, xfs_buf_t, b_iodone_work);
|
||||
|
||||
xfs_buf_ioend(bp);
|
||||
}
|
||||
|
||||
void
|
||||
xfs_buf_ioend(
|
||||
struct xfs_buf *bp,
|
||||
int schedule)
|
||||
xfs_buf_ioend_async(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
bool read = !!(bp->b_flags & XBF_READ);
|
||||
|
||||
trace_xfs_buf_iodone(bp, _RET_IP_);
|
||||
|
||||
if (bp->b_error == 0)
|
||||
bp->b_flags |= XBF_DONE;
|
||||
|
||||
if (bp->b_iodone || (read && bp->b_ops) || (bp->b_flags & XBF_ASYNC)) {
|
||||
if (schedule) {
|
||||
INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
|
||||
queue_work(xfslogd_workqueue, &bp->b_iodone_work);
|
||||
} else {
|
||||
xfs_buf_iodone_work(&bp->b_iodone_work);
|
||||
}
|
||||
} else {
|
||||
bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
|
||||
complete(&bp->b_iowait);
|
||||
}
|
||||
INIT_WORK(&bp->b_iodone_work, xfs_buf_ioend_work);
|
||||
queue_work(xfslogd_workqueue, &bp->b_iodone_work);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1067,96 +1076,6 @@ xfs_buf_ioerror_alert(
|
|||
(__uint64_t)XFS_BUF_ADDR(bp), func, -bp->b_error, bp->b_length);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when we want to stop a buffer from getting written or read.
|
||||
* We attach the EIO error, muck with its flags, and call xfs_buf_ioend
|
||||
* so that the proper iodone callbacks get called.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_bioerror(
|
||||
xfs_buf_t *bp)
|
||||
{
|
||||
#ifdef XFSERRORDEBUG
|
||||
ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* No need to wait until the buffer is unpinned, we aren't flushing it.
|
||||
*/
|
||||
xfs_buf_ioerror(bp, -EIO);
|
||||
|
||||
/*
|
||||
* We're calling xfs_buf_ioend, so delete XBF_DONE flag.
|
||||
*/
|
||||
XFS_BUF_UNREAD(bp);
|
||||
XFS_BUF_UNDONE(bp);
|
||||
xfs_buf_stale(bp);
|
||||
|
||||
xfs_buf_ioend(bp, 0);
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Same as xfs_bioerror, except that we are releasing the buffer
|
||||
* here ourselves, and avoiding the xfs_buf_ioend call.
|
||||
* This is meant for userdata errors; metadata bufs come with
|
||||
* iodone functions attached, so that we can track down errors.
|
||||
*/
|
||||
int
|
||||
xfs_bioerror_relse(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
int64_t fl = bp->b_flags;
|
||||
/*
|
||||
* No need to wait until the buffer is unpinned.
|
||||
* We aren't flushing it.
|
||||
*
|
||||
* chunkhold expects B_DONE to be set, whether
|
||||
* we actually finish the I/O or not. We don't want to
|
||||
* change that interface.
|
||||
*/
|
||||
XFS_BUF_UNREAD(bp);
|
||||
XFS_BUF_DONE(bp);
|
||||
xfs_buf_stale(bp);
|
||||
bp->b_iodone = NULL;
|
||||
if (!(fl & XBF_ASYNC)) {
|
||||
/*
|
||||
* Mark b_error and B_ERROR _both_.
|
||||
* Lot's of chunkcache code assumes that.
|
||||
* There's no reason to mark error for
|
||||
* ASYNC buffers.
|
||||
*/
|
||||
xfs_buf_ioerror(bp, -EIO);
|
||||
complete(&bp->b_iowait);
|
||||
} else {
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
xfs_bdstrat_cb(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
|
||||
trace_xfs_bdstrat_shut(bp, _RET_IP_);
|
||||
/*
|
||||
* Metadata write that didn't get logged but
|
||||
* written delayed anyway. These aren't associated
|
||||
* with a transaction, and can be ignored.
|
||||
*/
|
||||
if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
|
||||
return xfs_bioerror_relse(bp);
|
||||
else
|
||||
return xfs_bioerror(bp);
|
||||
}
|
||||
|
||||
xfs_buf_iorequest(bp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_bwrite(
|
||||
struct xfs_buf *bp)
|
||||
|
@ -1166,11 +1085,10 @@ xfs_bwrite(
|
|||
ASSERT(xfs_buf_islocked(bp));
|
||||
|
||||
bp->b_flags |= XBF_WRITE;
|
||||
bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);
|
||||
bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
|
||||
XBF_WRITE_FAIL | XBF_DONE);
|
||||
|
||||
xfs_bdstrat_cb(bp);
|
||||
|
||||
error = xfs_buf_iowait(bp);
|
||||
error = xfs_buf_submit_wait(bp);
|
||||
if (error) {
|
||||
xfs_force_shutdown(bp->b_target->bt_mount,
|
||||
SHUTDOWN_META_IO_ERROR);
|
||||
|
@ -1178,15 +1096,6 @@ xfs_bwrite(
|
|||
return error;
|
||||
}
|
||||
|
||||
STATIC void
|
||||
_xfs_buf_ioend(
|
||||
xfs_buf_t *bp,
|
||||
int schedule)
|
||||
{
|
||||
if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
|
||||
xfs_buf_ioend(bp, schedule);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
xfs_buf_bio_end_io(
|
||||
struct bio *bio,
|
||||
|
@ -1198,13 +1107,18 @@ xfs_buf_bio_end_io(
|
|||
* don't overwrite existing errors - otherwise we can lose errors on
|
||||
* buffers that require multiple bios to complete.
|
||||
*/
|
||||
if (!bp->b_error)
|
||||
xfs_buf_ioerror(bp, error);
|
||||
if (error) {
|
||||
spin_lock(&bp->b_lock);
|
||||
if (!bp->b_io_error)
|
||||
bp->b_io_error = error;
|
||||
spin_unlock(&bp->b_lock);
|
||||
}
|
||||
|
||||
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
|
||||
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
|
||||
|
||||
_xfs_buf_ioend(bp, 1);
|
||||
if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
|
||||
xfs_buf_ioend_async(bp);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
|
@ -1283,7 +1197,7 @@ xfs_buf_ioapply_map(
|
|||
} else {
|
||||
/*
|
||||
* This is guaranteed not to be the last io reference count
|
||||
* because the caller (xfs_buf_iorequest) holds a count itself.
|
||||
* because the caller (xfs_buf_submit) holds a count itself.
|
||||
*/
|
||||
atomic_dec(&bp->b_io_remaining);
|
||||
xfs_buf_ioerror(bp, -EIO);
|
||||
|
@ -1373,53 +1287,131 @@ _xfs_buf_ioapply(
|
|||
blk_finish_plug(&plug);
|
||||
}
|
||||
|
||||
/*
|
||||
* Asynchronous IO submission path. This transfers the buffer lock ownership and
|
||||
* the current reference to the IO. It is not safe to reference the buffer after
|
||||
* a call to this function unless the caller holds an additional reference
|
||||
* itself.
|
||||
*/
|
||||
void
|
||||
xfs_buf_iorequest(
|
||||
xfs_buf_t *bp)
|
||||
xfs_buf_submit(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
trace_xfs_buf_iorequest(bp, _RET_IP_);
|
||||
trace_xfs_buf_submit(bp, _RET_IP_);
|
||||
|
||||
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
|
||||
ASSERT(bp->b_flags & XBF_ASYNC);
|
||||
|
||||
/* on shutdown we stale and complete the buffer immediately */
|
||||
if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
|
||||
xfs_buf_ioerror(bp, -EIO);
|
||||
bp->b_flags &= ~XBF_DONE;
|
||||
xfs_buf_stale(bp);
|
||||
xfs_buf_ioend(bp);
|
||||
return;
|
||||
}
|
||||
|
||||
if (bp->b_flags & XBF_WRITE)
|
||||
xfs_buf_wait_unpin(bp);
|
||||
|
||||
/* clear the internal error state to avoid spurious errors */
|
||||
bp->b_io_error = 0;
|
||||
|
||||
/*
|
||||
* The caller's reference is released during I/O completion.
|
||||
* This occurs some time after the last b_io_remaining reference is
|
||||
* released, so after we drop our Io reference we have to have some
|
||||
* other reference to ensure the buffer doesn't go away from underneath
|
||||
* us. Take a direct reference to ensure we have safe access to the
|
||||
* buffer until we are finished with it.
|
||||
*/
|
||||
xfs_buf_hold(bp);
|
||||
|
||||
/*
|
||||
* Set the count to 1 initially, this will stop an I/O
|
||||
* completion callout which happens before we have started
|
||||
* all the I/O from calling xfs_buf_ioend too early.
|
||||
* Set the count to 1 initially, this will stop an I/O completion
|
||||
* callout which happens before we have started all the I/O from calling
|
||||
* xfs_buf_ioend too early.
|
||||
*/
|
||||
atomic_set(&bp->b_io_remaining, 1);
|
||||
_xfs_buf_ioapply(bp);
|
||||
|
||||
/*
|
||||
* If _xfs_buf_ioapply failed, we'll get back here with
|
||||
* only the reference we took above. _xfs_buf_ioend will
|
||||
* drop it to zero, so we'd better not queue it for later,
|
||||
* or we'll free it before it's done.
|
||||
* If _xfs_buf_ioapply failed, we can get back here with only the IO
|
||||
* reference we took above. If we drop it to zero, run completion so
|
||||
* that we don't return to the caller with completion still pending.
|
||||
*/
|
||||
_xfs_buf_ioend(bp, bp->b_error ? 0 : 1);
|
||||
if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
|
||||
if (bp->b_error)
|
||||
xfs_buf_ioend(bp);
|
||||
else
|
||||
xfs_buf_ioend_async(bp);
|
||||
}
|
||||
|
||||
xfs_buf_rele(bp);
|
||||
/* Note: it is not safe to reference bp now we've dropped our ref */
|
||||
}
|
||||
|
||||
/*
|
||||
* Waits for I/O to complete on the buffer supplied. It returns immediately if
|
||||
* no I/O is pending or there is already a pending error on the buffer, in which
|
||||
* case nothing will ever complete. It returns the I/O error code, if any, or
|
||||
* 0 if there was no error.
|
||||
* Synchronous buffer IO submission path, read or write.
|
||||
*/
|
||||
int
|
||||
xfs_buf_iowait(
|
||||
xfs_buf_t *bp)
|
||||
xfs_buf_submit_wait(
|
||||
struct xfs_buf *bp)
|
||||
{
|
||||
int error;
|
||||
|
||||
trace_xfs_buf_submit_wait(bp, _RET_IP_);
|
||||
|
||||
ASSERT(!(bp->b_flags & (_XBF_DELWRI_Q | XBF_ASYNC)));
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
|
||||
xfs_buf_ioerror(bp, -EIO);
|
||||
xfs_buf_stale(bp);
|
||||
bp->b_flags &= ~XBF_DONE;
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (bp->b_flags & XBF_WRITE)
|
||||
xfs_buf_wait_unpin(bp);
|
||||
|
||||
/* clear the internal error state to avoid spurious errors */
|
||||
bp->b_io_error = 0;
|
||||
|
||||
/*
|
||||
* For synchronous IO, the IO does not inherit the submitters reference
|
||||
* count, nor the buffer lock. Hence we cannot release the reference we
|
||||
* are about to take until we've waited for all IO completion to occur,
|
||||
* including any xfs_buf_ioend_async() work that may be pending.
|
||||
*/
|
||||
xfs_buf_hold(bp);
|
||||
|
||||
/*
|
||||
* Set the count to 1 initially, this will stop an I/O completion
|
||||
* callout which happens before we have started all the I/O from calling
|
||||
* xfs_buf_ioend too early.
|
||||
*/
|
||||
atomic_set(&bp->b_io_remaining, 1);
|
||||
_xfs_buf_ioapply(bp);
|
||||
|
||||
/*
|
||||
* make sure we run completion synchronously if it raced with us and is
|
||||
* already complete.
|
||||
*/
|
||||
if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
|
||||
xfs_buf_ioend(bp);
|
||||
|
||||
/* wait for completion before gathering the error from the buffer */
|
||||
trace_xfs_buf_iowait(bp, _RET_IP_);
|
||||
|
||||
if (!bp->b_error)
|
||||
wait_for_completion(&bp->b_iowait);
|
||||
|
||||
wait_for_completion(&bp->b_iowait);
|
||||
trace_xfs_buf_iowait_done(bp, _RET_IP_);
|
||||
return bp->b_error;
|
||||
error = bp->b_error;
|
||||
|
||||
/*
|
||||
* all done now, we can release the hold that keeps the buffer
|
||||
* referenced for the entire IO.
|
||||
*/
|
||||
xfs_buf_rele(bp);
|
||||
return error;
|
||||
}
|
||||
|
||||
xfs_caddr_t
|
||||
|
@ -1813,13 +1805,19 @@ __xfs_buf_delwri_submit(
|
|||
blk_start_plug(&plug);
|
||||
list_for_each_entry_safe(bp, n, io_list, b_list) {
|
||||
bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
|
||||
bp->b_flags |= XBF_WRITE;
|
||||
bp->b_flags |= XBF_WRITE | XBF_ASYNC;
|
||||
|
||||
if (!wait) {
|
||||
bp->b_flags |= XBF_ASYNC;
|
||||
/*
|
||||
* we do all Io submission async. This means if we need to wait
|
||||
* for IO completion we need to take an extra reference so the
|
||||
* buffer is still valid on the other side.
|
||||
*/
|
||||
if (wait)
|
||||
xfs_buf_hold(bp);
|
||||
else
|
||||
list_del_init(&bp->b_list);
|
||||
}
|
||||
xfs_bdstrat_cb(bp);
|
||||
|
||||
xfs_buf_submit(bp);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
|
@ -1866,7 +1864,10 @@ xfs_buf_delwri_submit(
|
|||
bp = list_first_entry(&io_list, struct xfs_buf, b_list);
|
||||
|
||||
list_del_init(&bp->b_list);
|
||||
error2 = xfs_buf_iowait(bp);
|
||||
|
||||
/* locking the buffer will wait for async IO completion. */
|
||||
xfs_buf_lock(bp);
|
||||
error2 = bp->b_error;
|
||||
xfs_buf_relse(bp);
|
||||
if (!error)
|
||||
error = error2;
|
||||
|
|
|
@ -158,6 +158,7 @@ typedef struct xfs_buf {
|
|||
struct list_head b_lru; /* lru list */
|
||||
spinlock_t b_lock; /* internal state lock */
|
||||
unsigned int b_state; /* internal state flags */
|
||||
int b_io_error; /* internal IO error state */
|
||||
wait_queue_head_t b_waiters; /* unpin waiters */
|
||||
struct list_head b_list;
|
||||
struct xfs_perag *b_pag; /* contains rbtree root */
|
||||
|
@ -268,9 +269,9 @@ int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
|
|||
|
||||
struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
|
||||
int flags);
|
||||
struct xfs_buf *xfs_buf_read_uncached(struct xfs_buftarg *target,
|
||||
xfs_daddr_t daddr, size_t numblks, int flags,
|
||||
const struct xfs_buf_ops *ops);
|
||||
int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
|
||||
size_t numblks, int flags, struct xfs_buf **bpp,
|
||||
const struct xfs_buf_ops *ops);
|
||||
void xfs_buf_hold(struct xfs_buf *bp);
|
||||
|
||||
/* Releasing Buffers */
|
||||
|
@ -286,18 +287,16 @@ extern void xfs_buf_unlock(xfs_buf_t *);
|
|||
|
||||
/* Buffer Read and Write Routines */
|
||||
extern int xfs_bwrite(struct xfs_buf *bp);
|
||||
extern void xfs_buf_ioend(xfs_buf_t *, int);
|
||||
extern void xfs_buf_ioend(struct xfs_buf *bp);
|
||||
extern void xfs_buf_ioerror(xfs_buf_t *, int);
|
||||
extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
|
||||
extern void xfs_buf_iorequest(xfs_buf_t *);
|
||||
extern int xfs_buf_iowait(xfs_buf_t *);
|
||||
extern void xfs_buf_submit(struct xfs_buf *bp);
|
||||
extern int xfs_buf_submit_wait(struct xfs_buf *bp);
|
||||
extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
|
||||
xfs_buf_rw_t);
|
||||
#define xfs_buf_zero(bp, off, len) \
|
||||
xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
|
||||
|
||||
extern int xfs_bioerror_relse(struct xfs_buf *);
|
||||
|
||||
/* Buffer Utility Routines */
|
||||
extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
|
||||
|
||||
|
|
|
@ -491,7 +491,7 @@ xfs_buf_item_unpin(
|
|||
xfs_buf_ioerror(bp, -EIO);
|
||||
XFS_BUF_UNDONE(bp);
|
||||
xfs_buf_stale(bp);
|
||||
xfs_buf_ioend(bp, 0);
|
||||
xfs_buf_ioend(bp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1081,7 +1081,7 @@ xfs_buf_iodone_callbacks(
|
|||
* a way to shut the filesystem down if the writes keep failing.
|
||||
*
|
||||
* In practice we'll shut the filesystem down soon as non-transient
|
||||
* erorrs tend to affect the whole device and a failing log write
|
||||
* errors tend to affect the whole device and a failing log write
|
||||
* will make us give up. But we really ought to do better here.
|
||||
*/
|
||||
if (XFS_BUF_ISASYNC(bp)) {
|
||||
|
@ -1094,7 +1094,7 @@ xfs_buf_iodone_callbacks(
|
|||
if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
|
||||
bp->b_flags |= XBF_WRITE | XBF_ASYNC |
|
||||
XBF_DONE | XBF_WRITE_FAIL;
|
||||
xfs_buf_iorequest(bp);
|
||||
xfs_buf_submit(bp);
|
||||
} else {
|
||||
xfs_buf_relse(bp);
|
||||
}
|
||||
|
@ -1115,7 +1115,7 @@ xfs_buf_iodone_callbacks(
|
|||
xfs_buf_do_callbacks(bp);
|
||||
bp->b_fspriv = NULL;
|
||||
bp->b_iodone = NULL;
|
||||
xfs_buf_ioend(bp, 0);
|
||||
xfs_buf_ioend(bp);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -172,16 +172,11 @@ xfs_growfs_data_private(
|
|||
if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
|
||||
return error;
|
||||
dpct = pct - mp->m_sb.sb_imax_pct;
|
||||
bp = xfs_buf_read_uncached(mp->m_ddev_targp,
|
||||
error = xfs_buf_read_uncached(mp->m_ddev_targp,
|
||||
XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
|
||||
XFS_FSS_TO_BB(mp, 1), 0, NULL);
|
||||
if (!bp)
|
||||
return -EIO;
|
||||
if (bp->b_error) {
|
||||
error = bp->b_error;
|
||||
xfs_buf_relse(bp);
|
||||
XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
xfs_buf_relse(bp);
|
||||
|
||||
new = nb; /* use new as a temporary here */
|
||||
|
|
|
@ -3062,7 +3062,7 @@ xfs_iflush_cluster(
|
|||
XFS_BUF_UNDONE(bp);
|
||||
xfs_buf_stale(bp);
|
||||
xfs_buf_ioerror(bp, -EIO);
|
||||
xfs_buf_ioend(bp, 0);
|
||||
xfs_buf_ioend(bp);
|
||||
} else {
|
||||
xfs_buf_stale(bp);
|
||||
xfs_buf_relse(bp);
|
||||
|
|
|
@ -1678,7 +1678,7 @@ xlog_bdstrat(
|
|||
if (iclog->ic_state & XLOG_STATE_IOERROR) {
|
||||
xfs_buf_ioerror(bp, -EIO);
|
||||
xfs_buf_stale(bp);
|
||||
xfs_buf_ioend(bp, 0);
|
||||
xfs_buf_ioend(bp);
|
||||
/*
|
||||
* It would seem logical to return EIO here, but we rely on
|
||||
* the log state machine to propagate I/O errors instead of
|
||||
|
@ -1688,7 +1688,7 @@ xlog_bdstrat(
|
|||
return 0;
|
||||
}
|
||||
|
||||
xfs_buf_iorequest(bp);
|
||||
xfs_buf_submit(bp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -3867,18 +3867,17 @@ xlog_state_ioerror(
|
|||
* This is called from xfs_force_shutdown, when we're forcibly
|
||||
* shutting down the filesystem, typically because of an IO error.
|
||||
* Our main objectives here are to make sure that:
|
||||
* a. the filesystem gets marked 'SHUTDOWN' for all interested
|
||||
* a. if !logerror, flush the logs to disk. Anything modified
|
||||
* after this is ignored.
|
||||
* b. the filesystem gets marked 'SHUTDOWN' for all interested
|
||||
* parties to find out, 'atomically'.
|
||||
* b. those who're sleeping on log reservations, pinned objects and
|
||||
* c. those who're sleeping on log reservations, pinned objects and
|
||||
* other resources get woken up, and be told the bad news.
|
||||
* c. nothing new gets queued up after (a) and (b) are done.
|
||||
* d. if !logerror, flush the iclogs to disk, then seal them off
|
||||
* for business.
|
||||
* d. nothing new gets queued up after (b) and (c) are done.
|
||||
*
|
||||
* Note: for delayed logging the !logerror case needs to flush the regions
|
||||
* held in memory out to the iclogs before flushing them to disk. This needs
|
||||
* to be done before the log is marked as shutdown, otherwise the flush to the
|
||||
* iclogs will fail.
|
||||
* Note: for the !logerror case we need to flush the regions held in memory out
|
||||
* to disk first. This needs to be done before the log is marked as shutdown,
|
||||
* otherwise the iclog writes will fail.
|
||||
*/
|
||||
int
|
||||
xfs_log_force_umount(
|
||||
|
@ -3910,16 +3909,16 @@ xfs_log_force_umount(
|
|||
ASSERT(XLOG_FORCED_SHUTDOWN(log));
|
||||
return 1;
|
||||
}
|
||||
retval = 0;
|
||||
|
||||
/*
|
||||
* Flush the in memory commit item list before marking the log as
|
||||
* being shut down. We need to do it in this order to ensure all the
|
||||
* completed transactions are flushed to disk with the xfs_log_force()
|
||||
* call below.
|
||||
* Flush all the completed transactions to disk before marking the log
|
||||
* being shut down. We need to do it in this order to ensure that
|
||||
* completed operations are safely on disk before we shut down, and that
|
||||
* we don't have to issue any buffer IO after the shutdown flags are set
|
||||
* to guarantee this.
|
||||
*/
|
||||
if (!logerror)
|
||||
xlog_cil_force(log);
|
||||
_xfs_log_force(mp, XFS_LOG_SYNC, NULL);
|
||||
|
||||
/*
|
||||
* mark the filesystem and the as in a shutdown state and wake
|
||||
|
@ -3931,18 +3930,11 @@ xfs_log_force_umount(
|
|||
XFS_BUF_DONE(mp->m_sb_bp);
|
||||
|
||||
/*
|
||||
* This flag is sort of redundant because of the mount flag, but
|
||||
* it's good to maintain the separation between the log and the rest
|
||||
* of XFS.
|
||||
* Mark the log and the iclogs with IO error flags to prevent any
|
||||
* further log IO from being issued or completed.
|
||||
*/
|
||||
log->l_flags |= XLOG_IO_ERROR;
|
||||
|
||||
/*
|
||||
* If we hit a log error, we want to mark all the iclogs IOERROR
|
||||
* while we're still holding the loglock.
|
||||
*/
|
||||
if (logerror)
|
||||
retval = xlog_state_ioerror(log);
|
||||
retval = xlog_state_ioerror(log);
|
||||
spin_unlock(&log->l_icloglock);
|
||||
|
||||
/*
|
||||
|
@ -3955,19 +3947,6 @@ xfs_log_force_umount(
|
|||
xlog_grant_head_wake_all(&log->l_reserve_head);
|
||||
xlog_grant_head_wake_all(&log->l_write_head);
|
||||
|
||||
if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
|
||||
ASSERT(!logerror);
|
||||
/*
|
||||
* Force the incore logs to disk before shutting the
|
||||
* log down completely.
|
||||
*/
|
||||
_xfs_log_force(mp, XFS_LOG_SYNC, NULL);
|
||||
|
||||
spin_lock(&log->l_icloglock);
|
||||
retval = xlog_state_ioerror(log);
|
||||
spin_unlock(&log->l_icloglock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wake up everybody waiting on xfs_log_force. Wake the CIL push first
|
||||
* as if the log writes were completed. The abort handling in the log
|
||||
|
|
|
@ -193,12 +193,8 @@ xlog_bread_noalign(
|
|||
bp->b_io_length = nbblks;
|
||||
bp->b_error = 0;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(log->l_mp))
|
||||
return -EIO;
|
||||
|
||||
xfs_buf_iorequest(bp);
|
||||
error = xfs_buf_iowait(bp);
|
||||
if (error)
|
||||
error = xfs_buf_submit_wait(bp);
|
||||
if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
|
||||
xfs_buf_ioerror_alert(bp, __func__);
|
||||
return error;
|
||||
}
|
||||
|
@ -378,12 +374,14 @@ xlog_recover_iodone(
|
|||
* We're not going to bother about retrying
|
||||
* this during recovery. One strike!
|
||||
*/
|
||||
xfs_buf_ioerror_alert(bp, __func__);
|
||||
xfs_force_shutdown(bp->b_target->bt_mount,
|
||||
SHUTDOWN_META_IO_ERROR);
|
||||
if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
|
||||
xfs_buf_ioerror_alert(bp, __func__);
|
||||
xfs_force_shutdown(bp->b_target->bt_mount,
|
||||
SHUTDOWN_META_IO_ERROR);
|
||||
}
|
||||
}
|
||||
bp->b_iodone = NULL;
|
||||
xfs_buf_ioend(bp, 0);
|
||||
xfs_buf_ioend(bp);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4452,16 +4450,12 @@ xlog_do_recover(
|
|||
XFS_BUF_UNASYNC(bp);
|
||||
bp->b_ops = &xfs_sb_buf_ops;
|
||||
|
||||
if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
|
||||
xfs_buf_relse(bp);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
xfs_buf_iorequest(bp);
|
||||
error = xfs_buf_iowait(bp);
|
||||
error = xfs_buf_submit_wait(bp);
|
||||
if (error) {
|
||||
xfs_buf_ioerror_alert(bp, __func__);
|
||||
ASSERT(0);
|
||||
if (!XFS_FORCED_SHUTDOWN(log->l_mp)) {
|
||||
xfs_buf_ioerror_alert(bp, __func__);
|
||||
ASSERT(0);
|
||||
}
|
||||
xfs_buf_relse(bp);
|
||||
return error;
|
||||
}
|
||||
|
|
|
@ -300,21 +300,15 @@ xfs_readsb(
|
|||
* access to the superblock.
|
||||
*/
|
||||
reread:
|
||||
bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
|
||||
BTOBB(sector_size), 0, buf_ops);
|
||||
if (!bp) {
|
||||
if (loud)
|
||||
xfs_warn(mp, "SB buffer read failed");
|
||||
return -EIO;
|
||||
}
|
||||
if (bp->b_error) {
|
||||
error = bp->b_error;
|
||||
error = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
|
||||
BTOBB(sector_size), 0, &bp, buf_ops);
|
||||
if (error) {
|
||||
if (loud)
|
||||
xfs_warn(mp, "SB validate failed with error %d.", error);
|
||||
/* bad CRC means corrupted metadata */
|
||||
if (error == -EFSBADCRC)
|
||||
error = -EFSCORRUPTED;
|
||||
goto release_buf;
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -544,40 +538,43 @@ xfs_set_inoalignment(xfs_mount_t *mp)
|
|||
* Check that the data (and log if separate) is an ok size.
|
||||
*/
|
||||
STATIC int
|
||||
xfs_check_sizes(xfs_mount_t *mp)
|
||||
xfs_check_sizes(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
xfs_buf_t *bp;
|
||||
struct xfs_buf *bp;
|
||||
xfs_daddr_t d;
|
||||
int error;
|
||||
|
||||
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
|
||||
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
|
||||
xfs_warn(mp, "filesystem size mismatch detected");
|
||||
return -EFBIG;
|
||||
}
|
||||
bp = xfs_buf_read_uncached(mp->m_ddev_targp,
|
||||
error = xfs_buf_read_uncached(mp->m_ddev_targp,
|
||||
d - XFS_FSS_TO_BB(mp, 1),
|
||||
XFS_FSS_TO_BB(mp, 1), 0, NULL);
|
||||
if (!bp) {
|
||||
XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
|
||||
if (error) {
|
||||
xfs_warn(mp, "last sector read failed");
|
||||
return -EIO;
|
||||
return error;
|
||||
}
|
||||
xfs_buf_relse(bp);
|
||||
|
||||
if (mp->m_logdev_targp != mp->m_ddev_targp) {
|
||||
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
|
||||
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
|
||||
xfs_warn(mp, "log size mismatch detected");
|
||||
return -EFBIG;
|
||||
}
|
||||
bp = xfs_buf_read_uncached(mp->m_logdev_targp,
|
||||
d - XFS_FSB_TO_BB(mp, 1),
|
||||
XFS_FSB_TO_BB(mp, 1), 0, NULL);
|
||||
if (!bp) {
|
||||
xfs_warn(mp, "log device read failed");
|
||||
return -EIO;
|
||||
}
|
||||
xfs_buf_relse(bp);
|
||||
if (mp->m_logdev_targp == mp->m_ddev_targp)
|
||||
return 0;
|
||||
|
||||
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
|
||||
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
|
||||
xfs_warn(mp, "log size mismatch detected");
|
||||
return -EFBIG;
|
||||
}
|
||||
error = xfs_buf_read_uncached(mp->m_logdev_targp,
|
||||
d - XFS_FSB_TO_BB(mp, 1),
|
||||
XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
|
||||
if (error) {
|
||||
xfs_warn(mp, "log device read failed");
|
||||
return error;
|
||||
}
|
||||
xfs_buf_relse(bp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -921,16 +921,11 @@ xfs_growfs_rt(
|
|||
/*
|
||||
* Read in the last block of the device, make sure it exists.
|
||||
*/
|
||||
bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
|
||||
error = xfs_buf_read_uncached(mp->m_rtdev_targp,
|
||||
XFS_FSB_TO_BB(mp, nrblocks - 1),
|
||||
XFS_FSB_TO_BB(mp, 1), 0, NULL);
|
||||
if (!bp)
|
||||
return -EIO;
|
||||
if (bp->b_error) {
|
||||
error = bp->b_error;
|
||||
xfs_buf_relse(bp);
|
||||
XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
xfs_buf_relse(bp);
|
||||
|
||||
/*
|
||||
|
@ -1184,11 +1179,12 @@ xfs_rtallocate_extent(
|
|||
*/
|
||||
int /* error */
|
||||
xfs_rtmount_init(
|
||||
xfs_mount_t *mp) /* file system mount structure */
|
||||
struct xfs_mount *mp) /* file system mount structure */
|
||||
{
|
||||
xfs_buf_t *bp; /* buffer for last block of subvolume */
|
||||
xfs_daddr_t d; /* address of last block of subvolume */
|
||||
xfs_sb_t *sbp; /* filesystem superblock copy in mount */
|
||||
struct xfs_buf *bp; /* buffer for last block of subvolume */
|
||||
struct xfs_sb *sbp; /* filesystem superblock copy in mount */
|
||||
xfs_daddr_t d; /* address of last block of subvolume */
|
||||
int error;
|
||||
|
||||
sbp = &mp->m_sb;
|
||||
if (sbp->sb_rblocks == 0)
|
||||
|
@ -1214,14 +1210,12 @@ xfs_rtmount_init(
|
|||
(unsigned long long) mp->m_sb.sb_rblocks);
|
||||
return -EFBIG;
|
||||
}
|
||||
bp = xfs_buf_read_uncached(mp->m_rtdev_targp,
|
||||
error = xfs_buf_read_uncached(mp->m_rtdev_targp,
|
||||
d - XFS_FSB_TO_BB(mp, 1),
|
||||
XFS_FSB_TO_BB(mp, 1), 0, NULL);
|
||||
if (!bp || bp->b_error) {
|
||||
XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
|
||||
if (error) {
|
||||
xfs_warn(mp, "realtime device size check failed");
|
||||
if (bp)
|
||||
xfs_buf_relse(bp);
|
||||
return -EIO;
|
||||
return error;
|
||||
}
|
||||
xfs_buf_relse(bp);
|
||||
return 0;
|
||||
|
|
|
@ -349,7 +349,8 @@ DEFINE_BUF_EVENT(xfs_buf_free);
|
|||
DEFINE_BUF_EVENT(xfs_buf_hold);
|
||||
DEFINE_BUF_EVENT(xfs_buf_rele);
|
||||
DEFINE_BUF_EVENT(xfs_buf_iodone);
|
||||
DEFINE_BUF_EVENT(xfs_buf_iorequest);
|
||||
DEFINE_BUF_EVENT(xfs_buf_submit);
|
||||
DEFINE_BUF_EVENT(xfs_buf_submit_wait);
|
||||
DEFINE_BUF_EVENT(xfs_buf_bawrite);
|
||||
DEFINE_BUF_EVENT(xfs_buf_lock);
|
||||
DEFINE_BUF_EVENT(xfs_buf_lock_done);
|
||||
|
|
|
@ -318,20 +318,10 @@ xfs_trans_read_buf_map(
|
|||
XFS_BUF_READ(bp);
|
||||
bp->b_ops = ops;
|
||||
|
||||
/*
|
||||
* XXX(hch): clean up the error handling here to be less
|
||||
* of a mess..
|
||||
*/
|
||||
if (XFS_FORCED_SHUTDOWN(mp)) {
|
||||
trace_xfs_bdstrat_shut(bp, _RET_IP_);
|
||||
xfs_bioerror_relse(bp);
|
||||
} else {
|
||||
xfs_buf_iorequest(bp);
|
||||
}
|
||||
|
||||
error = xfs_buf_iowait(bp);
|
||||
error = xfs_buf_submit_wait(bp);
|
||||
if (error) {
|
||||
xfs_buf_ioerror_alert(bp, __func__);
|
||||
if (!XFS_FORCED_SHUTDOWN(mp))
|
||||
xfs_buf_ioerror_alert(bp, __func__);
|
||||
xfs_buf_relse(bp);
|
||||
/*
|
||||
* We can gracefully recover from most read
|
||||
|
|
Loading…
Reference in a new issue