xfs: validate metadata LSNs against log on v5 superblocks

Since the onset of v5 superblocks, the LSN of the last modification has
been included in a variety of on-disk data structures. This LSN is used
to provide log recovery ordering guarantees (e.g., to ensure an older
log recovery item is not replayed over a newer target data structure).

While this works correctly from the point a filesystem is formatted and
mounted, userspace tools have some problematic behaviors that defeat
this mechanism. For example, xfs_repair historically zeroes out the log
unconditionally (regardless of whether corruption is detected). If this
occurs, the LSN of the filesystem is reset and the log is now in a
problematic state with respect to on-disk metadata structures that might
have a larger LSN. Until either the log catches up to the highest
previously used metadata LSN or each affected data structure is modified
and written out without incident (which resets the metadata LSN), log
recovery is susceptible to filesystem corruption.

This problem is ultimately addressed and repaired in the associated
userspace tools. The kernel is still responsible to detect the problem
and notify the user that something is wrong. Check the superblock LSN at
mount time and fail the mount if it is invalid. From that point on,
trigger verifier failure on any metadata I/O where an invalid LSN is
detected. This results in a filesystem shutdown and guarantees that we
do not log metadata changes with invalid LSNs on disk. Since this is a
known issue with a known recovery path, present a warning to instruct
the user how to recover.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
Brian Foster 2015-10-12 15:59:25 +11:00 committed by Dave Chinner
parent b7cdc66be5
commit a45086e27d
15 changed files with 180 additions and 10 deletions

View file

@ -482,7 +482,9 @@ xfs_agfl_verify(
be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
return false;
}
return true;
return xfs_log_check_lsn(mp,
be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
}
static void
@ -2259,9 +2261,13 @@ xfs_agf_verify(
{
struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
return false;
if (!xfs_log_check_lsn(mp,
be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
return false;
}
if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&

View file

@ -41,6 +41,7 @@
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
#include "xfs_dir2.h"
#include "xfs_log.h"
/*
@ -266,6 +267,8 @@ xfs_attr3_leaf_verify(
return false;
if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
return false;
if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
return false;
} else {
if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
return false;

View file

@ -32,6 +32,7 @@
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_alloc.h"
#include "xfs_log.h"
/*
* Cursor allocation zone.
@ -243,8 +244,14 @@ bool
xfs_btree_lblock_verify_crc(
struct xfs_buf *bp)
{
if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_mount *mp = bp->b_target->bt_mount;
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
return false;
return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
}
return true;
}
@ -275,8 +282,14 @@ bool
xfs_btree_sblock_verify_crc(
struct xfs_buf *bp)
{
if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_mount *mp = bp->b_target->bt_mount;
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
return false;
return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
}
return true;
}

View file

@ -39,6 +39,7 @@
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
/*
* xfs_da_btree.c
@ -150,6 +151,8 @@ xfs_da3_node_verify(
return false;
if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
return false;
if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
return false;
} else {
if (ichdr.magic != XFS_DA_NODE_MAGIC)
return false;
@ -322,6 +325,7 @@ xfs_da3_node_create(
if (xfs_sb_version_hascrc(&mp->m_sb)) {
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
ichdr.magic = XFS_DA3_NODE_MAGIC;
hdr3->info.blkno = cpu_to_be64(bp->b_bn);
hdr3->info.owner = cpu_to_be64(args->dp->i_ino);

View file

@ -34,6 +34,7 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_log.h"
/*
* Local function prototypes.
@ -71,6 +72,8 @@ xfs_dir3_block_verify(
return false;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
return false;
if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
return false;
} else {
if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
return false;

View file

@ -31,6 +31,7 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
#include "xfs_log.h"
/*
* Check the consistency of the data block.
@ -224,6 +225,8 @@ xfs_dir3_data_verify(
return false;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
return false;
if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
return false;
} else {
if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
return false;

View file

@ -33,6 +33,7 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
#include "xfs_log.h"
/*
* Local function declarations.
@ -164,6 +165,8 @@ xfs_dir3_leaf_verify(
return false;
if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
return false;
if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
return false;
} else {
if (leaf->hdr.info.magic != cpu_to_be16(magic))
return false;

View file

@ -33,6 +33,7 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_cksum.h"
#include "xfs_log.h"
/*
* Function declarations.
@ -97,6 +98,8 @@ xfs_dir3_free_verify(
return false;
if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
return false;
if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
return false;
} else {
if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
return false;

View file

@ -38,6 +38,7 @@
#include "xfs_icreate_item.h"
#include "xfs_icache.h"
#include "xfs_trace.h"
#include "xfs_log.h"
/*
@ -2500,9 +2501,14 @@ xfs_agi_verify(
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
return false;
if (!xfs_log_check_lsn(mp,
be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn)))
return false;
}
/*
* Validate the magic number of the agi block.
*/

View file

@ -35,6 +35,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_log.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@ -163,6 +164,15 @@ xfs_mount_validate_sb(
"Filesystem can not be safely mounted by this kernel.");
return -EINVAL;
}
} else if (xfs_sb_version_hascrc(sbp)) {
/*
* We can't read verify the sb LSN because the read verifier is
* called before the log is allocated and processed. We know the
* log is set up before write verifier (!check_version) calls,
* so just check it here.
*/
if (!xfs_log_check_lsn(mp, sbp->sb_lsn))
return -EFSCORRUPTED;
}
if (xfs_sb_version_has_pquotino(sbp)) {

View file

@ -31,6 +31,7 @@
#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
/*
@ -60,6 +61,7 @@ xfs_symlink_hdr_set(
if (!xfs_sb_version_hascrc(&mp->m_sb))
return 0;
memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr));
dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
dsl->sl_offset = cpu_to_be32(offset);
dsl->sl_bytes = cpu_to_be32(size);
@ -116,6 +118,8 @@ xfs_symlink_verify(
return false;
if (dsl->sl_owner == 0)
return false;
if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)))
return false;
return true;
}

View file

@ -3165,11 +3165,19 @@ xlog_state_switch_iclogs(
}
if (log->l_curr_block >= log->l_logBBsize) {
/*
* Rewind the current block before the cycle is bumped to make
* sure that the combined LSN never transiently moves forward
* when the log wraps to the next cycle. This is to support the
* unlocked sample of these fields from xlog_valid_lsn(). Most
* other cases should acquire l_icloglock.
*/
log->l_curr_block -= log->l_logBBsize;
ASSERT(log->l_curr_block >= 0);
smp_wmb();
log->l_curr_cycle++;
if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM)
log->l_curr_cycle++;
log->l_curr_block -= log->l_logBBsize;
ASSERT(log->l_curr_block >= 0);
}
ASSERT(iclog == log->l_iclog);
log->l_iclog = iclog->ic_next;
@ -4023,3 +4031,45 @@ xlog_iclogs_empty(
return 1;
}
/*
* Verify that an LSN stamped into a piece of metadata is valid. This is
* intended for use in read verifiers on v5 superblocks.
*/
bool
xfs_log_check_lsn(
struct xfs_mount *mp,
xfs_lsn_t lsn)
{
struct xlog *log = mp->m_log;
bool valid;
/*
* norecovery mode skips mount-time log processing and unconditionally
* resets the in-core LSN. We can't validate in this mode, but
* modifications are not allowed anyways so just return true.
*/
if (mp->m_flags & XFS_MOUNT_NORECOVERY)
return true;
/*
* Some metadata LSNs are initialized to NULL (e.g., the agfl). This is
* handled by recovery and thus safe to ignore here.
*/
if (lsn == NULLCOMMITLSN)
return true;
valid = xlog_valid_lsn(mp->m_log, lsn);
/* warn the user about what's gone wrong before verifier failure */
if (!valid) {
spin_lock(&log->l_icloglock);
xfs_warn(mp,
"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). "
"Please unmount and run xfs_repair (>= v4.3) to resolve.",
CYCLE_LSN(lsn), BLOCK_LSN(lsn),
log->l_curr_cycle, log->l_curr_block);
spin_unlock(&log->l_icloglock);
}
return valid;
}

View file

@ -181,5 +181,6 @@ bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
void xfs_log_work_queue(struct xfs_mount *mp);
void xfs_log_worker(struct work_struct *work);
void xfs_log_quiesce(struct xfs_mount *mp);
bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
#endif /* __XFS_LOG_H__ */

View file

@ -560,4 +560,55 @@ static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
remove_wait_queue(wq, &wait);
}
/*
* The LSN is valid so long as it is behind the current LSN. If it isn't, this
* means that the next log record that includes this metadata could have a
* smaller LSN. In turn, this means that the modification in the log would not
* replay.
*/
static inline bool
xlog_valid_lsn(
struct xlog *log,
xfs_lsn_t lsn)
{
int cur_cycle;
int cur_block;
bool valid = true;
/*
* First, sample the current lsn without locking to avoid added
* contention from metadata I/O. The current cycle and block are updated
* (in xlog_state_switch_iclogs()) and read here in a particular order
* to avoid false negatives (e.g., thinking the metadata LSN is valid
* when it is not).
*
* The current block is always rewound before the cycle is bumped in
* xlog_state_switch_iclogs() to ensure the current LSN is never seen in
* a transiently forward state. Instead, we can see the LSN in a
* transiently behind state if we happen to race with a cycle wrap.
*/
cur_cycle = ACCESS_ONCE(log->l_curr_cycle);
smp_rmb();
cur_block = ACCESS_ONCE(log->l_curr_block);
if ((CYCLE_LSN(lsn) > cur_cycle) ||
(CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) {
/*
* If the metadata LSN appears invalid, it's possible the check
* above raced with a wrap to the next log cycle. Grab the lock
* to check for sure.
*/
spin_lock(&log->l_icloglock);
cur_cycle = log->l_curr_cycle;
cur_block = log->l_curr_block;
spin_unlock(&log->l_icloglock);
if ((CYCLE_LSN(lsn) > cur_cycle) ||
(CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block))
valid = false;
}
return valid;
}
#endif /* __XFS_LOG_PRIV_H__ */

View file

@ -4609,9 +4609,19 @@ xlog_recover(
int error;
/* find the tail of the log */
if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
error = xlog_find_tail(log, &head_blk, &tail_blk);
if (error)
return error;
/*
* The superblock was read before the log was available and thus the LSN
* could not be verified. Check the superblock LSN against the current
* LSN now that it's known.
*/
if (xfs_sb_version_hascrc(&log->l_mp->m_sb) &&
!xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn))
return -EINVAL;
if (tail_blk != head_blk) {
/* There used to be a comment here:
*