ext4: fix potential race between online resizing and write operations

commit 1d0c3924a92e69bfa91163bda83c12a994b4d106 upstream.

During an online resize an array of pointers to buffer heads gets
replaced so it can get enlarged.  If there is a racing block
allocation or deallocation which uses the old array, and the old array
has gotten reused this can lead to a GPF or some other random kernel
memory getting modified.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=206443
Link: https://lore.kernel.org/r/20200221053458.730016-2-tytso@mit.edu
Reported-by: Suraj Jitindar Singh <surajjs@amazon.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Theodore Ts'o 2020-02-15 16:40:37 -05:00 committed by Greg Kroah-Hartman
parent 38884609b8
commit cc9948abe4
4 changed files with 97 additions and 25 deletions

View file

@ -270,6 +270,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
ext4_group_t ngroups = ext4_get_groups_count(sb);
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct buffer_head *bh_p;
if (block_group >= ngroups) {
ext4_error(sb, "block_group >= groups_count - block_group = %u,"
@ -280,7 +281,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
if (!sbi->s_group_desc[group_desc]) {
bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc);
/*
* sbi_array_rcu_deref returns with rcu unlocked, this is ok since
* the pointer being dereferenced won't be dereferenced again. By
* looking at the usage in add_new_gdb() the value isn't modified,
* just the pointer, and so it remains valid.
*/
if (!bh_p) {
ext4_error(sb, "Group descriptor not loaded - "
"block_group = %u, group_desc = %u, desc = %u",
block_group, group_desc, offset);
@ -288,10 +296,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
}
desc = (struct ext4_group_desc *)(
(__u8 *)sbi->s_group_desc[group_desc]->b_data +
(__u8 *)bh_p->b_data +
offset * EXT4_DESC_SIZE(sb));
if (bh)
*bh = sbi->s_group_desc[group_desc];
*bh = bh_p;
return desc;
}

View file

@ -1372,7 +1372,7 @@ struct ext4_sb_info {
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
struct buffer_head **s_group_desc;
struct buffer_head * __rcu *s_group_desc;
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
unsigned int s_mount_flags;
@ -1541,6 +1541,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
}
/*
* Returns: sbi->field[index]
* Used to access an array element from the following sbi fields which require
* rcu protection to avoid dereferencing an invalid pointer due to reassignment
* - s_group_desc
* - s_group_info
* - s_flex_group
*/
#define sbi_array_rcu_deref(sbi, field, index) \
({ \
typeof(*((sbi)->field)) _v; \
rcu_read_lock(); \
_v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \
rcu_read_unlock(); \
_v; \
})
/*
* Inode dynamic state flags
*/
@ -2564,6 +2581,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
extern bool ext4_empty_dir(struct inode *inode);
/* resize.c */
extern void ext4_kvfree_array_rcu(void *to_free);
extern int ext4_group_add(struct super_block *sb,
struct ext4_new_group_data *input);
extern int ext4_group_extend(struct super_block *sb,

View file

@ -17,6 +17,33 @@
#include "ext4_jbd2.h"
struct ext4_rcu_ptr {
struct rcu_head rcu;
void *ptr;
};
static void ext4_rcu_ptr_callback(struct rcu_head *head)
{
struct ext4_rcu_ptr *ptr;
ptr = container_of(head, struct ext4_rcu_ptr, rcu);
kvfree(ptr->ptr);
kfree(ptr);
}
void ext4_kvfree_array_rcu(void *to_free)
{
struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
if (ptr) {
ptr->ptr = to_free;
call_rcu(&ptr->rcu, ext4_rcu_ptr_callback);
return;
}
synchronize_rcu();
kvfree(to_free);
}
int ext4_resize_begin(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@ -560,8 +587,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
brelse(gdb);
goto out;
}
memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
gdb->b_size);
memcpy(gdb->b_data, sbi_array_rcu_deref(sbi,
s_group_desc, j)->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
err = ext4_handle_dirty_metadata(handle, NULL, gdb);
@ -879,13 +906,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
}
brelse(dind);
o_group_desc = EXT4_SB(sb)->s_group_desc;
rcu_read_lock();
o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;
EXT4_SB(sb)->s_group_desc = n_group_desc;
rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
kvfree(o_group_desc);
ext4_kvfree_array_rcu(o_group_desc);
le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
err = ext4_handle_dirty_super(handle, sb);
@ -929,9 +958,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}
o_group_desc = EXT4_SB(sb)->s_group_desc;
rcu_read_lock();
o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc);
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
rcu_read_unlock();
n_group_desc[gdb_num] = gdb_bh;
BUFFER_TRACE(gdb_bh, "get_write_access");
@ -942,9 +973,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
return err;
}
EXT4_SB(sb)->s_group_desc = n_group_desc;
rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc);
EXT4_SB(sb)->s_gdb_count++;
kvfree(o_group_desc);
ext4_kvfree_array_rcu(o_group_desc);
return err;
}
@ -1210,7 +1241,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
* use non-sparse filesystems anymore. This is already checked above.
*/
if (gdb_off) {
gdb_bh = sbi->s_group_desc[gdb_num];
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
gdb_num);
BUFFER_TRACE(gdb_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, gdb_bh);
@ -1292,7 +1324,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
/*
* get_write_access() has been called on gdb_bh by ext4_add_new_desc().
*/
gdb_bh = sbi->s_group_desc[gdb_num];
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num);
/* Update group descriptor block for new group */
gdp = (struct ext4_group_desc *)(gdb_bh->b_data +
gdb_off * EXT4_DESC_SIZE(sb));
@ -1519,7 +1551,8 @@ static int ext4_flex_group_add(struct super_block *sb,
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
gdb_bh = sbi->s_group_desc[gdb_num];
gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc,
gdb_num);
if (old_gdb == gdb_bh->b_blocknr)
continue;
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,

View file

@ -969,6 +969,7 @@ static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
struct buffer_head **group_desc;
int aborted = 0;
int i, err;
@ -999,9 +1000,12 @@ static void ext4_put_super(struct super_block *sb)
if (!sb_rdonly(sb))
ext4_commit_super(sb, 1);
rcu_read_lock();
group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]);
kvfree(sbi->s_group_desc);
brelse(group_desc[i]);
kvfree(group_desc);
rcu_read_unlock();
kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
@ -3548,7 +3552,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
char *orig_data = kstrdup(data, GFP_KERNEL);
struct buffer_head *bh;
struct buffer_head *bh, **group_desc;
struct ext4_super_block *es = NULL;
struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
ext4_fsblk_t block;
@ -4166,9 +4170,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
}
sbi->s_group_desc = kvmalloc_array(db_count,
sizeof(struct buffer_head *),
GFP_KERNEL);
rcu_assign_pointer(sbi->s_group_desc,
kvmalloc_array(db_count,
sizeof(struct buffer_head *),
GFP_KERNEL));
if (sbi->s_group_desc == NULL) {
ext4_msg(sb, KERN_ERR, "not enough memory");
ret = -ENOMEM;
@ -4184,14 +4189,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
for (i = 0; i < db_count; i++) {
struct buffer_head *bh;
block = descriptor_loc(sb, logical_sb_block, i);
sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
if (!sbi->s_group_desc[i]) {
bh = sb_bread_unmovable(sb, block);
if (!bh) {
ext4_msg(sb, KERN_ERR,
"can't read group descriptor %d", i);
db_count = i;
goto failed_mount2;
}
rcu_read_lock();
rcu_dereference(sbi->s_group_desc)[i] = bh;
rcu_read_unlock();
}
sbi->s_gdb_count = db_count;
if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
@ -4588,9 +4598,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
rcu_read_lock();
group_desc = rcu_dereference(sbi->s_group_desc);
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
kvfree(sbi->s_group_desc);
brelse(group_desc[i]);
kvfree(group_desc);
rcu_read_unlock();
failed_mount:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);