Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (61 commits) jbd2: Add MAINTAINERS entry jbd2: fix a potential leak of a journal_head on an error path ext4: teach ext4_ext_split to calculate extents efficiently ext4: Convert ext4 to new truncate calling convention ext4: do not normalize block requests from fallocate() ext4: enable "punch hole" functionality ext4: add "punch hole" flag to ext4_map_blocks() ext4: punch out extents ext4: add new function ext4_block_zero_page_range() ext4: add flag to ext4_has_free_blocks ext4: reserve inodes and feature code for 'quota' feature ext4: add support for multiple mount protection ext4: ensure f_bfree returned by ext4_statfs() is non-negative ext4: protect bb_first_free in ext4_trim_all_free() with group lock ext4: only load buddy bitmap in ext4_trim_fs() when it is needed jbd2: Fix comment to match the code in jbd2__journal_start() ext4: fix waiting and sending of a barrier in ext4_sync_file() jbd2: Add function jbd2_trans_will_send_data_barrier() jbd2: fix sending of data flush on journal commit ext4: fix ext4_ext_fiemap_cb() to handle blocks before request range correctly ...
This commit is contained in:
commit
35806b4f7c
24 changed files with 2018 additions and 1102 deletions
|
@ -226,10 +226,6 @@ acl Enables POSIX Access Control Lists support.
|
|||
noacl This option disables POSIX Access Control List
|
||||
support.
|
||||
|
||||
reservation
|
||||
|
||||
noreservation
|
||||
|
||||
bsddf (*) Make 'df' act like BSD.
|
||||
minixdf Make 'df' act like Minix.
|
||||
|
||||
|
|
13
MAINTAINERS
13
MAINTAINERS
|
@ -3572,9 +3572,16 @@ M: Andrew Morton <akpm@linux-foundation.org>
|
|||
M: Jan Kara <jack@suse.cz>
|
||||
L: linux-ext4@vger.kernel.org
|
||||
S: Maintained
|
||||
F: fs/jbd*/
|
||||
F: include/linux/ext*jbd*.h
|
||||
F: include/linux/jbd*.h
|
||||
F: fs/jbd/
|
||||
F: include/linux/ext3_jbd.h
|
||||
F: include/linux/jbd.h
|
||||
|
||||
JOURNALLING LAYER FOR BLOCK DEVICES (JBD2)
|
||||
M: "Theodore Ts'o" <tytso@mit.edu>
|
||||
L: linux-ext4@vger.kernel.org
|
||||
S: Maintained
|
||||
F: fs/jbd2/
|
||||
F: include/linux/jbd2.h
|
||||
|
||||
JSM Neo PCI based serial card
|
||||
M: Breno Leitao <leitao@linux.vnet.ibm.com>
|
||||
|
|
|
@ -6,7 +6,8 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
|
|||
|
||||
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
|
||||
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
|
||||
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
|
||||
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
|
||||
mmp.o
|
||||
|
||||
ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
|
||||
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
|
||||
|
|
146
fs/ext4/balloc.c
146
fs/ext4/balloc.c
|
@ -361,130 +361,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
|
|||
return bh;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_add_groupblocks() -- Add given blocks to an existing group
|
||||
* @handle: handle to this transaction
|
||||
* @sb: super block
|
||||
* @block: start physcial block to add to the block group
|
||||
* @count: number of blocks to free
|
||||
*
|
||||
* This marks the blocks as free in the bitmap. We ask the
|
||||
* mballoc to reload the buddy after this by setting group
|
||||
* EXT4_GROUP_INFO_NEED_INIT_BIT flag
|
||||
*/
|
||||
void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
|
||||
ext4_fsblk_t block, unsigned long count)
|
||||
{
|
||||
struct buffer_head *bitmap_bh = NULL;
|
||||
struct buffer_head *gd_bh;
|
||||
ext4_group_t block_group;
|
||||
ext4_grpblk_t bit;
|
||||
unsigned int i;
|
||||
struct ext4_group_desc *desc;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int err = 0, ret, blk_free_count;
|
||||
ext4_grpblk_t blocks_freed;
|
||||
struct ext4_group_info *grp;
|
||||
|
||||
ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
|
||||
|
||||
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
|
||||
grp = ext4_get_group_info(sb, block_group);
|
||||
/*
|
||||
* Check to see if we are freeing blocks across a group
|
||||
* boundary.
|
||||
*/
|
||||
if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
|
||||
goto error_return;
|
||||
}
|
||||
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
|
||||
if (!bitmap_bh)
|
||||
goto error_return;
|
||||
desc = ext4_get_group_desc(sb, block_group, &gd_bh);
|
||||
if (!desc)
|
||||
goto error_return;
|
||||
|
||||
if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
|
||||
in_range(ext4_inode_bitmap(sb, desc), block, count) ||
|
||||
in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
|
||||
in_range(block + count - 1, ext4_inode_table(sb, desc),
|
||||
sbi->s_itb_per_group)) {
|
||||
ext4_error(sb, "Adding blocks in system zones - "
|
||||
"Block = %llu, count = %lu",
|
||||
block, count);
|
||||
goto error_return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We are about to add blocks to the bitmap,
|
||||
* so we need undo access.
|
||||
*/
|
||||
BUFFER_TRACE(bitmap_bh, "getting undo access");
|
||||
err = ext4_journal_get_undo_access(handle, bitmap_bh);
|
||||
if (err)
|
||||
goto error_return;
|
||||
|
||||
/*
|
||||
* We are about to modify some metadata. Call the journal APIs
|
||||
* to unshare ->b_data if a currently-committing transaction is
|
||||
* using it
|
||||
*/
|
||||
BUFFER_TRACE(gd_bh, "get_write_access");
|
||||
err = ext4_journal_get_write_access(handle, gd_bh);
|
||||
if (err)
|
||||
goto error_return;
|
||||
/*
|
||||
* make sure we don't allow a parallel init on other groups in the
|
||||
* same buddy cache
|
||||
*/
|
||||
down_write(&grp->alloc_sem);
|
||||
for (i = 0, blocks_freed = 0; i < count; i++) {
|
||||
BUFFER_TRACE(bitmap_bh, "clear bit");
|
||||
if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
|
||||
bit + i, bitmap_bh->b_data)) {
|
||||
ext4_error(sb, "bit already cleared for block %llu",
|
||||
(ext4_fsblk_t)(block + i));
|
||||
BUFFER_TRACE(bitmap_bh, "bit already cleared");
|
||||
} else {
|
||||
blocks_freed++;
|
||||
}
|
||||
}
|
||||
ext4_lock_group(sb, block_group);
|
||||
blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
|
||||
ext4_free_blks_set(sb, desc, blk_free_count);
|
||||
desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
|
||||
ext4_unlock_group(sb, block_group);
|
||||
percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
|
||||
|
||||
if (sbi->s_log_groups_per_flex) {
|
||||
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
|
||||
atomic_add(blocks_freed,
|
||||
&sbi->s_flex_groups[flex_group].free_blocks);
|
||||
}
|
||||
/*
|
||||
* request to reload the buddy with the
|
||||
* new bitmap information
|
||||
*/
|
||||
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
|
||||
grp->bb_free += blocks_freed;
|
||||
up_write(&grp->alloc_sem);
|
||||
|
||||
/* We dirtied the bitmap block */
|
||||
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
|
||||
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
|
||||
|
||||
/* And the group descriptor block */
|
||||
BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
|
||||
ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
|
||||
if (!err)
|
||||
err = ret;
|
||||
|
||||
error_return:
|
||||
brelse(bitmap_bh);
|
||||
ext4_std_error(sb, err);
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_has_free_blocks()
|
||||
* @sbi: in-core super block structure.
|
||||
|
@ -493,7 +369,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
|
|||
* Check if filesystem has nblocks free & available for allocation.
|
||||
* On success return 1, return 0 on failure.
|
||||
*/
|
||||
static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
|
||||
static int ext4_has_free_blocks(struct ext4_sb_info *sbi,
|
||||
s64 nblocks, unsigned int flags)
|
||||
{
|
||||
s64 free_blocks, dirty_blocks, root_blocks;
|
||||
struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
|
||||
|
@ -507,11 +384,6 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
|
|||
EXT4_FREEBLOCKS_WATERMARK) {
|
||||
free_blocks = percpu_counter_sum_positive(fbc);
|
||||
dirty_blocks = percpu_counter_sum_positive(dbc);
|
||||
if (dirty_blocks < 0) {
|
||||
printk(KERN_CRIT "Dirty block accounting "
|
||||
"went wrong %lld\n",
|
||||
(long long)dirty_blocks);
|
||||
}
|
||||
}
|
||||
/* Check whether we have space after
|
||||
* accounting for current dirty blocks & root reserved blocks.
|
||||
|
@ -522,7 +394,9 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
|
|||
/* Hm, nope. Are (enough) root reserved blocks available? */
|
||||
if (sbi->s_resuid == current_fsuid() ||
|
||||
((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) ||
|
||||
capable(CAP_SYS_RESOURCE)) {
|
||||
capable(CAP_SYS_RESOURCE) ||
|
||||
(flags & EXT4_MB_USE_ROOT_BLOCKS)) {
|
||||
|
||||
if (free_blocks >= (nblocks + dirty_blocks))
|
||||
return 1;
|
||||
}
|
||||
|
@ -531,9 +405,9 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
|
|||
}
|
||||
|
||||
int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
|
||||
s64 nblocks)
|
||||
s64 nblocks, unsigned int flags)
|
||||
{
|
||||
if (ext4_has_free_blocks(sbi, nblocks)) {
|
||||
if (ext4_has_free_blocks(sbi, nblocks, flags)) {
|
||||
percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks);
|
||||
return 0;
|
||||
} else
|
||||
|
@ -554,7 +428,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
|
|||
*/
|
||||
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
|
||||
{
|
||||
if (!ext4_has_free_blocks(EXT4_SB(sb), 1) ||
|
||||
if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) ||
|
||||
(*retries)++ > 3 ||
|
||||
!EXT4_SB(sb)->s_journal)
|
||||
return 0;
|
||||
|
@ -577,7 +451,8 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
|
|||
* error stores in errp pointer
|
||||
*/
|
||||
ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t goal, unsigned long *count, int *errp)
|
||||
ext4_fsblk_t goal, unsigned int flags,
|
||||
unsigned long *count, int *errp)
|
||||
{
|
||||
struct ext4_allocation_request ar;
|
||||
ext4_fsblk_t ret;
|
||||
|
@ -587,6 +462,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
|
|||
ar.inode = inode;
|
||||
ar.goal = goal;
|
||||
ar.len = count ? *count : 1;
|
||||
ar.flags = flags;
|
||||
|
||||
ret = ext4_mb_new_blocks(handle, &ar, errp);
|
||||
if (count)
|
||||
|
|
127
fs/ext4/ext4.h
127
fs/ext4/ext4.h
|
@ -108,7 +108,8 @@ typedef unsigned int ext4_group_t;
|
|||
#define EXT4_MB_DELALLOC_RESERVED 0x0400
|
||||
/* We are doing stream allocation */
|
||||
#define EXT4_MB_STREAM_ALLOC 0x0800
|
||||
|
||||
/* Use reserved root blocks if needed */
|
||||
#define EXT4_MB_USE_ROOT_BLOCKS 0x1000
|
||||
|
||||
struct ext4_allocation_request {
|
||||
/* target inode for block we're allocating */
|
||||
|
@ -209,6 +210,8 @@ struct ext4_io_submit {
|
|||
*/
|
||||
#define EXT4_BAD_INO 1 /* Bad blocks inode */
|
||||
#define EXT4_ROOT_INO 2 /* Root inode */
|
||||
#define EXT4_USR_QUOTA_INO 3 /* User quota inode */
|
||||
#define EXT4_GRP_QUOTA_INO 4 /* Group quota inode */
|
||||
#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */
|
||||
#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */
|
||||
#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */
|
||||
|
@ -512,6 +515,10 @@ struct ext4_new_group_data {
|
|||
/* Convert extent to initialized after IO complete */
|
||||
#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
|
||||
EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
|
||||
/* Punch out blocks of an extent */
|
||||
#define EXT4_GET_BLOCKS_PUNCH_OUT_EXT 0x0020
|
||||
/* Don't normalize allocation size (used for fallocate) */
|
||||
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
|
||||
|
||||
/*
|
||||
* Flags used by ext4_free_blocks
|
||||
|
@ -1028,7 +1035,7 @@ struct ext4_super_block {
|
|||
__le16 s_want_extra_isize; /* New inodes should reserve # bytes */
|
||||
__le32 s_flags; /* Miscellaneous flags */
|
||||
__le16 s_raid_stride; /* RAID stride */
|
||||
__le16 s_mmp_interval; /* # seconds to wait in MMP checking */
|
||||
__le16 s_mmp_update_interval; /* # seconds to wait in MMP checking */
|
||||
__le64 s_mmp_block; /* Block for multi-mount protection */
|
||||
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
|
||||
__u8 s_log_groups_per_flex; /* FLEX_BG group size */
|
||||
|
@ -1144,6 +1151,9 @@ struct ext4_sb_info {
|
|||
unsigned long s_ext_blocks;
|
||||
unsigned long s_ext_extents;
|
||||
#endif
|
||||
/* ext4 extent cache stats */
|
||||
unsigned long extent_cache_hits;
|
||||
unsigned long extent_cache_misses;
|
||||
|
||||
/* for buddy allocator */
|
||||
struct ext4_group_info ***s_group_info;
|
||||
|
@ -1201,6 +1211,9 @@ struct ext4_sb_info {
|
|||
struct ext4_li_request *s_li_request;
|
||||
/* Wait multiplier for lazy initialization thread */
|
||||
unsigned int s_li_wait_mult;
|
||||
|
||||
/* Kernel thread for multiple mount protection */
|
||||
struct task_struct *s_mmp_tsk;
|
||||
};
|
||||
|
||||
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
|
||||
|
@ -1338,6 +1351,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
|
|||
#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
|
||||
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
|
||||
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
|
||||
#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
|
||||
|
||||
#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
|
||||
#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
|
||||
|
@ -1351,13 +1365,29 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
|
|||
#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
|
||||
#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
|
||||
|
||||
#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
|
||||
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
|
||||
EXT4_FEATURE_INCOMPAT_META_BG)
|
||||
#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
|
||||
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
|
||||
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
|
||||
|
||||
#define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
|
||||
#define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
|
||||
EXT4_FEATURE_INCOMPAT_RECOVER| \
|
||||
EXT4_FEATURE_INCOMPAT_META_BG)
|
||||
#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
|
||||
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
|
||||
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
|
||||
|
||||
#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
|
||||
#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
|
||||
EXT4_FEATURE_INCOMPAT_RECOVER| \
|
||||
EXT4_FEATURE_INCOMPAT_META_BG| \
|
||||
EXT4_FEATURE_INCOMPAT_EXTENTS| \
|
||||
EXT4_FEATURE_INCOMPAT_64BIT| \
|
||||
EXT4_FEATURE_INCOMPAT_FLEX_BG)
|
||||
EXT4_FEATURE_INCOMPAT_FLEX_BG| \
|
||||
EXT4_FEATURE_INCOMPAT_MMP)
|
||||
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
|
||||
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
|
||||
EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
|
||||
|
@ -1590,12 +1620,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
|
|||
*/
|
||||
struct ext4_lazy_init {
|
||||
unsigned long li_state;
|
||||
|
||||
wait_queue_head_t li_wait_daemon;
|
||||
wait_queue_head_t li_wait_task;
|
||||
struct timer_list li_timer;
|
||||
struct task_struct *li_task;
|
||||
|
||||
struct list_head li_request_list;
|
||||
struct mutex li_list_mtx;
|
||||
};
|
||||
|
@ -1614,6 +1638,67 @@ struct ext4_features {
|
|||
struct completion f_kobj_unregister;
|
||||
};
|
||||
|
||||
/*
|
||||
* This structure will be used for multiple mount protection. It will be
|
||||
* written into the block number saved in the s_mmp_block field in the
|
||||
* superblock. Programs that check MMP should assume that if
|
||||
* SEQ_FSCK (or any unknown code above SEQ_MAX) is present then it is NOT safe
|
||||
* to use the filesystem, regardless of how old the timestamp is.
|
||||
*/
|
||||
#define EXT4_MMP_MAGIC 0x004D4D50U /* ASCII for MMP */
|
||||
#define EXT4_MMP_SEQ_CLEAN 0xFF4D4D50U /* mmp_seq value for clean unmount */
|
||||
#define EXT4_MMP_SEQ_FSCK 0xE24D4D50U /* mmp_seq value when being fscked */
|
||||
#define EXT4_MMP_SEQ_MAX 0xE24D4D4FU /* maximum valid mmp_seq value */
|
||||
|
||||
struct mmp_struct {
|
||||
__le32 mmp_magic; /* Magic number for MMP */
|
||||
__le32 mmp_seq; /* Sequence no. updated periodically */
|
||||
|
||||
/*
|
||||
* mmp_time, mmp_nodename & mmp_bdevname are only used for information
|
||||
* purposes and do not affect the correctness of the algorithm
|
||||
*/
|
||||
__le64 mmp_time; /* Time last updated */
|
||||
char mmp_nodename[64]; /* Node which last updated MMP block */
|
||||
char mmp_bdevname[32]; /* Bdev which last updated MMP block */
|
||||
|
||||
/*
|
||||
* mmp_check_interval is used to verify if the MMP block has been
|
||||
* updated on the block device. The value is updated based on the
|
||||
* maximum time to write the MMP block during an update cycle.
|
||||
*/
|
||||
__le16 mmp_check_interval;
|
||||
|
||||
__le16 mmp_pad1;
|
||||
__le32 mmp_pad2[227];
|
||||
};
|
||||
|
||||
/* arguments passed to the mmp thread */
|
||||
struct mmpd_data {
|
||||
struct buffer_head *bh; /* bh from initial read_mmp_block() */
|
||||
struct super_block *sb; /* super block of the fs */
|
||||
};
|
||||
|
||||
/*
|
||||
* Check interval multiplier
|
||||
* The MMP block is written every update interval and initially checked every
|
||||
* update interval x the multiplier (the value is then adapted based on the
|
||||
* write latency). The reason is that writes can be delayed under load and we
|
||||
* don't want readers to incorrectly assume that the filesystem is no longer
|
||||
* in use.
|
||||
*/
|
||||
#define EXT4_MMP_CHECK_MULT 2UL
|
||||
|
||||
/*
|
||||
* Minimum interval for MMP checking in seconds.
|
||||
*/
|
||||
#define EXT4_MMP_MIN_CHECK_INTERVAL 5UL
|
||||
|
||||
/*
|
||||
* Maximum interval for MMP checking in seconds.
|
||||
*/
|
||||
#define EXT4_MMP_MAX_CHECK_INTERVAL 300UL
|
||||
|
||||
/*
|
||||
* Function prototypes
|
||||
*/
|
||||
|
@ -1638,10 +1723,12 @@ extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
|
|||
extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
|
||||
ext4_group_t group);
|
||||
extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_fsblk_t goal, unsigned long *count, int *errp);
|
||||
extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
|
||||
extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
|
||||
ext4_fsblk_t block, unsigned long count);
|
||||
ext4_fsblk_t goal,
|
||||
unsigned int flags,
|
||||
unsigned long *count,
|
||||
int *errp);
|
||||
extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
|
||||
s64 nblocks, unsigned int flags);
|
||||
extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
|
||||
extern void ext4_check_blocks_bitmap(struct super_block *);
|
||||
extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
|
||||
|
@ -1706,6 +1793,8 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
|
|||
unsigned long count, int flags);
|
||||
extern int ext4_mb_add_groupinfo(struct super_block *sb,
|
||||
ext4_group_t i, struct ext4_group_desc *desc);
|
||||
extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
|
||||
ext4_fsblk_t block, unsigned long count);
|
||||
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
|
||||
|
||||
/* inode.c */
|
||||
|
@ -1729,6 +1818,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);
|
|||
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
|
||||
extern int ext4_can_truncate(struct inode *inode);
|
||||
extern void ext4_truncate(struct inode *);
|
||||
extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
|
||||
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
|
||||
extern void ext4_set_inode_flags(struct inode *);
|
||||
extern void ext4_get_inode_flags(struct ext4_inode_info *);
|
||||
|
@ -1738,6 +1828,8 @@ extern int ext4_writepage_trans_blocks(struct inode *);
|
|||
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
|
||||
extern int ext4_block_truncate_page(handle_t *handle,
|
||||
struct address_space *mapping, loff_t from);
|
||||
extern int ext4_block_zero_page_range(handle_t *handle,
|
||||
struct address_space *mapping, loff_t from, loff_t length);
|
||||
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
|
||||
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
|
||||
extern void ext4_da_update_reserve_space(struct inode *inode,
|
||||
|
@ -1788,6 +1880,10 @@ extern void __ext4_warning(struct super_block *, const char *, unsigned int,
|
|||
__LINE__, ## message)
|
||||
extern void ext4_msg(struct super_block *, const char *, const char *, ...)
|
||||
__attribute__ ((format (printf, 3, 4)));
|
||||
extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp,
|
||||
const char *, unsigned int, const char *);
|
||||
#define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \
|
||||
__LINE__, msg)
|
||||
extern void __ext4_grp_locked_error(const char *, unsigned int, \
|
||||
struct super_block *, ext4_group_t, \
|
||||
unsigned long, ext4_fsblk_t, \
|
||||
|
@ -2064,6 +2160,8 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
|
|||
extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
||||
struct ext4_map_blocks *map, int flags);
|
||||
extern void ext4_ext_truncate(struct inode *);
|
||||
extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
|
||||
loff_t length);
|
||||
extern void ext4_ext_init(struct super_block *);
|
||||
extern void ext4_ext_release(struct super_block *);
|
||||
extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
|
||||
|
@ -2092,6 +2190,9 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
|
|||
int len,
|
||||
struct writeback_control *wbc);
|
||||
|
||||
/* mmp.c */
|
||||
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
|
||||
|
||||
/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
|
||||
enum ext4_state_bits {
|
||||
BH_Uninit /* blocks are allocated but uninitialized on disk */
|
||||
|
|
|
@ -6,20 +6,6 @@
|
|||
|
||||
#include <trace/events/ext4.h>
|
||||
|
||||
int __ext4_journal_get_undo_access(const char *where, unsigned int line,
|
||||
handle_t *handle, struct buffer_head *bh)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (ext4_handle_valid(handle)) {
|
||||
err = jbd2_journal_get_undo_access(handle, bh);
|
||||
if (err)
|
||||
ext4_journal_abort_handle(where, line, __func__, bh,
|
||||
handle, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int __ext4_journal_get_write_access(const char *where, unsigned int line,
|
||||
handle_t *handle, struct buffer_head *bh)
|
||||
{
|
||||
|
|
|
@ -126,9 +126,6 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line,
|
|||
const char *err_fn,
|
||||
struct buffer_head *bh, handle_t *handle, int err);
|
||||
|
||||
int __ext4_journal_get_undo_access(const char *where, unsigned int line,
|
||||
handle_t *handle, struct buffer_head *bh);
|
||||
|
||||
int __ext4_journal_get_write_access(const char *where, unsigned int line,
|
||||
handle_t *handle, struct buffer_head *bh);
|
||||
|
||||
|
@ -146,8 +143,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
|
|||
int __ext4_handle_dirty_super(const char *where, unsigned int line,
|
||||
handle_t *handle, struct super_block *sb);
|
||||
|
||||
#define ext4_journal_get_undo_access(handle, bh) \
|
||||
__ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh))
|
||||
#define ext4_journal_get_write_access(handle, bh) \
|
||||
__ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
|
||||
#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
|
||||
|
|
1410
fs/ext4/extents.c
1410
fs/ext4/extents.c
File diff suppressed because it is too large
Load diff
|
@ -272,7 +272,6 @@ const struct file_operations ext4_file_operations = {
|
|||
};
|
||||
|
||||
const struct inode_operations ext4_file_inode_operations = {
|
||||
.truncate = ext4_truncate,
|
||||
.setattr = ext4_setattr,
|
||||
.getattr = ext4_getattr,
|
||||
#ifdef CONFIG_EXT4_FS_XATTR
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
|
||||
static void dump_completed_IO(struct inode * inode)
|
||||
{
|
||||
#ifdef EXT4_DEBUG
|
||||
#ifdef EXT4FS_DEBUG
|
||||
struct list_head *cur, *before, *after;
|
||||
ext4_io_end_t *io, *io0, *io1;
|
||||
unsigned long flags;
|
||||
|
@ -172,6 +172,7 @@ int ext4_sync_file(struct file *file, int datasync)
|
|||
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||
int ret;
|
||||
tid_t commit_tid;
|
||||
bool needs_barrier = false;
|
||||
|
||||
J_ASSERT(ext4_journal_current_handle() == NULL);
|
||||
|
||||
|
@ -211,22 +212,12 @@ int ext4_sync_file(struct file *file, int datasync)
|
|||
}
|
||||
|
||||
commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
|
||||
if (jbd2_log_start_commit(journal, commit_tid)) {
|
||||
/*
|
||||
* When the journal is on a different device than the
|
||||
* fs data disk, we need to issue the barrier in
|
||||
* writeback mode. (In ordered mode, the jbd2 layer
|
||||
* will take care of issuing the barrier. In
|
||||
* data=journal, all of the data blocks are written to
|
||||
* the journal device.)
|
||||
*/
|
||||
if (ext4_should_writeback_data(inode) &&
|
||||
(journal->j_fs_dev != journal->j_dev) &&
|
||||
(journal->j_flags & JBD2_BARRIER))
|
||||
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
|
||||
NULL);
|
||||
ret = jbd2_log_wait_commit(journal, commit_tid);
|
||||
} else if (journal->j_flags & JBD2_BARRIER)
|
||||
if (journal->j_flags & JBD2_BARRIER &&
|
||||
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
|
||||
needs_barrier = true;
|
||||
jbd2_log_start_commit(journal, commit_tid);
|
||||
ret = jbd2_log_wait_commit(journal, commit_tid);
|
||||
if (needs_barrier)
|
||||
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
out:
|
||||
trace_ext4_sync_file_exit(inode, ret);
|
||||
|
|
114
fs/ext4/inode.c
114
fs/ext4/inode.c
|
@ -639,8 +639,8 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
|
|||
while (target > 0) {
|
||||
count = target;
|
||||
/* allocating blocks for indirect blocks and direct blocks */
|
||||
current_block = ext4_new_meta_blocks(handle, inode,
|
||||
goal, &count, err);
|
||||
current_block = ext4_new_meta_blocks(handle, inode, goal,
|
||||
0, &count, err);
|
||||
if (*err)
|
||||
goto failed_out;
|
||||
|
||||
|
@ -1930,7 +1930,7 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
|
|||
* We do still charge estimated metadata to the sb though;
|
||||
* we cannot afford to run out of free blocks.
|
||||
*/
|
||||
if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
|
||||
if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
|
||||
dquot_release_reservation_block(inode, 1);
|
||||
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
|
||||
yield();
|
||||
|
@ -2796,9 +2796,7 @@ static int write_cache_pages_da(struct address_space *mapping,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (PageWriteback(page))
|
||||
wait_on_page_writeback(page);
|
||||
|
||||
wait_on_page_writeback(page);
|
||||
BUG_ON(PageWriteback(page));
|
||||
|
||||
if (mpd->next_page != page->index)
|
||||
|
@ -3513,7 +3511,7 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
|
|||
loff_t end = offset + iov_length(iov, nr_segs);
|
||||
|
||||
if (end > isize)
|
||||
vmtruncate(inode, isize);
|
||||
ext4_truncate_failed_write(inode);
|
||||
}
|
||||
}
|
||||
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
|
||||
|
@ -3915,10 +3913,31 @@ void ext4_set_aops(struct inode *inode)
|
|||
*/
|
||||
int ext4_block_truncate_page(handle_t *handle,
|
||||
struct address_space *mapping, loff_t from)
|
||||
{
|
||||
unsigned offset = from & (PAGE_CACHE_SIZE-1);
|
||||
unsigned length;
|
||||
unsigned blocksize;
|
||||
struct inode *inode = mapping->host;
|
||||
|
||||
blocksize = inode->i_sb->s_blocksize;
|
||||
length = blocksize - (offset & (blocksize - 1));
|
||||
|
||||
return ext4_block_zero_page_range(handle, mapping, from, length);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_block_zero_page_range() zeros out a mapping of length 'length'
|
||||
* starting from file offset 'from'. The range to be zero'd must
|
||||
* be contained with in one block. If the specified range exceeds
|
||||
* the end of the block it will be shortened to end of the block
|
||||
* that cooresponds to 'from'
|
||||
*/
|
||||
int ext4_block_zero_page_range(handle_t *handle,
|
||||
struct address_space *mapping, loff_t from, loff_t length)
|
||||
{
|
||||
ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
|
||||
unsigned offset = from & (PAGE_CACHE_SIZE-1);
|
||||
unsigned blocksize, length, pos;
|
||||
unsigned blocksize, max, pos;
|
||||
ext4_lblk_t iblock;
|
||||
struct inode *inode = mapping->host;
|
||||
struct buffer_head *bh;
|
||||
|
@ -3931,7 +3950,15 @@ int ext4_block_truncate_page(handle_t *handle,
|
|||
return -EINVAL;
|
||||
|
||||
blocksize = inode->i_sb->s_blocksize;
|
||||
length = blocksize - (offset & (blocksize - 1));
|
||||
max = blocksize - (offset & (blocksize - 1));
|
||||
|
||||
/*
|
||||
* correct length if it does not fall between
|
||||
* 'from' and the end of the block
|
||||
*/
|
||||
if (length > max || length < 0)
|
||||
length = max;
|
||||
|
||||
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
|
||||
|
||||
if (!page_has_buffers(page))
|
||||
|
@ -4380,8 +4407,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
|
|||
|
||||
int ext4_can_truncate(struct inode *inode)
|
||||
{
|
||||
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
|
||||
return 0;
|
||||
if (S_ISREG(inode->i_mode))
|
||||
return 1;
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
|
@ -4391,6 +4416,31 @@ int ext4_can_truncate(struct inode *inode)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_punch_hole: punches a hole in a file by releaseing the blocks
|
||||
* associated with the given offset and length
|
||||
*
|
||||
* @inode: File inode
|
||||
* @offset: The offset where the hole will begin
|
||||
* @len: The length of the hole
|
||||
*
|
||||
* Returns: 0 on sucess or negative on failure
|
||||
*/
|
||||
|
||||
int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
|
||||
{
|
||||
struct inode *inode = file->f_path.dentry->d_inode;
|
||||
if (!S_ISREG(inode->i_mode))
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
|
||||
/* TODO: Add support for non extent hole punching */
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
return ext4_ext_punch_hole(file, offset, length);
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_truncate()
|
||||
*
|
||||
|
@ -4617,7 +4667,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
|
|||
/*
|
||||
* Figure out the offset within the block group inode table
|
||||
*/
|
||||
inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb));
|
||||
inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
|
||||
inode_offset = ((inode->i_ino - 1) %
|
||||
EXT4_INODES_PER_GROUP(sb));
|
||||
block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
|
||||
|
@ -5311,8 +5361,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
|
|||
|
||||
if (S_ISREG(inode->i_mode) &&
|
||||
attr->ia_valid & ATTR_SIZE &&
|
||||
(attr->ia_size < inode->i_size ||
|
||||
(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) {
|
||||
(attr->ia_size < inode->i_size)) {
|
||||
handle_t *handle;
|
||||
|
||||
handle = ext4_journal_start(inode, 3);
|
||||
|
@ -5346,14 +5395,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
|
|||
goto err_out;
|
||||
}
|
||||
}
|
||||
/* ext4_truncate will clear the flag */
|
||||
if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))
|
||||
ext4_truncate(inode);
|
||||
}
|
||||
|
||||
if ((attr->ia_valid & ATTR_SIZE) &&
|
||||
attr->ia_size != i_size_read(inode))
|
||||
rc = vmtruncate(inode, attr->ia_size);
|
||||
if (attr->ia_valid & ATTR_SIZE) {
|
||||
if (attr->ia_size != i_size_read(inode)) {
|
||||
truncate_setsize(inode, attr->ia_size);
|
||||
ext4_truncate(inode);
|
||||
} else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
|
||||
ext4_truncate(inode);
|
||||
}
|
||||
|
||||
if (!rc) {
|
||||
setattr_copy(inode, attr);
|
||||
|
@ -5811,15 +5861,19 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
goto out_unlock;
|
||||
}
|
||||
ret = 0;
|
||||
if (PageMappedToDisk(page))
|
||||
goto out_unlock;
|
||||
|
||||
lock_page(page);
|
||||
wait_on_page_writeback(page);
|
||||
if (PageMappedToDisk(page)) {
|
||||
up_read(&inode->i_alloc_sem);
|
||||
return VM_FAULT_LOCKED;
|
||||
}
|
||||
|
||||
if (page->index == size >> PAGE_CACHE_SHIFT)
|
||||
len = size & ~PAGE_CACHE_MASK;
|
||||
else
|
||||
len = PAGE_CACHE_SIZE;
|
||||
|
||||
lock_page(page);
|
||||
/*
|
||||
* return if we have all the buffers mapped. This avoid
|
||||
* the need to call write_begin/write_end which does a
|
||||
|
@ -5829,8 +5883,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
if (page_has_buffers(page)) {
|
||||
if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
|
||||
ext4_bh_unmapped)) {
|
||||
unlock_page(page);
|
||||
goto out_unlock;
|
||||
up_read(&inode->i_alloc_sem);
|
||||
return VM_FAULT_LOCKED;
|
||||
}
|
||||
}
|
||||
unlock_page(page);
|
||||
|
@ -5850,6 +5904,16 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
ret = 0;
|
||||
|
||||
/*
|
||||
* write_begin/end might have created a dirty page and someone
|
||||
* could wander in and start the IO. Make sure that hasn't
|
||||
* happened.
|
||||
*/
|
||||
lock_page(page);
|
||||
wait_on_page_writeback(page);
|
||||
up_read(&inode->i_alloc_sem);
|
||||
return VM_FAULT_LOCKED;
|
||||
out_unlock:
|
||||
if (ret)
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
|
|
|
@ -787,6 +787,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
|||
struct inode *inode;
|
||||
char *data;
|
||||
char *bitmap;
|
||||
struct ext4_group_info *grinfo;
|
||||
|
||||
mb_debug(1, "init page %lu\n", page->index);
|
||||
|
||||
|
@ -819,6 +820,18 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
|||
if (first_group + i >= ngroups)
|
||||
break;
|
||||
|
||||
grinfo = ext4_get_group_info(sb, first_group + i);
|
||||
/*
|
||||
* If page is uptodate then we came here after online resize
|
||||
* which added some new uninitialized group info structs, so
|
||||
* we must skip all initialized uptodate buddies on the page,
|
||||
* which may be currently in use by an allocating task.
|
||||
*/
|
||||
if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
|
||||
bh[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
err = -EIO;
|
||||
desc = ext4_get_group_desc(sb, first_group + i, NULL);
|
||||
if (desc == NULL)
|
||||
|
@ -871,26 +884,28 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
|||
}
|
||||
|
||||
/* wait for I/O completion */
|
||||
for (i = 0; i < groups_per_page && bh[i]; i++)
|
||||
wait_on_buffer(bh[i]);
|
||||
for (i = 0; i < groups_per_page; i++)
|
||||
if (bh[i])
|
||||
wait_on_buffer(bh[i]);
|
||||
|
||||
err = -EIO;
|
||||
for (i = 0; i < groups_per_page && bh[i]; i++)
|
||||
if (!buffer_uptodate(bh[i]))
|
||||
for (i = 0; i < groups_per_page; i++)
|
||||
if (bh[i] && !buffer_uptodate(bh[i]))
|
||||
goto out;
|
||||
|
||||
err = 0;
|
||||
first_block = page->index * blocks_per_page;
|
||||
/* init the page */
|
||||
memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
|
||||
for (i = 0; i < blocks_per_page; i++) {
|
||||
int group;
|
||||
struct ext4_group_info *grinfo;
|
||||
|
||||
group = (first_block + i) >> 1;
|
||||
if (group >= ngroups)
|
||||
break;
|
||||
|
||||
if (!bh[group - first_group])
|
||||
/* skip initialized uptodate buddy */
|
||||
continue;
|
||||
|
||||
/*
|
||||
* data carry information regarding this
|
||||
* particular group in the format specified
|
||||
|
@ -919,6 +934,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
|||
* incore got set to the group block bitmap below
|
||||
*/
|
||||
ext4_lock_group(sb, group);
|
||||
/* init the buddy */
|
||||
memset(data, 0xff, blocksize);
|
||||
ext4_mb_generate_buddy(sb, data, incore, group);
|
||||
ext4_unlock_group(sb, group);
|
||||
incore = NULL;
|
||||
|
@ -948,7 +965,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
|||
|
||||
out:
|
||||
if (bh) {
|
||||
for (i = 0; i < groups_per_page && bh[i]; i++)
|
||||
for (i = 0; i < groups_per_page; i++)
|
||||
brelse(bh[i]);
|
||||
if (bh != &bhs)
|
||||
kfree(bh);
|
||||
|
@ -957,22 +974,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
|||
}
|
||||
|
||||
/*
|
||||
* lock the group_info alloc_sem of all the groups
|
||||
* belonging to the same buddy cache page. This
|
||||
* make sure other parallel operation on the buddy
|
||||
* cache doesn't happen whild holding the buddy cache
|
||||
* lock
|
||||
* Lock the buddy and bitmap pages. This make sure other parallel init_group
|
||||
* on the same buddy page doesn't happen whild holding the buddy page lock.
|
||||
* Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
|
||||
* are on the same page e4b->bd_buddy_page is NULL and return value is 0.
|
||||
*/
|
||||
static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
|
||||
ext4_group_t group)
|
||||
static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
|
||||
ext4_group_t group, struct ext4_buddy *e4b)
|
||||
{
|
||||
int i;
|
||||
int block, pnum;
|
||||
struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
|
||||
int block, pnum, poff;
|
||||
int blocks_per_page;
|
||||
int groups_per_page;
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
ext4_group_t first_group;
|
||||
struct ext4_group_info *grp;
|
||||
struct page *page;
|
||||
|
||||
e4b->bd_buddy_page = NULL;
|
||||
e4b->bd_bitmap_page = NULL;
|
||||
|
||||
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
||||
/*
|
||||
|
@ -982,57 +998,40 @@ static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
|
|||
*/
|
||||
block = group * 2;
|
||||
pnum = block / blocks_per_page;
|
||||
first_group = pnum * blocks_per_page / 2;
|
||||
poff = block % blocks_per_page;
|
||||
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
||||
if (!page)
|
||||
return -EIO;
|
||||
BUG_ON(page->mapping != inode->i_mapping);
|
||||
e4b->bd_bitmap_page = page;
|
||||
e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
|
||||
|
||||
groups_per_page = blocks_per_page >> 1;
|
||||
if (groups_per_page == 0)
|
||||
groups_per_page = 1;
|
||||
/* read all groups the page covers into the cache */
|
||||
for (i = 0; i < groups_per_page; i++) {
|
||||
|
||||
if ((first_group + i) >= ngroups)
|
||||
break;
|
||||
grp = ext4_get_group_info(sb, first_group + i);
|
||||
/* take all groups write allocation
|
||||
* semaphore. This make sure there is
|
||||
* no block allocation going on in any
|
||||
* of that groups
|
||||
*/
|
||||
down_write_nested(&grp->alloc_sem, i);
|
||||
if (blocks_per_page >= 2) {
|
||||
/* buddy and bitmap are on the same page */
|
||||
return 0;
|
||||
}
|
||||
return i;
|
||||
|
||||
block++;
|
||||
pnum = block / blocks_per_page;
|
||||
poff = block % blocks_per_page;
|
||||
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
||||
if (!page)
|
||||
return -EIO;
|
||||
BUG_ON(page->mapping != inode->i_mapping);
|
||||
e4b->bd_buddy_page = page;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
|
||||
ext4_group_t group, int locked_group)
|
||||
static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
|
||||
{
|
||||
int i;
|
||||
int block, pnum;
|
||||
int blocks_per_page;
|
||||
ext4_group_t first_group;
|
||||
struct ext4_group_info *grp;
|
||||
|
||||
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
||||
/*
|
||||
* the buddy cache inode stores the block bitmap
|
||||
* and buddy information in consecutive blocks.
|
||||
* So for each group we need two blocks.
|
||||
*/
|
||||
block = group * 2;
|
||||
pnum = block / blocks_per_page;
|
||||
first_group = pnum * blocks_per_page / 2;
|
||||
/* release locks on all the groups */
|
||||
for (i = 0; i < locked_group; i++) {
|
||||
|
||||
grp = ext4_get_group_info(sb, first_group + i);
|
||||
/* take all groups write allocation
|
||||
* semaphore. This make sure there is
|
||||
* no block allocation going on in any
|
||||
* of that groups
|
||||
*/
|
||||
up_write(&grp->alloc_sem);
|
||||
if (e4b->bd_bitmap_page) {
|
||||
unlock_page(e4b->bd_bitmap_page);
|
||||
page_cache_release(e4b->bd_bitmap_page);
|
||||
}
|
||||
if (e4b->bd_buddy_page) {
|
||||
unlock_page(e4b->bd_buddy_page);
|
||||
page_cache_release(e4b->bd_buddy_page);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1044,93 +1043,60 @@ static noinline_for_stack
|
|||
int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
|
||||
{
|
||||
|
||||
int ret = 0;
|
||||
void *bitmap;
|
||||
int blocks_per_page;
|
||||
int block, pnum, poff;
|
||||
int num_grp_locked = 0;
|
||||
struct ext4_group_info *this_grp;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct inode *inode = sbi->s_buddy_cache;
|
||||
struct page *page = NULL, *bitmap_page = NULL;
|
||||
struct ext4_buddy e4b;
|
||||
struct page *page;
|
||||
int ret = 0;
|
||||
|
||||
mb_debug(1, "init group %u\n", group);
|
||||
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
|
||||
this_grp = ext4_get_group_info(sb, group);
|
||||
/*
|
||||
* This ensures that we don't reinit the buddy cache
|
||||
* page which map to the group from which we are already
|
||||
* allocating. If we are looking at the buddy cache we would
|
||||
* have taken a reference using ext4_mb_load_buddy and that
|
||||
* would have taken the alloc_sem lock.
|
||||
* would have pinned buddy page to page cache.
|
||||
*/
|
||||
num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
|
||||
if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
|
||||
ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
|
||||
if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
|
||||
/*
|
||||
* somebody initialized the group
|
||||
* return without doing anything
|
||||
*/
|
||||
ret = 0;
|
||||
goto err;
|
||||
}
|
||||
/*
|
||||
* the buddy cache inode stores the block bitmap
|
||||
* and buddy information in consecutive blocks.
|
||||
* So for each group we need two blocks.
|
||||
*/
|
||||
block = group * 2;
|
||||
pnum = block / blocks_per_page;
|
||||
poff = block % blocks_per_page;
|
||||
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
||||
if (page) {
|
||||
BUG_ON(page->mapping != inode->i_mapping);
|
||||
ret = ext4_mb_init_cache(page, NULL);
|
||||
if (ret) {
|
||||
unlock_page(page);
|
||||
goto err;
|
||||
}
|
||||
unlock_page(page);
|
||||
}
|
||||
if (page == NULL || !PageUptodate(page)) {
|
||||
|
||||
page = e4b.bd_bitmap_page;
|
||||
ret = ext4_mb_init_cache(page, NULL);
|
||||
if (ret)
|
||||
goto err;
|
||||
if (!PageUptodate(page)) {
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
mark_page_accessed(page);
|
||||
bitmap_page = page;
|
||||
bitmap = page_address(page) + (poff * sb->s_blocksize);
|
||||
|
||||
/* init buddy cache */
|
||||
block++;
|
||||
pnum = block / blocks_per_page;
|
||||
poff = block % blocks_per_page;
|
||||
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
|
||||
if (page == bitmap_page) {
|
||||
if (e4b.bd_buddy_page == NULL) {
|
||||
/*
|
||||
* If both the bitmap and buddy are in
|
||||
* the same page we don't need to force
|
||||
* init the buddy
|
||||
*/
|
||||
unlock_page(page);
|
||||
} else if (page) {
|
||||
BUG_ON(page->mapping != inode->i_mapping);
|
||||
ret = ext4_mb_init_cache(page, bitmap);
|
||||
if (ret) {
|
||||
unlock_page(page);
|
||||
goto err;
|
||||
}
|
||||
unlock_page(page);
|
||||
ret = 0;
|
||||
goto err;
|
||||
}
|
||||
if (page == NULL || !PageUptodate(page)) {
|
||||
/* init buddy cache */
|
||||
page = e4b.bd_buddy_page;
|
||||
ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
|
||||
if (ret)
|
||||
goto err;
|
||||
if (!PageUptodate(page)) {
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
}
|
||||
mark_page_accessed(page);
|
||||
err:
|
||||
ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
|
||||
if (bitmap_page)
|
||||
page_cache_release(bitmap_page);
|
||||
if (page)
|
||||
page_cache_release(page);
|
||||
ext4_mb_put_buddy_page_lock(&e4b);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1164,24 +1130,8 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
|||
e4b->bd_group = group;
|
||||
e4b->bd_buddy_page = NULL;
|
||||
e4b->bd_bitmap_page = NULL;
|
||||
e4b->alloc_semp = &grp->alloc_sem;
|
||||
|
||||
/* Take the read lock on the group alloc
|
||||
* sem. This would make sure a parallel
|
||||
* ext4_mb_init_group happening on other
|
||||
* groups mapped by the page is blocked
|
||||
* till we are done with allocation
|
||||
*/
|
||||
repeat_load_buddy:
|
||||
down_read(e4b->alloc_semp);
|
||||
|
||||
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
|
||||
/* we need to check for group need init flag
|
||||
* with alloc_semp held so that we can be sure
|
||||
* that new blocks didn't get added to the group
|
||||
* when we are loading the buddy cache
|
||||
*/
|
||||
up_read(e4b->alloc_semp);
|
||||
/*
|
||||
* we need full data about the group
|
||||
* to make a good selection
|
||||
|
@ -1189,7 +1139,6 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
|||
ret = ext4_mb_init_group(sb, group);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto repeat_load_buddy;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1273,15 +1222,14 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
|
|||
return 0;
|
||||
|
||||
err:
|
||||
if (page)
|
||||
page_cache_release(page);
|
||||
if (e4b->bd_bitmap_page)
|
||||
page_cache_release(e4b->bd_bitmap_page);
|
||||
if (e4b->bd_buddy_page)
|
||||
page_cache_release(e4b->bd_buddy_page);
|
||||
e4b->bd_buddy = NULL;
|
||||
e4b->bd_bitmap = NULL;
|
||||
|
||||
/* Done with the buddy cache */
|
||||
up_read(e4b->alloc_semp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1291,9 +1239,6 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
|
|||
page_cache_release(e4b->bd_bitmap_page);
|
||||
if (e4b->bd_buddy_page)
|
||||
page_cache_release(e4b->bd_buddy_page);
|
||||
/* Done with the buddy cache */
|
||||
if (e4b->alloc_semp)
|
||||
up_read(e4b->alloc_semp);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1606,9 +1551,6 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
|
|||
get_page(ac->ac_bitmap_page);
|
||||
ac->ac_buddy_page = e4b->bd_buddy_page;
|
||||
get_page(ac->ac_buddy_page);
|
||||
/* on allocation we use ac to track the held semaphore */
|
||||
ac->alloc_semp = e4b->alloc_semp;
|
||||
e4b->alloc_semp = NULL;
|
||||
/* store last allocated for subsequent stream allocation */
|
||||
if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
|
@ -2659,7 +2601,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
|
|||
struct super_block *sb = journal->j_private;
|
||||
struct ext4_buddy e4b;
|
||||
struct ext4_group_info *db;
|
||||
int err, ret, count = 0, count2 = 0;
|
||||
int err, count = 0, count2 = 0;
|
||||
struct ext4_free_data *entry;
|
||||
struct list_head *l, *ltmp;
|
||||
|
||||
|
@ -2669,15 +2611,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
|
|||
mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
|
||||
entry->count, entry->group, entry);
|
||||
|
||||
if (test_opt(sb, DISCARD)) {
|
||||
ret = ext4_issue_discard(sb, entry->group,
|
||||
entry->start_blk, entry->count);
|
||||
if (unlikely(ret == -EOPNOTSUPP)) {
|
||||
ext4_warning(sb, "discard not supported, "
|
||||
"disabling");
|
||||
clear_opt(sb, DISCARD);
|
||||
}
|
||||
}
|
||||
if (test_opt(sb, DISCARD))
|
||||
ext4_issue_discard(sb, entry->group,
|
||||
entry->start_blk, entry->count);
|
||||
|
||||
err = ext4_mb_load_buddy(sb, entry->group, &e4b);
|
||||
/* we expect to find existing buddy because it's pinned */
|
||||
|
@ -4226,15 +4162,12 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
|
|||
spin_unlock(&pa->pa_lock);
|
||||
}
|
||||
}
|
||||
if (ac->alloc_semp)
|
||||
up_read(ac->alloc_semp);
|
||||
if (pa) {
|
||||
/*
|
||||
* We want to add the pa to the right bucket.
|
||||
* Remove it from the list and while adding
|
||||
* make sure the list to which we are adding
|
||||
* doesn't grow big. We need to release
|
||||
* alloc_semp before calling ext4_mb_add_n_trim()
|
||||
* doesn't grow big.
|
||||
*/
|
||||
if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
|
||||
spin_lock(pa->pa_obj_lock);
|
||||
|
@ -4303,7 +4236,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
|
|||
* there is enough free blocks to do block allocation
|
||||
* and verify allocation doesn't exceed the quota limits.
|
||||
*/
|
||||
while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
|
||||
while (ar->len &&
|
||||
ext4_claim_free_blocks(sbi, ar->len, ar->flags)) {
|
||||
|
||||
/* let others to free the space */
|
||||
yield();
|
||||
ar->len = ar->len >> 1;
|
||||
|
@ -4313,9 +4248,15 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
|
|||
return 0;
|
||||
}
|
||||
reserv_blks = ar->len;
|
||||
while (ar->len && dquot_alloc_block(ar->inode, ar->len)) {
|
||||
ar->flags |= EXT4_MB_HINT_NOPREALLOC;
|
||||
ar->len--;
|
||||
if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
|
||||
dquot_alloc_block_nofail(ar->inode, ar->len);
|
||||
} else {
|
||||
while (ar->len &&
|
||||
dquot_alloc_block(ar->inode, ar->len)) {
|
||||
|
||||
ar->flags |= EXT4_MB_HINT_NOPREALLOC;
|
||||
ar->len--;
|
||||
}
|
||||
}
|
||||
inquota = ar->len;
|
||||
if (ar->len == 0) {
|
||||
|
@ -4703,6 +4644,127 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
|
|||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_add_groupblocks() -- Add given blocks to an existing group
|
||||
* @handle: handle to this transaction
|
||||
* @sb: super block
|
||||
* @block: start physcial block to add to the block group
|
||||
* @count: number of blocks to free
|
||||
*
|
||||
* This marks the blocks as free in the bitmap and buddy.
|
||||
*/
|
||||
void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
|
||||
ext4_fsblk_t block, unsigned long count)
|
||||
{
|
||||
struct buffer_head *bitmap_bh = NULL;
|
||||
struct buffer_head *gd_bh;
|
||||
ext4_group_t block_group;
|
||||
ext4_grpblk_t bit;
|
||||
unsigned int i;
|
||||
struct ext4_group_desc *desc;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_buddy e4b;
|
||||
int err = 0, ret, blk_free_count;
|
||||
ext4_grpblk_t blocks_freed;
|
||||
struct ext4_group_info *grp;
|
||||
|
||||
ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
|
||||
|
||||
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
|
||||
grp = ext4_get_group_info(sb, block_group);
|
||||
/*
|
||||
* Check to see if we are freeing blocks across a group
|
||||
* boundary.
|
||||
*/
|
||||
if (bit + count > EXT4_BLOCKS_PER_GROUP(sb))
|
||||
goto error_return;
|
||||
|
||||
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
|
||||
if (!bitmap_bh)
|
||||
goto error_return;
|
||||
desc = ext4_get_group_desc(sb, block_group, &gd_bh);
|
||||
if (!desc)
|
||||
goto error_return;
|
||||
|
||||
if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
|
||||
in_range(ext4_inode_bitmap(sb, desc), block, count) ||
|
||||
in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
|
||||
in_range(block + count - 1, ext4_inode_table(sb, desc),
|
||||
sbi->s_itb_per_group)) {
|
||||
ext4_error(sb, "Adding blocks in system zones - "
|
||||
"Block = %llu, count = %lu",
|
||||
block, count);
|
||||
goto error_return;
|
||||
}
|
||||
|
||||
BUFFER_TRACE(bitmap_bh, "getting write access");
|
||||
err = ext4_journal_get_write_access(handle, bitmap_bh);
|
||||
if (err)
|
||||
goto error_return;
|
||||
|
||||
/*
|
||||
* We are about to modify some metadata. Call the journal APIs
|
||||
* to unshare ->b_data if a currently-committing transaction is
|
||||
* using it
|
||||
*/
|
||||
BUFFER_TRACE(gd_bh, "get_write_access");
|
||||
err = ext4_journal_get_write_access(handle, gd_bh);
|
||||
if (err)
|
||||
goto error_return;
|
||||
|
||||
for (i = 0, blocks_freed = 0; i < count; i++) {
|
||||
BUFFER_TRACE(bitmap_bh, "clear bit");
|
||||
if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
|
||||
ext4_error(sb, "bit already cleared for block %llu",
|
||||
(ext4_fsblk_t)(block + i));
|
||||
BUFFER_TRACE(bitmap_bh, "bit already cleared");
|
||||
} else {
|
||||
blocks_freed++;
|
||||
}
|
||||
}
|
||||
|
||||
err = ext4_mb_load_buddy(sb, block_group, &e4b);
|
||||
if (err)
|
||||
goto error_return;
|
||||
|
||||
/*
|
||||
* need to update group_info->bb_free and bitmap
|
||||
* with group lock held. generate_buddy look at
|
||||
* them with group lock_held
|
||||
*/
|
||||
ext4_lock_group(sb, block_group);
|
||||
mb_clear_bits(bitmap_bh->b_data, bit, count);
|
||||
mb_free_blocks(NULL, &e4b, bit, count);
|
||||
blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
|
||||
ext4_free_blks_set(sb, desc, blk_free_count);
|
||||
desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
|
||||
ext4_unlock_group(sb, block_group);
|
||||
percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
|
||||
|
||||
if (sbi->s_log_groups_per_flex) {
|
||||
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
|
||||
atomic_add(blocks_freed,
|
||||
&sbi->s_flex_groups[flex_group].free_blocks);
|
||||
}
|
||||
|
||||
ext4_mb_unload_buddy(&e4b);
|
||||
|
||||
/* We dirtied the bitmap block */
|
||||
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
|
||||
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
|
||||
|
||||
/* And the group descriptor block */
|
||||
BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
|
||||
ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
|
||||
if (!err)
|
||||
err = ret;
|
||||
|
||||
error_return:
|
||||
brelse(bitmap_bh);
|
||||
ext4_std_error(sb, err);
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* ext4_trim_extent -- function to TRIM one single free extent in the group
|
||||
* @sb: super block for the file system
|
||||
|
@ -4715,11 +4777,10 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
|
|||
* one will allocate those blocks, mark it as used in buddy bitmap. This must
|
||||
* be called with under the group lock.
|
||||
*/
|
||||
static int ext4_trim_extent(struct super_block *sb, int start, int count,
|
||||
ext4_group_t group, struct ext4_buddy *e4b)
|
||||
static void ext4_trim_extent(struct super_block *sb, int start, int count,
|
||||
ext4_group_t group, struct ext4_buddy *e4b)
|
||||
{
|
||||
struct ext4_free_extent ex;
|
||||
int ret = 0;
|
||||
|
||||
assert_spin_locked(ext4_group_lock_ptr(sb, group));
|
||||
|
||||
|
@ -4733,12 +4794,9 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
|
|||
*/
|
||||
mb_mark_used(e4b, &ex);
|
||||
ext4_unlock_group(sb, group);
|
||||
|
||||
ret = ext4_issue_discard(sb, group, start, count);
|
||||
|
||||
ext4_issue_discard(sb, group, start, count);
|
||||
ext4_lock_group(sb, group);
|
||||
mb_free_blocks(NULL, e4b, start, ex.fe_len);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -4760,21 +4818,26 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
|
|||
* the group buddy bitmap. This is done until whole group is scanned.
|
||||
*/
|
||||
static ext4_grpblk_t
|
||||
ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
|
||||
ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
|
||||
ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
|
||||
ext4_grpblk_t start, ext4_grpblk_t max,
|
||||
ext4_grpblk_t minblocks)
|
||||
{
|
||||
void *bitmap;
|
||||
ext4_grpblk_t next, count = 0;
|
||||
ext4_group_t group;
|
||||
int ret = 0;
|
||||
struct ext4_buddy e4b;
|
||||
int ret;
|
||||
|
||||
BUG_ON(e4b == NULL);
|
||||
ret = ext4_mb_load_buddy(sb, group, &e4b);
|
||||
if (ret) {
|
||||
ext4_error(sb, "Error in loading buddy "
|
||||
"information for %u", group);
|
||||
return ret;
|
||||
}
|
||||
bitmap = e4b.bd_bitmap;
|
||||
|
||||
bitmap = e4b->bd_bitmap;
|
||||
group = e4b->bd_group;
|
||||
start = (e4b->bd_info->bb_first_free > start) ?
|
||||
e4b->bd_info->bb_first_free : start;
|
||||
ext4_lock_group(sb, group);
|
||||
start = (e4b.bd_info->bb_first_free > start) ?
|
||||
e4b.bd_info->bb_first_free : start;
|
||||
|
||||
while (start < max) {
|
||||
start = mb_find_next_zero_bit(bitmap, max, start);
|
||||
|
@ -4783,10 +4846,8 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
|
|||
next = mb_find_next_bit(bitmap, max, start);
|
||||
|
||||
if ((next - start) >= minblocks) {
|
||||
ret = ext4_trim_extent(sb, start,
|
||||
next - start, group, e4b);
|
||||
if (ret < 0)
|
||||
break;
|
||||
ext4_trim_extent(sb, start,
|
||||
next - start, group, &e4b);
|
||||
count += next - start;
|
||||
}
|
||||
start = next + 1;
|
||||
|
@ -4802,17 +4863,15 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
|
|||
ext4_lock_group(sb, group);
|
||||
}
|
||||
|
||||
if ((e4b->bd_info->bb_free - count) < minblocks)
|
||||
if ((e4b.bd_info->bb_free - count) < minblocks)
|
||||
break;
|
||||
}
|
||||
ext4_unlock_group(sb, group);
|
||||
ext4_mb_unload_buddy(&e4b);
|
||||
|
||||
ext4_debug("trimmed %d blocks in the group %d\n",
|
||||
count, group);
|
||||
|
||||
if (ret < 0)
|
||||
count = ret;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
@ -4830,11 +4889,11 @@ ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
|
|||
*/
|
||||
int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
||||
{
|
||||
struct ext4_buddy e4b;
|
||||
struct ext4_group_info *grp;
|
||||
ext4_group_t first_group, last_group;
|
||||
ext4_group_t group, ngroups = ext4_get_groups_count(sb);
|
||||
ext4_grpblk_t cnt = 0, first_block, last_block;
|
||||
uint64_t start, len, minlen, trimmed;
|
||||
uint64_t start, len, minlen, trimmed = 0;
|
||||
ext4_fsblk_t first_data_blk =
|
||||
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
|
||||
int ret = 0;
|
||||
|
@ -4842,7 +4901,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
|||
start = range->start >> sb->s_blocksize_bits;
|
||||
len = range->len >> sb->s_blocksize_bits;
|
||||
minlen = range->minlen >> sb->s_blocksize_bits;
|
||||
trimmed = 0;
|
||||
|
||||
if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
|
||||
return -EINVAL;
|
||||
|
@ -4863,11 +4921,12 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
|||
return -EINVAL;
|
||||
|
||||
for (group = first_group; group <= last_group; group++) {
|
||||
ret = ext4_mb_load_buddy(sb, group, &e4b);
|
||||
if (ret) {
|
||||
ext4_error(sb, "Error in loading buddy "
|
||||
"information for %u", group);
|
||||
break;
|
||||
grp = ext4_get_group_info(sb, group);
|
||||
/* We only do this if the grp has never been initialized */
|
||||
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
|
||||
ret = ext4_mb_init_group(sb, group);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4880,16 +4939,14 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
|||
last_block = first_block + len;
|
||||
len -= last_block - first_block;
|
||||
|
||||
if (e4b.bd_info->bb_free >= minlen) {
|
||||
cnt = ext4_trim_all_free(sb, &e4b, first_block,
|
||||
if (grp->bb_free >= minlen) {
|
||||
cnt = ext4_trim_all_free(sb, group, first_block,
|
||||
last_block, minlen);
|
||||
if (cnt < 0) {
|
||||
ret = cnt;
|
||||
ext4_mb_unload_buddy(&e4b);
|
||||
break;
|
||||
}
|
||||
}
|
||||
ext4_mb_unload_buddy(&e4b);
|
||||
trimmed += cnt;
|
||||
first_block = 0;
|
||||
}
|
||||
|
|
|
@ -193,11 +193,6 @@ struct ext4_allocation_context {
|
|||
__u8 ac_op; /* operation, for history only */
|
||||
struct page *ac_bitmap_page;
|
||||
struct page *ac_buddy_page;
|
||||
/*
|
||||
* pointer to the held semaphore upon successful
|
||||
* block allocation
|
||||
*/
|
||||
struct rw_semaphore *alloc_semp;
|
||||
struct ext4_prealloc_space *ac_pa;
|
||||
struct ext4_locality_group *ac_lg;
|
||||
};
|
||||
|
@ -215,7 +210,6 @@ struct ext4_buddy {
|
|||
struct super_block *bd_sb;
|
||||
__u16 bd_blkbits;
|
||||
ext4_group_t bd_group;
|
||||
struct rw_semaphore *alloc_semp;
|
||||
};
|
||||
#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
|
||||
#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
|
||||
|
|
|
@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
|
|||
* We have the extent map build with the tmp inode.
|
||||
* Now copy the i_data across
|
||||
*/
|
||||
ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS);
|
||||
ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
|
||||
memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
|
||||
|
||||
/*
|
||||
|
|
351
fs/ext4/mmp.c
Normal file
351
fs/ext4/mmp.c
Normal file
|
@ -0,0 +1,351 @@
|
|||
#include <linux/fs.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/kthread.h>
|
||||
|
||||
#include "ext4.h"
|
||||
|
||||
/*
|
||||
* Write the MMP block using WRITE_SYNC to try to get the block on-disk
|
||||
* faster.
|
||||
*/
|
||||
static int write_mmp_block(struct buffer_head *bh)
|
||||
{
|
||||
mark_buffer_dirty(bh);
|
||||
lock_buffer(bh);
|
||||
bh->b_end_io = end_buffer_write_sync;
|
||||
get_bh(bh);
|
||||
submit_bh(WRITE_SYNC, bh);
|
||||
wait_on_buffer(bh);
|
||||
if (unlikely(!buffer_uptodate(bh)))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the MMP block. It _must_ be read from disk and hence we clear the
|
||||
* uptodate flag on the buffer.
|
||||
*/
|
||||
static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
|
||||
ext4_fsblk_t mmp_block)
|
||||
{
|
||||
struct mmp_struct *mmp;
|
||||
|
||||
if (*bh)
|
||||
clear_buffer_uptodate(*bh);
|
||||
|
||||
/* This would be sb_bread(sb, mmp_block), except we need to be sure
|
||||
* that the MD RAID device cache has been bypassed, and that the read
|
||||
* is not blocked in the elevator. */
|
||||
if (!*bh)
|
||||
*bh = sb_getblk(sb, mmp_block);
|
||||
if (*bh) {
|
||||
get_bh(*bh);
|
||||
lock_buffer(*bh);
|
||||
(*bh)->b_end_io = end_buffer_read_sync;
|
||||
submit_bh(READ_SYNC, *bh);
|
||||
wait_on_buffer(*bh);
|
||||
if (!buffer_uptodate(*bh)) {
|
||||
brelse(*bh);
|
||||
*bh = NULL;
|
||||
}
|
||||
}
|
||||
if (!*bh) {
|
||||
ext4_warning(sb, "Error while reading MMP block %llu",
|
||||
mmp_block);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
mmp = (struct mmp_struct *)((*bh)->b_data);
|
||||
if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dump as much information as possible to help the admin.
|
||||
*/
|
||||
void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
|
||||
const char *function, unsigned int line, const char *msg)
|
||||
{
|
||||
__ext4_warning(sb, function, line, msg);
|
||||
__ext4_warning(sb, function, line,
|
||||
"MMP failure info: last update time: %llu, last update "
|
||||
"node: %s, last update device: %s\n",
|
||||
(long long unsigned int) le64_to_cpu(mmp->mmp_time),
|
||||
mmp->mmp_nodename, mmp->mmp_bdevname);
|
||||
}
|
||||
|
||||
/*
|
||||
* kmmpd will update the MMP sequence every s_mmp_update_interval seconds
|
||||
*/
|
||||
static int kmmpd(void *data)
|
||||
{
|
||||
struct super_block *sb = ((struct mmpd_data *) data)->sb;
|
||||
struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
|
||||
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
|
||||
struct mmp_struct *mmp;
|
||||
ext4_fsblk_t mmp_block;
|
||||
u32 seq = 0;
|
||||
unsigned long failed_writes = 0;
|
||||
int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
|
||||
unsigned mmp_check_interval;
|
||||
unsigned long last_update_time;
|
||||
unsigned long diff;
|
||||
int retval;
|
||||
|
||||
mmp_block = le64_to_cpu(es->s_mmp_block);
|
||||
mmp = (struct mmp_struct *)(bh->b_data);
|
||||
mmp->mmp_time = cpu_to_le64(get_seconds());
|
||||
/*
|
||||
* Start with the higher mmp_check_interval and reduce it if
|
||||
* the MMP block is being updated on time.
|
||||
*/
|
||||
mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
|
||||
EXT4_MMP_MIN_CHECK_INTERVAL);
|
||||
mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
|
||||
bdevname(bh->b_bdev, mmp->mmp_bdevname);
|
||||
|
||||
memcpy(mmp->mmp_nodename, init_utsname()->sysname,
|
||||
sizeof(mmp->mmp_nodename));
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
if (++seq > EXT4_MMP_SEQ_MAX)
|
||||
seq = 1;
|
||||
|
||||
mmp->mmp_seq = cpu_to_le32(seq);
|
||||
mmp->mmp_time = cpu_to_le64(get_seconds());
|
||||
last_update_time = jiffies;
|
||||
|
||||
retval = write_mmp_block(bh);
|
||||
/*
|
||||
* Don't spew too many error messages. Print one every
|
||||
* (s_mmp_update_interval * 60) seconds.
|
||||
*/
|
||||
if (retval && (failed_writes % 60) == 0) {
|
||||
ext4_error(sb, "Error writing to MMP block");
|
||||
failed_writes++;
|
||||
}
|
||||
|
||||
if (!(le32_to_cpu(es->s_feature_incompat) &
|
||||
EXT4_FEATURE_INCOMPAT_MMP)) {
|
||||
ext4_warning(sb, "kmmpd being stopped since MMP feature"
|
||||
" has been disabled.");
|
||||
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
if (sb->s_flags & MS_RDONLY) {
|
||||
ext4_warning(sb, "kmmpd being stopped since filesystem "
|
||||
"has been remounted as readonly.");
|
||||
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
diff = jiffies - last_update_time;
|
||||
if (diff < mmp_update_interval * HZ)
|
||||
schedule_timeout_interruptible(mmp_update_interval *
|
||||
HZ - diff);
|
||||
|
||||
/*
|
||||
* We need to make sure that more than mmp_check_interval
|
||||
* seconds have not passed since writing. If that has happened
|
||||
* we need to check if the MMP block is as we left it.
|
||||
*/
|
||||
diff = jiffies - last_update_time;
|
||||
if (diff > mmp_check_interval * HZ) {
|
||||
struct buffer_head *bh_check = NULL;
|
||||
struct mmp_struct *mmp_check;
|
||||
|
||||
retval = read_mmp_block(sb, &bh_check, mmp_block);
|
||||
if (retval) {
|
||||
ext4_error(sb, "error reading MMP data: %d",
|
||||
retval);
|
||||
|
||||
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
mmp_check = (struct mmp_struct *)(bh_check->b_data);
|
||||
if (mmp->mmp_seq != mmp_check->mmp_seq ||
|
||||
memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
|
||||
sizeof(mmp->mmp_nodename))) {
|
||||
dump_mmp_msg(sb, mmp_check,
|
||||
"Error while updating MMP info. "
|
||||
"The filesystem seems to have been"
|
||||
" multiply mounted.");
|
||||
ext4_error(sb, "abort");
|
||||
goto failed;
|
||||
}
|
||||
put_bh(bh_check);
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust the mmp_check_interval depending on how much time
|
||||
* it took for the MMP block to be written.
|
||||
*/
|
||||
mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
|
||||
EXT4_MMP_MAX_CHECK_INTERVAL),
|
||||
EXT4_MMP_MIN_CHECK_INTERVAL);
|
||||
mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmount seems to be clean.
|
||||
*/
|
||||
mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
|
||||
mmp->mmp_time = cpu_to_le64(get_seconds());
|
||||
|
||||
retval = write_mmp_block(bh);
|
||||
|
||||
failed:
|
||||
kfree(data);
|
||||
brelse(bh);
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a random new sequence number but make sure it is not greater than
|
||||
* EXT4_MMP_SEQ_MAX.
|
||||
*/
|
||||
static unsigned int mmp_new_seq(void)
|
||||
{
|
||||
u32 new_seq;
|
||||
|
||||
do {
|
||||
get_random_bytes(&new_seq, sizeof(u32));
|
||||
} while (new_seq > EXT4_MMP_SEQ_MAX);
|
||||
|
||||
return new_seq;
|
||||
}
|
||||
|
||||
/*
|
||||
* Protect the filesystem from being mounted more than once.
|
||||
*/
|
||||
int ext4_multi_mount_protect(struct super_block *sb,
|
||||
ext4_fsblk_t mmp_block)
|
||||
{
|
||||
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
|
||||
struct buffer_head *bh = NULL;
|
||||
struct mmp_struct *mmp = NULL;
|
||||
struct mmpd_data *mmpd_data;
|
||||
u32 seq;
|
||||
unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
|
||||
unsigned int wait_time = 0;
|
||||
int retval;
|
||||
|
||||
if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
|
||||
mmp_block >= ext4_blocks_count(es)) {
|
||||
ext4_warning(sb, "Invalid MMP block in superblock");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
retval = read_mmp_block(sb, &bh, mmp_block);
|
||||
if (retval)
|
||||
goto failed;
|
||||
|
||||
mmp = (struct mmp_struct *)(bh->b_data);
|
||||
|
||||
if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
|
||||
mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
|
||||
|
||||
/*
|
||||
* If check_interval in MMP block is larger, use that instead of
|
||||
* update_interval from the superblock.
|
||||
*/
|
||||
if (mmp->mmp_check_interval > mmp_check_interval)
|
||||
mmp_check_interval = mmp->mmp_check_interval;
|
||||
|
||||
seq = le32_to_cpu(mmp->mmp_seq);
|
||||
if (seq == EXT4_MMP_SEQ_CLEAN)
|
||||
goto skip;
|
||||
|
||||
if (seq == EXT4_MMP_SEQ_FSCK) {
|
||||
dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
wait_time = min(mmp_check_interval * 2 + 1,
|
||||
mmp_check_interval + 60);
|
||||
|
||||
/* Print MMP interval if more than 20 secs. */
|
||||
if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
|
||||
ext4_warning(sb, "MMP interval %u higher than expected, please"
|
||||
" wait.\n", wait_time * 2);
|
||||
|
||||
if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
|
||||
ext4_warning(sb, "MMP startup interrupted, failing mount\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
retval = read_mmp_block(sb, &bh, mmp_block);
|
||||
if (retval)
|
||||
goto failed;
|
||||
mmp = (struct mmp_struct *)(bh->b_data);
|
||||
if (seq != le32_to_cpu(mmp->mmp_seq)) {
|
||||
dump_mmp_msg(sb, mmp,
|
||||
"Device is already active on another node.");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
skip:
|
||||
/*
|
||||
* write a new random sequence number.
|
||||
*/
|
||||
mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq());
|
||||
|
||||
retval = write_mmp_block(bh);
|
||||
if (retval)
|
||||
goto failed;
|
||||
|
||||
/*
|
||||
* wait for MMP interval and check mmp_seq.
|
||||
*/
|
||||
if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
|
||||
ext4_warning(sb, "MMP startup interrupted, failing mount\n");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
retval = read_mmp_block(sb, &bh, mmp_block);
|
||||
if (retval)
|
||||
goto failed;
|
||||
mmp = (struct mmp_struct *)(bh->b_data);
|
||||
if (seq != le32_to_cpu(mmp->mmp_seq)) {
|
||||
dump_mmp_msg(sb, mmp,
|
||||
"Device is already active on another node.");
|
||||
goto failed;
|
||||
}
|
||||
|
||||
mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
|
||||
if (!mmpd_data) {
|
||||
ext4_warning(sb, "not enough memory for mmpd_data");
|
||||
goto failed;
|
||||
}
|
||||
mmpd_data->sb = sb;
|
||||
mmpd_data->bh = bh;
|
||||
|
||||
/*
|
||||
* Start a kernel thread to update the MMP block periodically.
|
||||
*/
|
||||
EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
|
||||
bdevname(bh->b_bdev,
|
||||
mmp->mmp_bdevname));
|
||||
if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
|
||||
EXT4_SB(sb)->s_mmp_tsk = NULL;
|
||||
kfree(mmpd_data);
|
||||
ext4_warning(sb, "Unable to create kmmpd thread for %s.",
|
||||
sb->s_id);
|
||||
goto failed;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
failed:
|
||||
brelse(bh);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -876,8 +876,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
|
|||
* It needs to call wait_on_page_writeback() to wait for the
|
||||
* writeback of the page.
|
||||
*/
|
||||
if (PageWriteback(page))
|
||||
wait_on_page_writeback(page);
|
||||
wait_on_page_writeback(page);
|
||||
|
||||
/* Release old bh and drop refs */
|
||||
try_to_release_page(page, 0);
|
||||
|
|
|
@ -1413,10 +1413,22 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
|
|||
frame->at = entries;
|
||||
frame->bh = bh;
|
||||
bh = bh2;
|
||||
|
||||
ext4_handle_dirty_metadata(handle, dir, frame->bh);
|
||||
ext4_handle_dirty_metadata(handle, dir, bh);
|
||||
|
||||
de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
|
||||
dx_release (frames);
|
||||
if (!(de))
|
||||
if (!de) {
|
||||
/*
|
||||
* Even if the block split failed, we have to properly write
|
||||
* out all the changes we did so far. Otherwise we can end up
|
||||
* with corrupted filesystem.
|
||||
*/
|
||||
ext4_mark_inode_dirty(handle, dir);
|
||||
dx_release(frames);
|
||||
return retval;
|
||||
}
|
||||
dx_release(frames);
|
||||
|
||||
retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
|
||||
brelse(bh);
|
||||
|
@ -2240,6 +2252,7 @@ static int ext4_symlink(struct inode *dir,
|
|||
handle_t *handle;
|
||||
struct inode *inode;
|
||||
int l, err, retries = 0;
|
||||
int credits;
|
||||
|
||||
l = strlen(symname)+1;
|
||||
if (l > dir->i_sb->s_blocksize)
|
||||
|
@ -2247,10 +2260,26 @@ static int ext4_symlink(struct inode *dir,
|
|||
|
||||
dquot_initialize(dir);
|
||||
|
||||
if (l > EXT4_N_BLOCKS * 4) {
|
||||
/*
|
||||
* For non-fast symlinks, we just allocate inode and put it on
|
||||
* orphan list in the first transaction => we need bitmap,
|
||||
* group descriptor, sb, inode block, quota blocks.
|
||||
*/
|
||||
credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
|
||||
} else {
|
||||
/*
|
||||
* Fast symlink. We have to add entry to directory
|
||||
* (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS),
|
||||
* allocate new inode (bitmap, group descriptor, inode block,
|
||||
* quota blocks, sb is already counted in previous macros).
|
||||
*/
|
||||
credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
|
||||
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
|
||||
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
|
||||
}
|
||||
retry:
|
||||
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
|
||||
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
|
||||
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
|
||||
handle = ext4_journal_start(dir, credits);
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
|
@ -2263,21 +2292,44 @@ static int ext4_symlink(struct inode *dir,
|
|||
if (IS_ERR(inode))
|
||||
goto out_stop;
|
||||
|
||||
if (l > sizeof(EXT4_I(inode)->i_data)) {
|
||||
if (l > EXT4_N_BLOCKS * 4) {
|
||||
inode->i_op = &ext4_symlink_inode_operations;
|
||||
ext4_set_aops(inode);
|
||||
/*
|
||||
* page_symlink() calls into ext4_prepare/commit_write.
|
||||
* We have a transaction open. All is sweetness. It also sets
|
||||
* i_size in generic_commit_write().
|
||||
* We cannot call page_symlink() with transaction started
|
||||
* because it calls into ext4_write_begin() which can wait
|
||||
* for transaction commit if we are running out of space
|
||||
* and thus we deadlock. So we have to stop transaction now
|
||||
* and restart it when symlink contents is written.
|
||||
*
|
||||
* To keep fs consistent in case of crash, we have to put inode
|
||||
* to orphan list in the mean time.
|
||||
*/
|
||||
drop_nlink(inode);
|
||||
err = ext4_orphan_add(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
if (err)
|
||||
goto err_drop_inode;
|
||||
err = __page_symlink(inode, symname, l, 1);
|
||||
if (err)
|
||||
goto err_drop_inode;
|
||||
/*
|
||||
* Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
|
||||
* + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
|
||||
*/
|
||||
handle = ext4_journal_start(dir,
|
||||
EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
|
||||
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
|
||||
if (IS_ERR(handle)) {
|
||||
err = PTR_ERR(handle);
|
||||
goto err_drop_inode;
|
||||
}
|
||||
inc_nlink(inode);
|
||||
err = ext4_orphan_del(handle, inode);
|
||||
if (err) {
|
||||
ext4_journal_stop(handle);
|
||||
clear_nlink(inode);
|
||||
unlock_new_inode(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
iput(inode);
|
||||
goto out_stop;
|
||||
goto err_drop_inode;
|
||||
}
|
||||
} else {
|
||||
/* clear the extent format for fast symlink */
|
||||
|
@ -2293,6 +2345,10 @@ static int ext4_symlink(struct inode *dir,
|
|||
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
|
||||
goto retry;
|
||||
return err;
|
||||
err_drop_inode:
|
||||
unlock_new_inode(inode);
|
||||
iput(inode);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ext4_link(struct dentry *old_dentry,
|
||||
|
|
|
@ -203,46 +203,29 @@ static void ext4_end_bio(struct bio *bio, int error)
|
|||
for (i = 0; i < io_end->num_io_pages; i++) {
|
||||
struct page *page = io_end->pages[i]->p_page;
|
||||
struct buffer_head *bh, *head;
|
||||
int partial_write = 0;
|
||||
loff_t offset;
|
||||
loff_t io_end_offset;
|
||||
|
||||
head = page_buffers(page);
|
||||
if (error)
|
||||
if (error) {
|
||||
SetPageError(page);
|
||||
BUG_ON(!head);
|
||||
if (head->b_size != PAGE_CACHE_SIZE) {
|
||||
loff_t offset;
|
||||
loff_t io_end_offset = io_end->offset + io_end->size;
|
||||
set_bit(AS_EIO, &page->mapping->flags);
|
||||
head = page_buffers(page);
|
||||
BUG_ON(!head);
|
||||
|
||||
io_end_offset = io_end->offset + io_end->size;
|
||||
|
||||
offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
|
||||
bh = head;
|
||||
do {
|
||||
if ((offset >= io_end->offset) &&
|
||||
(offset+bh->b_size <= io_end_offset)) {
|
||||
if (error)
|
||||
buffer_io_error(bh);
|
||||
(offset+bh->b_size <= io_end_offset))
|
||||
buffer_io_error(bh);
|
||||
|
||||
}
|
||||
if (buffer_delay(bh))
|
||||
partial_write = 1;
|
||||
else if (!buffer_mapped(bh))
|
||||
clear_buffer_dirty(bh);
|
||||
else if (buffer_dirty(bh))
|
||||
partial_write = 1;
|
||||
offset += bh->b_size;
|
||||
bh = bh->b_this_page;
|
||||
} while (bh != head);
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is a partial write which happened to make
|
||||
* all buffers uptodate then we can optimize away a
|
||||
* bogus readpage() for the next read(). Here we
|
||||
* 'discover' whether the page went uptodate as a
|
||||
* result of this (potentially partial) write.
|
||||
*/
|
||||
if (!partial_write)
|
||||
SetPageUptodate(page);
|
||||
|
||||
put_io_page(io_end->pages[i]);
|
||||
}
|
||||
io_end->num_io_pages = 0;
|
||||
|
|
204
fs/ext4/super.c
204
fs/ext4/super.c
|
@ -75,11 +75,27 @@ static void ext4_write_super(struct super_block *sb);
|
|||
static int ext4_freeze(struct super_block *sb);
|
||||
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
|
||||
const char *dev_name, void *data);
|
||||
static inline int ext2_feature_set_ok(struct super_block *sb);
|
||||
static inline int ext3_feature_set_ok(struct super_block *sb);
|
||||
static int ext4_feature_set_ok(struct super_block *sb, int readonly);
|
||||
static void ext4_destroy_lazyinit_thread(void);
|
||||
static void ext4_unregister_li_request(struct super_block *sb);
|
||||
static void ext4_clear_request_list(void);
|
||||
|
||||
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
|
||||
static struct file_system_type ext2_fs_type = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = "ext2",
|
||||
.mount = ext4_mount,
|
||||
.kill_sb = kill_block_super,
|
||||
.fs_flags = FS_REQUIRES_DEV,
|
||||
};
|
||||
#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
|
||||
#else
|
||||
#define IS_EXT2_SB(sb) (0)
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
|
||||
static struct file_system_type ext3_fs_type = {
|
||||
.owner = THIS_MODULE,
|
||||
|
@ -806,6 +822,8 @@ static void ext4_put_super(struct super_block *sb)
|
|||
invalidate_bdev(sbi->journal_bdev);
|
||||
ext4_blkdev_remove(sbi);
|
||||
}
|
||||
if (sbi->s_mmp_tsk)
|
||||
kthread_stop(sbi->s_mmp_tsk);
|
||||
sb->s_fs_info = NULL;
|
||||
/*
|
||||
* Now that we are completely done shutting down the
|
||||
|
@ -1096,7 +1114,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
|
|||
|
||||
if (!test_opt(sb, INIT_INODE_TABLE))
|
||||
seq_puts(seq, ",noinit_inode_table");
|
||||
else if (sbi->s_li_wait_mult)
|
||||
else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)
|
||||
seq_printf(seq, ",init_inode_table=%u",
|
||||
(unsigned) sbi->s_li_wait_mult);
|
||||
|
||||
|
@ -1187,9 +1205,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
|
|||
const char *data, size_t len, loff_t off);
|
||||
|
||||
static const struct dquot_operations ext4_quota_operations = {
|
||||
#ifdef CONFIG_QUOTA
|
||||
.get_reserved_space = ext4_get_reserved_space,
|
||||
#endif
|
||||
.write_dquot = ext4_write_dquot,
|
||||
.acquire_dquot = ext4_acquire_dquot,
|
||||
.release_dquot = ext4_release_dquot,
|
||||
|
@ -1900,7 +1916,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
|
|||
ext4_msg(sb, KERN_WARNING,
|
||||
"warning: mounting fs with errors, "
|
||||
"running e2fsck is recommended");
|
||||
else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
|
||||
else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
|
||||
le16_to_cpu(es->s_mnt_count) >=
|
||||
(unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
|
||||
ext4_msg(sb, KERN_WARNING,
|
||||
|
@ -2425,6 +2441,18 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
|
|||
EXT4_SB(sb)->s_sectors_written_start) >> 1)));
|
||||
}
|
||||
|
||||
static ssize_t extent_cache_hits_show(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi, char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_hits);
|
||||
}
|
||||
|
||||
static ssize_t extent_cache_misses_show(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi, char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_misses);
|
||||
}
|
||||
|
||||
static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi,
|
||||
const char *buf, size_t count)
|
||||
|
@ -2482,6 +2510,8 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
|
|||
EXT4_RO_ATTR(delayed_allocation_blocks);
|
||||
EXT4_RO_ATTR(session_write_kbytes);
|
||||
EXT4_RO_ATTR(lifetime_write_kbytes);
|
||||
EXT4_RO_ATTR(extent_cache_hits);
|
||||
EXT4_RO_ATTR(extent_cache_misses);
|
||||
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
|
||||
inode_readahead_blks_store, s_inode_readahead_blks);
|
||||
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
|
||||
|
@ -2497,6 +2527,8 @@ static struct attribute *ext4_attrs[] = {
|
|||
ATTR_LIST(delayed_allocation_blocks),
|
||||
ATTR_LIST(session_write_kbytes),
|
||||
ATTR_LIST(lifetime_write_kbytes),
|
||||
ATTR_LIST(extent_cache_hits),
|
||||
ATTR_LIST(extent_cache_misses),
|
||||
ATTR_LIST(inode_readahead_blks),
|
||||
ATTR_LIST(inode_goal),
|
||||
ATTR_LIST(mb_stats),
|
||||
|
@ -2659,12 +2691,6 @@ static void print_daily_error_info(unsigned long arg)
|
|||
mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
|
||||
}
|
||||
|
||||
static void ext4_lazyinode_timeout(unsigned long data)
|
||||
{
|
||||
struct task_struct *p = (struct task_struct *)data;
|
||||
wake_up_process(p);
|
||||
}
|
||||
|
||||
/* Find next suitable group and run ext4_init_inode_table */
|
||||
static int ext4_run_li_request(struct ext4_li_request *elr)
|
||||
{
|
||||
|
@ -2696,11 +2722,8 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
|
|||
ret = ext4_init_inode_table(sb, group,
|
||||
elr->lr_timeout ? 0 : 1);
|
||||
if (elr->lr_timeout == 0) {
|
||||
timeout = jiffies - timeout;
|
||||
if (elr->lr_sbi->s_li_wait_mult)
|
||||
timeout *= elr->lr_sbi->s_li_wait_mult;
|
||||
else
|
||||
timeout *= 20;
|
||||
timeout = (jiffies - timeout) *
|
||||
elr->lr_sbi->s_li_wait_mult;
|
||||
elr->lr_timeout = timeout;
|
||||
}
|
||||
elr->lr_next_sched = jiffies + elr->lr_timeout;
|
||||
|
@ -2712,7 +2735,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
|
|||
|
||||
/*
|
||||
* Remove lr_request from the list_request and free the
|
||||
* request tructure. Should be called with li_list_mtx held
|
||||
* request structure. Should be called with li_list_mtx held
|
||||
*/
|
||||
static void ext4_remove_li_request(struct ext4_li_request *elr)
|
||||
{
|
||||
|
@ -2730,14 +2753,16 @@ static void ext4_remove_li_request(struct ext4_li_request *elr)
|
|||
|
||||
static void ext4_unregister_li_request(struct super_block *sb)
|
||||
{
|
||||
struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
|
||||
|
||||
if (!ext4_li_info)
|
||||
mutex_lock(&ext4_li_mtx);
|
||||
if (!ext4_li_info) {
|
||||
mutex_unlock(&ext4_li_mtx);
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&ext4_li_info->li_list_mtx);
|
||||
ext4_remove_li_request(elr);
|
||||
ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
|
||||
mutex_unlock(&ext4_li_info->li_list_mtx);
|
||||
mutex_unlock(&ext4_li_mtx);
|
||||
}
|
||||
|
||||
static struct task_struct *ext4_lazyinit_task;
|
||||
|
@ -2756,17 +2781,10 @@ static int ext4_lazyinit_thread(void *arg)
|
|||
struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
|
||||
struct list_head *pos, *n;
|
||||
struct ext4_li_request *elr;
|
||||
unsigned long next_wakeup;
|
||||
DEFINE_WAIT(wait);
|
||||
unsigned long next_wakeup, cur;
|
||||
|
||||
BUG_ON(NULL == eli);
|
||||
|
||||
eli->li_timer.data = (unsigned long)current;
|
||||
eli->li_timer.function = ext4_lazyinode_timeout;
|
||||
|
||||
eli->li_task = current;
|
||||
wake_up(&eli->li_wait_task);
|
||||
|
||||
cont_thread:
|
||||
while (true) {
|
||||
next_wakeup = MAX_JIFFY_OFFSET;
|
||||
|
@ -2797,19 +2815,15 @@ static int ext4_lazyinit_thread(void *arg)
|
|||
if (freezing(current))
|
||||
refrigerator();
|
||||
|
||||
if ((time_after_eq(jiffies, next_wakeup)) ||
|
||||
cur = jiffies;
|
||||
if ((time_after_eq(cur, next_wakeup)) ||
|
||||
(MAX_JIFFY_OFFSET == next_wakeup)) {
|
||||
cond_resched();
|
||||
continue;
|
||||
}
|
||||
|
||||
eli->li_timer.expires = next_wakeup;
|
||||
add_timer(&eli->li_timer);
|
||||
prepare_to_wait(&eli->li_wait_daemon, &wait,
|
||||
TASK_INTERRUPTIBLE);
|
||||
if (time_before(jiffies, next_wakeup))
|
||||
schedule();
|
||||
finish_wait(&eli->li_wait_daemon, &wait);
|
||||
schedule_timeout_interruptible(next_wakeup - cur);
|
||||
|
||||
if (kthread_should_stop()) {
|
||||
ext4_clear_request_list();
|
||||
goto exit_thread;
|
||||
|
@ -2833,12 +2847,7 @@ static int ext4_lazyinit_thread(void *arg)
|
|||
goto cont_thread;
|
||||
}
|
||||
mutex_unlock(&eli->li_list_mtx);
|
||||
del_timer_sync(&ext4_li_info->li_timer);
|
||||
eli->li_task = NULL;
|
||||
wake_up(&eli->li_wait_task);
|
||||
|
||||
kfree(ext4_li_info);
|
||||
ext4_lazyinit_task = NULL;
|
||||
ext4_li_info = NULL;
|
||||
mutex_unlock(&ext4_li_mtx);
|
||||
|
||||
|
@ -2866,7 +2875,6 @@ static int ext4_run_lazyinit_thread(void)
|
|||
if (IS_ERR(ext4_lazyinit_task)) {
|
||||
int err = PTR_ERR(ext4_lazyinit_task);
|
||||
ext4_clear_request_list();
|
||||
del_timer_sync(&ext4_li_info->li_timer);
|
||||
kfree(ext4_li_info);
|
||||
ext4_li_info = NULL;
|
||||
printk(KERN_CRIT "EXT4: error %d creating inode table "
|
||||
|
@ -2875,8 +2883,6 @@ static int ext4_run_lazyinit_thread(void)
|
|||
return err;
|
||||
}
|
||||
ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
|
||||
|
||||
wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2911,13 +2917,9 @@ static int ext4_li_info_new(void)
|
|||
if (!eli)
|
||||
return -ENOMEM;
|
||||
|
||||
eli->li_task = NULL;
|
||||
INIT_LIST_HEAD(&eli->li_request_list);
|
||||
mutex_init(&eli->li_list_mtx);
|
||||
|
||||
init_waitqueue_head(&eli->li_wait_daemon);
|
||||
init_waitqueue_head(&eli->li_wait_task);
|
||||
init_timer(&eli->li_timer);
|
||||
eli->li_state |= EXT4_LAZYINIT_QUIT;
|
||||
|
||||
ext4_li_info = eli;
|
||||
|
@ -2960,20 +2962,19 @@ static int ext4_register_li_request(struct super_block *sb,
|
|||
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
|
||||
int ret = 0;
|
||||
|
||||
if (sbi->s_li_request != NULL)
|
||||
if (sbi->s_li_request != NULL) {
|
||||
/*
|
||||
* Reset timeout so it can be computed again, because
|
||||
* s_li_wait_mult might have changed.
|
||||
*/
|
||||
sbi->s_li_request->lr_timeout = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (first_not_zeroed == ngroups ||
|
||||
(sb->s_flags & MS_RDONLY) ||
|
||||
!test_opt(sb, INIT_INODE_TABLE)) {
|
||||
sbi->s_li_request = NULL;
|
||||
!test_opt(sb, INIT_INODE_TABLE))
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (first_not_zeroed == ngroups) {
|
||||
sbi->s_li_request = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
elr = ext4_li_request_new(sb, first_not_zeroed);
|
||||
if (!elr)
|
||||
|
@ -3166,6 +3167,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
|
||||
set_opt(sb, DELALLOC);
|
||||
|
||||
/*
|
||||
* set default s_li_wait_mult for lazyinit, for the case there is
|
||||
* no mount option specified.
|
||||
*/
|
||||
sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
|
||||
|
||||
if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
|
||||
&journal_devnum, &journal_ioprio, NULL, 0)) {
|
||||
ext4_msg(sb, KERN_WARNING,
|
||||
|
@ -3187,6 +3194,28 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
"feature flags set on rev 0 fs, "
|
||||
"running e2fsck is recommended");
|
||||
|
||||
if (IS_EXT2_SB(sb)) {
|
||||
if (ext2_feature_set_ok(sb))
|
||||
ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
|
||||
"using the ext4 subsystem");
|
||||
else {
|
||||
ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
|
||||
"to feature incompatibilities");
|
||||
goto failed_mount;
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_EXT3_SB(sb)) {
|
||||
if (ext3_feature_set_ok(sb))
|
||||
ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
|
||||
"using the ext4 subsystem");
|
||||
else {
|
||||
ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
|
||||
"to feature incompatibilities");
|
||||
goto failed_mount;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check feature flags regardless of the revision level, since we
|
||||
* previously didn't change the revision level when setting the flags,
|
||||
|
@ -3459,6 +3488,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
EXT4_HAS_INCOMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_INCOMPAT_RECOVER));
|
||||
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
|
||||
!(sb->s_flags & MS_RDONLY))
|
||||
if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
|
||||
goto failed_mount3;
|
||||
|
||||
/*
|
||||
* The first inode we look at is the journal inode. Don't try
|
||||
* root first: it may be modified in the journal!
|
||||
|
@ -3474,7 +3508,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
goto failed_mount_wq;
|
||||
} else {
|
||||
clear_opt(sb, DATA_FLAGS);
|
||||
set_opt(sb, WRITEBACK_DATA);
|
||||
sbi->s_journal = NULL;
|
||||
needs_recovery = 0;
|
||||
goto no_journal;
|
||||
|
@ -3707,6 +3740,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
percpu_counter_destroy(&sbi->s_freeinodes_counter);
|
||||
percpu_counter_destroy(&sbi->s_dirs_counter);
|
||||
percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
|
||||
if (sbi->s_mmp_tsk)
|
||||
kthread_stop(sbi->s_mmp_tsk);
|
||||
failed_mount2:
|
||||
for (i = 0; i < db_count; i++)
|
||||
brelse(sbi->s_group_desc[i]);
|
||||
|
@ -4242,7 +4277,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
|||
int enable_quota = 0;
|
||||
ext4_group_t g;
|
||||
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
|
||||
int err;
|
||||
int err = 0;
|
||||
#ifdef CONFIG_QUOTA
|
||||
int i;
|
||||
#endif
|
||||
|
@ -4368,6 +4403,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
|||
goto restore_opts;
|
||||
if (!ext4_setup_super(sb, es, 0))
|
||||
sb->s_flags &= ~MS_RDONLY;
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_INCOMPAT_MMP))
|
||||
if (ext4_multi_mount_protect(sb,
|
||||
le64_to_cpu(es->s_mmp_block))) {
|
||||
err = -EROFS;
|
||||
goto restore_opts;
|
||||
}
|
||||
enable_quota = 1;
|
||||
}
|
||||
}
|
||||
|
@ -4432,6 +4474,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
|
|||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_super_block *es = sbi->s_es;
|
||||
u64 fsid;
|
||||
s64 bfree;
|
||||
|
||||
if (test_opt(sb, MINIX_DF)) {
|
||||
sbi->s_overhead_last = 0;
|
||||
|
@ -4475,8 +4518,10 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
|
|||
buf->f_type = EXT4_SUPER_MAGIC;
|
||||
buf->f_bsize = sb->s_blocksize;
|
||||
buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
|
||||
buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
|
||||
bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
|
||||
percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
|
||||
/* prevent underflow in case that few free space is available */
|
||||
buf->f_bfree = max_t(s64, bfree, 0);
|
||||
buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
|
||||
if (buf->f_bfree < ext4_r_blocks_count(es))
|
||||
buf->f_bavail = 0;
|
||||
|
@ -4652,6 +4697,9 @@ static int ext4_quota_off(struct super_block *sb, int type)
|
|||
if (test_opt(sb, DELALLOC))
|
||||
sync_filesystem(sb);
|
||||
|
||||
if (!inode)
|
||||
goto out;
|
||||
|
||||
/* Update modification times of quota files when userspace can
|
||||
* start looking at them */
|
||||
handle = ext4_journal_start(inode, 1);
|
||||
|
@ -4772,14 +4820,6 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
|
|||
}
|
||||
|
||||
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
|
||||
static struct file_system_type ext2_fs_type = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = "ext2",
|
||||
.mount = ext4_mount,
|
||||
.kill_sb = kill_block_super,
|
||||
.fs_flags = FS_REQUIRES_DEV,
|
||||
};
|
||||
|
||||
static inline void register_as_ext2(void)
|
||||
{
|
||||
int err = register_filesystem(&ext2_fs_type);
|
||||
|
@ -4792,10 +4832,22 @@ static inline void unregister_as_ext2(void)
|
|||
{
|
||||
unregister_filesystem(&ext2_fs_type);
|
||||
}
|
||||
|
||||
static inline int ext2_feature_set_ok(struct super_block *sb)
|
||||
{
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP))
|
||||
return 0;
|
||||
if (sb->s_flags & MS_RDONLY)
|
||||
return 1;
|
||||
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
MODULE_ALIAS("ext2");
|
||||
#else
|
||||
static inline void register_as_ext2(void) { }
|
||||
static inline void unregister_as_ext2(void) { }
|
||||
static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
|
||||
#endif
|
||||
|
||||
#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
|
||||
|
@ -4811,10 +4863,24 @@ static inline void unregister_as_ext3(void)
|
|||
{
|
||||
unregister_filesystem(&ext3_fs_type);
|
||||
}
|
||||
|
||||
static inline int ext3_feature_set_ok(struct super_block *sb)
|
||||
{
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP))
|
||||
return 0;
|
||||
if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
|
||||
return 0;
|
||||
if (sb->s_flags & MS_RDONLY)
|
||||
return 1;
|
||||
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
MODULE_ALIAS("ext3");
|
||||
#else
|
||||
static inline void register_as_ext3(void) { }
|
||||
static inline void unregister_as_ext3(void) { }
|
||||
static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }
|
||||
#endif
|
||||
|
||||
static struct file_system_type ext4_fs_type = {
|
||||
|
@ -4898,8 +4964,8 @@ static int __init ext4_init_fs(void)
|
|||
err = init_inodecache();
|
||||
if (err)
|
||||
goto out1;
|
||||
register_as_ext2();
|
||||
register_as_ext3();
|
||||
register_as_ext2();
|
||||
err = register_filesystem(&ext4_fs_type);
|
||||
if (err)
|
||||
goto out;
|
||||
|
|
|
@ -820,8 +820,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
|
|||
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
||||
goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
|
||||
|
||||
block = ext4_new_meta_blocks(handle, inode,
|
||||
goal, NULL, &error);
|
||||
block = ext4_new_meta_blocks(handle, inode, goal, 0,
|
||||
NULL, &error);
|
||||
if (error)
|
||||
goto cleanup;
|
||||
|
||||
|
|
|
@ -219,7 +219,6 @@ static int journal_submit_data_buffers(journal_t *journal,
|
|||
ret = err;
|
||||
spin_lock(&journal->j_list_lock);
|
||||
J_ASSERT(jinode->i_transaction == commit_transaction);
|
||||
commit_transaction->t_flushed_data_blocks = 1;
|
||||
clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
|
||||
smp_mb__after_clear_bit();
|
||||
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
|
||||
|
@ -672,12 +671,16 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
|||
err = 0;
|
||||
}
|
||||
|
||||
write_lock(&journal->j_state_lock);
|
||||
J_ASSERT(commit_transaction->t_state == T_COMMIT);
|
||||
commit_transaction->t_state = T_COMMIT_DFLUSH;
|
||||
write_unlock(&journal->j_state_lock);
|
||||
/*
|
||||
* If the journal is not located on the file system device,
|
||||
* then we must flush the file system device before we issue
|
||||
* the commit record
|
||||
*/
|
||||
if (commit_transaction->t_flushed_data_blocks &&
|
||||
if (commit_transaction->t_need_data_flush &&
|
||||
(journal->j_fs_dev != journal->j_dev) &&
|
||||
(journal->j_flags & JBD2_BARRIER))
|
||||
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
|
||||
|
@ -754,8 +757,13 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
|||
required. */
|
||||
JBUFFER_TRACE(jh, "file as BJ_Forget");
|
||||
jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
|
||||
/* Wake up any transactions which were waiting for this
|
||||
IO to complete */
|
||||
/*
|
||||
* Wake up any transactions which were waiting for this IO to
|
||||
* complete. The barrier must be here so that changes by
|
||||
* jbd2_journal_file_buffer() take effect before wake_up_bit()
|
||||
* does the waitqueue check.
|
||||
*/
|
||||
smp_mb();
|
||||
wake_up_bit(&bh->b_state, BH_Unshadow);
|
||||
JBUFFER_TRACE(jh, "brelse shadowed buffer");
|
||||
__brelse(bh);
|
||||
|
@ -794,6 +802,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
|||
jbd2_journal_abort(journal, err);
|
||||
|
||||
jbd_debug(3, "JBD: commit phase 5\n");
|
||||
write_lock(&journal->j_state_lock);
|
||||
J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
|
||||
commit_transaction->t_state = T_COMMIT_JFLUSH;
|
||||
write_unlock(&journal->j_state_lock);
|
||||
|
||||
if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
|
||||
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
|
||||
|
@ -949,7 +961,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
|||
|
||||
jbd_debug(3, "JBD: commit phase 7\n");
|
||||
|
||||
J_ASSERT(commit_transaction->t_state == T_COMMIT);
|
||||
J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
|
||||
|
||||
commit_transaction->t_start = jiffies;
|
||||
stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
|
||||
|
|
|
@ -479,9 +479,12 @@ int __jbd2_log_space_left(journal_t *journal)
|
|||
int __jbd2_log_start_commit(journal_t *journal, tid_t target)
|
||||
{
|
||||
/*
|
||||
* Are we already doing a recent enough commit?
|
||||
* The only transaction we can possibly wait upon is the
|
||||
* currently running transaction (if it exists). Otherwise,
|
||||
* the target tid must be an old one.
|
||||
*/
|
||||
if (!tid_geq(journal->j_commit_request, target)) {
|
||||
if (journal->j_running_transaction &&
|
||||
journal->j_running_transaction->t_tid == target) {
|
||||
/*
|
||||
* We want a new commit: OK, mark the request and wakeup the
|
||||
* commit thread. We do _not_ do the commit ourselves.
|
||||
|
@ -493,7 +496,15 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
|
|||
journal->j_commit_sequence);
|
||||
wake_up(&journal->j_wait_commit);
|
||||
return 1;
|
||||
}
|
||||
} else if (!tid_geq(journal->j_commit_request, target))
|
||||
/* This should never happen, but if it does, preserve
|
||||
the evidence before kjournald goes into a loop and
|
||||
increments j_commit_sequence beyond all recognition. */
|
||||
WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
|
||||
journal->j_commit_request,
|
||||
journal->j_commit_sequence,
|
||||
target, journal->j_running_transaction ?
|
||||
journal->j_running_transaction->t_tid : 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -576,6 +587,47 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 1 if a given transaction has not yet sent barrier request
|
||||
* connected with a transaction commit. If 0 is returned, transaction
|
||||
* may or may not have sent the barrier. Used to avoid sending barrier
|
||||
* twice in common cases.
|
||||
*/
|
||||
int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
|
||||
{
|
||||
int ret = 0;
|
||||
transaction_t *commit_trans;
|
||||
|
||||
if (!(journal->j_flags & JBD2_BARRIER))
|
||||
return 0;
|
||||
read_lock(&journal->j_state_lock);
|
||||
/* Transaction already committed? */
|
||||
if (tid_geq(journal->j_commit_sequence, tid))
|
||||
goto out;
|
||||
commit_trans = journal->j_committing_transaction;
|
||||
if (!commit_trans || commit_trans->t_tid != tid) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* Transaction is being committed and we already proceeded to
|
||||
* submitting a flush to fs partition?
|
||||
*/
|
||||
if (journal->j_fs_dev != journal->j_dev) {
|
||||
if (!commit_trans->t_need_data_flush ||
|
||||
commit_trans->t_state >= T_COMMIT_DFLUSH)
|
||||
goto out;
|
||||
} else {
|
||||
if (commit_trans->t_state >= T_COMMIT_JFLUSH)
|
||||
goto out;
|
||||
}
|
||||
ret = 1;
|
||||
out:
|
||||
read_unlock(&journal->j_state_lock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier);
|
||||
|
||||
/*
|
||||
* Wait for a specified commit to complete.
|
||||
* The caller may not hold the journal lock.
|
||||
|
|
|
@ -82,7 +82,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
|
|||
*/
|
||||
|
||||
/*
|
||||
* Update transiaction's maximum wait time, if debugging is enabled.
|
||||
* Update transaction's maximum wait time, if debugging is enabled.
|
||||
*
|
||||
* In order for t_max_wait to be reliable, it must be protected by a
|
||||
* lock. But doing so will mean that start_this_handle() can not be
|
||||
|
@ -91,11 +91,10 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
|
|||
* means that maximum wait time reported by the jbd2_run_stats
|
||||
* tracepoint will always be zero.
|
||||
*/
|
||||
static inline void update_t_max_wait(transaction_t *transaction)
|
||||
static inline void update_t_max_wait(transaction_t *transaction,
|
||||
unsigned long ts)
|
||||
{
|
||||
#ifdef CONFIG_JBD2_DEBUG
|
||||
unsigned long ts = jiffies;
|
||||
|
||||
if (jbd2_journal_enable_debug &&
|
||||
time_after(transaction->t_start, ts)) {
|
||||
ts = jbd2_time_diff(ts, transaction->t_start);
|
||||
|
@ -121,6 +120,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
|
|||
tid_t tid;
|
||||
int needed, need_to_start;
|
||||
int nblocks = handle->h_buffer_credits;
|
||||
unsigned long ts = jiffies;
|
||||
|
||||
if (nblocks > journal->j_max_transaction_buffers) {
|
||||
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
|
||||
|
@ -271,7 +271,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
|
|||
/* OK, account for the buffers that this operation expects to
|
||||
* use and add the handle to the running transaction.
|
||||
*/
|
||||
update_t_max_wait(transaction);
|
||||
update_t_max_wait(transaction, ts);
|
||||
handle->h_transaction = transaction;
|
||||
atomic_inc(&transaction->t_updates);
|
||||
atomic_inc(&transaction->t_handle_count);
|
||||
|
@ -316,7 +316,8 @@ static handle_t *new_handle(int nblocks)
|
|||
* This function is visible to journal users (like ext3fs), so is not
|
||||
* called with the journal already locked.
|
||||
*
|
||||
* Return a pointer to a newly allocated handle, or NULL on failure
|
||||
* Return a pointer to a newly allocated handle, or an ERR_PTR() value
|
||||
* on failure.
|
||||
*/
|
||||
handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
|
||||
{
|
||||
|
@ -921,8 +922,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
|
|||
*/
|
||||
JBUFFER_TRACE(jh, "cancelling revoke");
|
||||
jbd2_journal_cancel_revoke(handle, jh);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
out:
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -2147,6 +2148,13 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
|
|||
jinode->i_next_transaction == transaction)
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* We only ever set this variable to 1 so the test is safe. Since
|
||||
* t_need_data_flush is likely to be set, we do the test to save some
|
||||
* cacheline bouncing
|
||||
*/
|
||||
if (!transaction->t_need_data_flush)
|
||||
transaction->t_need_data_flush = 1;
|
||||
/* On some different transaction's list - should be
|
||||
* the committing one */
|
||||
if (jinode->i_transaction) {
|
||||
|
|
|
@ -529,9 +529,10 @@ struct transaction_s
|
|||
enum {
|
||||
T_RUNNING,
|
||||
T_LOCKED,
|
||||
T_RUNDOWN,
|
||||
T_FLUSH,
|
||||
T_COMMIT,
|
||||
T_COMMIT_DFLUSH,
|
||||
T_COMMIT_JFLUSH,
|
||||
T_FINISHED
|
||||
} t_state;
|
||||
|
||||
|
@ -658,7 +659,9 @@ struct transaction_s
|
|||
* waiting for it to finish.
|
||||
*/
|
||||
unsigned int t_synchronous_commit:1;
|
||||
unsigned int t_flushed_data_blocks:1;
|
||||
|
||||
/* Disk flush needs to be sent to fs partition [no locking] */
|
||||
int t_need_data_flush;
|
||||
|
||||
/*
|
||||
* For use by the filesystem to store fs-specific data
|
||||
|
@ -1228,6 +1231,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
|
|||
int jbd2_journal_force_commit_nested(journal_t *journal);
|
||||
int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
|
||||
int jbd2_log_do_checkpoint(journal_t *journal);
|
||||
int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
|
||||
|
||||
void __jbd2_log_wait_for_space(journal_t *journal);
|
||||
extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
|
||||
|
|
Loading…
Reference in a new issue