Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (33 commits) ext4: Regularize mount options ext4: fix locking typo in mballoc which could cause soft lockup hangs ext4: fix typo which causes a memory leak on error path jbd2: Update locking coments ext4: Rename pa_linear to pa_type ext4: add checks of block references for non-extent inodes ext4: Check for an valid i_mode when reading the inode from disk ext4: Use WRITE_SYNC for commits which are caused by fsync() ext4: Add auto_da_alloc mount option ext4: Use struct flex_groups to calculate get_orlov_stats() ext4: Use atomic_t's in struct flex_groups ext4: remove /proc tuning knobs ext4: Add sysfs support ext4: Track lifetime disk writes ext4: Fix discard of inode prealloc space with delayed allocation. ext4: Automatically allocate delay allocated blocks on rename ext4: Automatically allocate delay allocated blocks on close ext4: add EXT4_IOC_ALLOC_DA_BLKS ioctl ext4: Simplify delalloc code by removing mpage_da_writepages() ext4: Save stack space by removing fake buffer heads ...
This commit is contained in:
commit
395d73413c
23 changed files with 1223 additions and 599 deletions
81
Documentation/ABI/testing/sysfs-fs-ext4
Normal file
81
Documentation/ABI/testing/sysfs-fs-ext4
Normal file
|
@ -0,0 +1,81 @@
|
|||
What: /sys/fs/ext4/<disk>/mb_stats
|
||||
Date: March 2008
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
Controls whether the multiblock allocator should
|
||||
collect statistics, which are shown during the unmount.
|
||||
1 means to collect statistics, 0 means not to collect
|
||||
statistics
|
||||
|
||||
What: /sys/fs/ext4/<disk>/mb_group_prealloc
|
||||
Date: March 2008
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
The multiblock allocator will round up allocation
|
||||
requests to a multiple of this tuning parameter if the
|
||||
stripe size is not set in the ext4 superblock
|
||||
|
||||
What: /sys/fs/ext4/<disk>/mb_max_to_scan
|
||||
Date: March 2008
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
The maximum number of extents the multiblock allocator
|
||||
will search to find the best extent
|
||||
|
||||
What: /sys/fs/ext4/<disk>/mb_min_to_scan
|
||||
Date: March 2008
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
The minimum number of extents the multiblock allocator
|
||||
will search to find the best extent
|
||||
|
||||
What: /sys/fs/ext4/<disk>/mb_order2_req
|
||||
Date: March 2008
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
Tuning parameter which controls the minimum size for
|
||||
requests (as a power of 2) where the buddy cache is
|
||||
used
|
||||
|
||||
What: /sys/fs/ext4/<disk>/mb_stream_req
|
||||
Date: March 2008
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
Files which have fewer blocks than this tunable
|
||||
parameter will have their blocks allocated out of a
|
||||
block group specific preallocation pool, so that small
|
||||
files are packed closely together. Each large file
|
||||
will have its blocks allocated out of its own unique
|
||||
preallocation pool.
|
||||
|
||||
What: /sys/fs/ext4/<disk>/inode_readahead
|
||||
Date: March 2008
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
Tuning parameter which controls the maximum number of
|
||||
inode table blocks that ext4's inode table readahead
|
||||
algorithm will pre-read into the buffer cache
|
||||
|
||||
What: /sys/fs/ext4/<disk>/delayed_allocation_blocks
|
||||
Date: March 2008
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
This file is read-only and shows the number of blocks
|
||||
that are dirty in the page cache, but which do not
|
||||
have their location in the filesystem allocated yet.
|
||||
|
||||
What: /sys/fs/ext4/<disk>/lifetime_write_kbytes
|
||||
Date: March 2008
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
This file is read-only and shows the number of kilobytes
|
||||
of data that have been written to this filesystem since it was
|
||||
created.
|
||||
|
||||
What: /sys/fs/ext4/<disk>/session_write_kbytes
|
||||
Date: March 2008
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
This file is read-only and shows the number of
|
||||
kilobytes of data that have been written to this
|
||||
filesystem since it was mounted.
|
|
@ -85,7 +85,7 @@ Note: More extensive information for getting started with ext4 can be
|
|||
* extent format more robust in face of on-disk corruption due to magics,
|
||||
* internal redundancy in tree
|
||||
* improved file allocation (multi-block alloc)
|
||||
* fix 32000 subdirectory limit
|
||||
* lift 32000 subdirectory limit imposed by i_links_count[1]
|
||||
* nsec timestamps for mtime, atime, ctime, create time
|
||||
* inode version field on disk (NFSv4, Lustre)
|
||||
* reduced e2fsck time via uninit_bg feature
|
||||
|
@ -100,6 +100,9 @@ Note: More extensive information for getting started with ext4 can be
|
|||
* efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force
|
||||
the ordering)
|
||||
|
||||
[1] Filesystems with a block size of 1k may see a limit imposed by the
|
||||
directory hash tree having a maximum depth of two.
|
||||
|
||||
2.2 Candidate features for future inclusion
|
||||
|
||||
* Online defrag (patches available but not well tested)
|
||||
|
@ -180,8 +183,8 @@ commit=nrsec (*) Ext4 can be told to sync all its data and metadata
|
|||
performance.
|
||||
|
||||
barrier=<0|1(*)> This enables/disables the use of write barriers in
|
||||
the jbd code. barrier=0 disables, barrier=1 enables.
|
||||
This also requires an IO stack which can support
|
||||
barrier(*) the jbd code. barrier=0 disables, barrier=1 enables.
|
||||
nobarrier This also requires an IO stack which can support
|
||||
barriers, and if jbd gets an error on a barrier
|
||||
write, it will disable again with a warning.
|
||||
Write barriers enforce proper on-disk ordering
|
||||
|
@ -189,6 +192,9 @@ barrier=<0|1(*)> This enables/disables the use of write barriers in
|
|||
safe to use, at some performance penalty. If
|
||||
your disks are battery-backed in one way or another,
|
||||
disabling barriers may safely improve performance.
|
||||
The mount options "barrier" and "nobarrier" can
|
||||
also be used to enable or disable barriers, for
|
||||
consistency with other ext4 mount options.
|
||||
|
||||
inode_readahead=n This tuning parameter controls the maximum
|
||||
number of inode table blocks that ext4's inode
|
||||
|
@ -310,6 +316,24 @@ journal_ioprio=prio The I/O priority (from 0 to 7, where 0 is the
|
|||
a slightly higher priority than the default I/O
|
||||
priority.
|
||||
|
||||
auto_da_alloc(*) Many broken applications don't use fsync() when
|
||||
noauto_da_alloc replacing existing files via patterns such as
|
||||
fd = open("foo.new")/write(fd,..)/close(fd)/
|
||||
rename("foo.new", "foo"), or worse yet,
|
||||
fd = open("foo", O_TRUNC)/write(fd,..)/close(fd).
|
||||
If auto_da_alloc is enabled, ext4 will detect
|
||||
the replace-via-rename and replace-via-truncate
|
||||
patterns and force that any delayed allocation
|
||||
blocks are allocated such that at the next
|
||||
journal commit, in the default data=ordered
|
||||
mode, the data blocks of the new file are forced
|
||||
to disk before the rename() operation is
|
||||
commited. This provides roughly the same level
|
||||
of guarantees as ext3, and avoids the
|
||||
"zero-length" problem that can happen when a
|
||||
system crashes before the delayed allocation
|
||||
blocks are forced to disk.
|
||||
|
||||
Data Mode
|
||||
=========
|
||||
There are 3 different data modes:
|
||||
|
|
|
@ -940,27 +940,6 @@ Table 1-10: Files in /proc/fs/ext4/<devname>
|
|||
File Content
|
||||
mb_groups details of multiblock allocator buddy cache of free blocks
|
||||
mb_history multiblock allocation history
|
||||
stats controls whether the multiblock allocator should start
|
||||
collecting statistics, which are shown during the unmount
|
||||
group_prealloc the multiblock allocator will round up allocation
|
||||
requests to a multiple of this tuning parameter if the
|
||||
stripe size is not set in the ext4 superblock
|
||||
max_to_scan The maximum number of extents the multiblock allocator
|
||||
will search to find the best extent
|
||||
min_to_scan The minimum number of extents the multiblock allocator
|
||||
will search to find the best extent
|
||||
order2_req Tuning parameter which controls the minimum size for
|
||||
requests (as a power of 2) where the buddy cache is
|
||||
used
|
||||
stream_req Files which have fewer blocks than this tunable
|
||||
parameter will have their blocks allocated out of a
|
||||
block group specific preallocation pool, so that small
|
||||
files are packed closely together. Each large file
|
||||
will have its blocks allocated out of its own unique
|
||||
preallocation pool.
|
||||
inode_readahead Tuning parameter which controls the maximum number of
|
||||
inode table blocks that ext4's inode table readahead
|
||||
algorithm will pre-read into the buffer cache
|
||||
..............................................................................
|
||||
|
||||
|
||||
|
|
|
@ -55,7 +55,8 @@ static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block,
|
|||
}
|
||||
|
||||
static int ext4_group_used_meta_blocks(struct super_block *sb,
|
||||
ext4_group_t block_group)
|
||||
ext4_group_t block_group,
|
||||
struct ext4_group_desc *gdp)
|
||||
{
|
||||
ext4_fsblk_t tmp;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
@ -63,10 +64,6 @@ static int ext4_group_used_meta_blocks(struct super_block *sb,
|
|||
int used_blocks = sbi->s_itb_per_group + 2;
|
||||
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
|
||||
struct ext4_group_desc *gdp;
|
||||
struct buffer_head *bh;
|
||||
|
||||
gdp = ext4_get_group_desc(sb, block_group, &bh);
|
||||
if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp),
|
||||
block_group))
|
||||
used_blocks--;
|
||||
|
@ -177,7 +174,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
|
|||
*/
|
||||
mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
|
||||
}
|
||||
return free_blocks - ext4_group_used_meta_blocks(sb, block_group);
|
||||
return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
|
||||
}
|
||||
|
||||
|
||||
|
@ -473,9 +470,8 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
|
|||
|
||||
if (sbi->s_log_groups_per_flex) {
|
||||
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
|
||||
spin_lock(sb_bgl_lock(sbi, flex_group));
|
||||
sbi->s_flex_groups[flex_group].free_blocks += blocks_freed;
|
||||
spin_unlock(sb_bgl_lock(sbi, flex_group));
|
||||
atomic_add(blocks_freed,
|
||||
&sbi->s_flex_groups[flex_group].free_blocks);
|
||||
}
|
||||
/*
|
||||
* request to reload the buddy with the
|
||||
|
|
|
@ -67,7 +67,8 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
|
|||
unsigned int offset)
|
||||
{
|
||||
const char *error_msg = NULL;
|
||||
const int rlen = ext4_rec_len_from_disk(de->rec_len);
|
||||
const int rlen = ext4_rec_len_from_disk(de->rec_len,
|
||||
dir->i_sb->s_blocksize);
|
||||
|
||||
if (rlen < EXT4_DIR_REC_LEN(1))
|
||||
error_msg = "rec_len is smaller than minimal";
|
||||
|
@ -178,10 +179,11 @@ static int ext4_readdir(struct file *filp,
|
|||
* least that it is non-zero. A
|
||||
* failure will be detected in the
|
||||
* dirent test below. */
|
||||
if (ext4_rec_len_from_disk(de->rec_len)
|
||||
< EXT4_DIR_REC_LEN(1))
|
||||
if (ext4_rec_len_from_disk(de->rec_len,
|
||||
sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
|
||||
break;
|
||||
i += ext4_rec_len_from_disk(de->rec_len);
|
||||
i += ext4_rec_len_from_disk(de->rec_len,
|
||||
sb->s_blocksize);
|
||||
}
|
||||
offset = i;
|
||||
filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
|
||||
|
@ -203,7 +205,8 @@ static int ext4_readdir(struct file *filp,
|
|||
ret = stored;
|
||||
goto out;
|
||||
}
|
||||
offset += ext4_rec_len_from_disk(de->rec_len);
|
||||
offset += ext4_rec_len_from_disk(de->rec_len,
|
||||
sb->s_blocksize);
|
||||
if (le32_to_cpu(de->inode)) {
|
||||
/* We might block in the next section
|
||||
* if the data destination is
|
||||
|
@ -225,7 +228,8 @@ static int ext4_readdir(struct file *filp,
|
|||
goto revalidate;
|
||||
stored++;
|
||||
}
|
||||
filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
|
||||
filp->f_pos += ext4_rec_len_from_disk(de->rec_len,
|
||||
sb->s_blocksize);
|
||||
}
|
||||
offset = 0;
|
||||
brelse(bh);
|
||||
|
|
|
@ -32,14 +32,6 @@
|
|||
*/
|
||||
#undef EXT4FS_DEBUG
|
||||
|
||||
/*
|
||||
* Define EXT4_RESERVATION to reserve data blocks for expanding files
|
||||
*/
|
||||
#define EXT4_DEFAULT_RESERVE_BLOCKS 8
|
||||
/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
|
||||
#define EXT4_MAX_RESERVE_BLOCKS 1027
|
||||
#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
|
||||
|
||||
/*
|
||||
* Debug code
|
||||
*/
|
||||
|
@ -54,8 +46,6 @@
|
|||
#define ext4_debug(f, a...) do {} while (0)
|
||||
#endif
|
||||
|
||||
#define EXT4_MULTIBLOCK_ALLOCATOR 1
|
||||
|
||||
/* prefer goal again. length */
|
||||
#define EXT4_MB_HINT_MERGE 1
|
||||
/* blocks already reserved */
|
||||
|
@ -180,8 +170,9 @@ struct ext4_group_desc
|
|||
*/
|
||||
|
||||
struct flex_groups {
|
||||
__u32 free_inodes;
|
||||
__u32 free_blocks;
|
||||
atomic_t free_inodes;
|
||||
atomic_t free_blocks;
|
||||
atomic_t used_dirs;
|
||||
};
|
||||
|
||||
#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
|
||||
|
@ -249,6 +240,30 @@ struct flex_groups {
|
|||
#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
|
||||
#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */
|
||||
|
||||
/* Flags that should be inherited by new inodes from their parent. */
|
||||
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
|
||||
EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
|
||||
EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
|
||||
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
|
||||
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
|
||||
|
||||
/* Flags that are appropriate for regular files (all but dir-specific ones). */
|
||||
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
|
||||
|
||||
/* Flags that are appropriate for non-directories/regular files. */
|
||||
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
|
||||
|
||||
/* Mask out flags that are inappropriate for the given type of inode. */
|
||||
static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
|
||||
{
|
||||
if (S_ISDIR(mode))
|
||||
return flags;
|
||||
else if (S_ISREG(mode))
|
||||
return flags & EXT4_REG_FLMASK;
|
||||
else
|
||||
return flags & EXT4_OTHER_FLMASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inode dynamic state flags
|
||||
*/
|
||||
|
@ -256,6 +271,7 @@ struct flex_groups {
|
|||
#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
|
||||
#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
|
||||
#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
|
||||
#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
|
||||
|
||||
/* Used to pass group descriptor data when online resize is done */
|
||||
struct ext4_new_group_input {
|
||||
|
@ -303,7 +319,9 @@ struct ext4_new_group_data {
|
|||
#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
|
||||
#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
|
||||
#define EXT4_IOC_MIGRATE _IO('f', 9)
|
||||
/* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
|
||||
/* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
|
||||
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
|
||||
|
||||
/*
|
||||
* ioctl commands in 32 bit emulation
|
||||
|
@ -531,7 +549,7 @@ do { \
|
|||
#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
|
||||
#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
|
||||
#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
|
||||
#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */
|
||||
#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
|
||||
#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
|
||||
#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
|
||||
#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
|
||||
|
@ -666,7 +684,8 @@ struct ext4_super_block {
|
|||
__u8 s_log_groups_per_flex; /* FLEX_BG group size */
|
||||
__u8 s_reserved_char_pad2;
|
||||
__le16 s_reserved_pad;
|
||||
__u32 s_reserved[162]; /* Padding to the end of the block */
|
||||
__le64 s_kbytes_written; /* nr of lifetime kilobytes written */
|
||||
__u32 s_reserved[160]; /* Padding to the end of the block */
|
||||
};
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
@ -813,6 +832,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
|
|||
#define EXT4_DEF_MIN_BATCH_TIME 0
|
||||
#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */
|
||||
|
||||
/*
|
||||
* Minimum number of groups in a flexgroup before we separate out
|
||||
* directories into the first block group of a flexgroup
|
||||
*/
|
||||
#define EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME 4
|
||||
|
||||
/*
|
||||
* Structure of a directory entry
|
||||
*/
|
||||
|
@ -865,24 +890,6 @@ struct ext4_dir_entry_2 {
|
|||
~EXT4_DIR_ROUND)
|
||||
#define EXT4_MAX_REC_LEN ((1<<16)-1)
|
||||
|
||||
static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
|
||||
{
|
||||
unsigned len = le16_to_cpu(dlen);
|
||||
|
||||
if (len == EXT4_MAX_REC_LEN || len == 0)
|
||||
return 1 << 16;
|
||||
return len;
|
||||
}
|
||||
|
||||
static inline __le16 ext4_rec_len_to_disk(unsigned len)
|
||||
{
|
||||
if (len == (1 << 16))
|
||||
return cpu_to_le16(EXT4_MAX_REC_LEN);
|
||||
else if (len > (1 << 16))
|
||||
BUG();
|
||||
return cpu_to_le16(len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hash Tree Directory indexing
|
||||
* (c) Daniel Phillips, 2001
|
||||
|
@ -970,22 +977,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
|
|||
|
||||
extern struct proc_dir_entry *ext4_proc_root;
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
extern const struct file_operations ext4_ui_proc_fops;
|
||||
|
||||
#define EXT4_PROC_HANDLER(name, var) \
|
||||
do { \
|
||||
proc = proc_create_data(name, mode, sbi->s_proc, \
|
||||
&ext4_ui_proc_fops, &sbi->s_##var); \
|
||||
if (proc == NULL) { \
|
||||
printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \
|
||||
goto err_out; \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define EXT4_PROC_HANDLER(name, var)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Function prototypes
|
||||
*/
|
||||
|
@ -1092,6 +1083,7 @@ extern int ext4_can_truncate(struct inode *inode);
|
|||
extern void ext4_truncate(struct inode *);
|
||||
extern void ext4_set_inode_flags(struct inode *);
|
||||
extern void ext4_get_inode_flags(struct ext4_inode_info *);
|
||||
extern int ext4_alloc_da_blocks(struct inode *inode);
|
||||
extern void ext4_set_aops(struct inode *inode);
|
||||
extern int ext4_writepage_trans_blocks(struct inode *);
|
||||
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
|
||||
|
@ -1107,7 +1099,10 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
|
|||
|
||||
/* migrate.c */
|
||||
extern int ext4_ext_migrate(struct inode *);
|
||||
|
||||
/* namei.c */
|
||||
extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize);
|
||||
extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize);
|
||||
extern int ext4_orphan_add(handle_t *, struct inode *);
|
||||
extern int ext4_orphan_del(handle_t *, struct inode *);
|
||||
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
|
||||
|
|
|
@ -241,5 +241,6 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
|
|||
extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
|
||||
ext4_lblk_t *, ext4_fsblk_t *);
|
||||
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
|
||||
extern int ext4_ext_check_inode(struct inode *inode);
|
||||
#endif /* _EXT4_EXTENTS */
|
||||
|
||||
|
|
|
@ -33,9 +33,6 @@ typedef __u32 ext4_lblk_t;
|
|||
/* data type for block group number */
|
||||
typedef unsigned int ext4_group_t;
|
||||
|
||||
#define rsv_start rsv_window._rsv_start
|
||||
#define rsv_end rsv_window._rsv_end
|
||||
|
||||
/*
|
||||
* storage for cached extent
|
||||
*/
|
||||
|
@ -125,6 +122,9 @@ struct ext4_inode_info {
|
|||
struct list_head i_prealloc_list;
|
||||
spinlock_t i_prealloc_lock;
|
||||
|
||||
/* ialloc */
|
||||
ext4_group_t i_last_alloc_group;
|
||||
|
||||
/* allocation reservation info for delalloc */
|
||||
unsigned int i_reserved_data_blocks;
|
||||
unsigned int i_reserved_meta_blocks;
|
||||
|
|
|
@ -62,12 +62,10 @@ struct ext4_sb_info {
|
|||
struct percpu_counter s_freeinodes_counter;
|
||||
struct percpu_counter s_dirs_counter;
|
||||
struct percpu_counter s_dirtyblocks_counter;
|
||||
struct blockgroup_lock s_blockgroup_lock;
|
||||
struct blockgroup_lock *s_blockgroup_lock;
|
||||
struct proc_dir_entry *s_proc;
|
||||
|
||||
/* root of the per fs reservation window tree */
|
||||
spinlock_t s_rsv_window_lock;
|
||||
struct rb_root s_rsv_window_root;
|
||||
struct kobject s_kobj;
|
||||
struct completion s_kobj_unregister;
|
||||
|
||||
/* Journaling */
|
||||
struct inode *s_journal_inode;
|
||||
|
@ -146,6 +144,10 @@ struct ext4_sb_info {
|
|||
/* locality groups */
|
||||
struct ext4_locality_group *s_locality_groups;
|
||||
|
||||
/* for write statistics */
|
||||
unsigned long s_sectors_written_start;
|
||||
u64 s_kbytes_written;
|
||||
|
||||
unsigned int s_log_groups_per_flex;
|
||||
struct flex_groups *s_flex_groups;
|
||||
};
|
||||
|
@ -153,7 +155,7 @@ struct ext4_sb_info {
|
|||
static inline spinlock_t *
|
||||
sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
|
||||
{
|
||||
return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
|
||||
return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
|
||||
}
|
||||
|
||||
#endif /* _EXT4_SB */
|
||||
|
|
|
@ -152,6 +152,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
|
|||
ext4_fsblk_t bg_start;
|
||||
ext4_fsblk_t last_block;
|
||||
ext4_grpblk_t colour;
|
||||
ext4_group_t block_group;
|
||||
int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
|
||||
int depth;
|
||||
|
||||
if (path) {
|
||||
|
@ -170,10 +172,31 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
|
|||
}
|
||||
|
||||
/* OK. use inode's group */
|
||||
bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
|
||||
block_group = ei->i_block_group;
|
||||
if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
|
||||
/*
|
||||
* If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
|
||||
* block groups per flexgroup, reserve the first block
|
||||
* group for directories and special files. Regular
|
||||
* files will start at the second block group. This
|
||||
* tends to speed up directory access and improves
|
||||
* fsck times.
|
||||
*/
|
||||
block_group &= ~(flex_size-1);
|
||||
if (S_ISREG(inode->i_mode))
|
||||
block_group++;
|
||||
}
|
||||
bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
|
||||
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
|
||||
last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
|
||||
|
||||
/*
|
||||
* If we are doing delayed allocation, we don't need take
|
||||
* colour into account.
|
||||
*/
|
||||
if (test_opt(inode->i_sb, DELALLOC))
|
||||
return bg_start;
|
||||
|
||||
if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
|
||||
colour = (current->pid % 16) *
|
||||
(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
|
||||
|
@ -301,7 +324,64 @@ ext4_ext_max_entries(struct inode *inode, int depth)
|
|||
return max;
|
||||
}
|
||||
|
||||
static int __ext4_ext_check_header(const char *function, struct inode *inode,
|
||||
static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
|
||||
{
|
||||
ext4_fsblk_t block = ext_pblock(ext);
|
||||
int len = ext4_ext_get_actual_len(ext);
|
||||
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
|
||||
if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
|
||||
((block + len) > ext4_blocks_count(es))))
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int ext4_valid_extent_idx(struct inode *inode,
|
||||
struct ext4_extent_idx *ext_idx)
|
||||
{
|
||||
ext4_fsblk_t block = idx_pblock(ext_idx);
|
||||
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
|
||||
if (unlikely(block < le32_to_cpu(es->s_first_data_block) ||
|
||||
(block > ext4_blocks_count(es))))
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int ext4_valid_extent_entries(struct inode *inode,
|
||||
struct ext4_extent_header *eh,
|
||||
int depth)
|
||||
{
|
||||
struct ext4_extent *ext;
|
||||
struct ext4_extent_idx *ext_idx;
|
||||
unsigned short entries;
|
||||
if (eh->eh_entries == 0)
|
||||
return 1;
|
||||
|
||||
entries = le16_to_cpu(eh->eh_entries);
|
||||
|
||||
if (depth == 0) {
|
||||
/* leaf entries */
|
||||
ext = EXT_FIRST_EXTENT(eh);
|
||||
while (entries) {
|
||||
if (!ext4_valid_extent(inode, ext))
|
||||
return 0;
|
||||
ext++;
|
||||
entries--;
|
||||
}
|
||||
} else {
|
||||
ext_idx = EXT_FIRST_INDEX(eh);
|
||||
while (entries) {
|
||||
if (!ext4_valid_extent_idx(inode, ext_idx))
|
||||
return 0;
|
||||
ext_idx++;
|
||||
entries--;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __ext4_ext_check(const char *function, struct inode *inode,
|
||||
struct ext4_extent_header *eh,
|
||||
int depth)
|
||||
{
|
||||
|
@ -329,11 +409,15 @@ static int __ext4_ext_check_header(const char *function, struct inode *inode,
|
|||
error_msg = "invalid eh_entries";
|
||||
goto corrupted;
|
||||
}
|
||||
if (!ext4_valid_extent_entries(inode, eh, depth)) {
|
||||
error_msg = "invalid extent entries";
|
||||
goto corrupted;
|
||||
}
|
||||
return 0;
|
||||
|
||||
corrupted:
|
||||
ext4_error(inode->i_sb, function,
|
||||
"bad header in inode #%lu: %s - magic %x, "
|
||||
"bad header/extent in inode #%lu: %s - magic %x, "
|
||||
"entries %u, max %u(%u), depth %u(%u)",
|
||||
inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
|
||||
le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
|
||||
|
@ -342,8 +426,13 @@ static int __ext4_ext_check_header(const char *function, struct inode *inode,
|
|||
return -EIO;
|
||||
}
|
||||
|
||||
#define ext4_ext_check_header(inode, eh, depth) \
|
||||
__ext4_ext_check_header(__func__, inode, eh, depth)
|
||||
#define ext4_ext_check(inode, eh, depth) \
|
||||
__ext4_ext_check(__func__, inode, eh, depth)
|
||||
|
||||
int ext4_ext_check_inode(struct inode *inode)
|
||||
{
|
||||
return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode));
|
||||
}
|
||||
|
||||
#ifdef EXT_DEBUG
|
||||
static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
|
||||
|
@ -547,9 +636,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
|
|||
|
||||
eh = ext_inode_hdr(inode);
|
||||
depth = ext_depth(inode);
|
||||
if (ext4_ext_check_header(inode, eh, depth))
|
||||
return ERR_PTR(-EIO);
|
||||
|
||||
|
||||
/* account possible depth increase */
|
||||
if (!path) {
|
||||
|
@ -565,6 +651,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
|
|||
i = depth;
|
||||
/* walk through the tree */
|
||||
while (i) {
|
||||
int need_to_validate = 0;
|
||||
|
||||
ext_debug("depth %d: num %d, max %d\n",
|
||||
ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
|
||||
|
||||
|
@ -573,10 +661,17 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
|
|||
path[ppos].p_depth = i;
|
||||
path[ppos].p_ext = NULL;
|
||||
|
||||
bh = sb_bread(inode->i_sb, path[ppos].p_block);
|
||||
if (!bh)
|
||||
bh = sb_getblk(inode->i_sb, path[ppos].p_block);
|
||||
if (unlikely(!bh))
|
||||
goto err;
|
||||
|
||||
if (!bh_uptodate_or_lock(bh)) {
|
||||
if (bh_submit_read(bh) < 0) {
|
||||
put_bh(bh);
|
||||
goto err;
|
||||
}
|
||||
/* validate the extent entries */
|
||||
need_to_validate = 1;
|
||||
}
|
||||
eh = ext_block_hdr(bh);
|
||||
ppos++;
|
||||
BUG_ON(ppos > depth);
|
||||
|
@ -584,7 +679,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
|
|||
path[ppos].p_hdr = eh;
|
||||
i--;
|
||||
|
||||
if (ext4_ext_check_header(inode, eh, i))
|
||||
if (need_to_validate && ext4_ext_check(inode, eh, i))
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -1181,7 +1276,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
|
|||
return -EIO;
|
||||
eh = ext_block_hdr(bh);
|
||||
/* subtract from p_depth to get proper eh_depth */
|
||||
if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
|
||||
if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
|
||||
put_bh(bh);
|
||||
return -EIO;
|
||||
}
|
||||
|
@ -1194,7 +1289,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
|
|||
if (bh == NULL)
|
||||
return -EIO;
|
||||
eh = ext_block_hdr(bh);
|
||||
if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
|
||||
if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
|
||||
put_bh(bh);
|
||||
return -EIO;
|
||||
}
|
||||
|
@ -2137,7 +2232,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
|
|||
return -ENOMEM;
|
||||
}
|
||||
path[0].p_hdr = ext_inode_hdr(inode);
|
||||
if (ext4_ext_check_header(inode, path[0].p_hdr, depth)) {
|
||||
if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
|
||||
err = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
@ -2191,7 +2286,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
|
|||
err = -EIO;
|
||||
break;
|
||||
}
|
||||
if (ext4_ext_check_header(inode, ext_block_hdr(bh),
|
||||
if (ext4_ext_check(inode, ext_block_hdr(bh),
|
||||
depth - i - 1)) {
|
||||
err = -EIO;
|
||||
break;
|
||||
|
|
|
@ -33,9 +33,14 @@
|
|||
*/
|
||||
static int ext4_release_file(struct inode *inode, struct file *filp)
|
||||
{
|
||||
if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) {
|
||||
ext4_alloc_da_blocks(inode);
|
||||
EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE;
|
||||
}
|
||||
/* if we are the last writer on the inode, drop the block reservation */
|
||||
if ((filp->f_mode & FMODE_WRITE) &&
|
||||
(atomic_read(&inode->i_writecount) == 1))
|
||||
(atomic_read(&inode->i_writecount) == 1) &&
|
||||
!EXT4_I(inode)->i_reserved_data_blocks)
|
||||
{
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
ext4_discard_preallocations(inode);
|
||||
|
|
271
fs/ext4/ialloc.c
271
fs/ext4/ialloc.c
|
@ -189,7 +189,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
|
|||
struct ext4_super_block *es;
|
||||
struct ext4_sb_info *sbi;
|
||||
int fatal = 0, err, count, cleared;
|
||||
ext4_group_t flex_group;
|
||||
|
||||
if (atomic_read(&inode->i_count) > 1) {
|
||||
printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
|
||||
|
@ -268,6 +267,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
|
|||
if (is_directory) {
|
||||
count = ext4_used_dirs_count(sb, gdp) - 1;
|
||||
ext4_used_dirs_set(sb, gdp, count);
|
||||
if (sbi->s_log_groups_per_flex) {
|
||||
ext4_group_t f;
|
||||
|
||||
f = ext4_flex_group(sbi, block_group);
|
||||
atomic_dec(&sbi->s_flex_groups[f].free_inodes);
|
||||
}
|
||||
|
||||
}
|
||||
gdp->bg_checksum = ext4_group_desc_csum(sbi,
|
||||
block_group, gdp);
|
||||
|
@ -277,10 +283,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
|
|||
percpu_counter_dec(&sbi->s_dirs_counter);
|
||||
|
||||
if (sbi->s_log_groups_per_flex) {
|
||||
flex_group = ext4_flex_group(sbi, block_group);
|
||||
spin_lock(sb_bgl_lock(sbi, flex_group));
|
||||
sbi->s_flex_groups[flex_group].free_inodes++;
|
||||
spin_unlock(sb_bgl_lock(sbi, flex_group));
|
||||
ext4_group_t f;
|
||||
|
||||
f = ext4_flex_group(sbi, block_group);
|
||||
atomic_inc(&sbi->s_flex_groups[f].free_inodes);
|
||||
}
|
||||
}
|
||||
BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
|
||||
|
@ -360,9 +366,9 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
|
|||
sbi->s_log_groups_per_flex;
|
||||
|
||||
find_close_to_parent:
|
||||
flexbg_free_blocks = flex_group[best_flex].free_blocks;
|
||||
flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks);
|
||||
flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
|
||||
if (flex_group[best_flex].free_inodes &&
|
||||
if (atomic_read(&flex_group[best_flex].free_inodes) &&
|
||||
flex_freeb_ratio > free_block_ratio)
|
||||
goto found_flexbg;
|
||||
|
||||
|
@ -375,24 +381,24 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
|
|||
if (i == parent_fbg_group || i == parent_fbg_group - 1)
|
||||
continue;
|
||||
|
||||
flexbg_free_blocks = flex_group[i].free_blocks;
|
||||
flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks);
|
||||
flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex;
|
||||
|
||||
if (flex_freeb_ratio > free_block_ratio &&
|
||||
flex_group[i].free_inodes) {
|
||||
(atomic_read(&flex_group[i].free_inodes))) {
|
||||
best_flex = i;
|
||||
goto found_flexbg;
|
||||
}
|
||||
|
||||
if (flex_group[best_flex].free_inodes == 0 ||
|
||||
(flex_group[i].free_blocks >
|
||||
flex_group[best_flex].free_blocks &&
|
||||
flex_group[i].free_inodes))
|
||||
if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) ||
|
||||
((atomic_read(&flex_group[i].free_blocks) >
|
||||
atomic_read(&flex_group[best_flex].free_blocks)) &&
|
||||
atomic_read(&flex_group[i].free_inodes)))
|
||||
best_flex = i;
|
||||
}
|
||||
|
||||
if (!flex_group[best_flex].free_inodes ||
|
||||
!flex_group[best_flex].free_blocks)
|
||||
if (!atomic_read(&flex_group[best_flex].free_inodes) ||
|
||||
!atomic_read(&flex_group[best_flex].free_blocks))
|
||||
return -1;
|
||||
|
||||
found_flexbg:
|
||||
|
@ -410,6 +416,42 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
|
|||
return 0;
|
||||
}
|
||||
|
||||
struct orlov_stats {
|
||||
__u32 free_inodes;
|
||||
__u32 free_blocks;
|
||||
__u32 used_dirs;
|
||||
};
|
||||
|
||||
/*
|
||||
* Helper function for Orlov's allocator; returns critical information
|
||||
* for a particular block group or flex_bg. If flex_size is 1, then g
|
||||
* is a block group number; otherwise it is flex_bg number.
|
||||
*/
|
||||
void get_orlov_stats(struct super_block *sb, ext4_group_t g,
|
||||
int flex_size, struct orlov_stats *stats)
|
||||
{
|
||||
struct ext4_group_desc *desc;
|
||||
struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
|
||||
|
||||
if (flex_size > 1) {
|
||||
stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
|
||||
stats->free_blocks = atomic_read(&flex_group[g].free_blocks);
|
||||
stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
|
||||
return;
|
||||
}
|
||||
|
||||
desc = ext4_get_group_desc(sb, g, NULL);
|
||||
if (desc) {
|
||||
stats->free_inodes = ext4_free_inodes_count(sb, desc);
|
||||
stats->free_blocks = ext4_free_blks_count(sb, desc);
|
||||
stats->used_dirs = ext4_used_dirs_count(sb, desc);
|
||||
} else {
|
||||
stats->free_inodes = 0;
|
||||
stats->free_blocks = 0;
|
||||
stats->used_dirs = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Orlov's allocator for directories.
|
||||
*
|
||||
|
@ -425,35 +467,34 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
|
|||
* it has too many directories already (max_dirs) or
|
||||
* it has too few free inodes left (min_inodes) or
|
||||
* it has too few free blocks left (min_blocks) or
|
||||
* it's already running too large debt (max_debt).
|
||||
* Parent's group is preferred, if it doesn't satisfy these
|
||||
* conditions we search cyclically through the rest. If none
|
||||
* of the groups look good we just look for a group with more
|
||||
* free inodes than average (starting at parent's group).
|
||||
*
|
||||
* Debt is incremented each time we allocate a directory and decremented
|
||||
* when we allocate an inode, within 0--255.
|
||||
*/
|
||||
|
||||
#define INODE_COST 64
|
||||
#define BLOCK_COST 256
|
||||
|
||||
static int find_group_orlov(struct super_block *sb, struct inode *parent,
|
||||
ext4_group_t *group)
|
||||
ext4_group_t *group, int mode)
|
||||
{
|
||||
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_super_block *es = sbi->s_es;
|
||||
ext4_group_t ngroups = sbi->s_groups_count;
|
||||
int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
|
||||
unsigned int freei, avefreei;
|
||||
ext4_fsblk_t freeb, avefreeb;
|
||||
ext4_fsblk_t blocks_per_dir;
|
||||
unsigned int ndirs;
|
||||
int max_debt, max_dirs, min_inodes;
|
||||
int max_dirs, min_inodes;
|
||||
ext4_grpblk_t min_blocks;
|
||||
ext4_group_t i;
|
||||
ext4_group_t i, grp, g;
|
||||
struct ext4_group_desc *desc;
|
||||
struct orlov_stats stats;
|
||||
int flex_size = ext4_flex_bg_size(sbi);
|
||||
|
||||
if (flex_size > 1) {
|
||||
ngroups = (ngroups + flex_size - 1) >>
|
||||
sbi->s_log_groups_per_flex;
|
||||
parent_group >>= sbi->s_log_groups_per_flex;
|
||||
}
|
||||
|
||||
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
|
||||
avefreei = freei / ngroups;
|
||||
|
@ -462,71 +503,97 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
|
|||
do_div(avefreeb, ngroups);
|
||||
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
|
||||
|
||||
if ((parent == sb->s_root->d_inode) ||
|
||||
(EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) {
|
||||
if (S_ISDIR(mode) &&
|
||||
((parent == sb->s_root->d_inode) ||
|
||||
(EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL))) {
|
||||
int best_ndir = inodes_per_group;
|
||||
ext4_group_t grp;
|
||||
int ret = -1;
|
||||
|
||||
get_random_bytes(&grp, sizeof(grp));
|
||||
parent_group = (unsigned)grp % ngroups;
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
grp = (parent_group + i) % ngroups;
|
||||
desc = ext4_get_group_desc(sb, grp, NULL);
|
||||
if (!desc || !ext4_free_inodes_count(sb, desc))
|
||||
g = (parent_group + i) % ngroups;
|
||||
get_orlov_stats(sb, g, flex_size, &stats);
|
||||
if (!stats.free_inodes)
|
||||
continue;
|
||||
if (ext4_used_dirs_count(sb, desc) >= best_ndir)
|
||||
if (stats.used_dirs >= best_ndir)
|
||||
continue;
|
||||
if (ext4_free_inodes_count(sb, desc) < avefreei)
|
||||
if (stats.free_inodes < avefreei)
|
||||
continue;
|
||||
if (ext4_free_blks_count(sb, desc) < avefreeb)
|
||||
if (stats.free_blocks < avefreeb)
|
||||
continue;
|
||||
*group = grp;
|
||||
grp = g;
|
||||
ret = 0;
|
||||
best_ndir = ext4_used_dirs_count(sb, desc);
|
||||
best_ndir = stats.used_dirs;
|
||||
}
|
||||
if (ret)
|
||||
goto fallback;
|
||||
found_flex_bg:
|
||||
if (flex_size == 1) {
|
||||
*group = grp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We pack inodes at the beginning of the flexgroup's
|
||||
* inode tables. Block allocation decisions will do
|
||||
* something similar, although regular files will
|
||||
* start at 2nd block group of the flexgroup. See
|
||||
* ext4_ext_find_goal() and ext4_find_near().
|
||||
*/
|
||||
grp *= flex_size;
|
||||
for (i = 0; i < flex_size; i++) {
|
||||
if (grp+i >= sbi->s_groups_count)
|
||||
break;
|
||||
desc = ext4_get_group_desc(sb, grp+i, NULL);
|
||||
if (desc && ext4_free_inodes_count(sb, desc)) {
|
||||
*group = grp+i;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (ret == 0)
|
||||
return ret;
|
||||
goto fallback;
|
||||
}
|
||||
|
||||
blocks_per_dir = ext4_blocks_count(es) - freeb;
|
||||
do_div(blocks_per_dir, ndirs);
|
||||
|
||||
max_dirs = ndirs / ngroups + inodes_per_group / 16;
|
||||
min_inodes = avefreei - inodes_per_group / 4;
|
||||
min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4;
|
||||
min_inodes = avefreei - inodes_per_group*flex_size / 4;
|
||||
if (min_inodes < 1)
|
||||
min_inodes = 1;
|
||||
min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4;
|
||||
|
||||
max_debt = EXT4_BLOCKS_PER_GROUP(sb);
|
||||
max_debt /= max_t(int, blocks_per_dir, BLOCK_COST);
|
||||
if (max_debt * INODE_COST > inodes_per_group)
|
||||
max_debt = inodes_per_group / INODE_COST;
|
||||
if (max_debt > 255)
|
||||
max_debt = 255;
|
||||
if (max_debt == 0)
|
||||
max_debt = 1;
|
||||
/*
|
||||
* Start looking in the flex group where we last allocated an
|
||||
* inode for this parent directory
|
||||
*/
|
||||
if (EXT4_I(parent)->i_last_alloc_group != ~0) {
|
||||
parent_group = EXT4_I(parent)->i_last_alloc_group;
|
||||
if (flex_size > 1)
|
||||
parent_group >>= sbi->s_log_groups_per_flex;
|
||||
}
|
||||
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
*group = (parent_group + i) % ngroups;
|
||||
desc = ext4_get_group_desc(sb, *group, NULL);
|
||||
if (!desc || !ext4_free_inodes_count(sb, desc))
|
||||
grp = (parent_group + i) % ngroups;
|
||||
get_orlov_stats(sb, grp, flex_size, &stats);
|
||||
if (stats.used_dirs >= max_dirs)
|
||||
continue;
|
||||
if (ext4_used_dirs_count(sb, desc) >= max_dirs)
|
||||
if (stats.free_inodes < min_inodes)
|
||||
continue;
|
||||
if (ext4_free_inodes_count(sb, desc) < min_inodes)
|
||||
if (stats.free_blocks < min_blocks)
|
||||
continue;
|
||||
if (ext4_free_blks_count(sb, desc) < min_blocks)
|
||||
continue;
|
||||
return 0;
|
||||
goto found_flex_bg;
|
||||
}
|
||||
|
||||
fallback:
|
||||
ngroups = sbi->s_groups_count;
|
||||
avefreei = freei / ngroups;
|
||||
parent_group = EXT4_I(parent)->i_block_group;
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
*group = (parent_group + i) % ngroups;
|
||||
desc = ext4_get_group_desc(sb, *group, NULL);
|
||||
grp = (parent_group + i) % ngroups;
|
||||
desc = ext4_get_group_desc(sb, grp, NULL);
|
||||
if (desc && ext4_free_inodes_count(sb, desc) &&
|
||||
ext4_free_inodes_count(sb, desc) >= avefreei)
|
||||
ext4_free_inodes_count(sb, desc) >= avefreei) {
|
||||
*group = grp;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (avefreei) {
|
||||
|
@ -542,12 +609,51 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
|
|||
}
|
||||
|
||||
static int find_group_other(struct super_block *sb, struct inode *parent,
|
||||
ext4_group_t *group)
|
||||
ext4_group_t *group, int mode)
|
||||
{
|
||||
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
|
||||
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
|
||||
struct ext4_group_desc *desc;
|
||||
ext4_group_t i;
|
||||
ext4_group_t i, last;
|
||||
int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
|
||||
|
||||
/*
|
||||
* Try to place the inode is the same flex group as its
|
||||
* parent. If we can't find space, use the Orlov algorithm to
|
||||
* find another flex group, and store that information in the
|
||||
* parent directory's inode information so that use that flex
|
||||
* group for future allocations.
|
||||
*/
|
||||
if (flex_size > 1) {
|
||||
int retry = 0;
|
||||
|
||||
try_again:
|
||||
parent_group &= ~(flex_size-1);
|
||||
last = parent_group + flex_size;
|
||||
if (last > ngroups)
|
||||
last = ngroups;
|
||||
for (i = parent_group; i < last; i++) {
|
||||
desc = ext4_get_group_desc(sb, i, NULL);
|
||||
if (desc && ext4_free_inodes_count(sb, desc)) {
|
||||
*group = i;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
|
||||
retry = 1;
|
||||
parent_group = EXT4_I(parent)->i_last_alloc_group;
|
||||
goto try_again;
|
||||
}
|
||||
/*
|
||||
* If this didn't work, use the Orlov search algorithm
|
||||
* to find a new flex group; we pass in the mode to
|
||||
* avoid the topdir algorithms.
|
||||
*/
|
||||
*group = parent_group + flex_size;
|
||||
if (*group > ngroups)
|
||||
*group = 0;
|
||||
return find_group_orlov(sb, parent, group, mode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to place the inode in its parent directory
|
||||
|
@ -665,6 +771,11 @@ static int ext4_claim_inode(struct super_block *sb,
|
|||
if (S_ISDIR(mode)) {
|
||||
count = ext4_used_dirs_count(sb, gdp) + 1;
|
||||
ext4_used_dirs_set(sb, gdp, count);
|
||||
if (sbi->s_log_groups_per_flex) {
|
||||
ext4_group_t f = ext4_flex_group(sbi, group);
|
||||
|
||||
atomic_inc(&sbi->s_flex_groups[f].free_inodes);
|
||||
}
|
||||
}
|
||||
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
|
||||
err_ret:
|
||||
|
@ -716,10 +827,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
|
|||
sbi = EXT4_SB(sb);
|
||||
es = sbi->s_es;
|
||||
|
||||
if (sbi->s_log_groups_per_flex) {
|
||||
if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
|
||||
ret2 = find_group_flex(sb, dir, &group);
|
||||
if (ret2 == -1) {
|
||||
ret2 = find_group_other(sb, dir, &group);
|
||||
ret2 = find_group_other(sb, dir, &group, mode);
|
||||
if (ret2 == 0 && once)
|
||||
once = 0;
|
||||
printk(KERN_NOTICE "ext4: find_group_flex "
|
||||
|
@ -733,11 +844,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
|
|||
if (test_opt(sb, OLDALLOC))
|
||||
ret2 = find_group_dir(sb, dir, &group);
|
||||
else
|
||||
ret2 = find_group_orlov(sb, dir, &group);
|
||||
ret2 = find_group_orlov(sb, dir, &group, mode);
|
||||
} else
|
||||
ret2 = find_group_other(sb, dir, &group);
|
||||
ret2 = find_group_other(sb, dir, &group, mode);
|
||||
|
||||
got_group:
|
||||
EXT4_I(dir)->i_last_alloc_group = group;
|
||||
err = -ENOSPC;
|
||||
if (ret2 == -1)
|
||||
goto out;
|
||||
|
@ -858,9 +970,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
|
|||
|
||||
if (sbi->s_log_groups_per_flex) {
|
||||
flex_group = ext4_flex_group(sbi, group);
|
||||
spin_lock(sb_bgl_lock(sbi, flex_group));
|
||||
sbi->s_flex_groups[flex_group].free_inodes--;
|
||||
spin_unlock(sb_bgl_lock(sbi, flex_group));
|
||||
atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
|
||||
}
|
||||
|
||||
inode->i_uid = current_fsuid();
|
||||
|
@ -885,19 +995,16 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
|
|||
ei->i_disksize = 0;
|
||||
|
||||
/*
|
||||
* Don't inherit extent flag from directory. We set extent flag on
|
||||
* newly created directory and file only if -o extent mount option is
|
||||
* specified
|
||||
* Don't inherit extent flag from directory, amongst others. We set
|
||||
* extent flag on newly created directory and file only if -o extent
|
||||
* mount option is specified
|
||||
*/
|
||||
ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);
|
||||
if (S_ISLNK(mode))
|
||||
ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
|
||||
/* dirsync only applies to directories */
|
||||
if (!S_ISDIR(mode))
|
||||
ei->i_flags &= ~EXT4_DIRSYNC_FL;
|
||||
ei->i_flags =
|
||||
ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
|
||||
ei->i_file_acl = 0;
|
||||
ei->i_dtime = 0;
|
||||
ei->i_block_group = group;
|
||||
ei->i_last_alloc_group = ~0;
|
||||
|
||||
ext4_set_inode_flags(inode);
|
||||
if (IS_DIRSYNC(inode))
|
||||
|
|
424
fs/ext4/inode.c
424
fs/ext4/inode.c
|
@ -371,6 +371,34 @@ static int ext4_block_to_path(struct inode *inode,
|
|||
return n;
|
||||
}
|
||||
|
||||
static int __ext4_check_blockref(const char *function, struct inode *inode,
|
||||
unsigned int *p, unsigned int max) {
|
||||
|
||||
unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
|
||||
unsigned int *bref = p;
|
||||
while (bref < p+max) {
|
||||
if (unlikely(*bref >= maxblocks)) {
|
||||
ext4_error(inode->i_sb, function,
|
||||
"block reference %u >= max (%u) "
|
||||
"in inode #%lu, offset=%d",
|
||||
*bref, maxblocks,
|
||||
inode->i_ino, (int)(bref-p));
|
||||
return -EIO;
|
||||
}
|
||||
bref++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#define ext4_check_indirect_blockref(inode, bh) \
|
||||
__ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \
|
||||
EXT4_ADDR_PER_BLOCK((inode)->i_sb))
|
||||
|
||||
#define ext4_check_inode_blockref(inode) \
|
||||
__ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \
|
||||
EXT4_NDIR_BLOCKS)
|
||||
|
||||
/**
|
||||
* ext4_get_branch - read the chain of indirect blocks leading to data
|
||||
* @inode: inode in question
|
||||
|
@ -415,9 +443,22 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
|
|||
if (!p->key)
|
||||
goto no_block;
|
||||
while (--depth) {
|
||||
bh = sb_bread(sb, le32_to_cpu(p->key));
|
||||
if (!bh)
|
||||
bh = sb_getblk(sb, le32_to_cpu(p->key));
|
||||
if (unlikely(!bh))
|
||||
goto failure;
|
||||
|
||||
if (!bh_uptodate_or_lock(bh)) {
|
||||
if (bh_submit_read(bh) < 0) {
|
||||
put_bh(bh);
|
||||
goto failure;
|
||||
}
|
||||
/* validate block references */
|
||||
if (ext4_check_indirect_blockref(inode, bh)) {
|
||||
put_bh(bh);
|
||||
goto failure;
|
||||
}
|
||||
}
|
||||
|
||||
add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
|
||||
/* Reader: end */
|
||||
if (!p->key)
|
||||
|
@ -459,6 +500,8 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
|
|||
ext4_fsblk_t bg_start;
|
||||
ext4_fsblk_t last_block;
|
||||
ext4_grpblk_t colour;
|
||||
ext4_group_t block_group;
|
||||
int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
|
||||
|
||||
/* Try to find previous block */
|
||||
for (p = ind->p - 1; p >= start; p--) {
|
||||
|
@ -474,9 +517,22 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
|
|||
* It is going to be referred to from the inode itself? OK, just put it
|
||||
* into the same cylinder group then.
|
||||
*/
|
||||
bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group);
|
||||
block_group = ei->i_block_group;
|
||||
if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
|
||||
block_group &= ~(flex_size-1);
|
||||
if (S_ISREG(inode->i_mode))
|
||||
block_group++;
|
||||
}
|
||||
bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
|
||||
last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
|
||||
|
||||
/*
|
||||
* If we are doing delayed allocation, we don't need take
|
||||
* colour into account.
|
||||
*/
|
||||
if (test_opt(inode->i_sb, DELALLOC))
|
||||
return bg_start;
|
||||
|
||||
if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
|
||||
colour = (current->pid % 16) *
|
||||
(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
|
||||
|
@ -1052,9 +1108,16 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
|
|||
/*
|
||||
* free those over-booking quota for metadata blocks
|
||||
*/
|
||||
|
||||
if (mdb_free)
|
||||
vfs_dq_release_reservation_block(inode, mdb_free);
|
||||
|
||||
/*
|
||||
* If we have done all the pending block allocations and if
|
||||
* there aren't any writers on the inode, we can discard the
|
||||
* inode's preallocations.
|
||||
*/
|
||||
if (!total && (atomic_read(&inode->i_writecount) == 0))
|
||||
ext4_discard_preallocations(inode);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1688,9 +1751,10 @@ static void ext4_da_page_release_reservation(struct page *page,
|
|||
|
||||
struct mpage_da_data {
|
||||
struct inode *inode;
|
||||
struct buffer_head lbh; /* extent of blocks */
|
||||
sector_t b_blocknr; /* start block number of extent */
|
||||
size_t b_size; /* size of extent */
|
||||
unsigned long b_state; /* state of the extent */
|
||||
unsigned long first_page, next_page; /* extent of pages */
|
||||
get_block_t *get_block;
|
||||
struct writeback_control *wbc;
|
||||
int io_done;
|
||||
int pages_written;
|
||||
|
@ -1704,7 +1768,6 @@ struct mpage_da_data {
|
|||
* @mpd->inode: inode
|
||||
* @mpd->first_page: first page of the extent
|
||||
* @mpd->next_page: page after the last page of the extent
|
||||
* @mpd->get_block: the filesystem's block mapper function
|
||||
*
|
||||
* By the time mpage_da_submit_io() is called we expect all blocks
|
||||
* to be allocated. this may be wrong if allocation failed.
|
||||
|
@ -1724,7 +1787,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
|
|||
/*
|
||||
* We need to start from the first_page to the next_page - 1
|
||||
* to make sure we also write the mapped dirty buffer_heads.
|
||||
* If we look at mpd->lbh.b_blocknr we would only be looking
|
||||
* If we look at mpd->b_blocknr we would only be looking
|
||||
* at the currently mapped buffer_heads.
|
||||
*/
|
||||
index = mpd->first_page;
|
||||
|
@ -1914,68 +1977,111 @@ static void ext4_print_free_blocks(struct inode *inode)
|
|||
return;
|
||||
}
|
||||
|
||||
#define EXT4_DELALLOC_RSVED 1
|
||||
static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
{
|
||||
int ret;
|
||||
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
|
||||
loff_t disksize = EXT4_I(inode)->i_disksize;
|
||||
handle_t *handle = NULL;
|
||||
|
||||
handle = ext4_journal_current_handle();
|
||||
BUG_ON(!handle);
|
||||
ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
|
||||
bh_result, create, 0, EXT4_DELALLOC_RSVED);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
bh_result->b_size = (ret << inode->i_blkbits);
|
||||
|
||||
if (ext4_should_order_data(inode)) {
|
||||
int retval;
|
||||
retval = ext4_jbd2_file_inode(handle, inode);
|
||||
if (retval)
|
||||
/*
|
||||
* Failed to add inode for ordered mode. Don't
|
||||
* update file size
|
||||
*/
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update on-disk size along with block allocation we don't
|
||||
* use 'extend_disksize' as size may change within already
|
||||
* allocated block -bzzz
|
||||
*/
|
||||
disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
|
||||
if (disksize > i_size_read(inode))
|
||||
disksize = i_size_read(inode);
|
||||
if (disksize > EXT4_I(inode)->i_disksize) {
|
||||
ext4_update_i_disksize(inode, disksize);
|
||||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* mpage_da_map_blocks - go through given space
|
||||
*
|
||||
* @mpd->lbh - bh describing space
|
||||
* @mpd->get_block - the filesystem's block mapper function
|
||||
* @mpd - bh describing space
|
||||
*
|
||||
* The function skips space we know is already mapped to disk blocks.
|
||||
*
|
||||
*/
|
||||
static int mpage_da_map_blocks(struct mpage_da_data *mpd)
|
||||
static int mpage_da_map_blocks(struct mpage_da_data *mpd)
|
||||
{
|
||||
int err = 0;
|
||||
struct buffer_head new;
|
||||
struct buffer_head *lbh = &mpd->lbh;
|
||||
sector_t next;
|
||||
|
||||
/*
|
||||
* We consider only non-mapped and non-allocated blocks
|
||||
*/
|
||||
if (buffer_mapped(lbh) && !buffer_delay(lbh))
|
||||
if ((mpd->b_state & (1 << BH_Mapped)) &&
|
||||
!(mpd->b_state & (1 << BH_Delay)))
|
||||
return 0;
|
||||
new.b_state = lbh->b_state;
|
||||
new.b_state = mpd->b_state;
|
||||
new.b_blocknr = 0;
|
||||
new.b_size = lbh->b_size;
|
||||
next = lbh->b_blocknr;
|
||||
new.b_size = mpd->b_size;
|
||||
next = mpd->b_blocknr;
|
||||
/*
|
||||
* If we didn't accumulate anything
|
||||
* to write simply return
|
||||
*/
|
||||
if (!new.b_size)
|
||||
return 0;
|
||||
err = mpd->get_block(mpd->inode, next, &new, 1);
|
||||
if (err) {
|
||||
|
||||
/* If get block returns with error
|
||||
* we simply return. Later writepage
|
||||
* will redirty the page and writepages
|
||||
* will find the dirty page again
|
||||
err = ext4_da_get_block_write(mpd->inode, next, &new, 1);
|
||||
if (err) {
|
||||
/*
|
||||
* If get block returns with error we simply
|
||||
* return. Later writepage will redirty the page and
|
||||
* writepages will find the dirty page again
|
||||
*/
|
||||
if (err == -EAGAIN)
|
||||
return 0;
|
||||
|
||||
if (err == -ENOSPC &&
|
||||
ext4_count_free_blocks(mpd->inode->i_sb)) {
|
||||
ext4_count_free_blocks(mpd->inode->i_sb)) {
|
||||
mpd->retval = err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* get block failure will cause us
|
||||
* to loop in writepages. Because
|
||||
* a_ops->writepage won't be able to
|
||||
* make progress. The page will be redirtied
|
||||
* by writepage and writepages will again
|
||||
* try to write the same.
|
||||
* get block failure will cause us to loop in
|
||||
* writepages, because a_ops->writepage won't be able
|
||||
* to make progress. The page will be redirtied by
|
||||
* writepage and writepages will again try to write
|
||||
* the same.
|
||||
*/
|
||||
printk(KERN_EMERG "%s block allocation failed for inode %lu "
|
||||
"at logical offset %llu with max blocks "
|
||||
"%zd with error %d\n",
|
||||
__func__, mpd->inode->i_ino,
|
||||
(unsigned long long)next,
|
||||
lbh->b_size >> mpd->inode->i_blkbits, err);
|
||||
mpd->b_size >> mpd->inode->i_blkbits, err);
|
||||
printk(KERN_EMERG "This should not happen.!! "
|
||||
"Data will be lost\n");
|
||||
if (err == -ENOSPC) {
|
||||
|
@ -1983,7 +2089,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
|
|||
}
|
||||
/* invlaidate all the pages */
|
||||
ext4_da_block_invalidatepages(mpd, next,
|
||||
lbh->b_size >> mpd->inode->i_blkbits);
|
||||
mpd->b_size >> mpd->inode->i_blkbits);
|
||||
return err;
|
||||
}
|
||||
BUG_ON(new.b_size == 0);
|
||||
|
@ -1995,7 +2101,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
|
|||
* If blocks are delayed marked, we need to
|
||||
* put actual blocknr and drop delayed bit
|
||||
*/
|
||||
if (buffer_delay(lbh) || buffer_unwritten(lbh))
|
||||
if ((mpd->b_state & (1 << BH_Delay)) ||
|
||||
(mpd->b_state & (1 << BH_Unwritten)))
|
||||
mpage_put_bnr_to_bhs(mpd, next, &new);
|
||||
|
||||
return 0;
|
||||
|
@ -2014,12 +2121,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
|
|||
* the function is used to collect contig. blocks in same state
|
||||
*/
|
||||
static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
|
||||
sector_t logical, struct buffer_head *bh)
|
||||
sector_t logical, size_t b_size,
|
||||
unsigned long b_state)
|
||||
{
|
||||
sector_t next;
|
||||
size_t b_size = bh->b_size;
|
||||
struct buffer_head *lbh = &mpd->lbh;
|
||||
int nrblocks = lbh->b_size >> mpd->inode->i_blkbits;
|
||||
int nrblocks = mpd->b_size >> mpd->inode->i_blkbits;
|
||||
|
||||
/* check if thereserved journal credits might overflow */
|
||||
if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
|
||||
|
@ -2046,19 +2152,19 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
|
|||
/*
|
||||
* First block in the extent
|
||||
*/
|
||||
if (lbh->b_size == 0) {
|
||||
lbh->b_blocknr = logical;
|
||||
lbh->b_size = b_size;
|
||||
lbh->b_state = bh->b_state & BH_FLAGS;
|
||||
if (mpd->b_size == 0) {
|
||||
mpd->b_blocknr = logical;
|
||||
mpd->b_size = b_size;
|
||||
mpd->b_state = b_state & BH_FLAGS;
|
||||
return;
|
||||
}
|
||||
|
||||
next = lbh->b_blocknr + nrblocks;
|
||||
next = mpd->b_blocknr + nrblocks;
|
||||
/*
|
||||
* Can we merge the block to our big extent?
|
||||
*/
|
||||
if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
|
||||
lbh->b_size += b_size;
|
||||
if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) {
|
||||
mpd->b_size += b_size;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2087,7 +2193,7 @@ static int __mpage_da_writepage(struct page *page,
|
|||
{
|
||||
struct mpage_da_data *mpd = data;
|
||||
struct inode *inode = mpd->inode;
|
||||
struct buffer_head *bh, *head, fake;
|
||||
struct buffer_head *bh, *head;
|
||||
sector_t logical;
|
||||
|
||||
if (mpd->io_done) {
|
||||
|
@ -2129,9 +2235,9 @@ static int __mpage_da_writepage(struct page *page,
|
|||
/*
|
||||
* ... and blocks
|
||||
*/
|
||||
mpd->lbh.b_size = 0;
|
||||
mpd->lbh.b_state = 0;
|
||||
mpd->lbh.b_blocknr = 0;
|
||||
mpd->b_size = 0;
|
||||
mpd->b_state = 0;
|
||||
mpd->b_blocknr = 0;
|
||||
}
|
||||
|
||||
mpd->next_page = page->index + 1;
|
||||
|
@ -2139,16 +2245,8 @@ static int __mpage_da_writepage(struct page *page,
|
|||
(PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||
|
||||
if (!page_has_buffers(page)) {
|
||||
/*
|
||||
* There is no attached buffer heads yet (mmap?)
|
||||
* we treat the page asfull of dirty blocks
|
||||
*/
|
||||
bh = &fake;
|
||||
bh->b_size = PAGE_CACHE_SIZE;
|
||||
bh->b_state = 0;
|
||||
set_buffer_dirty(bh);
|
||||
set_buffer_uptodate(bh);
|
||||
mpage_add_bh_to_extent(mpd, logical, bh);
|
||||
mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
|
||||
(1 << BH_Dirty) | (1 << BH_Uptodate));
|
||||
if (mpd->io_done)
|
||||
return MPAGE_DA_EXTENT_TAIL;
|
||||
} else {
|
||||
|
@ -2166,8 +2264,10 @@ static int __mpage_da_writepage(struct page *page,
|
|||
* with the page in ext4_da_writepage
|
||||
*/
|
||||
if (buffer_dirty(bh) &&
|
||||
(!buffer_mapped(bh) || buffer_delay(bh))) {
|
||||
mpage_add_bh_to_extent(mpd, logical, bh);
|
||||
(!buffer_mapped(bh) || buffer_delay(bh))) {
|
||||
mpage_add_bh_to_extent(mpd, logical,
|
||||
bh->b_size,
|
||||
bh->b_state);
|
||||
if (mpd->io_done)
|
||||
return MPAGE_DA_EXTENT_TAIL;
|
||||
} else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
|
||||
|
@ -2179,9 +2279,8 @@ static int __mpage_da_writepage(struct page *page,
|
|||
* unmapped buffer_head later we need to
|
||||
* use the b_state flag of that buffer_head.
|
||||
*/
|
||||
if (mpd->lbh.b_size == 0)
|
||||
mpd->lbh.b_state =
|
||||
bh->b_state & BH_FLAGS;
|
||||
if (mpd->b_size == 0)
|
||||
mpd->b_state = bh->b_state & BH_FLAGS;
|
||||
}
|
||||
logical++;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
@ -2190,51 +2289,6 @@ static int __mpage_da_writepage(struct page *page,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* mpage_da_writepages - walk the list of dirty pages of the given
|
||||
* address space, allocates non-allocated blocks, maps newly-allocated
|
||||
* blocks to existing bhs and issue IO them
|
||||
*
|
||||
* @mapping: address space structure to write
|
||||
* @wbc: subtract the number of written pages from *@wbc->nr_to_write
|
||||
* @get_block: the filesystem's block mapper function.
|
||||
*
|
||||
* This is a library function, which implements the writepages()
|
||||
* address_space_operation.
|
||||
*/
|
||||
static int mpage_da_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc,
|
||||
struct mpage_da_data *mpd)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!mpd->get_block)
|
||||
return generic_writepages(mapping, wbc);
|
||||
|
||||
mpd->lbh.b_size = 0;
|
||||
mpd->lbh.b_state = 0;
|
||||
mpd->lbh.b_blocknr = 0;
|
||||
mpd->first_page = 0;
|
||||
mpd->next_page = 0;
|
||||
mpd->io_done = 0;
|
||||
mpd->pages_written = 0;
|
||||
mpd->retval = 0;
|
||||
|
||||
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
|
||||
/*
|
||||
* Handle last extent of pages
|
||||
*/
|
||||
if (!mpd->io_done && mpd->next_page != mpd->first_page) {
|
||||
if (mpage_da_map_blocks(mpd) == 0)
|
||||
mpage_da_submit_io(mpd);
|
||||
|
||||
mpd->io_done = 1;
|
||||
ret = MPAGE_DA_EXTENT_TAIL;
|
||||
}
|
||||
wbc->nr_to_write -= mpd->pages_written;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* this is a special callback for ->write_begin() only
|
||||
* it's intention is to return mapped block or reserve space
|
||||
|
@ -2274,51 +2328,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
|||
|
||||
return ret;
|
||||
}
|
||||
#define EXT4_DELALLOC_RSVED 1
|
||||
static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
{
|
||||
int ret;
|
||||
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
|
||||
loff_t disksize = EXT4_I(inode)->i_disksize;
|
||||
handle_t *handle = NULL;
|
||||
|
||||
handle = ext4_journal_current_handle();
|
||||
BUG_ON(!handle);
|
||||
ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
|
||||
bh_result, create, 0, EXT4_DELALLOC_RSVED);
|
||||
if (ret > 0) {
|
||||
|
||||
bh_result->b_size = (ret << inode->i_blkbits);
|
||||
|
||||
if (ext4_should_order_data(inode)) {
|
||||
int retval;
|
||||
retval = ext4_jbd2_file_inode(handle, inode);
|
||||
if (retval)
|
||||
/*
|
||||
* Failed to add inode for ordered
|
||||
* mode. Don't update file size
|
||||
*/
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update on-disk size along with block allocation
|
||||
* we don't use 'extend_disksize' as size may change
|
||||
* within already allocated block -bzzz
|
||||
*/
|
||||
disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
|
||||
if (disksize > i_size_read(inode))
|
||||
disksize = i_size_read(inode);
|
||||
if (disksize > EXT4_I(inode)->i_disksize) {
|
||||
ext4_update_i_disksize(inode, disksize);
|
||||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
return ret;
|
||||
}
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
|
||||
{
|
||||
|
@ -2569,8 +2578,38 @@ static int ext4_da_writepages(struct address_space *mapping,
|
|||
dump_stack();
|
||||
goto out_writepages;
|
||||
}
|
||||
mpd.get_block = ext4_da_get_block_write;
|
||||
ret = mpage_da_writepages(mapping, wbc, &mpd);
|
||||
|
||||
/*
|
||||
* Now call __mpage_da_writepage to find the next
|
||||
* contiguous region of logical blocks that need
|
||||
* blocks to be allocated by ext4. We don't actually
|
||||
* submit the blocks for I/O here, even though
|
||||
* write_cache_pages thinks it will, and will set the
|
||||
* pages as clean for write before calling
|
||||
* __mpage_da_writepage().
|
||||
*/
|
||||
mpd.b_size = 0;
|
||||
mpd.b_state = 0;
|
||||
mpd.b_blocknr = 0;
|
||||
mpd.first_page = 0;
|
||||
mpd.next_page = 0;
|
||||
mpd.io_done = 0;
|
||||
mpd.pages_written = 0;
|
||||
mpd.retval = 0;
|
||||
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
|
||||
&mpd);
|
||||
/*
|
||||
* If we have a contigous extent of pages and we
|
||||
* haven't done the I/O yet, map the blocks and submit
|
||||
* them for I/O.
|
||||
*/
|
||||
if (!mpd.io_done && mpd.next_page != mpd.first_page) {
|
||||
if (mpage_da_map_blocks(&mpd) == 0)
|
||||
mpage_da_submit_io(&mpd);
|
||||
mpd.io_done = 1;
|
||||
ret = MPAGE_DA_EXTENT_TAIL;
|
||||
}
|
||||
wbc->nr_to_write -= mpd.pages_written;
|
||||
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
|
@ -2846,6 +2885,48 @@ static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Force all delayed allocation blocks to be allocated for a given inode.
|
||||
*/
|
||||
int ext4_alloc_da_blocks(struct inode *inode)
|
||||
{
|
||||
if (!EXT4_I(inode)->i_reserved_data_blocks &&
|
||||
!EXT4_I(inode)->i_reserved_meta_blocks)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* We do something simple for now. The filemap_flush() will
|
||||
* also start triggering a write of the data blocks, which is
|
||||
* not strictly speaking necessary (and for users of
|
||||
* laptop_mode, not even desirable). However, to do otherwise
|
||||
* would require replicating code paths in:
|
||||
*
|
||||
* ext4_da_writepages() ->
|
||||
* write_cache_pages() ---> (via passed in callback function)
|
||||
* __mpage_da_writepage() -->
|
||||
* mpage_add_bh_to_extent()
|
||||
* mpage_da_map_blocks()
|
||||
*
|
||||
* The problem is that write_cache_pages(), located in
|
||||
* mm/page-writeback.c, marks pages clean in preparation for
|
||||
* doing I/O, which is not desirable if we're not planning on
|
||||
* doing I/O at all.
|
||||
*
|
||||
* We could call write_cache_pages(), and then redirty all of
|
||||
* the pages by calling redirty_page_for_writeback() but that
|
||||
* would be ugly in the extreme. So instead we would need to
|
||||
* replicate parts of the code in the above functions,
|
||||
* simplifying them becuase we wouldn't actually intend to
|
||||
* write out the pages, but rather only collect contiguous
|
||||
* logical block extents, call the multi-block allocator, and
|
||||
* then update the buffer heads with the block allocations.
|
||||
*
|
||||
* For now, though, we'll cheat by calling filemap_flush(),
|
||||
* which will map the blocks, and start the I/O, but not
|
||||
* actually wait for the I/O to complete.
|
||||
*/
|
||||
return filemap_flush(inode->i_mapping);
|
||||
}
|
||||
|
||||
/*
|
||||
* bmap() is special. It gets used by applications such as lilo and by
|
||||
|
@ -3868,6 +3949,9 @@ void ext4_truncate(struct inode *inode)
|
|||
if (!ext4_can_truncate(inode))
|
||||
return;
|
||||
|
||||
if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
|
||||
ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
|
||||
|
||||
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
|
||||
ext4_ext_truncate(inode);
|
||||
return;
|
||||
|
@ -4110,12 +4194,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
|
|||
unsigned num;
|
||||
|
||||
table = ext4_inode_table(sb, gdp);
|
||||
/* Make sure s_inode_readahead_blks is a power of 2 */
|
||||
while (EXT4_SB(sb)->s_inode_readahead_blks &
|
||||
(EXT4_SB(sb)->s_inode_readahead_blks-1))
|
||||
EXT4_SB(sb)->s_inode_readahead_blks =
|
||||
(EXT4_SB(sb)->s_inode_readahead_blks &
|
||||
(EXT4_SB(sb)->s_inode_readahead_blks-1));
|
||||
/* s_inode_readahead_blks is always a power of 2 */
|
||||
b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1);
|
||||
if (table > b)
|
||||
b = table;
|
||||
|
@ -4287,6 +4366,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
|||
ei->i_disksize = inode->i_size;
|
||||
inode->i_generation = le32_to_cpu(raw_inode->i_generation);
|
||||
ei->i_block_group = iloc.block_group;
|
||||
ei->i_last_alloc_group = ~0;
|
||||
/*
|
||||
* NOTE! The in-memory inode i_data array is in little-endian order
|
||||
* even on big-endian machines: we do NOT byteswap the block numbers!
|
||||
|
@ -4329,6 +4409,20 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
|||
(__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
|
||||
}
|
||||
|
||||
if (ei->i_flags & EXT4_EXTENTS_FL) {
|
||||
/* Validate extent which is part of inode */
|
||||
ret = ext4_ext_check_inode(inode);
|
||||
} else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
|
||||
(S_ISLNK(inode->i_mode) &&
|
||||
!ext4_inode_is_fast_symlink(inode))) {
|
||||
/* Validate block references which are part of inode */
|
||||
ret = ext4_check_inode_blockref(inode);
|
||||
}
|
||||
if (ret) {
|
||||
brelse(bh);
|
||||
goto bad_inode;
|
||||
}
|
||||
|
||||
if (S_ISREG(inode->i_mode)) {
|
||||
inode->i_op = &ext4_file_inode_operations;
|
||||
inode->i_fop = &ext4_file_operations;
|
||||
|
@ -4345,7 +4439,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
|||
inode->i_op = &ext4_symlink_inode_operations;
|
||||
ext4_set_aops(inode);
|
||||
}
|
||||
} else {
|
||||
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
|
||||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
|
||||
inode->i_op = &ext4_special_inode_operations;
|
||||
if (raw_inode->i_block[0])
|
||||
init_special_inode(inode, inode->i_mode,
|
||||
|
@ -4353,6 +4448,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
|||
else
|
||||
init_special_inode(inode, inode->i_mode,
|
||||
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
|
||||
} else {
|
||||
brelse(bh);
|
||||
ret = -EIO;
|
||||
ext4_error(inode->i_sb, __func__,
|
||||
"bogus i_mode (%o) for inode=%lu",
|
||||
inode->i_mode, inode->i_ino);
|
||||
goto bad_inode;
|
||||
}
|
||||
brelse(iloc.bh);
|
||||
ext4_set_inode_flags(inode);
|
||||
|
|
|
@ -48,8 +48,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|||
if (err)
|
||||
return err;
|
||||
|
||||
if (!S_ISDIR(inode->i_mode))
|
||||
flags &= ~EXT4_DIRSYNC_FL;
|
||||
flags = ext4_mask_flags(inode->i_mode, flags);
|
||||
|
||||
err = -EPERM;
|
||||
mutex_lock(&inode->i_mutex);
|
||||
|
@ -263,6 +262,20 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|||
return err;
|
||||
}
|
||||
|
||||
case EXT4_IOC_ALLOC_DA_BLKS:
|
||||
{
|
||||
int err;
|
||||
if (!is_owner_or_cap(inode))
|
||||
return -EACCES;
|
||||
|
||||
err = mnt_want_write(filp->f_path.mnt);
|
||||
if (err)
|
||||
return err;
|
||||
err = ext4_alloc_da_blocks(inode);
|
||||
mnt_drop_write(filp->f_path.mnt);
|
||||
return err;
|
||||
}
|
||||
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
|
|
|
@ -46,22 +46,23 @@
|
|||
* The allocation request involve request for multiple number of blocks
|
||||
* near to the goal(block) value specified.
|
||||
*
|
||||
* During initialization phase of the allocator we decide to use the group
|
||||
* preallocation or inode preallocation depending on the size file. The
|
||||
* size of the file could be the resulting file size we would have after
|
||||
* allocation or the current file size which ever is larger. If the size is
|
||||
* less that sbi->s_mb_stream_request we select the group
|
||||
* preallocation. The default value of s_mb_stream_request is 16
|
||||
* blocks. This can also be tuned via
|
||||
* /proc/fs/ext4/<partition>/stream_req. The value is represented in terms
|
||||
* of number of blocks.
|
||||
* During initialization phase of the allocator we decide to use the
|
||||
* group preallocation or inode preallocation depending on the size of
|
||||
* the file. The size of the file could be the resulting file size we
|
||||
* would have after allocation, or the current file size, which ever
|
||||
* is larger. If the size is less than sbi->s_mb_stream_request we
|
||||
* select to use the group preallocation. The default value of
|
||||
* s_mb_stream_request is 16 blocks. This can also be tuned via
|
||||
* /sys/fs/ext4/<partition>/mb_stream_req. The value is represented in
|
||||
* terms of number of blocks.
|
||||
*
|
||||
* The main motivation for having small file use group preallocation is to
|
||||
* ensure that we have small file closer in the disk.
|
||||
* ensure that we have small files closer together on the disk.
|
||||
*
|
||||
* First stage the allocator looks at the inode prealloc list
|
||||
* ext4_inode_info->i_prealloc_list contain list of prealloc spaces for
|
||||
* this particular inode. The inode prealloc space is represented as:
|
||||
* First stage the allocator looks at the inode prealloc list,
|
||||
* ext4_inode_info->i_prealloc_list, which contains list of prealloc
|
||||
* spaces for this particular inode. The inode prealloc space is
|
||||
* represented as:
|
||||
*
|
||||
* pa_lstart -> the logical start block for this prealloc space
|
||||
* pa_pstart -> the physical start block for this prealloc space
|
||||
|
@ -121,29 +122,29 @@
|
|||
* list. In case of inode preallocation we follow a list of heuristics
|
||||
* based on file size. This can be found in ext4_mb_normalize_request. If
|
||||
* we are doing a group prealloc we try to normalize the request to
|
||||
* sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is set to
|
||||
* sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
|
||||
* 512 blocks. This can be tuned via
|
||||
* /proc/fs/ext4/<partition/group_prealloc. The value is represented in
|
||||
* /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in
|
||||
* terms of number of blocks. If we have mounted the file system with -O
|
||||
* stripe=<value> option the group prealloc request is normalized to the
|
||||
* stripe value (sbi->s_stripe)
|
||||
*
|
||||
* The regular allocator(using the buddy cache) support few tunables.
|
||||
* The regular allocator(using the buddy cache) supports few tunables.
|
||||
*
|
||||
* /proc/fs/ext4/<partition>/min_to_scan
|
||||
* /proc/fs/ext4/<partition>/max_to_scan
|
||||
* /proc/fs/ext4/<partition>/order2_req
|
||||
* /sys/fs/ext4/<partition>/mb_min_to_scan
|
||||
* /sys/fs/ext4/<partition>/mb_max_to_scan
|
||||
* /sys/fs/ext4/<partition>/mb_order2_req
|
||||
*
|
||||
* The regular allocator use buddy scan only if the request len is power of
|
||||
* The regular allocator uses buddy scan only if the request len is power of
|
||||
* 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
|
||||
* value of s_mb_order2_reqs can be tuned via
|
||||
* /proc/fs/ext4/<partition>/order2_req. If the request len is equal to
|
||||
* /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
|
||||
* stripe size (sbi->s_stripe), we try to search for contigous block in
|
||||
* stripe size. This should result in better allocation on RAID setup. If
|
||||
* not we search in the specific group using bitmap for best extents. The
|
||||
* tunable min_to_scan and max_to_scan controll the behaviour here.
|
||||
* stripe size. This should result in better allocation on RAID setups. If
|
||||
* not, we search in the specific group using bitmap for best extents. The
|
||||
* tunable min_to_scan and max_to_scan control the behaviour here.
|
||||
* min_to_scan indicate how long the mballoc __must__ look for a best
|
||||
* extent and max_to_scanindicate how long the mballoc __can__ look for a
|
||||
* extent and max_to_scan indicates how long the mballoc __can__ look for a
|
||||
* best extent in the found extents. Searching for the blocks starts with
|
||||
* the group specified as the goal value in allocation context via
|
||||
* ac_g_ex. Each group is first checked based on the criteria whether it
|
||||
|
@ -337,8 +338,6 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
|
|||
ext4_group_t group);
|
||||
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
|
||||
ext4_group_t group);
|
||||
static int ext4_mb_init_per_dev_proc(struct super_block *sb);
|
||||
static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
|
||||
static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
|
||||
|
||||
|
||||
|
@ -1726,6 +1725,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
|
|||
{
|
||||
unsigned free, fragments;
|
||||
unsigned i, bits;
|
||||
int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
|
||||
struct ext4_group_desc *desc;
|
||||
struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
|
||||
|
||||
|
@ -1747,6 +1747,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
|
|||
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
|
||||
return 0;
|
||||
|
||||
/* Avoid using the first bg of a flexgroup for data files */
|
||||
if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
|
||||
(flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
|
||||
((group % flex_size) == 0))
|
||||
return 0;
|
||||
|
||||
bits = ac->ac_sb->s_blocksize_bits + 1;
|
||||
for (i = ac->ac_2order; i <= bits; i++)
|
||||
if (grp->bb_counters[i] > 0)
|
||||
|
@ -1971,7 +1977,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
|
|||
/*
|
||||
* We search using buddy data only if the order of the request
|
||||
* is greater than equal to the sbi_s_mb_order2_reqs
|
||||
* You can tune it via /proc/fs/ext4/<partition>/order2_req
|
||||
* You can tune it via /sys/fs/ext4/<partition>/mb_order2_req
|
||||
*/
|
||||
if (i >= sbi->s_mb_order2_reqs) {
|
||||
/*
|
||||
|
@ -2693,7 +2699,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
|
|||
i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int);
|
||||
sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
|
||||
if (sbi->s_mb_maxs == NULL) {
|
||||
kfree(sbi->s_mb_maxs);
|
||||
kfree(sbi->s_mb_offsets);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
|
@ -2746,7 +2752,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
|
|||
spin_lock_init(&lg->lg_prealloc_lock);
|
||||
}
|
||||
|
||||
ext4_mb_init_per_dev_proc(sb);
|
||||
ext4_mb_history_init(sb);
|
||||
|
||||
if (sbi->s_journal)
|
||||
|
@ -2829,7 +2834,6 @@ int ext4_mb_release(struct super_block *sb)
|
|||
|
||||
free_percpu(sbi->s_locality_groups);
|
||||
ext4_mb_history_release(sb);
|
||||
ext4_mb_destroy_per_dev_proc(sb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -2890,62 +2894,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
|
|||
mb_debug("freed %u blocks in %u structures\n", count, count2);
|
||||
}
|
||||
|
||||
#define EXT4_MB_STATS_NAME "stats"
|
||||
#define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan"
|
||||
#define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan"
|
||||
#define EXT4_MB_ORDER2_REQ "order2_req"
|
||||
#define EXT4_MB_STREAM_REQ "stream_req"
|
||||
#define EXT4_MB_GROUP_PREALLOC "group_prealloc"
|
||||
|
||||
static int ext4_mb_init_per_dev_proc(struct super_block *sb)
|
||||
{
|
||||
#ifdef CONFIG_PROC_FS
|
||||
mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct proc_dir_entry *proc;
|
||||
|
||||
if (sbi->s_proc == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats);
|
||||
EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan);
|
||||
EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan);
|
||||
EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs);
|
||||
EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request);
|
||||
EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc);
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
|
||||
return -ENOMEM;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
|
||||
{
|
||||
#ifdef CONFIG_PROC_FS
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (sbi->s_proc == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __init init_ext4_mballoc(void)
|
||||
{
|
||||
ext4_pspace_cachep =
|
||||
|
@ -3096,9 +3044,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
|
|||
if (sbi->s_log_groups_per_flex) {
|
||||
ext4_group_t flex_group = ext4_flex_group(sbi,
|
||||
ac->ac_b_ex.fe_group);
|
||||
spin_lock(sb_bgl_lock(sbi, flex_group));
|
||||
sbi->s_flex_groups[flex_group].free_blocks -= ac->ac_b_ex.fe_len;
|
||||
spin_unlock(sb_bgl_lock(sbi, flex_group));
|
||||
atomic_sub(ac->ac_b_ex.fe_len,
|
||||
&sbi->s_flex_groups[flex_group].free_blocks);
|
||||
}
|
||||
|
||||
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
|
||||
|
@ -3116,7 +3063,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
|
|||
* here we normalize request for locality group
|
||||
* Group request are normalized to s_strip size if we set the same via mount
|
||||
* option. If not we set it to s_mb_group_prealloc which can be configured via
|
||||
* /proc/fs/ext4/<partition>/group_prealloc
|
||||
* /sys/fs/ext4/<partition>/mb_group_prealloc
|
||||
*
|
||||
* XXX: should we try to preallocate more than the group has now?
|
||||
*/
|
||||
|
@ -3608,8 +3555,11 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
|
|||
spin_unlock(&pa->pa_lock);
|
||||
|
||||
grp_blk = pa->pa_pstart;
|
||||
/* If linear, pa_pstart may be in the next group when pa is used up */
|
||||
if (pa->pa_linear)
|
||||
/*
|
||||
* If doing group-based preallocation, pa_pstart may be in the
|
||||
* next group when pa is used up
|
||||
*/
|
||||
if (pa->pa_type == MB_GROUP_PA)
|
||||
grp_blk--;
|
||||
|
||||
ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
|
||||
|
@ -3704,7 +3654,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
|
|||
INIT_LIST_HEAD(&pa->pa_inode_list);
|
||||
INIT_LIST_HEAD(&pa->pa_group_list);
|
||||
pa->pa_deleted = 0;
|
||||
pa->pa_linear = 0;
|
||||
pa->pa_type = MB_INODE_PA;
|
||||
|
||||
mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
|
||||
pa->pa_pstart, pa->pa_len, pa->pa_lstart);
|
||||
|
@ -3767,7 +3717,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
|
|||
INIT_LIST_HEAD(&pa->pa_inode_list);
|
||||
INIT_LIST_HEAD(&pa->pa_group_list);
|
||||
pa->pa_deleted = 0;
|
||||
pa->pa_linear = 1;
|
||||
pa->pa_type = MB_GROUP_PA;
|
||||
|
||||
mb_debug("new group pa %p: %llu/%u for %u\n", pa,
|
||||
pa->pa_pstart, pa->pa_len, pa->pa_lstart);
|
||||
|
@ -4021,7 +3971,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
|
|||
list_del_rcu(&pa->pa_inode_list);
|
||||
spin_unlock(pa->pa_obj_lock);
|
||||
|
||||
if (pa->pa_linear)
|
||||
if (pa->pa_type == MB_GROUP_PA)
|
||||
ext4_mb_release_group_pa(&e4b, pa, ac);
|
||||
else
|
||||
ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
|
||||
|
@ -4121,7 +4071,7 @@ void ext4_discard_preallocations(struct inode *inode)
|
|||
spin_unlock(&ei->i_prealloc_lock);
|
||||
|
||||
list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
|
||||
BUG_ON(pa->pa_linear != 0);
|
||||
BUG_ON(pa->pa_type != MB_INODE_PA);
|
||||
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
|
||||
|
||||
err = ext4_mb_load_buddy(sb, group, &e4b);
|
||||
|
@ -4232,7 +4182,7 @@ static inline void ext4_mb_show_ac(struct ext4_allocation_context *ac)
|
|||
* file is determined by the current size or the resulting size after
|
||||
* allocation which ever is larger
|
||||
*
|
||||
* One can tune this size via /proc/fs/ext4/<partition>/stream_req
|
||||
* One can tune this size via /sys/fs/ext4/<partition>/mb_stream_req
|
||||
*/
|
||||
static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
|
||||
{
|
||||
|
@ -4373,7 +4323,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
|
|||
continue;
|
||||
}
|
||||
/* only lg prealloc space */
|
||||
BUG_ON(!pa->pa_linear);
|
||||
BUG_ON(pa->pa_type != MB_GROUP_PA);
|
||||
|
||||
/* seems this one can be freed ... */
|
||||
pa->pa_deleted = 1;
|
||||
|
@ -4442,7 +4392,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
|
|||
pa_inode_list) {
|
||||
spin_lock(&tmp_pa->pa_lock);
|
||||
if (tmp_pa->pa_deleted) {
|
||||
spin_unlock(&pa->pa_lock);
|
||||
spin_unlock(&tmp_pa->pa_lock);
|
||||
continue;
|
||||
}
|
||||
if (!added && pa->pa_free < tmp_pa->pa_free) {
|
||||
|
@ -4479,7 +4429,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
|
|||
{
|
||||
struct ext4_prealloc_space *pa = ac->ac_pa;
|
||||
if (pa) {
|
||||
if (pa->pa_linear) {
|
||||
if (pa->pa_type == MB_GROUP_PA) {
|
||||
/* see comment in ext4_mb_use_group_pa() */
|
||||
spin_lock(&pa->pa_lock);
|
||||
pa->pa_pstart += ac->ac_b_ex.fe_len;
|
||||
|
@ -4499,7 +4449,7 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
|
|||
* doesn't grow big. We need to release
|
||||
* alloc_semp before calling ext4_mb_add_n_trim()
|
||||
*/
|
||||
if (pa->pa_linear && likely(pa->pa_free)) {
|
||||
if ((pa->pa_type == MB_GROUP_PA) && likely(pa->pa_free)) {
|
||||
spin_lock(pa->pa_obj_lock);
|
||||
list_del_rcu(&pa->pa_inode_list);
|
||||
spin_unlock(pa->pa_obj_lock);
|
||||
|
@ -4936,9 +4886,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
|
|||
|
||||
if (sbi->s_log_groups_per_flex) {
|
||||
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
|
||||
spin_lock(sb_bgl_lock(sbi, flex_group));
|
||||
sbi->s_flex_groups[flex_group].free_blocks += count;
|
||||
spin_unlock(sb_bgl_lock(sbi, flex_group));
|
||||
atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
|
||||
}
|
||||
|
||||
ext4_mb_release_desc(&e4b);
|
||||
|
|
|
@ -132,12 +132,15 @@ struct ext4_prealloc_space {
|
|||
ext4_lblk_t pa_lstart; /* log. block */
|
||||
unsigned short pa_len; /* len of preallocated chunk */
|
||||
unsigned short pa_free; /* how many blocks are free */
|
||||
unsigned short pa_linear; /* consumed in one direction
|
||||
* strictly, for grp prealloc */
|
||||
unsigned short pa_type; /* pa type. inode or group */
|
||||
spinlock_t *pa_obj_lock;
|
||||
struct inode *pa_inode; /* hack, for history only */
|
||||
};
|
||||
|
||||
enum {
|
||||
MB_INODE_PA = 0,
|
||||
MB_GROUP_PA = 1
|
||||
};
|
||||
|
||||
struct ext4_free_extent {
|
||||
ext4_lblk_t fe_logical;
|
||||
|
@ -247,7 +250,6 @@ static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
|
|||
|
||||
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
|
||||
|
||||
struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
|
||||
static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
|
||||
struct ext4_free_extent *fex)
|
||||
{
|
||||
|
|
164
fs/ext4/namei.c
164
fs/ext4/namei.c
|
@ -161,12 +161,12 @@ static struct dx_frame *dx_probe(const struct qstr *d_name,
|
|||
struct dx_frame *frame,
|
||||
int *err);
|
||||
static void dx_release(struct dx_frame *frames);
|
||||
static int dx_make_map(struct ext4_dir_entry_2 *de, int size,
|
||||
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
|
||||
struct dx_hash_info *hinfo, struct dx_map_entry map[]);
|
||||
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
|
||||
static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
|
||||
struct dx_map_entry *offsets, int count);
|
||||
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size);
|
||||
struct dx_map_entry *offsets, int count, unsigned blocksize);
|
||||
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
|
||||
static void dx_insert_block(struct dx_frame *frame,
|
||||
u32 hash, ext4_lblk_t block);
|
||||
static int ext4_htree_next_block(struct inode *dir, __u32 hash,
|
||||
|
@ -180,14 +180,38 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
|
|||
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
|
||||
struct inode *inode);
|
||||
|
||||
unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
|
||||
{
|
||||
unsigned len = le16_to_cpu(dlen);
|
||||
|
||||
if (len == EXT4_MAX_REC_LEN || len == 0)
|
||||
return blocksize;
|
||||
return (len & 65532) | ((len & 3) << 16);
|
||||
}
|
||||
|
||||
__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
|
||||
{
|
||||
if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
|
||||
BUG();
|
||||
if (len < 65536)
|
||||
return cpu_to_le16(len);
|
||||
if (len == blocksize) {
|
||||
if (blocksize == 65536)
|
||||
return cpu_to_le16(EXT4_MAX_REC_LEN);
|
||||
else
|
||||
return cpu_to_le16(0);
|
||||
}
|
||||
return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
|
||||
}
|
||||
|
||||
/*
|
||||
* p is at least 6 bytes before the end of page
|
||||
*/
|
||||
static inline struct ext4_dir_entry_2 *
|
||||
ext4_next_entry(struct ext4_dir_entry_2 *p)
|
||||
ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
|
||||
{
|
||||
return (struct ext4_dir_entry_2 *)((char *)p +
|
||||
ext4_rec_len_from_disk(p->rec_len));
|
||||
ext4_rec_len_from_disk(p->rec_len, blocksize));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -294,7 +318,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
|
|||
space += EXT4_DIR_REC_LEN(de->name_len);
|
||||
names++;
|
||||
}
|
||||
de = ext4_next_entry(de);
|
||||
de = ext4_next_entry(de, size);
|
||||
}
|
||||
printk("(%i)\n", names);
|
||||
return (struct stats) { names, space, 1 };
|
||||
|
@ -585,7 +609,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
|
|||
top = (struct ext4_dir_entry_2 *) ((char *) de +
|
||||
dir->i_sb->s_blocksize -
|
||||
EXT4_DIR_REC_LEN(0));
|
||||
for (; de < top; de = ext4_next_entry(de)) {
|
||||
for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
|
||||
if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
|
||||
(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
|
||||
+((char *)de - bh->b_data))) {
|
||||
|
@ -663,7 +687,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
|
|||
}
|
||||
if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
|
||||
de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
|
||||
de = ext4_next_entry(de);
|
||||
de = ext4_next_entry(de, dir->i_sb->s_blocksize);
|
||||
if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
|
||||
goto errout;
|
||||
count++;
|
||||
|
@ -713,15 +737,15 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
|
|||
* Create map of hash values, offsets, and sizes, stored at end of block.
|
||||
* Returns number of entries mapped.
|
||||
*/
|
||||
static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
|
||||
struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
|
||||
static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
|
||||
struct dx_hash_info *hinfo,
|
||||
struct dx_map_entry *map_tail)
|
||||
{
|
||||
int count = 0;
|
||||
char *base = (char *) de;
|
||||
struct dx_hash_info h = *hinfo;
|
||||
|
||||
while ((char *) de < base + size)
|
||||
{
|
||||
while ((char *) de < base + blocksize) {
|
||||
if (de->name_len && de->inode) {
|
||||
ext4fs_dirhash(de->name, de->name_len, &h);
|
||||
map_tail--;
|
||||
|
@ -732,7 +756,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
|
|||
cond_resched();
|
||||
}
|
||||
/* XXX: do we need to check rec_len == 0 case? -Chris */
|
||||
de = ext4_next_entry(de);
|
||||
de = ext4_next_entry(de, blocksize);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
@ -832,7 +856,8 @@ static inline int search_dirblock(struct buffer_head *bh,
|
|||
return 1;
|
||||
}
|
||||
/* prevent looping on a bad block */
|
||||
de_len = ext4_rec_len_from_disk(de->rec_len);
|
||||
de_len = ext4_rec_len_from_disk(de->rec_len,
|
||||
dir->i_sb->s_blocksize);
|
||||
if (de_len <= 0)
|
||||
return -1;
|
||||
offset += de_len;
|
||||
|
@ -996,7 +1021,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
|
|||
de = (struct ext4_dir_entry_2 *) bh->b_data;
|
||||
top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
|
||||
EXT4_DIR_REC_LEN(0));
|
||||
for (; de < top; de = ext4_next_entry(de)) {
|
||||
for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
|
||||
int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
|
||||
+ ((char *) de - bh->b_data);
|
||||
|
||||
|
@ -1052,8 +1077,16 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
|
|||
return ERR_PTR(-EIO);
|
||||
}
|
||||
inode = ext4_iget(dir->i_sb, ino);
|
||||
if (IS_ERR(inode))
|
||||
return ERR_CAST(inode);
|
||||
if (unlikely(IS_ERR(inode))) {
|
||||
if (PTR_ERR(inode) == -ESTALE) {
|
||||
ext4_error(dir->i_sb, __func__,
|
||||
"deleted inode referenced: %u",
|
||||
ino);
|
||||
return ERR_PTR(-EIO);
|
||||
} else {
|
||||
return ERR_CAST(inode);
|
||||
}
|
||||
}
|
||||
}
|
||||
return d_splice_alias(inode, dentry);
|
||||
}
|
||||
|
@ -1109,7 +1142,8 @@ static inline void ext4_set_de_type(struct super_block *sb,
|
|||
* Returns pointer to last entry moved.
|
||||
*/
|
||||
static struct ext4_dir_entry_2 *
|
||||
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
|
||||
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
|
||||
unsigned blocksize)
|
||||
{
|
||||
unsigned rec_len = 0;
|
||||
|
||||
|
@ -1118,7 +1152,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
|
|||
rec_len = EXT4_DIR_REC_LEN(de->name_len);
|
||||
memcpy (to, de, rec_len);
|
||||
((struct ext4_dir_entry_2 *) to)->rec_len =
|
||||
ext4_rec_len_to_disk(rec_len);
|
||||
ext4_rec_len_to_disk(rec_len, blocksize);
|
||||
de->inode = 0;
|
||||
map++;
|
||||
to += rec_len;
|
||||
|
@ -1130,19 +1164,19 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
|
|||
* Compact each dir entry in the range to the minimal rec_len.
|
||||
* Returns pointer to last entry in range.
|
||||
*/
|
||||
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
|
||||
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
|
||||
{
|
||||
struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
|
||||
unsigned rec_len = 0;
|
||||
|
||||
prev = to = de;
|
||||
while ((char*)de < base + size) {
|
||||
next = ext4_next_entry(de);
|
||||
while ((char*)de < base + blocksize) {
|
||||
next = ext4_next_entry(de, blocksize);
|
||||
if (de->inode && de->name_len) {
|
||||
rec_len = EXT4_DIR_REC_LEN(de->name_len);
|
||||
if (de > to)
|
||||
memmove(to, de, rec_len);
|
||||
to->rec_len = ext4_rec_len_to_disk(rec_len);
|
||||
to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
|
||||
prev = to;
|
||||
to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
|
||||
}
|
||||
|
@ -1215,10 +1249,12 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
|
|||
hash2, split, count-split));
|
||||
|
||||
/* Fancy dance to stay within two buffers */
|
||||
de2 = dx_move_dirents(data1, data2, map + split, count - split);
|
||||
de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize);
|
||||
de = dx_pack_dirents(data1, blocksize);
|
||||
de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
|
||||
de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
|
||||
de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
|
||||
blocksize);
|
||||
de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2,
|
||||
blocksize);
|
||||
dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
|
||||
dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
|
||||
|
||||
|
@ -1268,6 +1304,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
|
|||
const char *name = dentry->d_name.name;
|
||||
int namelen = dentry->d_name.len;
|
||||
unsigned int offset = 0;
|
||||
unsigned int blocksize = dir->i_sb->s_blocksize;
|
||||
unsigned short reclen;
|
||||
int nlen, rlen, err;
|
||||
char *top;
|
||||
|
@ -1275,7 +1312,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
|
|||
reclen = EXT4_DIR_REC_LEN(namelen);
|
||||
if (!de) {
|
||||
de = (struct ext4_dir_entry_2 *)bh->b_data;
|
||||
top = bh->b_data + dir->i_sb->s_blocksize - reclen;
|
||||
top = bh->b_data + blocksize - reclen;
|
||||
while ((char *) de <= top) {
|
||||
if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
|
||||
bh, offset)) {
|
||||
|
@ -1287,7 +1324,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
|
|||
return -EEXIST;
|
||||
}
|
||||
nlen = EXT4_DIR_REC_LEN(de->name_len);
|
||||
rlen = ext4_rec_len_from_disk(de->rec_len);
|
||||
rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
|
||||
if ((de->inode? rlen - nlen: rlen) >= reclen)
|
||||
break;
|
||||
de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
|
||||
|
@ -1306,11 +1343,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
|
|||
|
||||
/* By now the buffer is marked for journaling */
|
||||
nlen = EXT4_DIR_REC_LEN(de->name_len);
|
||||
rlen = ext4_rec_len_from_disk(de->rec_len);
|
||||
rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
|
||||
if (de->inode) {
|
||||
struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
|
||||
de1->rec_len = ext4_rec_len_to_disk(rlen - nlen);
|
||||
de->rec_len = ext4_rec_len_to_disk(nlen);
|
||||
de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, blocksize);
|
||||
de->rec_len = ext4_rec_len_to_disk(nlen, blocksize);
|
||||
de = de1;
|
||||
}
|
||||
de->file_type = EXT4_FT_UNKNOWN;
|
||||
|
@ -1380,7 +1417,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
|
|||
/* The 0th block becomes the root, move the dirents out */
|
||||
fde = &root->dotdot;
|
||||
de = (struct ext4_dir_entry_2 *)((char *)fde +
|
||||
ext4_rec_len_from_disk(fde->rec_len));
|
||||
ext4_rec_len_from_disk(fde->rec_len, blocksize));
|
||||
if ((char *) de >= (((char *) root) + blocksize)) {
|
||||
ext4_error(dir->i_sb, __func__,
|
||||
"invalid rec_len for '..' in inode %lu",
|
||||
|
@ -1402,12 +1439,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
|
|||
memcpy (data1, de, len);
|
||||
de = (struct ext4_dir_entry_2 *) data1;
|
||||
top = data1 + len;
|
||||
while ((char *)(de2 = ext4_next_entry(de)) < top)
|
||||
while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
|
||||
de = de2;
|
||||
de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
|
||||
de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de,
|
||||
blocksize);
|
||||
/* Initialize the root; the dot dirents already exist */
|
||||
de = (struct ext4_dir_entry_2 *) (&root->dotdot);
|
||||
de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2));
|
||||
de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
|
||||
blocksize);
|
||||
memset (&root->info, 0, sizeof(root->info));
|
||||
root->info.info_length = sizeof(root->info);
|
||||
root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
|
||||
|
@ -1488,7 +1527,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
|
|||
return retval;
|
||||
de = (struct ext4_dir_entry_2 *) bh->b_data;
|
||||
de->inode = 0;
|
||||
de->rec_len = ext4_rec_len_to_disk(blocksize);
|
||||
de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
|
||||
return add_dirent_to_buf(handle, dentry, inode, de, bh);
|
||||
}
|
||||
|
||||
|
@ -1551,7 +1590,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
|
|||
goto cleanup;
|
||||
node2 = (struct dx_node *)(bh2->b_data);
|
||||
entries2 = node2->entries;
|
||||
node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize);
|
||||
node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
|
||||
sb->s_blocksize);
|
||||
node2->fake.inode = 0;
|
||||
BUFFER_TRACE(frame->bh, "get_write_access");
|
||||
err = ext4_journal_get_write_access(handle, frame->bh);
|
||||
|
@ -1639,6 +1679,7 @@ static int ext4_delete_entry(handle_t *handle,
|
|||
struct buffer_head *bh)
|
||||
{
|
||||
struct ext4_dir_entry_2 *de, *pde;
|
||||
unsigned int blocksize = dir->i_sb->s_blocksize;
|
||||
int i;
|
||||
|
||||
i = 0;
|
||||
|
@ -1652,8 +1693,11 @@ static int ext4_delete_entry(handle_t *handle,
|
|||
ext4_journal_get_write_access(handle, bh);
|
||||
if (pde)
|
||||
pde->rec_len = ext4_rec_len_to_disk(
|
||||
ext4_rec_len_from_disk(pde->rec_len) +
|
||||
ext4_rec_len_from_disk(de->rec_len));
|
||||
ext4_rec_len_from_disk(pde->rec_len,
|
||||
blocksize) +
|
||||
ext4_rec_len_from_disk(de->rec_len,
|
||||
blocksize),
|
||||
blocksize);
|
||||
else
|
||||
de->inode = 0;
|
||||
dir->i_version++;
|
||||
|
@ -1661,9 +1705,9 @@ static int ext4_delete_entry(handle_t *handle,
|
|||
ext4_handle_dirty_metadata(handle, dir, bh);
|
||||
return 0;
|
||||
}
|
||||
i += ext4_rec_len_from_disk(de->rec_len);
|
||||
i += ext4_rec_len_from_disk(de->rec_len, blocksize);
|
||||
pde = de;
|
||||
de = ext4_next_entry(de);
|
||||
de = ext4_next_entry(de, blocksize);
|
||||
}
|
||||
return -ENOENT;
|
||||
}
|
||||
|
@ -1793,6 +1837,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
|||
struct inode *inode;
|
||||
struct buffer_head *dir_block;
|
||||
struct ext4_dir_entry_2 *de;
|
||||
unsigned int blocksize = dir->i_sb->s_blocksize;
|
||||
int err, retries = 0;
|
||||
|
||||
if (EXT4_DIR_LINK_MAX(dir))
|
||||
|
@ -1824,13 +1869,14 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
|
|||
de = (struct ext4_dir_entry_2 *) dir_block->b_data;
|
||||
de->inode = cpu_to_le32(inode->i_ino);
|
||||
de->name_len = 1;
|
||||
de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
|
||||
de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
|
||||
blocksize);
|
||||
strcpy(de->name, ".");
|
||||
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
|
||||
de = ext4_next_entry(de);
|
||||
de = ext4_next_entry(de, blocksize);
|
||||
de->inode = cpu_to_le32(dir->i_ino);
|
||||
de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
|
||||
EXT4_DIR_REC_LEN(1));
|
||||
de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(1),
|
||||
blocksize);
|
||||
de->name_len = 2;
|
||||
strcpy(de->name, "..");
|
||||
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
|
||||
|
@ -1885,7 +1931,7 @@ static int empty_dir(struct inode *inode)
|
|||
return 1;
|
||||
}
|
||||
de = (struct ext4_dir_entry_2 *) bh->b_data;
|
||||
de1 = ext4_next_entry(de);
|
||||
de1 = ext4_next_entry(de, sb->s_blocksize);
|
||||
if (le32_to_cpu(de->inode) != inode->i_ino ||
|
||||
!le32_to_cpu(de1->inode) ||
|
||||
strcmp(".", de->name) ||
|
||||
|
@ -1896,9 +1942,9 @@ static int empty_dir(struct inode *inode)
|
|||
brelse(bh);
|
||||
return 1;
|
||||
}
|
||||
offset = ext4_rec_len_from_disk(de->rec_len) +
|
||||
ext4_rec_len_from_disk(de1->rec_len);
|
||||
de = ext4_next_entry(de1);
|
||||
offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
|
||||
ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
|
||||
de = ext4_next_entry(de1, sb->s_blocksize);
|
||||
while (offset < inode->i_size) {
|
||||
if (!bh ||
|
||||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
|
||||
|
@ -1927,8 +1973,8 @@ static int empty_dir(struct inode *inode)
|
|||
brelse(bh);
|
||||
return 0;
|
||||
}
|
||||
offset += ext4_rec_len_from_disk(de->rec_len);
|
||||
de = ext4_next_entry(de);
|
||||
offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
|
||||
de = ext4_next_entry(de, sb->s_blocksize);
|
||||
}
|
||||
brelse(bh);
|
||||
return 1;
|
||||
|
@ -2297,8 +2343,8 @@ static int ext4_link(struct dentry *old_dentry,
|
|||
return err;
|
||||
}
|
||||
|
||||
#define PARENT_INO(buffer) \
|
||||
(ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode)
|
||||
#define PARENT_INO(buffer, size) \
|
||||
(ext4_next_entry((struct ext4_dir_entry_2 *)(buffer), size)->inode)
|
||||
|
||||
/*
|
||||
* Anybody can rename anything with this: the permission checks are left to the
|
||||
|
@ -2311,7 +2357,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
|
|||
struct inode *old_inode, *new_inode;
|
||||
struct buffer_head *old_bh, *new_bh, *dir_bh;
|
||||
struct ext4_dir_entry_2 *old_de, *new_de;
|
||||
int retval;
|
||||
int retval, force_da_alloc = 0;
|
||||
|
||||
old_bh = new_bh = dir_bh = NULL;
|
||||
|
||||
|
@ -2358,7 +2404,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
|
|||
dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
|
||||
if (!dir_bh)
|
||||
goto end_rename;
|
||||
if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
|
||||
if (le32_to_cpu(PARENT_INO(dir_bh->b_data,
|
||||
old_dir->i_sb->s_blocksize)) != old_dir->i_ino)
|
||||
goto end_rename;
|
||||
retval = -EMLINK;
|
||||
if (!new_inode && new_dir != old_dir &&
|
||||
|
@ -2430,7 +2477,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
|
|||
if (dir_bh) {
|
||||
BUFFER_TRACE(dir_bh, "get_write_access");
|
||||
ext4_journal_get_write_access(handle, dir_bh);
|
||||
PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
|
||||
PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
|
||||
cpu_to_le32(new_dir->i_ino);
|
||||
BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
|
||||
ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
|
||||
ext4_dec_count(handle, old_dir);
|
||||
|
@ -2449,6 +2497,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
|
|||
ext4_mark_inode_dirty(handle, new_inode);
|
||||
if (!new_inode->i_nlink)
|
||||
ext4_orphan_add(handle, new_inode);
|
||||
if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
|
||||
force_da_alloc = 1;
|
||||
}
|
||||
retval = 0;
|
||||
|
||||
|
@ -2457,6 +2507,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
|
|||
brelse(old_bh);
|
||||
brelse(new_bh);
|
||||
ext4_journal_stop(handle);
|
||||
if (retval == 0 && force_da_alloc)
|
||||
ext4_alloc_da_blocks(old_inode);
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
|
|
@ -938,10 +938,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
|
|||
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
|
||||
ext4_group_t flex_group;
|
||||
flex_group = ext4_flex_group(sbi, input->group);
|
||||
sbi->s_flex_groups[flex_group].free_blocks +=
|
||||
input->free_blocks_count;
|
||||
sbi->s_flex_groups[flex_group].free_inodes +=
|
||||
EXT4_INODES_PER_GROUP(sb);
|
||||
atomic_add(input->free_blocks_count,
|
||||
&sbi->s_flex_groups[flex_group].free_blocks);
|
||||
atomic_add(EXT4_INODES_PER_GROUP(sb),
|
||||
&sbi->s_flex_groups[flex_group].free_inodes);
|
||||
}
|
||||
|
||||
ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
|
||||
|
|
327
fs/ext4/super.c
327
fs/ext4/super.c
|
@ -35,6 +35,7 @@
|
|||
#include <linux/quotaops.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/marker.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/crc16.h>
|
||||
|
@ -48,6 +49,7 @@
|
|||
#include "group.h"
|
||||
|
||||
struct proc_dir_entry *ext4_proc_root;
|
||||
static struct kset *ext4_kset;
|
||||
|
||||
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
|
||||
unsigned long journal_devnum);
|
||||
|
@ -577,9 +579,9 @@ static void ext4_put_super(struct super_block *sb)
|
|||
ext4_commit_super(sb, es, 1);
|
||||
}
|
||||
if (sbi->s_proc) {
|
||||
remove_proc_entry("inode_readahead_blks", sbi->s_proc);
|
||||
remove_proc_entry(sb->s_id, ext4_proc_root);
|
||||
}
|
||||
kobject_del(&sbi->s_kobj);
|
||||
|
||||
for (i = 0; i < sbi->s_gdb_count; i++)
|
||||
brelse(sbi->s_group_desc[i]);
|
||||
|
@ -615,6 +617,17 @@ static void ext4_put_super(struct super_block *sb)
|
|||
ext4_blkdev_remove(sbi);
|
||||
}
|
||||
sb->s_fs_info = NULL;
|
||||
/*
|
||||
* Now that we are completely done shutting down the
|
||||
* superblock, we need to actually destroy the kobject.
|
||||
*/
|
||||
unlock_kernel();
|
||||
unlock_super(sb);
|
||||
kobject_put(&sbi->s_kobj);
|
||||
wait_for_completion(&sbi->s_kobj_unregister);
|
||||
lock_super(sb);
|
||||
lock_kernel();
|
||||
kfree(sbi->s_blockgroup_lock);
|
||||
kfree(sbi);
|
||||
return;
|
||||
}
|
||||
|
@ -803,8 +816,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
|
|||
if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
|
||||
seq_puts(seq, ",noacl");
|
||||
#endif
|
||||
if (!test_opt(sb, RESERVATION))
|
||||
seq_puts(seq, ",noreservation");
|
||||
if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
|
||||
seq_printf(seq, ",commit=%u",
|
||||
(unsigned) (sbi->s_commit_interval / HZ));
|
||||
|
@ -855,6 +866,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
|
|||
if (test_opt(sb, DATA_ERR_ABORT))
|
||||
seq_puts(seq, ",data_err=abort");
|
||||
|
||||
if (test_opt(sb, NO_AUTO_DA_ALLOC))
|
||||
seq_puts(seq, ",noauto_da_alloc");
|
||||
|
||||
ext4_show_quota_options(seq, sb);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1004,7 +1018,7 @@ enum {
|
|||
Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
|
||||
Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
|
||||
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
|
||||
Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
|
||||
Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
|
||||
Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
|
||||
Opt_journal_update, Opt_journal_dev,
|
||||
Opt_journal_checksum, Opt_journal_async_commit,
|
||||
|
@ -1012,8 +1026,8 @@ enum {
|
|||
Opt_data_err_abort, Opt_data_err_ignore,
|
||||
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
|
||||
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
|
||||
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
|
||||
Opt_grpquota, Opt_i_version,
|
||||
Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
|
||||
Opt_usrquota, Opt_grpquota, Opt_i_version,
|
||||
Opt_stripe, Opt_delalloc, Opt_nodelalloc,
|
||||
Opt_inode_readahead_blks, Opt_journal_ioprio
|
||||
};
|
||||
|
@ -1039,8 +1053,6 @@ static const match_table_t tokens = {
|
|||
{Opt_nouser_xattr, "nouser_xattr"},
|
||||
{Opt_acl, "acl"},
|
||||
{Opt_noacl, "noacl"},
|
||||
{Opt_reservation, "reservation"},
|
||||
{Opt_noreservation, "noreservation"},
|
||||
{Opt_noload, "noload"},
|
||||
{Opt_nobh, "nobh"},
|
||||
{Opt_bh, "bh"},
|
||||
|
@ -1068,6 +1080,8 @@ static const match_table_t tokens = {
|
|||
{Opt_quota, "quota"},
|
||||
{Opt_usrquota, "usrquota"},
|
||||
{Opt_barrier, "barrier=%u"},
|
||||
{Opt_barrier, "barrier"},
|
||||
{Opt_nobarrier, "nobarrier"},
|
||||
{Opt_i_version, "i_version"},
|
||||
{Opt_stripe, "stripe=%u"},
|
||||
{Opt_resize, "resize"},
|
||||
|
@ -1075,6 +1089,9 @@ static const match_table_t tokens = {
|
|||
{Opt_nodelalloc, "nodelalloc"},
|
||||
{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
|
||||
{Opt_journal_ioprio, "journal_ioprio=%u"},
|
||||
{Opt_auto_da_alloc, "auto_da_alloc=%u"},
|
||||
{Opt_auto_da_alloc, "auto_da_alloc"},
|
||||
{Opt_noauto_da_alloc, "noauto_da_alloc"},
|
||||
{Opt_err, NULL},
|
||||
};
|
||||
|
||||
|
@ -1207,12 +1224,6 @@ static int parse_options(char *options, struct super_block *sb,
|
|||
"not supported\n");
|
||||
break;
|
||||
#endif
|
||||
case Opt_reservation:
|
||||
set_opt(sbi->s_mount_opt, RESERVATION);
|
||||
break;
|
||||
case Opt_noreservation:
|
||||
clear_opt(sbi->s_mount_opt, RESERVATION);
|
||||
break;
|
||||
case Opt_journal_update:
|
||||
/* @@@ FIXME */
|
||||
/* Eventually we will want to be able to create
|
||||
|
@ -1415,9 +1426,14 @@ static int parse_options(char *options, struct super_block *sb,
|
|||
case Opt_abort:
|
||||
set_opt(sbi->s_mount_opt, ABORT);
|
||||
break;
|
||||
case Opt_nobarrier:
|
||||
clear_opt(sbi->s_mount_opt, BARRIER);
|
||||
break;
|
||||
case Opt_barrier:
|
||||
if (match_int(&args[0], &option))
|
||||
return 0;
|
||||
if (match_int(&args[0], &option)) {
|
||||
set_opt(sbi->s_mount_opt, BARRIER);
|
||||
break;
|
||||
}
|
||||
if (option)
|
||||
set_opt(sbi->s_mount_opt, BARRIER);
|
||||
else
|
||||
|
@ -1463,6 +1479,11 @@ static int parse_options(char *options, struct super_block *sb,
|
|||
return 0;
|
||||
if (option < 0 || option > (1 << 30))
|
||||
return 0;
|
||||
if (option & (option - 1)) {
|
||||
printk(KERN_ERR "EXT4-fs: inode_readahead_blks"
|
||||
" must be a power of 2\n");
|
||||
return 0;
|
||||
}
|
||||
sbi->s_inode_readahead_blks = option;
|
||||
break;
|
||||
case Opt_journal_ioprio:
|
||||
|
@ -1473,6 +1494,19 @@ static int parse_options(char *options, struct super_block *sb,
|
|||
*journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
|
||||
option);
|
||||
break;
|
||||
case Opt_noauto_da_alloc:
|
||||
set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
|
||||
break;
|
||||
case Opt_auto_da_alloc:
|
||||
if (match_int(&args[0], &option)) {
|
||||
clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
|
||||
break;
|
||||
}
|
||||
if (option)
|
||||
clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
|
||||
else
|
||||
set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
|
||||
break;
|
||||
default:
|
||||
printk(KERN_ERR
|
||||
"EXT4-fs: Unrecognized mount option \"%s\" "
|
||||
|
@ -1612,10 +1646,12 @@ static int ext4_fill_flex_info(struct super_block *sb)
|
|||
gdp = ext4_get_group_desc(sb, i, &bh);
|
||||
|
||||
flex_group = ext4_flex_group(sbi, i);
|
||||
sbi->s_flex_groups[flex_group].free_inodes +=
|
||||
ext4_free_inodes_count(sb, gdp);
|
||||
sbi->s_flex_groups[flex_group].free_blocks +=
|
||||
ext4_free_blks_count(sb, gdp);
|
||||
atomic_set(&sbi->s_flex_groups[flex_group].free_inodes,
|
||||
ext4_free_inodes_count(sb, gdp));
|
||||
atomic_set(&sbi->s_flex_groups[flex_group].free_blocks,
|
||||
ext4_free_blks_count(sb, gdp));
|
||||
atomic_set(&sbi->s_flex_groups[flex_group].used_dirs,
|
||||
ext4_used_dirs_count(sb, gdp));
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
@ -1991,6 +2027,181 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* sysfs supprt */
|
||||
|
||||
struct ext4_attr {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);
|
||||
ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,
|
||||
const char *, size_t);
|
||||
int offset;
|
||||
};
|
||||
|
||||
static int parse_strtoul(const char *buf,
|
||||
unsigned long max, unsigned long *value)
|
||||
{
|
||||
char *endp;
|
||||
|
||||
while (*buf && isspace(*buf))
|
||||
buf++;
|
||||
*value = simple_strtoul(buf, &endp, 0);
|
||||
while (*endp && isspace(*endp))
|
||||
endp++;
|
||||
if (*endp || *value > max)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi,
|
||||
char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
(s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter));
|
||||
}
|
||||
|
||||
static ssize_t session_write_kbytes_show(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi, char *buf)
|
||||
{
|
||||
struct super_block *sb = sbi->s_buddy_cache->i_sb;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%lu\n",
|
||||
(part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
|
||||
sbi->s_sectors_written_start) >> 1);
|
||||
}
|
||||
|
||||
static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi, char *buf)
|
||||
{
|
||||
struct super_block *sb = sbi->s_buddy_cache->i_sb;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
sbi->s_kbytes_written +
|
||||
((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
|
||||
EXT4_SB(sb)->s_sectors_written_start) >> 1));
|
||||
}
|
||||
|
||||
static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
unsigned long t;
|
||||
|
||||
if (parse_strtoul(buf, 0x40000000, &t))
|
||||
return -EINVAL;
|
||||
|
||||
/* inode_readahead_blks must be a power of 2 */
|
||||
if (t & (t-1))
|
||||
return -EINVAL;
|
||||
|
||||
sbi->s_inode_readahead_blks = t;
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t sbi_ui_show(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi, char *buf)
|
||||
{
|
||||
unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
|
||||
}
|
||||
|
||||
static ssize_t sbi_ui_store(struct ext4_attr *a,
|
||||
struct ext4_sb_info *sbi,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset);
|
||||
unsigned long t;
|
||||
|
||||
if (parse_strtoul(buf, 0xffffffff, &t))
|
||||
return -EINVAL;
|
||||
*ui = t;
|
||||
return count;
|
||||
}
|
||||
|
||||
#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
|
||||
static struct ext4_attr ext4_attr_##_name = { \
|
||||
.attr = {.name = __stringify(_name), .mode = _mode }, \
|
||||
.show = _show, \
|
||||
.store = _store, \
|
||||
.offset = offsetof(struct ext4_sb_info, _elname), \
|
||||
}
|
||||
#define EXT4_ATTR(name, mode, show, store) \
|
||||
static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
|
||||
|
||||
#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
|
||||
#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
|
||||
#define EXT4_RW_ATTR_SBI_UI(name, elname) \
|
||||
EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
|
||||
#define ATTR_LIST(name) &ext4_attr_##name.attr
|
||||
|
||||
EXT4_RO_ATTR(delayed_allocation_blocks);
|
||||
EXT4_RO_ATTR(session_write_kbytes);
|
||||
EXT4_RO_ATTR(lifetime_write_kbytes);
|
||||
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
|
||||
inode_readahead_blks_store, s_inode_readahead_blks);
|
||||
EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
|
||||
EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
|
||||
EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
|
||||
EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
|
||||
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
|
||||
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
|
||||
|
||||
static struct attribute *ext4_attrs[] = {
|
||||
ATTR_LIST(delayed_allocation_blocks),
|
||||
ATTR_LIST(session_write_kbytes),
|
||||
ATTR_LIST(lifetime_write_kbytes),
|
||||
ATTR_LIST(inode_readahead_blks),
|
||||
ATTR_LIST(mb_stats),
|
||||
ATTR_LIST(mb_max_to_scan),
|
||||
ATTR_LIST(mb_min_to_scan),
|
||||
ATTR_LIST(mb_order2_req),
|
||||
ATTR_LIST(mb_stream_req),
|
||||
ATTR_LIST(mb_group_prealloc),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static ssize_t ext4_attr_show(struct kobject *kobj,
|
||||
struct attribute *attr, char *buf)
|
||||
{
|
||||
struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
|
||||
s_kobj);
|
||||
struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
|
||||
|
||||
return a->show ? a->show(a, sbi, buf) : 0;
|
||||
}
|
||||
|
||||
static ssize_t ext4_attr_store(struct kobject *kobj,
|
||||
struct attribute *attr,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
|
||||
s_kobj);
|
||||
struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
|
||||
|
||||
return a->store ? a->store(a, sbi, buf, len) : 0;
|
||||
}
|
||||
|
||||
static void ext4_sb_release(struct kobject *kobj)
|
||||
{
|
||||
struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
|
||||
s_kobj);
|
||||
complete(&sbi->s_kobj_unregister);
|
||||
}
|
||||
|
||||
|
||||
static struct sysfs_ops ext4_attr_ops = {
|
||||
.show = ext4_attr_show,
|
||||
.store = ext4_attr_store,
|
||||
};
|
||||
|
||||
static struct kobj_type ext4_ktype = {
|
||||
.default_attrs = ext4_attrs,
|
||||
.sysfs_ops = &ext4_attr_ops,
|
||||
.release = ext4_sb_release,
|
||||
};
|
||||
|
||||
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
__releases(kernel_lock)
|
||||
__acquires(kernel_lock)
|
||||
|
@ -2021,12 +2232,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
|
||||
if (!sbi)
|
||||
return -ENOMEM;
|
||||
|
||||
sbi->s_blockgroup_lock =
|
||||
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
|
||||
if (!sbi->s_blockgroup_lock) {
|
||||
kfree(sbi);
|
||||
return -ENOMEM;
|
||||
}
|
||||
sb->s_fs_info = sbi;
|
||||
sbi->s_mount_opt = 0;
|
||||
sbi->s_resuid = EXT4_DEF_RESUID;
|
||||
sbi->s_resgid = EXT4_DEF_RESGID;
|
||||
sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
|
||||
sbi->s_sb_block = sb_block;
|
||||
sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part,
|
||||
sectors[1]);
|
||||
|
||||
unlock_kernel();
|
||||
|
||||
|
@ -2064,6 +2284,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
sb->s_magic = le16_to_cpu(es->s_magic);
|
||||
if (sb->s_magic != EXT4_SUPER_MAGIC)
|
||||
goto cantfind_ext4;
|
||||
sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
|
||||
|
||||
/* Set defaults before we parse the mount options */
|
||||
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
|
||||
|
@ -2101,7 +2322,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
|
||||
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
|
||||
|
||||
set_opt(sbi->s_mount_opt, RESERVATION);
|
||||
set_opt(sbi->s_mount_opt, BARRIER);
|
||||
|
||||
/*
|
||||
|
@ -2325,14 +2545,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
#ifdef CONFIG_PROC_FS
|
||||
if (ext4_proc_root)
|
||||
sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
|
||||
|
||||
if (sbi->s_proc)
|
||||
proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
|
||||
&ext4_ui_proc_fops,
|
||||
&sbi->s_inode_readahead_blks);
|
||||
#endif
|
||||
|
||||
bgl_lock_init(&sbi->s_blockgroup_lock);
|
||||
bgl_lock_init(sbi->s_blockgroup_lock);
|
||||
|
||||
for (i = 0; i < db_count; i++) {
|
||||
block = descriptor_loc(sb, logical_sb_block, i);
|
||||
|
@ -2564,6 +2779,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
goto failed_mount4;
|
||||
}
|
||||
|
||||
sbi->s_kobj.kset = ext4_kset;
|
||||
init_completion(&sbi->s_kobj_unregister);
|
||||
err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
|
||||
"%s", sb->s_id);
|
||||
if (err) {
|
||||
ext4_mb_release(sb);
|
||||
ext4_ext_release(sb);
|
||||
goto failed_mount4;
|
||||
};
|
||||
|
||||
/*
|
||||
* akpm: core read_super() calls in here with the superblock locked.
|
||||
* That deadlocks, because orphan cleanup needs to lock the superblock
|
||||
|
@ -2618,7 +2843,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
kfree(sbi->s_group_desc);
|
||||
failed_mount:
|
||||
if (sbi->s_proc) {
|
||||
remove_proc_entry("inode_readahead_blks", sbi->s_proc);
|
||||
remove_proc_entry(sb->s_id, ext4_proc_root);
|
||||
}
|
||||
#ifdef CONFIG_QUOTA
|
||||
|
@ -2913,6 +3137,10 @@ static int ext4_commit_super(struct super_block *sb,
|
|||
set_buffer_uptodate(sbh);
|
||||
}
|
||||
es->s_wtime = cpu_to_le32(get_seconds());
|
||||
es->s_kbytes_written =
|
||||
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
|
||||
((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
|
||||
EXT4_SB(sb)->s_sectors_written_start) >> 1));
|
||||
ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
|
||||
&EXT4_SB(sb)->s_freeblocks_counter));
|
||||
es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
|
||||
|
@ -3647,45 +3875,6 @@ static int ext4_get_sb(struct file_system_type *fs_type,
|
|||
return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
static int ext4_ui_proc_show(struct seq_file *m, void *v)
|
||||
{
|
||||
unsigned int *p = m->private;
|
||||
|
||||
seq_printf(m, "%u\n", *p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_ui_proc_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
|
||||
}
|
||||
|
||||
static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
|
||||
size_t cnt, loff_t *ppos)
|
||||
{
|
||||
unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
|
||||
char str[32];
|
||||
|
||||
if (cnt >= sizeof(str))
|
||||
return -EINVAL;
|
||||
if (copy_from_user(str, buf, cnt))
|
||||
return -EFAULT;
|
||||
|
||||
*p = simple_strtoul(str, NULL, 0);
|
||||
return cnt;
|
||||
}
|
||||
|
||||
const struct file_operations ext4_ui_proc_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = ext4_ui_proc_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
.write = ext4_ui_proc_write,
|
||||
};
|
||||
#endif
|
||||
|
||||
static struct file_system_type ext4_fs_type = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = "ext4",
|
||||
|
@ -3719,6 +3908,9 @@ static int __init init_ext4_fs(void)
|
|||
{
|
||||
int err;
|
||||
|
||||
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
|
||||
if (!ext4_kset)
|
||||
return -ENOMEM;
|
||||
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
|
||||
err = init_ext4_mballoc();
|
||||
if (err)
|
||||
|
@ -3760,6 +3952,7 @@ static void __exit exit_ext4_fs(void)
|
|||
exit_ext4_xattr();
|
||||
exit_ext4_mballoc();
|
||||
remove_proc_entry("fs/ext4", NULL);
|
||||
kset_unregister(ext4_kset);
|
||||
}
|
||||
|
||||
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
|
||||
|
|
|
@ -367,6 +367,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
|||
int tag_bytes = journal_tag_bytes(journal);
|
||||
struct buffer_head *cbh = NULL; /* For transactional checksums */
|
||||
__u32 crc32_sum = ~0;
|
||||
int write_op = WRITE;
|
||||
|
||||
/*
|
||||
* First job: lock down the current transaction and wait for
|
||||
|
@ -401,6 +402,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
|||
spin_lock(&journal->j_state_lock);
|
||||
commit_transaction->t_state = T_LOCKED;
|
||||
|
||||
if (commit_transaction->t_synchronous_commit)
|
||||
write_op = WRITE_SYNC;
|
||||
stats.u.run.rs_wait = commit_transaction->t_max_wait;
|
||||
stats.u.run.rs_locked = jiffies;
|
||||
stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
|
||||
|
@ -680,7 +683,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
|||
clear_buffer_dirty(bh);
|
||||
set_buffer_uptodate(bh);
|
||||
bh->b_end_io = journal_end_buffer_io_sync;
|
||||
submit_bh(WRITE, bh);
|
||||
submit_bh(write_op, bh);
|
||||
}
|
||||
cond_resched();
|
||||
stats.u.run.rs_blocks_logged += bufs;
|
||||
|
|
|
@ -55,6 +55,25 @@
|
|||
* need do nothing.
|
||||
* RevokeValid set, Revoked set:
|
||||
* buffer has been revoked.
|
||||
*
|
||||
* Locking rules:
|
||||
* We keep two hash tables of revoke records. One hashtable belongs to the
|
||||
* running transaction (is pointed to by journal->j_revoke), the other one
|
||||
* belongs to the committing transaction. Accesses to the second hash table
|
||||
* happen only from the kjournald and no other thread touches this table. Also
|
||||
* journal_switch_revoke_table() which switches which hashtable belongs to the
|
||||
* running and which to the committing transaction is called only from
|
||||
* kjournald. Therefore we need no locks when accessing the hashtable belonging
|
||||
* to the committing transaction.
|
||||
*
|
||||
* All users operating on the hash table belonging to the running transaction
|
||||
* have a handle to the transaction. Therefore they are safe from kjournald
|
||||
* switching hash tables under them. For operations on the lists of entries in
|
||||
* the hash table j_revoke_lock is used.
|
||||
*
|
||||
* Finally, also replay code uses the hash tables but at this moment noone else
|
||||
* can touch them (filesystem isn't mounted yet) and hence no locking is
|
||||
* needed.
|
||||
*/
|
||||
|
||||
#ifndef __KERNEL__
|
||||
|
@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
|
|||
* the second time we would still have a pending revoke to cancel. So,
|
||||
* do not trust the Revoked bit on buffers unless RevokeValid is also
|
||||
* set.
|
||||
*
|
||||
* The caller must have the journal locked.
|
||||
*/
|
||||
int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
|
||||
{
|
||||
|
@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
|
|||
/*
|
||||
* Write revoke records to the journal for all entries in the current
|
||||
* revoke hash, deleting the entries as we go.
|
||||
*
|
||||
* Called with the journal lock held.
|
||||
*/
|
||||
|
||||
void jbd2_journal_write_revoke_records(journal_t *journal,
|
||||
transaction_t *transaction)
|
||||
{
|
||||
|
|
|
@ -1315,6 +1315,8 @@ int jbd2_journal_stop(handle_t *handle)
|
|||
}
|
||||
}
|
||||
|
||||
if (handle->h_sync)
|
||||
transaction->t_synchronous_commit = 1;
|
||||
current->journal_info = NULL;
|
||||
spin_lock(&journal->j_state_lock);
|
||||
spin_lock(&transaction->t_handle_lock);
|
||||
|
|
|
@ -648,6 +648,12 @@ struct transaction_s
|
|||
*/
|
||||
int t_handle_count;
|
||||
|
||||
/*
|
||||
* This transaction is being forced and some process is
|
||||
* waiting for it to finish.
|
||||
*/
|
||||
int t_synchronous_commit:1;
|
||||
|
||||
/*
|
||||
* For use by the filesystem to store fs-specific data
|
||||
* structures associated with the transaction
|
||||
|
|
Loading…
Reference in a new issue