ext4: return 32/64-bit dir name hash according to usage type
Traditionally ext2/3/4 has returned a 32-bit hash value from llseek() to appease NFSv2, which can only handle a 32-bit cookie for seekdir() and telldir(). However, this causes problems if there are 32-bit hash collisions, since the NFSv2 server can get stuck resending the same entries from the directory repeatedly. Allow ext4 to return a full 64-bit hash (both major and minor) for telldir to decrease the chance of hash collisions. This still needs integration on the NFS side. Patch-updated-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de> (blame me if something is not correct) Signed-off-by: Fan Yong <yong.fan@whamcloud.com> Signed-off-by: Andreas Dilger <adilger@whamcloud.com> Signed-off-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
parent
6a8a13e038
commit
d1f5273e9a
3 changed files with 176 additions and 48 deletions
214
fs/ext4/dir.c
214
fs/ext4/dir.c
|
@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = {
|
||||||
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
|
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
|
||||||
};
|
};
|
||||||
|
|
||||||
static int ext4_readdir(struct file *, void *, filldir_t);
|
|
||||||
static int ext4_dx_readdir(struct file *filp,
|
static int ext4_dx_readdir(struct file *filp,
|
||||||
void *dirent, filldir_t filldir);
|
void *dirent, filldir_t filldir);
|
||||||
static int ext4_release_dir(struct inode *inode,
|
|
||||||
struct file *filp);
|
|
||||||
|
|
||||||
const struct file_operations ext4_dir_operations = {
|
|
||||||
.llseek = ext4_llseek,
|
|
||||||
.read = generic_read_dir,
|
|
||||||
.readdir = ext4_readdir, /* we take BKL. needed?*/
|
|
||||||
.unlocked_ioctl = ext4_ioctl,
|
|
||||||
#ifdef CONFIG_COMPAT
|
|
||||||
.compat_ioctl = ext4_compat_ioctl,
|
|
||||||
#endif
|
|
||||||
.fsync = ext4_sync_file,
|
|
||||||
.release = ext4_release_dir,
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
static unsigned char get_dtype(struct super_block *sb, int filetype)
|
static unsigned char get_dtype(struct super_block *sb, int filetype)
|
||||||
{
|
{
|
||||||
|
@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
|
||||||
return (ext4_filetype_table[filetype]);
|
return (ext4_filetype_table[filetype]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the given dir-inode refers to an htree-indexed directory
|
||||||
|
* (or a directory which chould potentially get coverted to use htree
|
||||||
|
* indexing).
|
||||||
|
*
|
||||||
|
* Return 1 if it is a dx dir, 0 if not
|
||||||
|
*/
|
||||||
|
static int is_dx_dir(struct inode *inode)
|
||||||
|
{
|
||||||
|
struct super_block *sb = inode->i_sb;
|
||||||
|
|
||||||
|
if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
|
||||||
|
EXT4_FEATURE_COMPAT_DIR_INDEX) &&
|
||||||
|
((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
|
||||||
|
((inode->i_size >> sb->s_blocksize_bits) == 1)))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return 0 if the directory entry is OK, and 1 if there is a problem
|
* Return 0 if the directory entry is OK, and 1 if there is a problem
|
||||||
*
|
*
|
||||||
|
@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp,
|
||||||
unsigned int offset;
|
unsigned int offset;
|
||||||
int i, stored;
|
int i, stored;
|
||||||
struct ext4_dir_entry_2 *de;
|
struct ext4_dir_entry_2 *de;
|
||||||
struct super_block *sb;
|
|
||||||
int err;
|
int err;
|
||||||
struct inode *inode = filp->f_path.dentry->d_inode;
|
struct inode *inode = filp->f_path.dentry->d_inode;
|
||||||
|
struct super_block *sb = inode->i_sb;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
int dir_has_error = 0;
|
int dir_has_error = 0;
|
||||||
|
|
||||||
sb = inode->i_sb;
|
if (is_dx_dir(inode)) {
|
||||||
|
|
||||||
if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
|
|
||||||
EXT4_FEATURE_COMPAT_DIR_INDEX) &&
|
|
||||||
((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) ||
|
|
||||||
((inode->i_size >> sb->s_blocksize_bits) == 1))) {
|
|
||||||
err = ext4_dx_readdir(filp, dirent, filldir);
|
err = ext4_dx_readdir(filp, dirent, filldir);
|
||||||
if (err != ERR_BAD_DX_DIR) {
|
if (err != ERR_BAD_DX_DIR) {
|
||||||
ret = err;
|
ret = err;
|
||||||
|
@ -254,22 +253,134 @@ static int ext4_readdir(struct file *filp,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int is_32bit_api(void)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_COMPAT
|
||||||
|
return is_compat_task();
|
||||||
|
#else
|
||||||
|
return (BITS_PER_LONG == 32);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These functions convert from the major/minor hash to an f_pos
|
* These functions convert from the major/minor hash to an f_pos
|
||||||
* value.
|
* value for dx directories
|
||||||
*
|
*
|
||||||
* Currently we only use major hash numer. This is unfortunate, but
|
* Upper layer (for example NFS) should specify FMODE_32BITHASH or
|
||||||
* on 32-bit machines, the same VFS interface is used for lseek and
|
* FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
|
||||||
* llseek, so if we use the 64 bit offset, then the 32-bit versions of
|
* directly on both 32-bit and 64-bit nodes, under such case, neither
|
||||||
* lseek/telldir/seekdir will blow out spectacularly, and from within
|
* FMODE_32BITHASH nor FMODE_64BITHASH is specified.
|
||||||
* the ext2 low-level routine, we don't know if we're being called by
|
|
||||||
* a 64-bit version of the system call or the 32-bit version of the
|
|
||||||
* system call. Worse yet, NFSv2 only allows for a 32-bit readdir
|
|
||||||
* cookie. Sigh.
|
|
||||||
*/
|
*/
|
||||||
#define hash2pos(major, minor) (major >> 1)
|
static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
|
||||||
#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff)
|
{
|
||||||
#define pos2min_hash(pos) (0)
|
if ((filp->f_mode & FMODE_32BITHASH) ||
|
||||||
|
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
|
||||||
|
return major >> 1;
|
||||||
|
else
|
||||||
|
return ((__u64)(major >> 1) << 32) | (__u64)minor;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
|
||||||
|
{
|
||||||
|
if ((filp->f_mode & FMODE_32BITHASH) ||
|
||||||
|
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
|
||||||
|
return (pos << 1) & 0xffffffff;
|
||||||
|
else
|
||||||
|
return ((pos >> 32) << 1) & 0xffffffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
|
||||||
|
{
|
||||||
|
if ((filp->f_mode & FMODE_32BITHASH) ||
|
||||||
|
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
|
return pos & 0xffffffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return 32- or 64-bit end-of-file for dx directories
|
||||||
|
*/
|
||||||
|
static inline loff_t ext4_get_htree_eof(struct file *filp)
|
||||||
|
{
|
||||||
|
if ((filp->f_mode & FMODE_32BITHASH) ||
|
||||||
|
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
|
||||||
|
return EXT4_HTREE_EOF_32BIT;
|
||||||
|
else
|
||||||
|
return EXT4_HTREE_EOF_64BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ext4_dir_llseek() based on generic_file_llseek() to handle both
|
||||||
|
* non-htree and htree directories, where the "offset" is in terms
|
||||||
|
* of the filename hash value instead of the byte offset.
|
||||||
|
*
|
||||||
|
* NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX)
|
||||||
|
* will be invalid once the directory was converted into a dx directory
|
||||||
|
*/
|
||||||
|
loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
|
||||||
|
{
|
||||||
|
struct inode *inode = file->f_mapping->host;
|
||||||
|
loff_t ret = -EINVAL;
|
||||||
|
int dx_dir = is_dx_dir(inode);
|
||||||
|
|
||||||
|
mutex_lock(&inode->i_mutex);
|
||||||
|
|
||||||
|
/* NOTE: relative offsets with dx directories might not work
|
||||||
|
* as expected, as it is difficult to figure out the
|
||||||
|
* correct offset between dx hashes */
|
||||||
|
|
||||||
|
switch (origin) {
|
||||||
|
case SEEK_END:
|
||||||
|
if (unlikely(offset > 0))
|
||||||
|
goto out_err; /* not supported for directories */
|
||||||
|
|
||||||
|
/* so only negative offsets are left, does that have a
|
||||||
|
* meaning for directories at all? */
|
||||||
|
if (dx_dir)
|
||||||
|
offset += ext4_get_htree_eof(file);
|
||||||
|
else
|
||||||
|
offset += inode->i_size;
|
||||||
|
break;
|
||||||
|
case SEEK_CUR:
|
||||||
|
/*
|
||||||
|
* Here we special-case the lseek(fd, 0, SEEK_CUR)
|
||||||
|
* position-querying operation. Avoid rewriting the "same"
|
||||||
|
* f_pos value back to the file because a concurrent read(),
|
||||||
|
* write() or lseek() might have altered it
|
||||||
|
*/
|
||||||
|
if (offset == 0) {
|
||||||
|
offset = file->f_pos;
|
||||||
|
goto out_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset += file->f_pos;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(offset < 0))
|
||||||
|
goto out_err;
|
||||||
|
|
||||||
|
if (!dx_dir) {
|
||||||
|
if (offset > inode->i_sb->s_maxbytes)
|
||||||
|
goto out_err;
|
||||||
|
} else if (offset > ext4_get_htree_eof(file))
|
||||||
|
goto out_err;
|
||||||
|
|
||||||
|
/* Special lock needed here? */
|
||||||
|
if (offset != file->f_pos) {
|
||||||
|
file->f_pos = offset;
|
||||||
|
file->f_version = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
out_ok:
|
||||||
|
ret = offset;
|
||||||
|
out_err:
|
||||||
|
mutex_unlock(&inode->i_mutex);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This structure holds the nodes of the red-black tree used to store
|
* This structure holds the nodes of the red-black tree used to store
|
||||||
|
@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos)
|
static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
|
||||||
|
loff_t pos)
|
||||||
{
|
{
|
||||||
struct dir_private_info *p;
|
struct dir_private_info *p;
|
||||||
|
|
||||||
p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
|
p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
|
||||||
if (!p)
|
if (!p)
|
||||||
return NULL;
|
return NULL;
|
||||||
p->curr_hash = pos2maj_hash(pos);
|
p->curr_hash = pos2maj_hash(filp, pos);
|
||||||
p->curr_minor_hash = pos2min_hash(pos);
|
p->curr_minor_hash = pos2min_hash(filp, pos);
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -429,7 +541,7 @@ static int call_filldir(struct file *filp, void *dirent,
|
||||||
"null fname?!?\n");
|
"null fname?!?\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
curr_pos = hash2pos(fname->hash, fname->minor_hash);
|
curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
|
||||||
while (fname) {
|
while (fname) {
|
||||||
error = filldir(dirent, fname->name,
|
error = filldir(dirent, fname->name,
|
||||||
fname->name_len, curr_pos,
|
fname->name_len, curr_pos,
|
||||||
|
@ -454,13 +566,13 @@ static int ext4_dx_readdir(struct file *filp,
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!info) {
|
if (!info) {
|
||||||
info = ext4_htree_create_dir_info(filp->f_pos);
|
info = ext4_htree_create_dir_info(filp, filp->f_pos);
|
||||||
if (!info)
|
if (!info)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
filp->private_data = info;
|
filp->private_data = info;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (filp->f_pos == EXT4_HTREE_EOF)
|
if (filp->f_pos == ext4_get_htree_eof(filp))
|
||||||
return 0; /* EOF */
|
return 0; /* EOF */
|
||||||
|
|
||||||
/* Some one has messed with f_pos; reset the world */
|
/* Some one has messed with f_pos; reset the world */
|
||||||
|
@ -468,8 +580,8 @@ static int ext4_dx_readdir(struct file *filp,
|
||||||
free_rb_tree_fname(&info->root);
|
free_rb_tree_fname(&info->root);
|
||||||
info->curr_node = NULL;
|
info->curr_node = NULL;
|
||||||
info->extra_fname = NULL;
|
info->extra_fname = NULL;
|
||||||
info->curr_hash = pos2maj_hash(filp->f_pos);
|
info->curr_hash = pos2maj_hash(filp, filp->f_pos);
|
||||||
info->curr_minor_hash = pos2min_hash(filp->f_pos);
|
info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -501,7 +613,7 @@ static int ext4_dx_readdir(struct file *filp,
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
if (ret == 0) {
|
if (ret == 0) {
|
||||||
filp->f_pos = EXT4_HTREE_EOF;
|
filp->f_pos = ext4_get_htree_eof(filp);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
info->curr_node = rb_first(&info->root);
|
info->curr_node = rb_first(&info->root);
|
||||||
|
@ -521,7 +633,7 @@ static int ext4_dx_readdir(struct file *filp,
|
||||||
info->curr_minor_hash = fname->minor_hash;
|
info->curr_minor_hash = fname->minor_hash;
|
||||||
} else {
|
} else {
|
||||||
if (info->next_hash == ~0) {
|
if (info->next_hash == ~0) {
|
||||||
filp->f_pos = EXT4_HTREE_EOF;
|
filp->f_pos = ext4_get_htree_eof(filp);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
info->curr_hash = info->next_hash;
|
info->curr_hash = info->next_hash;
|
||||||
|
@ -540,3 +652,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const struct file_operations ext4_dir_operations = {
|
||||||
|
.llseek = ext4_dir_llseek,
|
||||||
|
.read = generic_read_dir,
|
||||||
|
.readdir = ext4_readdir,
|
||||||
|
.unlocked_ioctl = ext4_ioctl,
|
||||||
|
#ifdef CONFIG_COMPAT
|
||||||
|
.compat_ioctl = ext4_compat_ioctl,
|
||||||
|
#endif
|
||||||
|
.fsync = ext4_sync_file,
|
||||||
|
.release = ext4_release_dir,
|
||||||
|
};
|
||||||
|
|
|
@ -1612,7 +1612,11 @@ struct dx_hash_info
|
||||||
u32 *seed;
|
u32 *seed;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define EXT4_HTREE_EOF 0x7fffffff
|
|
||||||
|
/* 32 and 64 bit signed EOF for dx directories */
|
||||||
|
#define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
|
||||||
|
#define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Control parameters used by ext4_htree_next_block
|
* Control parameters used by ext4_htree_next_block
|
||||||
|
|
|
@ -200,8 +200,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
hash = hash & ~1;
|
hash = hash & ~1;
|
||||||
if (hash == (EXT4_HTREE_EOF << 1))
|
if (hash == (EXT4_HTREE_EOF_32BIT << 1))
|
||||||
hash = (EXT4_HTREE_EOF-1) << 1;
|
hash = (EXT4_HTREE_EOF_32BIT - 1) << 1;
|
||||||
hinfo->hash = hash;
|
hinfo->hash = hash;
|
||||||
hinfo->minor_hash = minor_hash;
|
hinfo->minor_hash = minor_hash;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in a new issue