fs: allow for more than 2^31 files
Andrew, Could you please review this patch, you probably are the right guy to take it, because it crosses fs and net trees. Note : /proc/sys/fs/file-nr is a read-only file, so this patch doesnt depend on previous patch (sysctl: fix min/max handling in __do_proc_doulongvec_minmax()) Thanks ! [PATCH V4] fs: allow for more than 2^31 files Robin Holt tried to boot a 16TB system and found af_unix was overflowing a 32bit value : <quote> We were seeing a failure which prevented boot. The kernel was incapable of creating either a named pipe or unix domain socket. This comes down to a common kernel function called unix_create1() which does: atomic_inc(&unix_nr_socks); if (atomic_read(&unix_nr_socks) > 2 * get_max_files()) goto out; The function get_max_files() is a simple return of files_stat.max_files. files_stat.max_files is a signed integer and is computed in fs/file_table.c's files_init(). n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = n; In our case, mempages (total_ram_pages) is approx 3,758,096,384 (0xe0000000). That leaves max_files at approximately 1,503,238,553. This causes 2 * get_max_files() to integer overflow. </quote> Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long integers, and change af_unix to use an atomic_long_t instead of atomic_t. get_max_files() is changed to return an unsigned long. get_nr_files() is changed to return a long. unix_nr_socks is changed from atomic_t to atomic_long_t, while not strictly needed to address Robin problem. Before patch (on a 64bit kernel) : # echo 2147483648 >/proc/sys/fs/file-max # cat /proc/sys/fs/file-max -18446744071562067968 After patch: # echo 2147483648 >/proc/sys/fs/file-max # cat /proc/sys/fs/file-max 2147483648 # cat /proc/sys/fs/file-nr 704 0 2147483648 Reported-by: Robin Holt <holt@sgi.com> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Acked-by: David Miller <davem@davemloft.net> Reviewed-by: Robin Holt <holt@sgi.com> Tested-by: Robin Holt <holt@sgi.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
fde214d414
commit
7e360c38ab
4 changed files with 21 additions and 24 deletions
|
@ -60,7 +60,7 @@ static inline void file_free(struct file *f)
|
||||||
/*
|
/*
|
||||||
* Return the total number of open files in the system
|
* Return the total number of open files in the system
|
||||||
*/
|
*/
|
||||||
static int get_nr_files(void)
|
static long get_nr_files(void)
|
||||||
{
|
{
|
||||||
return percpu_counter_read_positive(&nr_files);
|
return percpu_counter_read_positive(&nr_files);
|
||||||
}
|
}
|
||||||
|
@ -68,7 +68,7 @@ static int get_nr_files(void)
|
||||||
/*
|
/*
|
||||||
* Return the maximum number of open files in the system
|
* Return the maximum number of open files in the system
|
||||||
*/
|
*/
|
||||||
int get_max_files(void)
|
unsigned long get_max_files(void)
|
||||||
{
|
{
|
||||||
return files_stat.max_files;
|
return files_stat.max_files;
|
||||||
}
|
}
|
||||||
|
@ -82,7 +82,7 @@ int proc_nr_files(ctl_table *table, int write,
|
||||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||||
{
|
{
|
||||||
files_stat.nr_files = get_nr_files();
|
files_stat.nr_files = get_nr_files();
|
||||||
return proc_dointvec(table, write, buffer, lenp, ppos);
|
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
int proc_nr_files(ctl_table *table, int write,
|
int proc_nr_files(ctl_table *table, int write,
|
||||||
|
@ -105,7 +105,7 @@ int proc_nr_files(ctl_table *table, int write,
|
||||||
struct file *get_empty_filp(void)
|
struct file *get_empty_filp(void)
|
||||||
{
|
{
|
||||||
const struct cred *cred = current_cred();
|
const struct cred *cred = current_cred();
|
||||||
static int old_max;
|
static long old_max;
|
||||||
struct file * f;
|
struct file * f;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -140,8 +140,7 @@ struct file *get_empty_filp(void)
|
||||||
over:
|
over:
|
||||||
/* Ran out of filps - report that */
|
/* Ran out of filps - report that */
|
||||||
if (get_nr_files() > old_max) {
|
if (get_nr_files() > old_max) {
|
||||||
printk(KERN_INFO "VFS: file-max limit %d reached\n",
|
pr_info("VFS: file-max limit %lu reached\n", get_max_files());
|
||||||
get_max_files());
|
|
||||||
old_max = get_nr_files();
|
old_max = get_nr_files();
|
||||||
}
|
}
|
||||||
goto fail;
|
goto fail;
|
||||||
|
@ -487,7 +486,7 @@ void mark_files_ro(struct super_block *sb)
|
||||||
|
|
||||||
void __init files_init(unsigned long mempages)
|
void __init files_init(unsigned long mempages)
|
||||||
{
|
{
|
||||||
int n;
|
unsigned long n;
|
||||||
|
|
||||||
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
|
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
|
||||||
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
|
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
|
||||||
|
@ -498,9 +497,7 @@ void __init files_init(unsigned long mempages)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
n = (mempages * (PAGE_SIZE / 1024)) / 10;
|
n = (mempages * (PAGE_SIZE / 1024)) / 10;
|
||||||
files_stat.max_files = n;
|
files_stat.max_files = max_t(unsigned long, n, NR_FILE);
|
||||||
if (files_stat.max_files < NR_FILE)
|
|
||||||
files_stat.max_files = NR_FILE;
|
|
||||||
files_defer_init();
|
files_defer_init();
|
||||||
lg_lock_init(files_lglock);
|
lg_lock_init(files_lglock);
|
||||||
percpu_counter_init(&nr_files, 0);
|
percpu_counter_init(&nr_files, 0);
|
||||||
|
|
|
@ -34,9 +34,9 @@
|
||||||
|
|
||||||
/* And dynamically-tunable limits and defaults: */
|
/* And dynamically-tunable limits and defaults: */
|
||||||
struct files_stat_struct {
|
struct files_stat_struct {
|
||||||
int nr_files; /* read only */
|
unsigned long nr_files; /* read only */
|
||||||
int nr_free_files; /* read only */
|
unsigned long nr_free_files; /* read only */
|
||||||
int max_files; /* tunable */
|
unsigned long max_files; /* tunable */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct inodes_stat_t {
|
struct inodes_stat_t {
|
||||||
|
@ -400,7 +400,7 @@ extern void __init inode_init_early(void);
|
||||||
extern void __init files_init(unsigned long);
|
extern void __init files_init(unsigned long);
|
||||||
|
|
||||||
extern struct files_stat_struct files_stat;
|
extern struct files_stat_struct files_stat;
|
||||||
extern int get_max_files(void);
|
extern unsigned long get_max_files(void);
|
||||||
extern int sysctl_nr_open;
|
extern int sysctl_nr_open;
|
||||||
extern struct inodes_stat_t inodes_stat;
|
extern struct inodes_stat_t inodes_stat;
|
||||||
extern int leases_enable, lease_break_time;
|
extern int leases_enable, lease_break_time;
|
||||||
|
|
|
@ -1352,16 +1352,16 @@ static struct ctl_table fs_table[] = {
|
||||||
{
|
{
|
||||||
.procname = "file-nr",
|
.procname = "file-nr",
|
||||||
.data = &files_stat,
|
.data = &files_stat,
|
||||||
.maxlen = 3*sizeof(int),
|
.maxlen = sizeof(files_stat),
|
||||||
.mode = 0444,
|
.mode = 0444,
|
||||||
.proc_handler = proc_nr_files,
|
.proc_handler = proc_nr_files,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.procname = "file-max",
|
.procname = "file-max",
|
||||||
.data = &files_stat.max_files,
|
.data = &files_stat.max_files,
|
||||||
.maxlen = sizeof(int),
|
.maxlen = sizeof(files_stat.max_files),
|
||||||
.mode = 0644,
|
.mode = 0644,
|
||||||
.proc_handler = proc_dointvec,
|
.proc_handler = proc_doulongvec_minmax,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
.procname = "nr_open",
|
.procname = "nr_open",
|
||||||
|
|
|
@ -117,7 +117,7 @@
|
||||||
|
|
||||||
static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
|
static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
|
||||||
static DEFINE_SPINLOCK(unix_table_lock);
|
static DEFINE_SPINLOCK(unix_table_lock);
|
||||||
static atomic_t unix_nr_socks = ATOMIC_INIT(0);
|
static atomic_long_t unix_nr_socks;
|
||||||
|
|
||||||
#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
|
#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
|
||||||
|
|
||||||
|
@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk)
|
||||||
if (u->addr)
|
if (u->addr)
|
||||||
unix_release_addr(u->addr);
|
unix_release_addr(u->addr);
|
||||||
|
|
||||||
atomic_dec(&unix_nr_socks);
|
atomic_long_dec(&unix_nr_socks);
|
||||||
local_bh_disable();
|
local_bh_disable();
|
||||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
||||||
local_bh_enable();
|
local_bh_enable();
|
||||||
#ifdef UNIX_REFCNT_DEBUG
|
#ifdef UNIX_REFCNT_DEBUG
|
||||||
printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
|
printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
|
||||||
atomic_read(&unix_nr_socks));
|
atomic_long_read(&unix_nr_socks));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -606,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
|
||||||
struct sock *sk = NULL;
|
struct sock *sk = NULL;
|
||||||
struct unix_sock *u;
|
struct unix_sock *u;
|
||||||
|
|
||||||
atomic_inc(&unix_nr_socks);
|
atomic_long_inc(&unix_nr_socks);
|
||||||
if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
|
if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
|
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
|
||||||
|
@ -632,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
|
||||||
unix_insert_socket(unix_sockets_unbound, sk);
|
unix_insert_socket(unix_sockets_unbound, sk);
|
||||||
out:
|
out:
|
||||||
if (sk == NULL)
|
if (sk == NULL)
|
||||||
atomic_dec(&unix_nr_socks);
|
atomic_long_dec(&unix_nr_socks);
|
||||||
else {
|
else {
|
||||||
local_bh_disable();
|
local_bh_disable();
|
||||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
|
||||||
|
|
Loading…
Reference in a new issue