[PATCH] Define struct pspace
Define a per-container pid space object. And create one instance of this object, init_pspace, to define the entire pid space. Subsequent patches will provide/use interfaces to create/destroy pid spaces. Its a subset/rework of Eric Biederman's patch http://lkml.org/lkml/2006/2/6/285 . Signed-off-by: Eric Biederman <ebiederm@xmission.com> Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Serge Hallyn <serue@us.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: Kirill Korotaev <dev@sw.ru> Cc: Andrey Savochkin <saw@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
aa5a6662f9
commit
3fbc964864
4 changed files with 40 additions and 24 deletions
|
@ -45,6 +45,7 @@
|
||||||
#include <linux/sysrq.h>
|
#include <linux/sysrq.h>
|
||||||
#include <linux/vmalloc.h>
|
#include <linux/vmalloc.h>
|
||||||
#include <linux/crash_dump.h>
|
#include <linux/crash_dump.h>
|
||||||
|
#include <linux/pspace.h>
|
||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
#include <asm/pgtable.h>
|
#include <asm/pgtable.h>
|
||||||
#include <asm/io.h>
|
#include <asm/io.h>
|
||||||
|
@ -91,7 +92,7 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
|
||||||
LOAD_INT(a), LOAD_FRAC(a),
|
LOAD_INT(a), LOAD_FRAC(a),
|
||||||
LOAD_INT(b), LOAD_FRAC(b),
|
LOAD_INT(b), LOAD_FRAC(b),
|
||||||
LOAD_INT(c), LOAD_FRAC(c),
|
LOAD_INT(c), LOAD_FRAC(c),
|
||||||
nr_running(), nr_threads, last_pid);
|
nr_running(), nr_threads, init_pspace.last_pid);
|
||||||
return proc_calc_metrics(page, start, off, count, eof, len);
|
return proc_calc_metrics(page, start, off, count, eof, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,4 +13,11 @@ struct pidmap {
|
||||||
|
|
||||||
#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
|
#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
|
||||||
|
|
||||||
|
struct pspace {
|
||||||
|
struct pidmap pidmap[PIDMAP_ENTRIES];
|
||||||
|
int last_pid;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern struct pspace init_pspace;
|
||||||
|
|
||||||
#endif /* _LINUX_PSPACE_H */
|
#endif /* _LINUX_PSPACE_H */
|
||||||
|
|
|
@ -118,7 +118,6 @@ extern unsigned long avenrun[]; /* Load averages */
|
||||||
|
|
||||||
extern unsigned long total_forks;
|
extern unsigned long total_forks;
|
||||||
extern int nr_threads;
|
extern int nr_threads;
|
||||||
extern int last_pid;
|
|
||||||
DECLARE_PER_CPU(unsigned long, process_counts);
|
DECLARE_PER_CPU(unsigned long, process_counts);
|
||||||
extern int nr_processes(void);
|
extern int nr_processes(void);
|
||||||
extern unsigned long nr_running(void);
|
extern unsigned long nr_running(void);
|
||||||
|
|
53
kernel/pid.c
53
kernel/pid.c
|
@ -34,7 +34,6 @@ static int pidhash_shift;
|
||||||
static kmem_cache_t *pid_cachep;
|
static kmem_cache_t *pid_cachep;
|
||||||
|
|
||||||
int pid_max = PID_MAX_DEFAULT;
|
int pid_max = PID_MAX_DEFAULT;
|
||||||
int last_pid;
|
|
||||||
|
|
||||||
#define RESERVED_PIDS 300
|
#define RESERVED_PIDS 300
|
||||||
|
|
||||||
|
@ -43,7 +42,12 @@ int pid_max_max = PID_MAX_LIMIT;
|
||||||
|
|
||||||
#define BITS_PER_PAGE (PAGE_SIZE*8)
|
#define BITS_PER_PAGE (PAGE_SIZE*8)
|
||||||
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
|
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
|
||||||
#define mk_pid(map, off) (((map) - pidmap_array)*BITS_PER_PAGE + (off))
|
|
||||||
|
static inline int mk_pid(struct pspace *pspace, struct pidmap *map, int off)
|
||||||
|
{
|
||||||
|
return (map - pspace->pidmap)*BITS_PER_PAGE + off;
|
||||||
|
}
|
||||||
|
|
||||||
#define find_next_offset(map, off) \
|
#define find_next_offset(map, off) \
|
||||||
find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
|
find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
|
||||||
|
|
||||||
|
@ -53,8 +57,12 @@ int pid_max_max = PID_MAX_LIMIT;
|
||||||
* value does not cause lots of bitmaps to be allocated, but
|
* value does not cause lots of bitmaps to be allocated, but
|
||||||
* the scheme scales to up to 4 million PIDs, runtime.
|
* the scheme scales to up to 4 million PIDs, runtime.
|
||||||
*/
|
*/
|
||||||
static struct pidmap pidmap_array[PIDMAP_ENTRIES] =
|
struct pspace init_pspace = {
|
||||||
{ [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
|
.pidmap = {
|
||||||
|
[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
|
||||||
|
},
|
||||||
|
.last_pid = 0
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note: disable interrupts while the pidmap_lock is held as an
|
* Note: disable interrupts while the pidmap_lock is held as an
|
||||||
|
@ -69,40 +77,41 @@ static struct pidmap pidmap_array[PIDMAP_ENTRIES] =
|
||||||
* irq handlers that take it we can leave the interrupts enabled.
|
* irq handlers that take it we can leave the interrupts enabled.
|
||||||
* For now it is easier to be safe than to prove it can't happen.
|
* For now it is easier to be safe than to prove it can't happen.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
|
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
|
||||||
|
|
||||||
static fastcall void free_pidmap(int pid)
|
static fastcall void free_pidmap(struct pspace *pspace, int pid)
|
||||||
{
|
{
|
||||||
struct pidmap *map = pidmap_array + pid / BITS_PER_PAGE;
|
struct pidmap *map = pspace->pidmap + pid / BITS_PER_PAGE;
|
||||||
int offset = pid & BITS_PER_PAGE_MASK;
|
int offset = pid & BITS_PER_PAGE_MASK;
|
||||||
|
|
||||||
clear_bit(offset, map->page);
|
clear_bit(offset, map->page);
|
||||||
atomic_inc(&map->nr_free);
|
atomic_inc(&map->nr_free);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int alloc_pidmap(void)
|
static int alloc_pidmap(struct pspace *pspace)
|
||||||
{
|
{
|
||||||
int i, offset, max_scan, pid, last = last_pid;
|
int i, offset, max_scan, pid, last = pspace->last_pid;
|
||||||
struct pidmap *map;
|
struct pidmap *map;
|
||||||
|
|
||||||
pid = last + 1;
|
pid = last + 1;
|
||||||
if (pid >= pid_max)
|
if (pid >= pid_max)
|
||||||
pid = RESERVED_PIDS;
|
pid = RESERVED_PIDS;
|
||||||
offset = pid & BITS_PER_PAGE_MASK;
|
offset = pid & BITS_PER_PAGE_MASK;
|
||||||
map = &pidmap_array[pid/BITS_PER_PAGE];
|
map = &pspace->pidmap[pid/BITS_PER_PAGE];
|
||||||
max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
|
max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
|
||||||
for (i = 0; i <= max_scan; ++i) {
|
for (i = 0; i <= max_scan; ++i) {
|
||||||
if (unlikely(!map->page)) {
|
if (unlikely(!map->page)) {
|
||||||
unsigned long page = get_zeroed_page(GFP_KERNEL);
|
void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
|
||||||
/*
|
/*
|
||||||
* Free the page if someone raced with us
|
* Free the page if someone raced with us
|
||||||
* installing it:
|
* installing it:
|
||||||
*/
|
*/
|
||||||
spin_lock_irq(&pidmap_lock);
|
spin_lock_irq(&pidmap_lock);
|
||||||
if (map->page)
|
if (map->page)
|
||||||
free_page(page);
|
kfree(page);
|
||||||
else
|
else
|
||||||
map->page = (void *)page;
|
map->page = page;
|
||||||
spin_unlock_irq(&pidmap_lock);
|
spin_unlock_irq(&pidmap_lock);
|
||||||
if (unlikely(!map->page))
|
if (unlikely(!map->page))
|
||||||
break;
|
break;
|
||||||
|
@ -111,11 +120,11 @@ static int alloc_pidmap(void)
|
||||||
do {
|
do {
|
||||||
if (!test_and_set_bit(offset, map->page)) {
|
if (!test_and_set_bit(offset, map->page)) {
|
||||||
atomic_dec(&map->nr_free);
|
atomic_dec(&map->nr_free);
|
||||||
last_pid = pid;
|
pspace->last_pid = pid;
|
||||||
return pid;
|
return pid;
|
||||||
}
|
}
|
||||||
offset = find_next_offset(map, offset);
|
offset = find_next_offset(map, offset);
|
||||||
pid = mk_pid(map, offset);
|
pid = mk_pid(pspace, map, offset);
|
||||||
/*
|
/*
|
||||||
* find_next_offset() found a bit, the pid from it
|
* find_next_offset() found a bit, the pid from it
|
||||||
* is in-bounds, and if we fell back to the last
|
* is in-bounds, and if we fell back to the last
|
||||||
|
@ -126,16 +135,16 @@ static int alloc_pidmap(void)
|
||||||
(i != max_scan || pid < last ||
|
(i != max_scan || pid < last ||
|
||||||
!((last+1) & BITS_PER_PAGE_MASK)));
|
!((last+1) & BITS_PER_PAGE_MASK)));
|
||||||
}
|
}
|
||||||
if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
|
if (map < &pspace->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
|
||||||
++map;
|
++map;
|
||||||
offset = 0;
|
offset = 0;
|
||||||
} else {
|
} else {
|
||||||
map = &pidmap_array[0];
|
map = &pspace->pidmap[0];
|
||||||
offset = RESERVED_PIDS;
|
offset = RESERVED_PIDS;
|
||||||
if (unlikely(last == offset))
|
if (unlikely(last == offset))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
pid = mk_pid(map, offset);
|
pid = mk_pid(pspace, map, offset);
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -182,7 +191,7 @@ fastcall void free_pid(struct pid *pid)
|
||||||
hlist_del_rcu(&pid->pid_chain);
|
hlist_del_rcu(&pid->pid_chain);
|
||||||
spin_unlock_irqrestore(&pidmap_lock, flags);
|
spin_unlock_irqrestore(&pidmap_lock, flags);
|
||||||
|
|
||||||
free_pidmap(pid->nr);
|
free_pidmap(&init_pspace, pid->nr);
|
||||||
call_rcu(&pid->rcu, delayed_put_pid);
|
call_rcu(&pid->rcu, delayed_put_pid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,7 +205,7 @@ struct pid *alloc_pid(void)
|
||||||
if (!pid)
|
if (!pid)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
nr = alloc_pidmap();
|
nr = alloc_pidmap(&init_pspace);
|
||||||
if (nr < 0)
|
if (nr < 0)
|
||||||
goto out_free;
|
goto out_free;
|
||||||
|
|
||||||
|
@ -363,10 +372,10 @@ void __init pidhash_init(void)
|
||||||
|
|
||||||
void __init pidmap_init(void)
|
void __init pidmap_init(void)
|
||||||
{
|
{
|
||||||
pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
|
init_pspace.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
|
||||||
/* Reserve PID 0. We never call free_pidmap(0) */
|
/* Reserve PID 0. We never call free_pidmap(0) */
|
||||||
set_bit(0, pidmap_array->page);
|
set_bit(0, init_pspace.pidmap[0].page);
|
||||||
atomic_dec(&pidmap_array->nr_free);
|
atomic_dec(&init_pspace.pidmap[0].nr_free);
|
||||||
|
|
||||||
pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
|
pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
|
||||||
__alignof__(struct pid),
|
__alignof__(struct pid),
|
||||||
|
|
Loading…
Reference in a new issue