Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6:
  slub: Dont define useless label in the !CONFIG_CMPXCHG_LOCAL case
  slab,rcu: don't assume the size of struct rcu_head
  slub,rcu: don't assume the size of struct rcu_head
  slub: automatically reserve bytes at the end of slab
  Lockless (and preemptless) fastpaths for slub
  slub: Get rid of slab_free_hook_irq()
  slub: min_partial needs to be in first cacheline
  slub: fix ksize() build error
  slub: fix kmemcheck calls to match ksize() hints
  Revert "slab: Fix missing DEBUG_SLAB last user"
  mm: Remove support for kmem_cache_name()
This commit is contained in:
Linus Torvalds 2011-03-22 09:36:23 -07:00
commit 14577beb82
5 changed files with 333 additions and 103 deletions

View file

@ -105,7 +105,6 @@ void kmem_cache_destroy(struct kmem_cache *);
int kmem_cache_shrink(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *);
void kmem_cache_free(struct kmem_cache *, void *); void kmem_cache_free(struct kmem_cache *, void *);
unsigned int kmem_cache_size(struct kmem_cache *); unsigned int kmem_cache_size(struct kmem_cache *);
const char *kmem_cache_name(struct kmem_cache *);
/* /*
* Please use this macro to create slab caches. Simply specify the * Please use this macro to create slab caches. Simply specify the

View file

@ -35,7 +35,10 @@ enum stat_item {
NR_SLUB_STAT_ITEMS }; NR_SLUB_STAT_ITEMS };
struct kmem_cache_cpu { struct kmem_cache_cpu {
void **freelist; /* Pointer to first free per cpu object */ void **freelist; /* Pointer to next available object */
#ifdef CONFIG_CMPXCHG_LOCAL
unsigned long tid; /* Globally unique transaction id */
#endif
struct page *page; /* The slab from which we are allocating */ struct page *page; /* The slab from which we are allocating */
int node; /* The node of the page (or -1 for debug) */ int node; /* The node of the page (or -1 for debug) */
#ifdef CONFIG_SLUB_STATS #ifdef CONFIG_SLUB_STATS
@ -70,6 +73,7 @@ struct kmem_cache {
struct kmem_cache_cpu __percpu *cpu_slab; struct kmem_cache_cpu __percpu *cpu_slab;
/* Used for retriving partial slabs etc */ /* Used for retriving partial slabs etc */
unsigned long flags; unsigned long flags;
unsigned long min_partial;
int size; /* The size of an object including meta data */ int size; /* The size of an object including meta data */
int objsize; /* The size of an object without meta data */ int objsize; /* The size of an object without meta data */
int offset; /* Free pointer offset. */ int offset; /* Free pointer offset. */
@ -83,7 +87,7 @@ struct kmem_cache {
void (*ctor)(void *); void (*ctor)(void *);
int inuse; /* Offset to metadata */ int inuse; /* Offset to metadata */
int align; /* Alignment */ int align; /* Alignment */
unsigned long min_partial; int reserved; /* Reserved bytes at the end of slabs */
const char *name; /* Name (only for display!) */ const char *name; /* Name (only for display!) */
struct list_head list; /* List of slab caches */ struct list_head list; /* List of slab caches */
#ifdef CONFIG_SYSFS #ifdef CONFIG_SYSFS

View file

@ -190,22 +190,6 @@ typedef unsigned int kmem_bufctl_t;
#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) #define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
/*
* struct slab
*
* Manages the objs in a slab. Placed either at the beginning of mem allocated
* for a slab, or allocated from an general cache.
* Slabs are chained into three list: fully used, partial, fully free slabs.
*/
struct slab {
struct list_head list;
unsigned long colouroff;
void *s_mem; /* including colour offset */
unsigned int inuse; /* num of objs active in slab */
kmem_bufctl_t free;
unsigned short nodeid;
};
/* /*
* struct slab_rcu * struct slab_rcu
* *
@ -219,8 +203,6 @@ struct slab {
* *
* rcu_read_lock before reading the address, then rcu_read_unlock after * rcu_read_lock before reading the address, then rcu_read_unlock after
* taking the spinlock within the structure expected at that address. * taking the spinlock within the structure expected at that address.
*
* We assume struct slab_rcu can overlay struct slab when destroying.
*/ */
struct slab_rcu { struct slab_rcu {
struct rcu_head head; struct rcu_head head;
@ -228,6 +210,27 @@ struct slab_rcu {
void *addr; void *addr;
}; };
/*
* struct slab
*
* Manages the objs in a slab. Placed either at the beginning of mem allocated
* for a slab, or allocated from an general cache.
* Slabs are chained into three list: fully used, partial, fully free slabs.
*/
struct slab {
union {
struct {
struct list_head list;
unsigned long colouroff;
void *s_mem; /* including colour offset */
unsigned int inuse; /* num of objs active in slab */
kmem_bufctl_t free;
unsigned short nodeid;
};
struct slab_rcu __slab_cover_slab_rcu;
};
};
/* /*
* struct array_cache * struct array_cache
* *
@ -2147,8 +2150,6 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
* *
* @name must be valid until the cache is destroyed. This implies that * @name must be valid until the cache is destroyed. This implies that
* the module calling this has to destroy the cache before getting unloaded. * the module calling this has to destroy the cache before getting unloaded.
* Note that kmem_cache_name() is not guaranteed to return the same pointer,
* therefore applications must manage it themselves.
* *
* The flags are * The flags are
* *
@ -2288,8 +2289,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
if (ralign < align) { if (ralign < align) {
ralign = align; ralign = align;
} }
/* disable debug if not aligning with REDZONE_ALIGN */ /* disable debug if necessary */
if (ralign & (__alignof__(unsigned long long) - 1)) if (ralign > __alignof__(unsigned long long))
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
/* /*
* 4) Store it. * 4) Store it.
@ -2315,8 +2316,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
*/ */
if (flags & SLAB_RED_ZONE) { if (flags & SLAB_RED_ZONE) {
/* add space for red zone words */ /* add space for red zone words */
cachep->obj_offset += align; cachep->obj_offset += sizeof(unsigned long long);
size += align + sizeof(unsigned long long); size += 2 * sizeof(unsigned long long);
} }
if (flags & SLAB_STORE_USER) { if (flags & SLAB_STORE_USER) {
/* user store requires one word storage behind the end of /* user store requires one word storage behind the end of
@ -3840,12 +3841,6 @@ unsigned int kmem_cache_size(struct kmem_cache *cachep)
} }
EXPORT_SYMBOL(kmem_cache_size); EXPORT_SYMBOL(kmem_cache_size);
const char *kmem_cache_name(struct kmem_cache *cachep)
{
return cachep->name;
}
EXPORT_SYMBOL_GPL(kmem_cache_name);
/* /*
* This initializes kmem_list3 or resizes various caches for all nodes. * This initializes kmem_list3 or resizes various caches for all nodes.
*/ */

View file

@ -666,12 +666,6 @@ unsigned int kmem_cache_size(struct kmem_cache *c)
} }
EXPORT_SYMBOL(kmem_cache_size); EXPORT_SYMBOL(kmem_cache_size);
const char *kmem_cache_name(struct kmem_cache *c)
{
return c->name;
}
EXPORT_SYMBOL(kmem_cache_name);
int kmem_cache_shrink(struct kmem_cache *d) int kmem_cache_shrink(struct kmem_cache *d)
{ {
return 0; return 0;

356
mm/slub.c
View file

@ -281,11 +281,40 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
return (p - addr) / s->size; return (p - addr) / s->size;
} }
static inline size_t slab_ksize(const struct kmem_cache *s)
{
#ifdef CONFIG_SLUB_DEBUG
/*
* Debugging requires use of the padding between object
* and whatever may come after it.
*/
if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
return s->objsize;
#endif
/*
* If we have the need to store the freelist pointer
* back there or track user information then we can
* only use the space before that information.
*/
if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
return s->inuse;
/*
* Else we can use all the padding etc for the allocation
*/
return s->size;
}
static inline int order_objects(int order, unsigned long size, int reserved)
{
return ((PAGE_SIZE << order) - reserved) / size;
}
static inline struct kmem_cache_order_objects oo_make(int order, static inline struct kmem_cache_order_objects oo_make(int order,
unsigned long size) unsigned long size, int reserved)
{ {
struct kmem_cache_order_objects x = { struct kmem_cache_order_objects x = {
(order << OO_SHIFT) + (PAGE_SIZE << order) / size (order << OO_SHIFT) + order_objects(order, size, reserved)
}; };
return x; return x;
@ -617,7 +646,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
return 1; return 1;
start = page_address(page); start = page_address(page);
length = (PAGE_SIZE << compound_order(page)); length = (PAGE_SIZE << compound_order(page)) - s->reserved;
end = start + length; end = start + length;
remainder = length % s->size; remainder = length % s->size;
if (!remainder) if (!remainder)
@ -698,7 +727,7 @@ static int check_slab(struct kmem_cache *s, struct page *page)
return 0; return 0;
} }
maxobj = (PAGE_SIZE << compound_order(page)) / s->size; maxobj = order_objects(compound_order(page), s->size, s->reserved);
if (page->objects > maxobj) { if (page->objects > maxobj) {
slab_err(s, page, "objects %u > max %u", slab_err(s, page, "objects %u > max %u",
s->name, page->objects, maxobj); s->name, page->objects, maxobj);
@ -748,7 +777,7 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
nr++; nr++;
} }
max_objects = (PAGE_SIZE << compound_order(page)) / s->size; max_objects = order_objects(compound_order(page), s->size, s->reserved);
if (max_objects > MAX_OBJS_PER_PAGE) if (max_objects > MAX_OBJS_PER_PAGE)
max_objects = MAX_OBJS_PER_PAGE; max_objects = MAX_OBJS_PER_PAGE;
@ -800,21 +829,31 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object) static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
{ {
flags &= gfp_allowed_mask; flags &= gfp_allowed_mask;
kmemcheck_slab_alloc(s, flags, object, s->objsize); kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags); kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, flags);
} }
static inline void slab_free_hook(struct kmem_cache *s, void *x) static inline void slab_free_hook(struct kmem_cache *s, void *x)
{ {
kmemleak_free_recursive(x, s->flags); kmemleak_free_recursive(x, s->flags);
}
static inline void slab_free_hook_irq(struct kmem_cache *s, void *object) /*
{ * Trouble is that we may no longer disable interupts in the fast path
kmemcheck_slab_free(s, object, s->objsize); * So in order to make the debug calls that expect irqs to be
debug_check_no_locks_freed(object, s->objsize); * disabled we need to disable interrupts temporarily.
*/
#if defined(CONFIG_KMEMCHECK) || defined(CONFIG_LOCKDEP)
{
unsigned long flags;
local_irq_save(flags);
kmemcheck_slab_free(s, x, s->objsize);
debug_check_no_locks_freed(x, s->objsize);
if (!(s->flags & SLAB_DEBUG_OBJECTS)) if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(object, s->objsize); debug_check_no_obj_freed(x, s->objsize);
local_irq_restore(flags);
}
#endif
} }
/* /*
@ -1101,9 +1140,6 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
static inline void slab_free_hook(struct kmem_cache *s, void *x) {} static inline void slab_free_hook(struct kmem_cache *s, void *x) {}
static inline void slab_free_hook_irq(struct kmem_cache *s,
void *object) {}
#endif /* CONFIG_SLUB_DEBUG */ #endif /* CONFIG_SLUB_DEBUG */
/* /*
@ -1249,21 +1285,38 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
__free_pages(page, order); __free_pages(page, order);
} }
#define need_reserve_slab_rcu \
(sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
static void rcu_free_slab(struct rcu_head *h) static void rcu_free_slab(struct rcu_head *h)
{ {
struct page *page; struct page *page;
if (need_reserve_slab_rcu)
page = virt_to_head_page(h);
else
page = container_of((struct list_head *)h, struct page, lru); page = container_of((struct list_head *)h, struct page, lru);
__free_slab(page->slab, page); __free_slab(page->slab, page);
} }
static void free_slab(struct kmem_cache *s, struct page *page) static void free_slab(struct kmem_cache *s, struct page *page)
{ {
if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
struct rcu_head *head;
if (need_reserve_slab_rcu) {
int order = compound_order(page);
int offset = (PAGE_SIZE << order) - s->reserved;
VM_BUG_ON(s->reserved != sizeof(*head));
head = page_address(page) + offset;
} else {
/* /*
* RCU free overloads the RCU head over the LRU * RCU free overloads the RCU head over the LRU
*/ */
struct rcu_head *head = (void *)&page->lru; head = (void *)&page->lru;
}
call_rcu(head, rcu_free_slab); call_rcu(head, rcu_free_slab);
} else } else
@ -1487,6 +1540,77 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
} }
} }
#ifdef CONFIG_CMPXCHG_LOCAL
#ifdef CONFIG_PREEMPT
/*
* Calculate the next globally unique transaction for disambiguiation
* during cmpxchg. The transactions start with the cpu number and are then
* incremented by CONFIG_NR_CPUS.
*/
#define TID_STEP roundup_pow_of_two(CONFIG_NR_CPUS)
#else
/*
* No preemption supported therefore also no need to check for
* different cpus.
*/
#define TID_STEP 1
#endif
static inline unsigned long next_tid(unsigned long tid)
{
return tid + TID_STEP;
}
static inline unsigned int tid_to_cpu(unsigned long tid)
{
return tid % TID_STEP;
}
static inline unsigned long tid_to_event(unsigned long tid)
{
return tid / TID_STEP;
}
static inline unsigned int init_tid(int cpu)
{
return cpu;
}
static inline void note_cmpxchg_failure(const char *n,
const struct kmem_cache *s, unsigned long tid)
{
#ifdef SLUB_DEBUG_CMPXCHG
unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid);
printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name);
#ifdef CONFIG_PREEMPT
if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
printk("due to cpu change %d -> %d\n",
tid_to_cpu(tid), tid_to_cpu(actual_tid));
else
#endif
if (tid_to_event(tid) != tid_to_event(actual_tid))
printk("due to cpu running other code. Event %ld->%ld\n",
tid_to_event(tid), tid_to_event(actual_tid));
else
printk("for unknown reason: actual=%lx was=%lx target=%lx\n",
actual_tid, tid, next_tid(tid));
#endif
}
#endif
void init_kmem_cache_cpus(struct kmem_cache *s)
{
#if defined(CONFIG_CMPXCHG_LOCAL) && defined(CONFIG_PREEMPT)
int cpu;
for_each_possible_cpu(cpu)
per_cpu_ptr(s->cpu_slab, cpu)->tid = init_tid(cpu);
#endif
}
/* /*
* Remove the cpu slab * Remove the cpu slab
*/ */
@ -1518,6 +1642,9 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
page->inuse--; page->inuse--;
} }
c->page = NULL; c->page = NULL;
#ifdef CONFIG_CMPXCHG_LOCAL
c->tid = next_tid(c->tid);
#endif
unfreeze_slab(s, page, tail); unfreeze_slab(s, page, tail);
} }
@ -1652,6 +1779,19 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
{ {
void **object; void **object;
struct page *new; struct page *new;
#ifdef CONFIG_CMPXCHG_LOCAL
unsigned long flags;
local_irq_save(flags);
#ifdef CONFIG_PREEMPT
/*
* We may have been preempted and rescheduled on a different
* cpu before disabling interrupts. Need to reload cpu area
* pointer.
*/
c = this_cpu_ptr(s->cpu_slab);
#endif
#endif
/* We handle __GFP_ZERO in the caller */ /* We handle __GFP_ZERO in the caller */
gfpflags &= ~__GFP_ZERO; gfpflags &= ~__GFP_ZERO;
@ -1678,6 +1818,10 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
c->node = page_to_nid(c->page); c->node = page_to_nid(c->page);
unlock_out: unlock_out:
slab_unlock(c->page); slab_unlock(c->page);
#ifdef CONFIG_CMPXCHG_LOCAL
c->tid = next_tid(c->tid);
local_irq_restore(flags);
#endif
stat(s, ALLOC_SLOWPATH); stat(s, ALLOC_SLOWPATH);
return object; return object;
@ -1739,23 +1883,76 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
{ {
void **object; void **object;
struct kmem_cache_cpu *c; struct kmem_cache_cpu *c;
#ifdef CONFIG_CMPXCHG_LOCAL
unsigned long tid;
#else
unsigned long flags; unsigned long flags;
#endif
if (slab_pre_alloc_hook(s, gfpflags)) if (slab_pre_alloc_hook(s, gfpflags))
return NULL; return NULL;
#ifndef CONFIG_CMPXCHG_LOCAL
local_irq_save(flags); local_irq_save(flags);
#else
redo:
#endif
/*
* Must read kmem_cache cpu data via this cpu ptr. Preemption is
* enabled. We may switch back and forth between cpus while
* reading from one cpu area. That does not matter as long
* as we end up on the original cpu again when doing the cmpxchg.
*/
c = __this_cpu_ptr(s->cpu_slab); c = __this_cpu_ptr(s->cpu_slab);
#ifdef CONFIG_CMPXCHG_LOCAL
/*
* The transaction ids are globally unique per cpu and per operation on
* a per cpu queue. Thus they can be guarantee that the cmpxchg_double
* occurs on the right processor and that there was no operation on the
* linked list in between.
*/
tid = c->tid;
barrier();
#endif
object = c->freelist; object = c->freelist;
if (unlikely(!object || !node_match(c, node))) if (unlikely(!object || !node_match(c, node)))
object = __slab_alloc(s, gfpflags, node, addr, c); object = __slab_alloc(s, gfpflags, node, addr, c);
else { else {
#ifdef CONFIG_CMPXCHG_LOCAL
/*
* The cmpxchg will only match if there was no additonal
* operation and if we are on the right processor.
*
* The cmpxchg does the following atomically (without lock semantics!)
* 1. Relocate first pointer to the current per cpu area.
* 2. Verify that tid and freelist have not been changed
* 3. If they were not changed replace tid and freelist
*
* Since this is without lock semantics the protection is only against
* code executing on this cpu *not* from access by other cpus.
*/
if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
object, tid,
get_freepointer(s, object), next_tid(tid)))) {
note_cmpxchg_failure("slab_alloc", s, tid);
goto redo;
}
#else
c->freelist = get_freepointer(s, object); c->freelist = get_freepointer(s, object);
#endif
stat(s, ALLOC_FASTPATH); stat(s, ALLOC_FASTPATH);
} }
#ifndef CONFIG_CMPXCHG_LOCAL
local_irq_restore(flags); local_irq_restore(flags);
#endif
if (unlikely(gfpflags & __GFP_ZERO) && object) if (unlikely(gfpflags & __GFP_ZERO) && object)
memset(object, 0, s->objsize); memset(object, 0, s->objsize);
@ -1833,9 +2030,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
{ {
void *prior; void *prior;
void **object = (void *)x; void **object = (void *)x;
#ifdef CONFIG_CMPXCHG_LOCAL
unsigned long flags;
stat(s, FREE_SLOWPATH); local_irq_save(flags);
#endif
slab_lock(page); slab_lock(page);
stat(s, FREE_SLOWPATH);
if (kmem_cache_debug(s)) if (kmem_cache_debug(s))
goto debug; goto debug;
@ -1865,6 +2066,9 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
out_unlock: out_unlock:
slab_unlock(page); slab_unlock(page);
#ifdef CONFIG_CMPXCHG_LOCAL
local_irq_restore(flags);
#endif
return; return;
slab_empty: slab_empty:
@ -1876,6 +2080,9 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
stat(s, FREE_REMOVE_PARTIAL); stat(s, FREE_REMOVE_PARTIAL);
} }
slab_unlock(page); slab_unlock(page);
#ifdef CONFIG_CMPXCHG_LOCAL
local_irq_restore(flags);
#endif
stat(s, FREE_SLAB); stat(s, FREE_SLAB);
discard_slab(s, page); discard_slab(s, page);
return; return;
@ -1902,23 +2109,56 @@ static __always_inline void slab_free(struct kmem_cache *s,
{ {
void **object = (void *)x; void **object = (void *)x;
struct kmem_cache_cpu *c; struct kmem_cache_cpu *c;
#ifdef CONFIG_CMPXCHG_LOCAL
unsigned long tid;
#else
unsigned long flags; unsigned long flags;
#endif
slab_free_hook(s, x); slab_free_hook(s, x);
#ifndef CONFIG_CMPXCHG_LOCAL
local_irq_save(flags); local_irq_save(flags);
#else
redo:
#endif
/*
* Determine the currently cpus per cpu slab.
* The cpu may change afterward. However that does not matter since
* data is retrieved via this pointer. If we are on the same cpu
* during the cmpxchg then the free will succedd.
*/
c = __this_cpu_ptr(s->cpu_slab); c = __this_cpu_ptr(s->cpu_slab);
slab_free_hook_irq(s, x); #ifdef CONFIG_CMPXCHG_LOCAL
tid = c->tid;
barrier();
#endif
if (likely(page == c->page && c->node != NUMA_NO_NODE)) { if (likely(page == c->page && c->node != NUMA_NO_NODE)) {
set_freepointer(s, object, c->freelist); set_freepointer(s, object, c->freelist);
#ifdef CONFIG_CMPXCHG_LOCAL
if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
c->freelist, tid,
object, next_tid(tid)))) {
note_cmpxchg_failure("slab_free", s, tid);
goto redo;
}
#else
c->freelist = object; c->freelist = object;
#endif
stat(s, FREE_FASTPATH); stat(s, FREE_FASTPATH);
} else } else
__slab_free(s, page, x, addr); __slab_free(s, page, x, addr);
#ifndef CONFIG_CMPXCHG_LOCAL
local_irq_restore(flags); local_irq_restore(flags);
#endif
} }
void kmem_cache_free(struct kmem_cache *s, void *x) void kmem_cache_free(struct kmem_cache *s, void *x)
@ -1988,13 +2228,13 @@ static int slub_nomerge;
* the smallest order which will fit the object. * the smallest order which will fit the object.
*/ */
static inline int slab_order(int size, int min_objects, static inline int slab_order(int size, int min_objects,
int max_order, int fract_leftover) int max_order, int fract_leftover, int reserved)
{ {
int order; int order;
int rem; int rem;
int min_order = slub_min_order; int min_order = slub_min_order;
if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE) if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
return get_order(size * MAX_OBJS_PER_PAGE) - 1; return get_order(size * MAX_OBJS_PER_PAGE) - 1;
for (order = max(min_order, for (order = max(min_order,
@ -2003,10 +2243,10 @@ static inline int slab_order(int size, int min_objects,
unsigned long slab_size = PAGE_SIZE << order; unsigned long slab_size = PAGE_SIZE << order;
if (slab_size < min_objects * size) if (slab_size < min_objects * size + reserved)
continue; continue;
rem = slab_size % size; rem = (slab_size - reserved) % size;
if (rem <= slab_size / fract_leftover) if (rem <= slab_size / fract_leftover)
break; break;
@ -2016,7 +2256,7 @@ static inline int slab_order(int size, int min_objects,
return order; return order;
} }
static inline int calculate_order(int size) static inline int calculate_order(int size, int reserved)
{ {
int order; int order;
int min_objects; int min_objects;
@ -2034,14 +2274,14 @@ static inline int calculate_order(int size)
min_objects = slub_min_objects; min_objects = slub_min_objects;
if (!min_objects) if (!min_objects)
min_objects = 4 * (fls(nr_cpu_ids) + 1); min_objects = 4 * (fls(nr_cpu_ids) + 1);
max_objects = (PAGE_SIZE << slub_max_order)/size; max_objects = order_objects(slub_max_order, size, reserved);
min_objects = min(min_objects, max_objects); min_objects = min(min_objects, max_objects);
while (min_objects > 1) { while (min_objects > 1) {
fraction = 16; fraction = 16;
while (fraction >= 4) { while (fraction >= 4) {
order = slab_order(size, min_objects, order = slab_order(size, min_objects,
slub_max_order, fraction); slub_max_order, fraction, reserved);
if (order <= slub_max_order) if (order <= slub_max_order)
return order; return order;
fraction /= 2; fraction /= 2;
@ -2053,14 +2293,14 @@ static inline int calculate_order(int size)
* We were unable to place multiple objects in a slab. Now * We were unable to place multiple objects in a slab. Now
* lets see if we can place a single object there. * lets see if we can place a single object there.
*/ */
order = slab_order(size, 1, slub_max_order, 1); order = slab_order(size, 1, slub_max_order, 1, reserved);
if (order <= slub_max_order) if (order <= slub_max_order)
return order; return order;
/* /*
* Doh this slab cannot be placed using slub_max_order. * Doh this slab cannot be placed using slub_max_order.
*/ */
order = slab_order(size, 1, MAX_ORDER, 1); order = slab_order(size, 1, MAX_ORDER, 1, reserved);
if (order < MAX_ORDER) if (order < MAX_ORDER)
return order; return order;
return -ENOSYS; return -ENOSYS;
@ -2110,9 +2350,23 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu));
#ifdef CONFIG_CMPXCHG_LOCAL
/*
* Must align to double word boundary for the double cmpxchg instructions
* to work.
*/
s->cpu_slab = __alloc_percpu(sizeof(struct kmem_cache_cpu), 2 * sizeof(void *));
#else
/* Regular alignment is sufficient */
s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
#endif
return s->cpu_slab != NULL; if (!s->cpu_slab)
return 0;
init_kmem_cache_cpus(s);
return 1;
} }
static struct kmem_cache *kmem_cache_node; static struct kmem_cache *kmem_cache_node;
@ -2311,7 +2565,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
if (forced_order >= 0) if (forced_order >= 0)
order = forced_order; order = forced_order;
else else
order = calculate_order(size); order = calculate_order(size, s->reserved);
if (order < 0) if (order < 0)
return 0; return 0;
@ -2329,8 +2583,8 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
/* /*
* Determine the number of objects per slab * Determine the number of objects per slab
*/ */
s->oo = oo_make(order, size); s->oo = oo_make(order, size, s->reserved);
s->min = oo_make(get_order(size), size); s->min = oo_make(get_order(size), size, s->reserved);
if (oo_objects(s->oo) > oo_objects(s->max)) if (oo_objects(s->oo) > oo_objects(s->max))
s->max = s->oo; s->max = s->oo;
@ -2349,6 +2603,10 @@ static int kmem_cache_open(struct kmem_cache *s,
s->objsize = size; s->objsize = size;
s->align = align; s->align = align;
s->flags = kmem_cache_flags(size, flags, name, ctor); s->flags = kmem_cache_flags(size, flags, name, ctor);
s->reserved = 0;
if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU))
s->reserved = sizeof(struct rcu_head);
if (!calculate_sizes(s, -1)) if (!calculate_sizes(s, -1))
goto error; goto error;
@ -2399,12 +2657,6 @@ unsigned int kmem_cache_size(struct kmem_cache *s)
} }
EXPORT_SYMBOL(kmem_cache_size); EXPORT_SYMBOL(kmem_cache_size);
const char *kmem_cache_name(struct kmem_cache *s)
{
return s->name;
}
EXPORT_SYMBOL(kmem_cache_name);
static void list_slab_objects(struct kmem_cache *s, struct page *page, static void list_slab_objects(struct kmem_cache *s, struct page *page,
const char *text) const char *text)
{ {
@ -2696,7 +2948,6 @@ EXPORT_SYMBOL(__kmalloc_node);
size_t ksize(const void *object) size_t ksize(const void *object)
{ {
struct page *page; struct page *page;
struct kmem_cache *s;
if (unlikely(object == ZERO_SIZE_PTR)) if (unlikely(object == ZERO_SIZE_PTR))
return 0; return 0;
@ -2707,28 +2958,8 @@ size_t ksize(const void *object)
WARN_ON(!PageCompound(page)); WARN_ON(!PageCompound(page));
return PAGE_SIZE << compound_order(page); return PAGE_SIZE << compound_order(page);
} }
s = page->slab;
#ifdef CONFIG_SLUB_DEBUG return slab_ksize(page->slab);
/*
* Debugging requires use of the padding between object
* and whatever may come after it.
*/
if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
return s->objsize;
#endif
/*
* If we have the need to store the freelist pointer
* back there or track user information then we can
* only use the space before that information.
*/
if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
return s->inuse;
/*
* Else we can use all the padding etc for the allocation
*/
return s->size;
} }
EXPORT_SYMBOL(ksize); EXPORT_SYMBOL(ksize);
@ -4017,6 +4248,12 @@ static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf)
} }
SLAB_ATTR_RO(destroy_by_rcu); SLAB_ATTR_RO(destroy_by_rcu);
static ssize_t reserved_show(struct kmem_cache *s, char *buf)
{
return sprintf(buf, "%d\n", s->reserved);
}
SLAB_ATTR_RO(reserved);
#ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG
static ssize_t slabs_show(struct kmem_cache *s, char *buf) static ssize_t slabs_show(struct kmem_cache *s, char *buf)
{ {
@ -4303,6 +4540,7 @@ static struct attribute *slab_attrs[] = {
&reclaim_account_attr.attr, &reclaim_account_attr.attr,
&destroy_by_rcu_attr.attr, &destroy_by_rcu_attr.attr,
&shrink_attr.attr, &shrink_attr.attr,
&reserved_attr.attr,
#ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG
&total_objects_attr.attr, &total_objects_attr.attr,
&slabs_attr.attr, &slabs_attr.attr,