2007-05-06 15:49:36 -06:00
|
|
|
#ifndef _LINUX_SLUB_DEF_H
|
|
|
|
#define _LINUX_SLUB_DEF_H
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SLUB : A Slab allocator without object queues.
|
|
|
|
*
|
2008-07-04 10:59:22 -06:00
|
|
|
* (C) 2007 SGI, Christoph Lameter
|
2007-05-06 15:49:36 -06:00
|
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
|
|
#include <linux/gfp.h>
|
|
|
|
#include <linux/workqueue.h>
|
|
|
|
#include <linux/kobject.h>
|
2009-07-07 03:32:59 -06:00
|
|
|
#include <linux/kmemleak.h>
|
2007-05-06 15:49:36 -06:00
|
|
|
|
2010-05-26 03:22:17 -06:00
|
|
|
#include <trace/events/kmem.h>
|
|
|
|
|
2008-02-07 18:47:41 -07:00
|
|
|
enum stat_item {
|
|
|
|
ALLOC_FASTPATH, /* Allocation from cpu slab */
|
|
|
|
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
|
|
|
|
FREE_FASTPATH, /* Free to cpu slub */
|
|
|
|
FREE_SLOWPATH, /* Freeing not to cpu slab */
|
|
|
|
FREE_FROZEN, /* Freeing to frozen slab */
|
|
|
|
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
|
|
|
|
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
|
|
|
|
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from partial list */
|
|
|
|
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
|
|
|
|
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
|
|
|
|
FREE_SLAB, /* Slab freed to the page allocator */
|
|
|
|
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
|
|
|
|
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
|
|
|
|
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
|
|
|
|
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
|
|
|
|
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
|
|
|
|
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
|
2008-04-14 10:11:40 -06:00
|
|
|
ORDER_FALLBACK, /* Number of times fallback was necessary */
|
2008-02-07 18:47:41 -07:00
|
|
|
NR_SLUB_STAT_ITEMS };
|
|
|
|
|
2007-10-16 02:26:05 -06:00
|
|
|
struct kmem_cache_cpu {
|
2008-01-08 00:20:31 -07:00
|
|
|
void **freelist; /* Pointer to first free per cpu object */
|
|
|
|
struct page *page; /* The slab from which we are allocating */
|
|
|
|
int node; /* The node of the page (or -1 for debug) */
|
2008-02-07 18:47:41 -07:00
|
|
|
#ifdef CONFIG_SLUB_STATS
|
|
|
|
unsigned stat[NR_SLUB_STAT_ITEMS];
|
|
|
|
#endif
|
2007-10-16 02:26:08 -06:00
|
|
|
};
|
2007-10-16 02:26:05 -06:00
|
|
|
|
2007-05-06 15:49:36 -06:00
|
|
|
struct kmem_cache_node {
|
|
|
|
spinlock_t list_lock; /* Protect partial list and nr_partial */
|
|
|
|
unsigned long nr_partial;
|
|
|
|
struct list_head partial;
|
2007-07-17 05:03:24 -06:00
|
|
|
#ifdef CONFIG_SLUB_DEBUG
|
2008-04-14 09:53:02 -06:00
|
|
|
atomic_long_t nr_slabs;
|
2008-04-14 10:11:40 -06:00
|
|
|
atomic_long_t total_objects;
|
2007-05-06 15:49:42 -06:00
|
|
|
struct list_head full;
|
2007-07-17 05:03:24 -06:00
|
|
|
#endif
|
2007-05-06 15:49:36 -06:00
|
|
|
};
|
|
|
|
|
2008-04-14 10:11:31 -06:00
|
|
|
/*
|
|
|
|
* Word size structure that can be atomically updated or read and that
|
|
|
|
* contains both the order and the number of objects that a slab of the
|
|
|
|
* given order would contain.
|
|
|
|
*/
|
|
|
|
struct kmem_cache_order_objects {
|
|
|
|
unsigned long x;
|
|
|
|
};
|
|
|
|
|
2007-05-06 15:49:36 -06:00
|
|
|
/*
|
|
|
|
* Slab cache management.
|
|
|
|
*/
|
|
|
|
struct kmem_cache {
|
2009-12-18 15:26:20 -07:00
|
|
|
struct kmem_cache_cpu *cpu_slab;
|
2007-05-06 15:49:36 -06:00
|
|
|
/* Used for retriving partial slabs etc */
|
|
|
|
unsigned long flags;
|
|
|
|
int size; /* The size of an object including meta data */
|
|
|
|
int objsize; /* The size of an object without meta data */
|
|
|
|
int offset; /* Free pointer offset. */
|
2008-04-14 10:11:31 -06:00
|
|
|
struct kmem_cache_order_objects oo;
|
2007-05-06 15:49:36 -06:00
|
|
|
|
|
|
|
/* Allocation and freeing of slabs */
|
2008-04-14 10:11:40 -06:00
|
|
|
struct kmem_cache_order_objects max;
|
2008-04-14 10:11:40 -06:00
|
|
|
struct kmem_cache_order_objects min;
|
2008-02-14 15:21:32 -07:00
|
|
|
gfp_t allocflags; /* gfp flags to use on each alloc */
|
2007-05-06 15:49:36 -06:00
|
|
|
int refcount; /* Refcount for slab cache destroy */
|
2008-07-25 20:45:34 -06:00
|
|
|
void (*ctor)(void *);
|
2007-05-06 15:49:36 -06:00
|
|
|
int inuse; /* Offset to metadata */
|
|
|
|
int align; /* Alignment */
|
2009-02-22 18:40:07 -07:00
|
|
|
unsigned long min_partial;
|
2007-05-06 15:49:36 -06:00
|
|
|
const char *name; /* Name (only for display!) */
|
|
|
|
struct list_head list; /* List of slab caches */
|
2007-07-17 05:03:24 -06:00
|
|
|
#ifdef CONFIG_SLUB_DEBUG
|
2007-05-06 15:49:36 -06:00
|
|
|
struct kobject kobj; /* For sysfs */
|
2007-07-17 05:03:24 -06:00
|
|
|
#endif
|
2007-05-06 15:49:36 -06:00
|
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
2008-01-08 00:20:26 -07:00
|
|
|
/*
|
|
|
|
* Defragmentation by allocating from a remote node.
|
|
|
|
*/
|
|
|
|
int remote_node_defrag_ratio;
|
2007-05-06 15:49:36 -06:00
|
|
|
struct kmem_cache_node *node[MAX_NUMNODES];
|
2010-05-21 15:41:35 -06:00
|
|
|
#else
|
|
|
|
/* Avoid an extra cache line for UP */
|
|
|
|
struct kmem_cache_node local_node;
|
2007-05-06 15:49:36 -06:00
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Kmalloc subsystem.
|
|
|
|
*/
|
2007-06-16 11:16:13 -06:00
|
|
|
#if defined(ARCH_KMALLOC_MINALIGN) && ARCH_KMALLOC_MINALIGN > 8
|
|
|
|
#define KMALLOC_MIN_SIZE ARCH_KMALLOC_MINALIGN
|
|
|
|
#else
|
|
|
|
#define KMALLOC_MIN_SIZE 8
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
|
2007-05-06 15:49:36 -06:00
|
|
|
|
2010-05-19 05:02:14 -06:00
|
|
|
#ifndef ARCH_KMALLOC_MINALIGN
|
|
|
|
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef ARCH_SLAB_MINALIGN
|
|
|
|
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
|
|
|
|
#endif
|
|
|
|
|
2009-02-17 10:05:07 -07:00
|
|
|
/*
|
|
|
|
* Maximum kmalloc object size handled by SLUB. Larger object allocations
|
|
|
|
* are passed through to the page allocator. The page allocator "fastpath"
|
|
|
|
* is relatively slow so we need this value sufficiently high so that
|
|
|
|
* performance critical objects are allocated through the SLUB fastpath.
|
|
|
|
*
|
|
|
|
* This should be dropped to PAGE_SIZE / 2 once the page allocator
|
|
|
|
* "fastpath" becomes competitive with the slab allocator fastpaths.
|
|
|
|
*/
|
2009-02-20 03:21:33 -07:00
|
|
|
#define SLUB_MAX_SIZE (2 * PAGE_SIZE)
|
2009-02-17 10:05:07 -07:00
|
|
|
|
2009-02-20 03:21:33 -07:00
|
|
|
#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2)
|
2009-02-17 10:05:07 -07:00
|
|
|
|
2009-12-18 15:26:21 -07:00
|
|
|
#ifdef CONFIG_ZONE_DMA
|
|
|
|
#define SLUB_DMA __GFP_DMA
|
|
|
|
/* Reserve extra caches for potential DMA use */
|
2010-05-27 08:17:17 -06:00
|
|
|
#define KMALLOC_CACHES (2 * SLUB_PAGE_SHIFT)
|
2009-12-18 15:26:21 -07:00
|
|
|
#else
|
|
|
|
/* Disable DMA functionality */
|
|
|
|
#define SLUB_DMA (__force gfp_t)0
|
|
|
|
#define KMALLOC_CACHES SLUB_PAGE_SHIFT
|
|
|
|
#endif
|
|
|
|
|
2007-05-06 15:49:36 -06:00
|
|
|
/*
|
|
|
|
* We keep the general caches in an array of slab caches that are used for
|
|
|
|
* 2^x bytes of allocations.
|
|
|
|
*/
|
2009-12-18 15:26:21 -07:00
|
|
|
extern struct kmem_cache kmalloc_caches[KMALLOC_CACHES];
|
2007-05-06 15:49:36 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Sorry that the following has to be that ugly but some versions of GCC
|
|
|
|
* have trouble with constant propagation and loops.
|
|
|
|
*/
|
2007-08-31 01:48:45 -06:00
|
|
|
static __always_inline int kmalloc_index(size_t size)
|
2007-05-06 15:49:36 -06:00
|
|
|
{
|
2007-06-08 14:46:49 -06:00
|
|
|
if (!size)
|
|
|
|
return 0;
|
2007-05-06 15:49:38 -06:00
|
|
|
|
2007-06-16 11:16:13 -06:00
|
|
|
if (size <= KMALLOC_MIN_SIZE)
|
|
|
|
return KMALLOC_SHIFT_LOW;
|
|
|
|
|
2009-08-28 05:28:54 -06:00
|
|
|
if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
|
2007-05-06 15:49:36 -06:00
|
|
|
return 1;
|
2009-08-28 05:28:54 -06:00
|
|
|
if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
|
2007-05-06 15:49:36 -06:00
|
|
|
return 2;
|
|
|
|
if (size <= 8) return 3;
|
|
|
|
if (size <= 16) return 4;
|
|
|
|
if (size <= 32) return 5;
|
|
|
|
if (size <= 64) return 6;
|
|
|
|
if (size <= 128) return 7;
|
|
|
|
if (size <= 256) return 8;
|
|
|
|
if (size <= 512) return 9;
|
|
|
|
if (size <= 1024) return 10;
|
|
|
|
if (size <= 2 * 1024) return 11;
|
2008-02-16 00:45:26 -07:00
|
|
|
if (size <= 4 * 1024) return 12;
|
2007-10-16 02:24:38 -06:00
|
|
|
/*
|
|
|
|
* The following is only needed to support architectures with a larger page
|
|
|
|
* size than 4k.
|
|
|
|
*/
|
2007-05-06 15:49:36 -06:00
|
|
|
if (size <= 8 * 1024) return 13;
|
|
|
|
if (size <= 16 * 1024) return 14;
|
|
|
|
if (size <= 32 * 1024) return 15;
|
|
|
|
if (size <= 64 * 1024) return 16;
|
|
|
|
if (size <= 128 * 1024) return 17;
|
|
|
|
if (size <= 256 * 1024) return 18;
|
2007-10-16 02:24:38 -06:00
|
|
|
if (size <= 512 * 1024) return 19;
|
2007-05-06 15:49:36 -06:00
|
|
|
if (size <= 1024 * 1024) return 20;
|
|
|
|
if (size <= 2 * 1024 * 1024) return 21;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* What we really wanted to do and cannot do because of compiler issues is:
|
|
|
|
* int i;
|
|
|
|
* for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
|
|
|
|
* if (size <= (1 << i))
|
|
|
|
* return i;
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the slab cache for a given combination of allocation flags and size.
|
|
|
|
*
|
|
|
|
* This ought to end up with a global pointer to the right cache
|
|
|
|
* in kmalloc_caches.
|
|
|
|
*/
|
2007-08-31 01:48:45 -06:00
|
|
|
static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
|
2007-05-06 15:49:36 -06:00
|
|
|
{
|
|
|
|
int index = kmalloc_index(size);
|
|
|
|
|
|
|
|
if (index == 0)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return &kmalloc_caches[index];
|
|
|
|
}
|
|
|
|
|
slob: initial NUMA support
This adds preliminary NUMA support to SLOB, primarily aimed at systems with
small nodes (tested all the way down to a 128kB SRAM block), whether
asymmetric or otherwise.
We follow the same conventions as SLAB/SLUB, preferring current node
placement for new pages, or with explicit placement, if a node has been
specified. Presently on UP NUMA this has the side-effect of preferring
node#0 allocations (since numa_node_id() == 0, though this could be
reworked if we could hand off a pfn to determine node placement), so
single-CPU NUMA systems will want to place smaller nodes further out in
terms of node id. Once a page has been bound to a node (via explicit node
id typing), we only do block allocations from partial free pages that have
a matching node id in the page flags.
The current implementation does have some scalability problems, in that all
partial free pages are tracked in the global freelist (with contention due
to the single spinlock). However, these are things that are being reworked
for SMP scalability first, while things like per-node freelists can easily
be built on top of this sort of functionality once it's been added.
More background can be found in:
http://marc.info/?l=linux-mm&m=118117916022379&w=2
http://marc.info/?l=linux-mm&m=118170446306199&w=2
http://marc.info/?l=linux-mm&m=118187859420048&w=2
and subsequent threads.
Acked-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 00:38:22 -06:00
|
|
|
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
|
|
|
|
void *__kmalloc(size_t size, gfp_t flags);
|
|
|
|
|
2009-12-11 00:45:30 -07:00
|
|
|
#ifdef CONFIG_TRACING
|
2008-08-19 11:43:26 -06:00
|
|
|
extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
|
|
|
|
#else
|
|
|
|
static __always_inline void *
|
|
|
|
kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
|
|
|
|
{
|
|
|
|
return kmem_cache_alloc(s, gfpflags);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-02-11 13:47:46 -07:00
|
|
|
static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
|
|
|
|
{
|
2008-08-19 11:43:26 -06:00
|
|
|
unsigned int order = get_order(size);
|
|
|
|
void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
|
|
|
|
|
2009-07-07 03:32:59 -06:00
|
|
|
kmemleak_alloc(ret, size, 1, flags);
|
2009-03-23 07:12:24 -06:00
|
|
|
trace_kmalloc(_THIS_IP_, ret, size, PAGE_SIZE << order, flags);
|
2008-08-19 11:43:26 -06:00
|
|
|
|
|
|
|
return ret;
|
2008-02-11 13:47:46 -07:00
|
|
|
}
|
|
|
|
|
2007-08-31 01:48:45 -06:00
|
|
|
static __always_inline void *kmalloc(size_t size, gfp_t flags)
|
2007-05-06 15:49:36 -06:00
|
|
|
{
|
2008-08-19 11:43:26 -06:00
|
|
|
void *ret;
|
|
|
|
|
2007-10-16 02:24:38 -06:00
|
|
|
if (__builtin_constant_p(size)) {
|
2009-02-17 10:05:07 -07:00
|
|
|
if (size > SLUB_MAX_SIZE)
|
2008-02-11 13:47:46 -07:00
|
|
|
return kmalloc_large(size, flags);
|
2007-05-06 15:49:36 -06:00
|
|
|
|
2007-10-16 02:24:38 -06:00
|
|
|
if (!(flags & SLUB_DMA)) {
|
|
|
|
struct kmem_cache *s = kmalloc_slab(size);
|
|
|
|
|
|
|
|
if (!s)
|
|
|
|
return ZERO_SIZE_PTR;
|
2007-05-06 15:49:36 -06:00
|
|
|
|
2008-08-19 11:43:26 -06:00
|
|
|
ret = kmem_cache_alloc_notrace(s, flags);
|
|
|
|
|
2009-03-23 07:12:24 -06:00
|
|
|
trace_kmalloc(_THIS_IP_, ret, size, s->size, flags);
|
2008-08-19 11:43:26 -06:00
|
|
|
|
|
|
|
return ret;
|
2007-10-16 02:24:38 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return __kmalloc(size, flags);
|
2007-05-06 15:49:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
slob: initial NUMA support
This adds preliminary NUMA support to SLOB, primarily aimed at systems with
small nodes (tested all the way down to a 128kB SRAM block), whether
asymmetric or otherwise.
We follow the same conventions as SLAB/SLUB, preferring current node
placement for new pages, or with explicit placement, if a node has been
specified. Presently on UP NUMA this has the side-effect of preferring
node#0 allocations (since numa_node_id() == 0, though this could be
reworked if we could hand off a pfn to determine node placement), so
single-CPU NUMA systems will want to place smaller nodes further out in
terms of node id. Once a page has been bound to a node (via explicit node
id typing), we only do block allocations from partial free pages that have
a matching node id in the page flags.
The current implementation does have some scalability problems, in that all
partial free pages are tracked in the global freelist (with contention due
to the single spinlock). However, these are things that are being reworked
for SMP scalability first, while things like per-node freelists can easily
be built on top of this sort of functionality once it's been added.
More background can be found in:
http://marc.info/?l=linux-mm&m=118117916022379&w=2
http://marc.info/?l=linux-mm&m=118170446306199&w=2
http://marc.info/?l=linux-mm&m=118187859420048&w=2
and subsequent threads.
Acked-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-07-16 00:38:22 -06:00
|
|
|
void *__kmalloc_node(size_t size, gfp_t flags, int node);
|
|
|
|
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
|
2007-05-06 15:49:36 -06:00
|
|
|
|
2009-12-11 00:45:30 -07:00
|
|
|
#ifdef CONFIG_TRACING
|
2008-08-19 11:43:26 -06:00
|
|
|
extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
|
|
|
|
gfp_t gfpflags,
|
|
|
|
int node);
|
|
|
|
#else
|
|
|
|
static __always_inline void *
|
|
|
|
kmem_cache_alloc_node_notrace(struct kmem_cache *s,
|
|
|
|
gfp_t gfpflags,
|
|
|
|
int node)
|
|
|
|
{
|
|
|
|
return kmem_cache_alloc_node(s, gfpflags, node);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2007-08-31 01:48:45 -06:00
|
|
|
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
|
2007-05-06 15:49:36 -06:00
|
|
|
{
|
2008-08-19 11:43:26 -06:00
|
|
|
void *ret;
|
|
|
|
|
2007-10-16 02:24:38 -06:00
|
|
|
if (__builtin_constant_p(size) &&
|
2009-02-17 10:05:07 -07:00
|
|
|
size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) {
|
2007-10-16 02:24:38 -06:00
|
|
|
struct kmem_cache *s = kmalloc_slab(size);
|
2007-05-06 15:49:36 -06:00
|
|
|
|
|
|
|
if (!s)
|
2007-06-08 14:46:49 -06:00
|
|
|
return ZERO_SIZE_PTR;
|
2007-05-06 15:49:36 -06:00
|
|
|
|
2008-08-19 11:43:26 -06:00
|
|
|
ret = kmem_cache_alloc_node_notrace(s, flags, node);
|
|
|
|
|
2009-03-23 07:12:24 -06:00
|
|
|
trace_kmalloc_node(_THIS_IP_, ret,
|
|
|
|
size, s->size, flags, node);
|
2008-08-19 11:43:26 -06:00
|
|
|
|
|
|
|
return ret;
|
2007-10-16 02:24:38 -06:00
|
|
|
}
|
|
|
|
return __kmalloc_node(size, flags, node);
|
2007-05-06 15:49:36 -06:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* _LINUX_SLUB_DEF_H */
|