2008-07-25 20:44:36 -06:00
|
|
|
#include <linux/mm.h>
|
2006-01-08 02:01:43 -07:00
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/string.h>
|
|
|
|
#include <linux/module.h>
|
2006-03-24 04:18:42 -07:00
|
|
|
#include <linux/err.h>
|
2008-07-26 16:22:28 -06:00
|
|
|
#include <linux/sched.h>
|
2006-03-24 04:18:42 -07:00
|
|
|
#include <asm/uaccess.h>
|
2006-01-08 02:01:43 -07:00
|
|
|
|
mm: nommu: sort mm->mmap list properly
When I was reading nommu code, I found that it handles the vma list/tree
in an unusual way. IIUC, because there can be more than one
identical/overrapped vmas in the list/tree, it sorts the tree more
strictly and does a linear search on the tree. But it doesn't applied to
the list (i.e. the list could be constructed in a different order than
the tree so that we can't use the list when finding the first vma in that
order).
Since inserting/sorting a vma in the tree and link is done at the same
time, we can easily construct both of them in the same order. And linear
searching on the tree could be more costly than doing it on the list, it
can be converted to use the list.
Also, after the commit 297c5eee3724 ("mm: make the vma list be doubly
linked") made the list be doubly linked, there were a couple of code need
to be fixed to construct the list properly.
Patch 1/6 is a preparation. It maintains the list sorted same as the tree
and construct doubly-linked list properly. Patch 2/6 is a simple
optimization for the vma deletion. Patch 3/6 and 4/6 convert tree
traversal to list traversal and the rest are simple fixes and cleanups.
This patch:
@vma added into @mm should be sorted by start addr, end addr and VMA
struct addr in that order because we may get identical VMAs in the @mm.
However this was true only for the rbtree, not for the list.
This patch fixes this by remembering 'rb_prev' during the tree traversal
like find_vma_prepare() does and linking the @vma via __vma_link_list().
After this patch, we can iterate the whole VMAs in correct order simply by
using @mm->mmap list.
[akpm@linux-foundation.org: avoid duplicating __vma_link_list()]
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-05-24 18:11:22 -06:00
|
|
|
#include "internal.h"
|
|
|
|
|
2009-04-10 07:36:00 -06:00
|
|
|
#define CREATE_TRACE_POINTS
|
2009-04-14 17:39:12 -06:00
|
|
|
#include <trace/events/kmem.h>
|
2009-04-10 07:36:00 -06:00
|
|
|
|
2006-01-08 02:01:43 -07:00
|
|
|
/**
|
|
|
|
* kstrdup - allocate space for and copy an existing string
|
|
|
|
* @s: the string to duplicate
|
|
|
|
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
|
|
|
|
*/
|
|
|
|
char *kstrdup(const char *s, gfp_t gfp)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
if (!s)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
len = strlen(s) + 1;
|
2006-10-04 03:15:25 -06:00
|
|
|
buf = kmalloc_track_caller(len, gfp);
|
2006-01-08 02:01:43 -07:00
|
|
|
if (buf)
|
|
|
|
memcpy(buf, s, len);
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(kstrdup);
|
2006-03-24 04:18:42 -07:00
|
|
|
|
2007-07-17 19:37:02 -06:00
|
|
|
/**
|
|
|
|
* kstrndup - allocate space for and copy an existing string
|
|
|
|
* @s: the string to duplicate
|
|
|
|
* @max: read at most @max chars from @s
|
|
|
|
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
|
|
|
|
*/
|
|
|
|
char *kstrndup(const char *s, size_t max, gfp_t gfp)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
if (!s)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
len = strnlen(s, max);
|
|
|
|
buf = kmalloc_track_caller(len+1, gfp);
|
|
|
|
if (buf) {
|
|
|
|
memcpy(buf, s, len);
|
|
|
|
buf[len] = '\0';
|
|
|
|
}
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(kstrndup);
|
|
|
|
|
[PATCH] kmemdup: introduce
One of idiomatic ways to duplicate a region of memory is
dst = kmalloc(len, GFP_KERNEL);
if (!dst)
return -ENOMEM;
memcpy(dst, src, len);
which is neat code except a programmer needs to write size twice. Which
sometimes leads to mistakes. If len passed to kmalloc is smaller that len
passed to memcpy, it's straight overwrite-beyond-end. If len passed to
memcpy is smaller than len passed to kmalloc, it's either a) legit
behaviour ;-), or b) cloned buffer will contain garbage in second half.
Slight trolling of commit lists shows several duplications bugs
done exactly because of diverged lenghts:
Linux:
[CRYPTO]: Fix memcpy/memset args.
[PATCH] memcpy/memset fixes
OpenBSD:
kerberosV/src/lib/asn1: der_copy.c:1.4
If programmer is given only one place to play with lengths, I believe, such
mistakes could be avoided.
With kmemdup, the snippet above will be rewritten as:
dst = kmemdup(src, len, GFP_KERNEL);
if (!dst)
return -ENOMEM;
This also leads to smaller code (kzalloc effect). Quick grep shows
200+ places where kmemdup() can be used.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-01 00:27:20 -06:00
|
|
|
/**
|
|
|
|
* kmemdup - duplicate region of memory
|
|
|
|
*
|
|
|
|
* @src: memory region to duplicate
|
|
|
|
* @len: memory region length
|
|
|
|
* @gfp: GFP mask to use
|
|
|
|
*/
|
|
|
|
void *kmemdup(const void *src, size_t len, gfp_t gfp)
|
|
|
|
{
|
|
|
|
void *p;
|
|
|
|
|
2006-10-04 03:15:25 -06:00
|
|
|
p = kmalloc_track_caller(len, gfp);
|
[PATCH] kmemdup: introduce
One of idiomatic ways to duplicate a region of memory is
dst = kmalloc(len, GFP_KERNEL);
if (!dst)
return -ENOMEM;
memcpy(dst, src, len);
which is neat code except a programmer needs to write size twice. Which
sometimes leads to mistakes. If len passed to kmalloc is smaller that len
passed to memcpy, it's straight overwrite-beyond-end. If len passed to
memcpy is smaller than len passed to kmalloc, it's either a) legit
behaviour ;-), or b) cloned buffer will contain garbage in second half.
Slight trolling of commit lists shows several duplications bugs
done exactly because of diverged lenghts:
Linux:
[CRYPTO]: Fix memcpy/memset args.
[PATCH] memcpy/memset fixes
OpenBSD:
kerberosV/src/lib/asn1: der_copy.c:1.4
If programmer is given only one place to play with lengths, I believe, such
mistakes could be avoided.
With kmemdup, the snippet above will be rewritten as:
dst = kmemdup(src, len, GFP_KERNEL);
if (!dst)
return -ENOMEM;
This also leads to smaller code (kzalloc effect). Quick grep shows
200+ places where kmemdup() can be used.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-01 00:27:20 -06:00
|
|
|
if (p)
|
|
|
|
memcpy(p, src, len);
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(kmemdup);
|
|
|
|
|
2009-03-31 16:23:16 -06:00
|
|
|
/**
|
|
|
|
* memdup_user - duplicate memory region from user space
|
|
|
|
*
|
|
|
|
* @src: source address in user space
|
|
|
|
* @len: number of bytes to copy
|
|
|
|
*
|
|
|
|
* Returns an ERR_PTR() on failure.
|
|
|
|
*/
|
|
|
|
void *memdup_user(const void __user *src, size_t len)
|
|
|
|
{
|
|
|
|
void *p;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Always use GFP_KERNEL, since copy_from_user() can sleep and
|
|
|
|
* cause pagefault, which makes it pointless to use GFP_NOFS
|
|
|
|
* or GFP_ATOMIC.
|
|
|
|
*/
|
|
|
|
p = kmalloc_track_caller(len, GFP_KERNEL);
|
|
|
|
if (!p)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
if (copy_from_user(p, src, len)) {
|
|
|
|
kfree(p);
|
|
|
|
return ERR_PTR(-EFAULT);
|
|
|
|
}
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(memdup_user);
|
|
|
|
|
2007-07-17 05:03:21 -06:00
|
|
|
/**
|
2008-07-26 18:49:33 -06:00
|
|
|
* __krealloc - like krealloc() but don't free @p.
|
2007-07-17 05:03:21 -06:00
|
|
|
* @p: object to reallocate memory for.
|
|
|
|
* @new_size: how many bytes of memory are required.
|
|
|
|
* @flags: the type of memory to allocate.
|
|
|
|
*
|
2008-07-26 18:49:33 -06:00
|
|
|
* This function is like krealloc() except it never frees the originally
|
|
|
|
* allocated buffer. Use this if you don't want to free the buffer immediately
|
|
|
|
* like, for example, with RCU.
|
2007-07-17 05:03:21 -06:00
|
|
|
*/
|
2008-07-26 18:49:33 -06:00
|
|
|
void *__krealloc(const void *p, size_t new_size, gfp_t flags)
|
2007-07-17 05:03:21 -06:00
|
|
|
{
|
|
|
|
void *ret;
|
2007-10-16 02:24:46 -06:00
|
|
|
size_t ks = 0;
|
2007-07-17 05:03:21 -06:00
|
|
|
|
2008-07-26 18:49:33 -06:00
|
|
|
if (unlikely(!new_size))
|
2007-07-17 05:03:22 -06:00
|
|
|
return ZERO_SIZE_PTR;
|
2007-07-17 05:03:21 -06:00
|
|
|
|
2007-10-16 02:24:46 -06:00
|
|
|
if (p)
|
|
|
|
ks = ksize(p);
|
|
|
|
|
2007-07-17 05:03:21 -06:00
|
|
|
if (ks >= new_size)
|
|
|
|
return (void *)p;
|
|
|
|
|
|
|
|
ret = kmalloc_track_caller(new_size, flags);
|
2008-07-26 18:49:33 -06:00
|
|
|
if (ret && p)
|
2007-11-14 18:00:01 -07:00
|
|
|
memcpy(ret, p, ks);
|
2008-07-26 18:49:33 -06:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(__krealloc);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* krealloc - reallocate memory. The contents will remain unchanged.
|
|
|
|
* @p: object to reallocate memory for.
|
|
|
|
* @new_size: how many bytes of memory are required.
|
|
|
|
* @flags: the type of memory to allocate.
|
|
|
|
*
|
|
|
|
* The contents of the object pointed to are preserved up to the
|
|
|
|
* lesser of the new and old sizes. If @p is %NULL, krealloc()
|
|
|
|
* behaves exactly like kmalloc(). If @size is 0 and @p is not a
|
|
|
|
* %NULL pointer, the object pointed to is freed.
|
|
|
|
*/
|
|
|
|
void *krealloc(const void *p, size_t new_size, gfp_t flags)
|
|
|
|
{
|
|
|
|
void *ret;
|
|
|
|
|
|
|
|
if (unlikely(!new_size)) {
|
2007-07-17 05:03:21 -06:00
|
|
|
kfree(p);
|
2008-07-26 18:49:33 -06:00
|
|
|
return ZERO_SIZE_PTR;
|
2007-07-17 05:03:21 -06:00
|
|
|
}
|
2008-07-26 18:49:33 -06:00
|
|
|
|
|
|
|
ret = __krealloc(p, new_size, flags);
|
|
|
|
if (ret && p != ret)
|
|
|
|
kfree(p);
|
|
|
|
|
2007-07-17 05:03:21 -06:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(krealloc);
|
|
|
|
|
2009-02-20 16:38:41 -07:00
|
|
|
/**
|
|
|
|
* kzfree - like kfree but zero memory
|
|
|
|
* @p: object to free memory of
|
|
|
|
*
|
|
|
|
* The memory of the object @p points to is zeroed before freed.
|
|
|
|
* If @p is %NULL, kzfree() does nothing.
|
2009-05-31 04:50:38 -06:00
|
|
|
*
|
|
|
|
* Note: this function zeroes the whole allocated buffer which can be a good
|
|
|
|
* deal bigger than the requested buffer size passed to kmalloc(). So be
|
|
|
|
* careful when using this function in performance sensitive code.
|
2009-02-20 16:38:41 -07:00
|
|
|
*/
|
|
|
|
void kzfree(const void *p)
|
|
|
|
{
|
|
|
|
size_t ks;
|
|
|
|
void *mem = (void *)p;
|
|
|
|
|
|
|
|
if (unlikely(ZERO_OR_NULL_PTR(mem)))
|
|
|
|
return;
|
|
|
|
ks = ksize(mem);
|
|
|
|
memset(mem, 0, ks);
|
|
|
|
kfree(mem);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(kzfree);
|
|
|
|
|
2006-03-24 04:18:42 -07:00
|
|
|
/*
|
|
|
|
* strndup_user - duplicate an existing string from user space
|
|
|
|
* @s: The string to duplicate
|
|
|
|
* @n: Maximum number of bytes to copy, including the trailing NUL.
|
|
|
|
*/
|
|
|
|
char *strndup_user(const char __user *s, long n)
|
|
|
|
{
|
|
|
|
char *p;
|
|
|
|
long length;
|
|
|
|
|
|
|
|
length = strnlen_user(s, n);
|
|
|
|
|
|
|
|
if (!length)
|
|
|
|
return ERR_PTR(-EFAULT);
|
|
|
|
|
|
|
|
if (length > n)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
2010-08-09 18:18:26 -06:00
|
|
|
p = memdup_user(s, length);
|
2006-03-24 04:18:42 -07:00
|
|
|
|
2010-08-09 18:18:26 -06:00
|
|
|
if (IS_ERR(p))
|
|
|
|
return p;
|
2006-03-24 04:18:42 -07:00
|
|
|
|
|
|
|
p[length - 1] = '\0';
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(strndup_user);
|
2008-07-25 20:44:36 -06:00
|
|
|
|
mm: nommu: sort mm->mmap list properly
When I was reading nommu code, I found that it handles the vma list/tree
in an unusual way. IIUC, because there can be more than one
identical/overrapped vmas in the list/tree, it sorts the tree more
strictly and does a linear search on the tree. But it doesn't applied to
the list (i.e. the list could be constructed in a different order than
the tree so that we can't use the list when finding the first vma in that
order).
Since inserting/sorting a vma in the tree and link is done at the same
time, we can easily construct both of them in the same order. And linear
searching on the tree could be more costly than doing it on the list, it
can be converted to use the list.
Also, after the commit 297c5eee3724 ("mm: make the vma list be doubly
linked") made the list be doubly linked, there were a couple of code need
to be fixed to construct the list properly.
Patch 1/6 is a preparation. It maintains the list sorted same as the tree
and construct doubly-linked list properly. Patch 2/6 is a simple
optimization for the vma deletion. Patch 3/6 and 4/6 convert tree
traversal to list traversal and the rest are simple fixes and cleanups.
This patch:
@vma added into @mm should be sorted by start addr, end addr and VMA
struct addr in that order because we may get identical VMAs in the @mm.
However this was true only for the rbtree, not for the list.
This patch fixes this by remembering 'rb_prev' during the tree traversal
like find_vma_prepare() does and linking the @vma via __vma_link_list().
After this patch, we can iterate the whole VMAs in correct order simply by
using @mm->mmap list.
[akpm@linux-foundation.org: avoid duplicating __vma_link_list()]
Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-05-24 18:11:22 -06:00
|
|
|
void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
struct vm_area_struct *prev, struct rb_node *rb_parent)
|
|
|
|
{
|
|
|
|
struct vm_area_struct *next;
|
|
|
|
|
|
|
|
vma->vm_prev = prev;
|
|
|
|
if (prev) {
|
|
|
|
next = prev->vm_next;
|
|
|
|
prev->vm_next = vma;
|
|
|
|
} else {
|
|
|
|
mm->mmap = vma;
|
|
|
|
if (rb_parent)
|
|
|
|
next = rb_entry(rb_parent,
|
|
|
|
struct vm_area_struct, vm_rb);
|
|
|
|
else
|
|
|
|
next = NULL;
|
|
|
|
}
|
|
|
|
vma->vm_next = next;
|
|
|
|
if (next)
|
|
|
|
next->vm_prev = vma;
|
|
|
|
}
|
|
|
|
|
2010-01-15 18:01:35 -07:00
|
|
|
#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
|
2008-07-25 20:44:36 -06:00
|
|
|
void arch_pick_mmap_layout(struct mm_struct *mm)
|
|
|
|
{
|
|
|
|
mm->mmap_base = TASK_UNMAPPED_BASE;
|
|
|
|
mm->get_unmapped_area = arch_get_unmapped_area;
|
|
|
|
mm->unmap_area = arch_unmap_area;
|
|
|
|
}
|
|
|
|
#endif
|
2008-08-12 16:52:52 -06:00
|
|
|
|
2010-08-22 05:08:57 -06:00
|
|
|
/*
|
|
|
|
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
|
|
|
|
* back to the regular GUP.
|
2011-03-30 19:57:33 -06:00
|
|
|
* If the architecture not support this function, simply return with no
|
2010-08-22 05:08:57 -06:00
|
|
|
* page pinned
|
|
|
|
*/
|
|
|
|
int __attribute__((weak)) __get_user_pages_fast(unsigned long start,
|
|
|
|
int nr_pages, int write, struct page **pages)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(__get_user_pages_fast);
|
|
|
|
|
2009-04-13 15:40:05 -06:00
|
|
|
/**
|
|
|
|
* get_user_pages_fast() - pin user pages in memory
|
|
|
|
* @start: starting user address
|
|
|
|
* @nr_pages: number of pages from start to pin
|
|
|
|
* @write: whether pages will be written to
|
|
|
|
* @pages: array that receives pointers to the pages pinned.
|
|
|
|
* Should be at least nr_pages long.
|
|
|
|
*
|
|
|
|
* Returns number of pages pinned. This may be fewer than the number
|
|
|
|
* requested. If nr_pages is 0 or negative, returns 0. If no pages
|
|
|
|
* were pinned, returns -errno.
|
2009-06-16 16:31:39 -06:00
|
|
|
*
|
|
|
|
* get_user_pages_fast provides equivalent functionality to get_user_pages,
|
|
|
|
* operating on current and current->mm, with force=0 and vma=NULL. However
|
|
|
|
* unlike get_user_pages, it must be called without mmap_sem held.
|
|
|
|
*
|
|
|
|
* get_user_pages_fast may take mmap_sem and page table locks, so no
|
|
|
|
* assumptions can be made about lack of locking. get_user_pages_fast is to be
|
|
|
|
* implemented in a way that is advantageous (vs get_user_pages()) when the
|
|
|
|
* user memory area is already faulted in and present in ptes. However if the
|
|
|
|
* pages have to be faulted in, it may turn out to be slightly slower so
|
|
|
|
* callers need to carefully consider what to use. On many architectures,
|
|
|
|
* get_user_pages_fast simply falls back to get_user_pages.
|
2009-04-13 15:40:05 -06:00
|
|
|
*/
|
2008-08-12 16:52:52 -06:00
|
|
|
int __attribute__((weak)) get_user_pages_fast(unsigned long start,
|
|
|
|
int nr_pages, int write, struct page **pages)
|
|
|
|
{
|
|
|
|
struct mm_struct *mm = current->mm;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
down_read(&mm->mmap_sem);
|
|
|
|
ret = get_user_pages(current, mm, start, nr_pages,
|
|
|
|
write, 0, pages, NULL);
|
|
|
|
up_read(&mm->mmap_sem);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(get_user_pages_fast);
|
2009-03-23 07:12:24 -06:00
|
|
|
|
|
|
|
/* Tracepoints definitions. */
|
|
|
|
EXPORT_TRACEPOINT_SYMBOL(kmalloc);
|
|
|
|
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
|
|
|
|
EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
|
|
|
|
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
|
|
|
|
EXPORT_TRACEPOINT_SYMBOL(kfree);
|
|
|
|
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
|