Merge branch 'akpm' (Andrew's fixups)
Merge random fixes from Andrew Morton. * emailed from Andrew Morton <akpm@linux-foundation.org>: (11 patches) mm: correctly synchronize rss-counters at exit/exec btree: catch NULL value before it does harm btree: fix tree corruption in btree_get_prev() ipc: shm: restore MADV_REMOVE functionality on shared memory segments drivers/platform/x86/acerhdf.c: correct Boris' mail address c/r: prctl: drop VMA flags test on PR_SET_MM_ stack data assignment c/r: prctl: add ability to get clear_tid_address c/r: prctl: add minimal address test to PR_SET_MM c/r: prctl: update prctl_set_mm_exe_file() after mm->num_exe_file_vmas removal MAINTAINERS: whitespace fixes shmem: replace_page must flush_dcache and others
This commit is contained in:
commit
46edaedaf3
11 changed files with 118 additions and 69 deletions
|
@ -5,7 +5,7 @@
|
|||
*
|
||||
* (C) 2009 - Peter Feuerer peter (a) piie.net
|
||||
* http://piie.net
|
||||
* 2009 Borislav Petkov <petkovbb@gmail.com>
|
||||
* 2009 Borislav Petkov bp (a) alien8.de
|
||||
*
|
||||
* Inspired by and many thanks to:
|
||||
* o acerfand - Rachel Greenham
|
||||
|
|
|
@ -819,7 +819,6 @@ static int exec_mmap(struct mm_struct *mm)
|
|||
/* Notify parent that we're no longer interested in the old VM */
|
||||
tsk = current;
|
||||
old_mm = current->mm;
|
||||
sync_mm_rss(old_mm);
|
||||
mm_release(tsk, old_mm);
|
||||
|
||||
if (old_mm) {
|
||||
|
|
|
@ -145,4 +145,6 @@
|
|||
#define PR_SET_NO_NEW_PRIVS 38
|
||||
#define PR_GET_NO_NEW_PRIVS 39
|
||||
|
||||
#define PR_GET_TID_ADDRESS 40
|
||||
|
||||
#endif /* _LINUX_PRCTL_H */
|
||||
|
|
|
@ -439,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm);
|
|||
/* leave room for more dump flags */
|
||||
#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
|
||||
#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
|
||||
#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
|
||||
|
||||
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
|
||||
|
||||
|
|
12
ipc/shm.c
12
ipc/shm.c
|
@ -393,6 +393,16 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
|
|||
return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
|
||||
}
|
||||
|
||||
static long shm_fallocate(struct file *file, int mode, loff_t offset,
|
||||
loff_t len)
|
||||
{
|
||||
struct shm_file_data *sfd = shm_file_data(file);
|
||||
|
||||
if (!sfd->file->f_op->fallocate)
|
||||
return -EOPNOTSUPP;
|
||||
return sfd->file->f_op->fallocate(file, mode, offset, len);
|
||||
}
|
||||
|
||||
static unsigned long shm_get_unmapped_area(struct file *file,
|
||||
unsigned long addr, unsigned long len, unsigned long pgoff,
|
||||
unsigned long flags)
|
||||
|
@ -410,6 +420,7 @@ static const struct file_operations shm_file_operations = {
|
|||
.get_unmapped_area = shm_get_unmapped_area,
|
||||
#endif
|
||||
.llseek = noop_llseek,
|
||||
.fallocate = shm_fallocate,
|
||||
};
|
||||
|
||||
static const struct file_operations shm_file_operations_huge = {
|
||||
|
@ -418,6 +429,7 @@ static const struct file_operations shm_file_operations_huge = {
|
|||
.release = shm_release,
|
||||
.get_unmapped_area = shm_get_unmapped_area,
|
||||
.llseek = noop_llseek,
|
||||
.fallocate = shm_fallocate,
|
||||
};
|
||||
|
||||
int is_file_shm_hugepages(struct file *file)
|
||||
|
|
|
@ -423,6 +423,7 @@ void daemonize(const char *name, ...)
|
|||
* user space pages. We don't need them, and if we didn't close them
|
||||
* they would be locked into memory.
|
||||
*/
|
||||
mm_release(current, current->mm);
|
||||
exit_mm(current);
|
||||
/*
|
||||
* We don't want to get frozen, in case system-wide hibernation
|
||||
|
@ -640,7 +641,6 @@ static void exit_mm(struct task_struct * tsk)
|
|||
struct mm_struct *mm = tsk->mm;
|
||||
struct core_state *core_state;
|
||||
|
||||
mm_release(tsk, mm);
|
||||
if (!mm)
|
||||
return;
|
||||
/*
|
||||
|
@ -960,9 +960,13 @@ void do_exit(long code)
|
|||
preempt_count());
|
||||
|
||||
acct_update_integrals(tsk);
|
||||
/* sync mm's RSS info before statistics gathering */
|
||||
if (tsk->mm)
|
||||
sync_mm_rss(tsk->mm);
|
||||
|
||||
/* Set exit_code before complete_vfork_done() in mm_release() */
|
||||
tsk->exit_code = code;
|
||||
|
||||
/* Release mm and sync mm's RSS info before statistics gathering */
|
||||
mm_release(tsk, tsk->mm);
|
||||
|
||||
group_dead = atomic_dec_and_test(&tsk->signal->live);
|
||||
if (group_dead) {
|
||||
hrtimer_cancel(&tsk->signal->real_timer);
|
||||
|
@ -975,7 +979,6 @@ void do_exit(long code)
|
|||
tty_audit_exit();
|
||||
audit_free(tsk);
|
||||
|
||||
tsk->exit_code = code;
|
||||
taskstats_exit(tsk, group_dead);
|
||||
|
||||
exit_mm(tsk);
|
||||
|
|
|
@ -619,6 +619,14 @@ void mmput(struct mm_struct *mm)
|
|||
module_put(mm->binfmt->module);
|
||||
mmdrop(mm);
|
||||
}
|
||||
|
||||
/*
|
||||
* Final rss-counter synchronization. After this point there must be
|
||||
* no pagefaults into this mm from the current context. Otherwise
|
||||
* mm->rss_stat will be inconsistent.
|
||||
*/
|
||||
if (mm)
|
||||
sync_mm_rss(mm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mmput);
|
||||
|
||||
|
|
58
kernel/sys.c
58
kernel/sys.c
|
@ -1786,27 +1786,13 @@ SYSCALL_DEFINE1(umask, int, mask)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_CHECKPOINT_RESTORE
|
||||
static bool vma_flags_mismatch(struct vm_area_struct *vma,
|
||||
unsigned long required,
|
||||
unsigned long banned)
|
||||
{
|
||||
return (vma->vm_flags & required) != required ||
|
||||
(vma->vm_flags & banned);
|
||||
}
|
||||
|
||||
static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct file *exe_file;
|
||||
struct dentry *dentry;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
|
||||
* remain. So perform a quick test first.
|
||||
*/
|
||||
if (mm->num_exe_file_vmas)
|
||||
return -EBUSY;
|
||||
|
||||
exe_file = fget(fd);
|
||||
if (!exe_file)
|
||||
return -EBADF;
|
||||
|
@ -1827,17 +1813,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
|
|||
if (err)
|
||||
goto exit;
|
||||
|
||||
down_write(&mm->mmap_sem);
|
||||
|
||||
/*
|
||||
* Forbid mm->exe_file change if there are mapped other files.
|
||||
*/
|
||||
err = -EBUSY;
|
||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||
if (vma->vm_file && !path_equal(&vma->vm_file->f_path,
|
||||
&exe_file->f_path))
|
||||
goto exit_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* The symlink can be changed only once, just to disallow arbitrary
|
||||
* transitions malicious software might bring in. This means one
|
||||
* could make a snapshot over all processes running and monitor
|
||||
* /proc/pid/exe changes to notice unusual activity if needed.
|
||||
*/
|
||||
down_write(&mm->mmap_sem);
|
||||
if (likely(!mm->exe_file))
|
||||
err = -EPERM;
|
||||
if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
|
||||
goto exit_unlock;
|
||||
|
||||
set_mm_exe_file(mm, exe_file);
|
||||
else
|
||||
err = -EBUSY;
|
||||
exit_unlock:
|
||||
up_write(&mm->mmap_sem);
|
||||
|
||||
exit:
|
||||
|
@ -1862,7 +1861,7 @@ static int prctl_set_mm(int opt, unsigned long addr,
|
|||
if (opt == PR_SET_MM_EXE_FILE)
|
||||
return prctl_set_mm_exe_file(mm, (unsigned int)addr);
|
||||
|
||||
if (addr >= TASK_SIZE)
|
||||
if (addr >= TASK_SIZE || addr < mmap_min_addr)
|
||||
return -EINVAL;
|
||||
|
||||
error = -EINVAL;
|
||||
|
@ -1924,12 +1923,6 @@ static int prctl_set_mm(int opt, unsigned long addr,
|
|||
error = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
#ifdef CONFIG_STACK_GROWSUP
|
||||
if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
|
||||
#else
|
||||
if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
|
||||
#endif
|
||||
goto out;
|
||||
if (opt == PR_SET_MM_START_STACK)
|
||||
mm->start_stack = addr;
|
||||
else if (opt == PR_SET_MM_ARG_START)
|
||||
|
@ -1981,12 +1974,22 @@ static int prctl_set_mm(int opt, unsigned long addr,
|
|||
up_read(&mm->mmap_sem);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
|
||||
{
|
||||
return put_user(me->clear_child_tid, tid_addr);
|
||||
}
|
||||
|
||||
#else /* CONFIG_CHECKPOINT_RESTORE */
|
||||
static int prctl_set_mm(int opt, unsigned long addr,
|
||||
unsigned long arg4, unsigned long arg5)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
|
||||
|
@ -2124,6 +2127,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
|
|||
else
|
||||
return -EINVAL;
|
||||
break;
|
||||
case PR_GET_TID_ADDRESS:
|
||||
error = prctl_get_tid_address(me, (int __user **)arg2);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
|
@ -319,8 +319,8 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
|
|||
|
||||
if (head->height == 0)
|
||||
return NULL;
|
||||
retry:
|
||||
longcpy(key, __key, geo->keylen);
|
||||
retry:
|
||||
dec_key(geo, key);
|
||||
|
||||
node = head->node;
|
||||
|
@ -351,7 +351,7 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
|
|||
}
|
||||
miss:
|
||||
if (retry_key) {
|
||||
__key = retry_key;
|
||||
longcpy(key, retry_key, geo->keylen);
|
||||
retry_key = NULL;
|
||||
goto retry;
|
||||
}
|
||||
|
@ -509,6 +509,7 @@ static int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
|
|||
int btree_insert(struct btree_head *head, struct btree_geo *geo,
|
||||
unsigned long *key, void *val, gfp_t gfp)
|
||||
{
|
||||
BUG_ON(!val);
|
||||
return btree_insert_level(head, geo, key, val, 1, gfp);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(btree_insert);
|
||||
|
|
49
mm/shmem.c
49
mm/shmem.c
|
@ -683,10 +683,21 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
|
|||
mutex_lock(&shmem_swaplist_mutex);
|
||||
/*
|
||||
* We needed to drop mutex to make that restrictive page
|
||||
* allocation; but the inode might already be freed by now,
|
||||
* and we cannot refer to inode or mapping or info to check.
|
||||
* However, we do hold page lock on the PageSwapCache page,
|
||||
* so can check if that still has our reference remaining.
|
||||
* allocation, but the inode might have been freed while we
|
||||
* dropped it: although a racing shmem_evict_inode() cannot
|
||||
* complete without emptying the radix_tree, our page lock
|
||||
* on this swapcache page is not enough to prevent that -
|
||||
* free_swap_and_cache() of our swap entry will only
|
||||
* trylock_page(), removing swap from radix_tree whatever.
|
||||
*
|
||||
* We must not proceed to shmem_add_to_page_cache() if the
|
||||
* inode has been freed, but of course we cannot rely on
|
||||
* inode or mapping or info to check that. However, we can
|
||||
* safely check if our swap entry is still in use (and here
|
||||
* it can't have got reused for another page): if it's still
|
||||
* in use, then the inode cannot have been freed yet, and we
|
||||
* can safely proceed (if it's no longer in use, that tells
|
||||
* nothing about the inode, but we don't need to unuse swap).
|
||||
*/
|
||||
if (!page_swapcount(*pagep))
|
||||
error = -ENOENT;
|
||||
|
@ -730,9 +741,9 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
|
|||
|
||||
/*
|
||||
* There's a faint possibility that swap page was replaced before
|
||||
* caller locked it: it will come back later with the right page.
|
||||
* caller locked it: caller will come back later with the right page.
|
||||
*/
|
||||
if (unlikely(!PageSwapCache(page)))
|
||||
if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
|
@ -995,21 +1006,15 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
|
|||
newpage = shmem_alloc_page(gfp, info, index);
|
||||
if (!newpage)
|
||||
return -ENOMEM;
|
||||
VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
|
||||
|
||||
*pagep = newpage;
|
||||
page_cache_get(newpage);
|
||||
copy_highpage(newpage, oldpage);
|
||||
flush_dcache_page(newpage);
|
||||
|
||||
VM_BUG_ON(!PageLocked(oldpage));
|
||||
__set_page_locked(newpage);
|
||||
VM_BUG_ON(!PageUptodate(oldpage));
|
||||
SetPageUptodate(newpage);
|
||||
VM_BUG_ON(!PageSwapBacked(oldpage));
|
||||
SetPageSwapBacked(newpage);
|
||||
VM_BUG_ON(!swap_index);
|
||||
set_page_private(newpage, swap_index);
|
||||
VM_BUG_ON(!PageSwapCache(oldpage));
|
||||
SetPageSwapCache(newpage);
|
||||
|
||||
/*
|
||||
|
@ -1019,13 +1024,24 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
|
|||
spin_lock_irq(&swap_mapping->tree_lock);
|
||||
error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
|
||||
newpage);
|
||||
if (!error) {
|
||||
__inc_zone_page_state(newpage, NR_FILE_PAGES);
|
||||
__dec_zone_page_state(oldpage, NR_FILE_PAGES);
|
||||
}
|
||||
spin_unlock_irq(&swap_mapping->tree_lock);
|
||||
BUG_ON(error);
|
||||
|
||||
if (unlikely(error)) {
|
||||
/*
|
||||
* Is this possible? I think not, now that our callers check
|
||||
* both PageSwapCache and page_private after getting page lock;
|
||||
* but be defensive. Reverse old to newpage for clear and free.
|
||||
*/
|
||||
oldpage = newpage;
|
||||
} else {
|
||||
mem_cgroup_replace_page_cache(oldpage, newpage);
|
||||
lru_cache_add_anon(newpage);
|
||||
*pagep = newpage;
|
||||
}
|
||||
|
||||
ClearPageSwapCache(oldpage);
|
||||
set_page_private(oldpage, 0);
|
||||
|
@ -1033,7 +1049,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
|
|||
unlock_page(oldpage);
|
||||
page_cache_release(oldpage);
|
||||
page_cache_release(oldpage);
|
||||
return 0;
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1107,7 +1123,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
|
|||
|
||||
/* We have to do this with page locked to prevent races */
|
||||
lock_page(page);
|
||||
if (!PageSwapCache(page) || page->mapping) {
|
||||
if (!PageSwapCache(page) || page_private(page) != swap.val ||
|
||||
page->mapping) {
|
||||
error = -EEXIST; /* try again */
|
||||
goto failed;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue