Merge branch 'akpm' (Andrew's fixups)

Merge random fixes from Andrew Morton.

* emailed from Andrew Morton <akpm@linux-foundation.org>: (11 patches)
  mm: correctly synchronize rss-counters at exit/exec
  btree: catch NULL value before it does harm
  btree: fix tree corruption in btree_get_prev()
  ipc: shm: restore MADV_REMOVE functionality on shared memory segments
  drivers/platform/x86/acerhdf.c: correct Boris' mail address
  c/r: prctl: drop VMA flags test on PR_SET_MM_ stack data assignment
  c/r: prctl: add ability to get clear_tid_address
  c/r: prctl: add minimal address test to PR_SET_MM
  c/r: prctl: update prctl_set_mm_exe_file() after mm->num_exe_file_vmas removal
  MAINTAINERS: whitespace fixes
  shmem: replace_page must flush_dcache and others
This commit is contained in:
Linus Torvalds 2012-06-07 15:05:43 -07:00
commit 46edaedaf3
11 changed files with 118 additions and 69 deletions

View file

@ -5,7 +5,7 @@
*
* (C) 2009 - Peter Feuerer peter (a) piie.net
* http://piie.net
* 2009 Borislav Petkov <petkovbb@gmail.com>
* 2009 Borislav Petkov bp (a) alien8.de
*
* Inspired by and many thanks to:
* o acerfand - Rachel Greenham

View file

@ -819,7 +819,6 @@ static int exec_mmap(struct mm_struct *mm)
/* Notify parent that we're no longer interested in the old VM */
tsk = current;
old_mm = current->mm;
sync_mm_rss(old_mm);
mm_release(tsk, old_mm);
if (old_mm) {

View file

@ -145,4 +145,6 @@
#define PR_SET_NO_NEW_PRIVS 38
#define PR_GET_NO_NEW_PRIVS 39
#define PR_GET_TID_ADDRESS 40
#endif /* _LINUX_PRCTL_H */

View file

@ -439,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm);
/* leave room for more dump flags */
#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)

View file

@ -393,6 +393,16 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
}
static long shm_fallocate(struct file *file, int mode, loff_t offset,
loff_t len)
{
struct shm_file_data *sfd = shm_file_data(file);
if (!sfd->file->f_op->fallocate)
return -EOPNOTSUPP;
return sfd->file->f_op->fallocate(file, mode, offset, len);
}
static unsigned long shm_get_unmapped_area(struct file *file,
unsigned long addr, unsigned long len, unsigned long pgoff,
unsigned long flags)
@ -410,6 +420,7 @@ static const struct file_operations shm_file_operations = {
.get_unmapped_area = shm_get_unmapped_area,
#endif
.llseek = noop_llseek,
.fallocate = shm_fallocate,
};
static const struct file_operations shm_file_operations_huge = {
@ -418,6 +429,7 @@ static const struct file_operations shm_file_operations_huge = {
.release = shm_release,
.get_unmapped_area = shm_get_unmapped_area,
.llseek = noop_llseek,
.fallocate = shm_fallocate,
};
int is_file_shm_hugepages(struct file *file)

View file

@ -423,6 +423,7 @@ void daemonize(const char *name, ...)
* user space pages. We don't need them, and if we didn't close them
* they would be locked into memory.
*/
mm_release(current, current->mm);
exit_mm(current);
/*
* We don't want to get frozen, in case system-wide hibernation
@ -640,7 +641,6 @@ static void exit_mm(struct task_struct * tsk)
struct mm_struct *mm = tsk->mm;
struct core_state *core_state;
mm_release(tsk, mm);
if (!mm)
return;
/*
@ -960,9 +960,13 @@ void do_exit(long code)
preempt_count());
acct_update_integrals(tsk);
/* sync mm's RSS info before statistics gathering */
if (tsk->mm)
sync_mm_rss(tsk->mm);
/* Set exit_code before complete_vfork_done() in mm_release() */
tsk->exit_code = code;
/* Release mm and sync mm's RSS info before statistics gathering */
mm_release(tsk, tsk->mm);
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
hrtimer_cancel(&tsk->signal->real_timer);
@ -975,7 +979,6 @@ void do_exit(long code)
tty_audit_exit();
audit_free(tsk);
tsk->exit_code = code;
taskstats_exit(tsk, group_dead);
exit_mm(tsk);

View file

@ -619,6 +619,14 @@ void mmput(struct mm_struct *mm)
module_put(mm->binfmt->module);
mmdrop(mm);
}
/*
* Final rss-counter synchronization. After this point there must be
* no pagefaults into this mm from the current context. Otherwise
* mm->rss_stat will be inconsistent.
*/
if (mm)
sync_mm_rss(mm);
}
EXPORT_SYMBOL_GPL(mmput);

View file

@ -1786,27 +1786,13 @@ SYSCALL_DEFINE1(umask, int, mask)
}
#ifdef CONFIG_CHECKPOINT_RESTORE
static bool vma_flags_mismatch(struct vm_area_struct *vma,
unsigned long required,
unsigned long banned)
{
return (vma->vm_flags & required) != required ||
(vma->vm_flags & banned);
}
static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
{
struct vm_area_struct *vma;
struct file *exe_file;
struct dentry *dentry;
int err;
/*
* Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
* remain. So perform a quick test first.
*/
if (mm->num_exe_file_vmas)
return -EBUSY;
exe_file = fget(fd);
if (!exe_file)
return -EBADF;
@ -1827,17 +1813,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
if (err)
goto exit;
down_write(&mm->mmap_sem);
/*
* Forbid mm->exe_file change if there are mapped other files.
*/
err = -EBUSY;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (vma->vm_file && !path_equal(&vma->vm_file->f_path,
&exe_file->f_path))
goto exit_unlock;
}
/*
* The symlink can be changed only once, just to disallow arbitrary
* transitions malicious software might bring in. This means one
* could make a snapshot over all processes running and monitor
* /proc/pid/exe changes to notice unusual activity if needed.
*/
down_write(&mm->mmap_sem);
if (likely(!mm->exe_file))
err = -EPERM;
if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
goto exit_unlock;
set_mm_exe_file(mm, exe_file);
else
err = -EBUSY;
exit_unlock:
up_write(&mm->mmap_sem);
exit:
@ -1862,7 +1861,7 @@ static int prctl_set_mm(int opt, unsigned long addr,
if (opt == PR_SET_MM_EXE_FILE)
return prctl_set_mm_exe_file(mm, (unsigned int)addr);
if (addr >= TASK_SIZE)
if (addr >= TASK_SIZE || addr < mmap_min_addr)
return -EINVAL;
error = -EINVAL;
@ -1924,12 +1923,6 @@ static int prctl_set_mm(int opt, unsigned long addr,
error = -EFAULT;
goto out;
}
#ifdef CONFIG_STACK_GROWSUP
if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
#else
if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
#endif
goto out;
if (opt == PR_SET_MM_START_STACK)
mm->start_stack = addr;
else if (opt == PR_SET_MM_ARG_START)
@ -1981,12 +1974,22 @@ static int prctl_set_mm(int opt, unsigned long addr,
up_read(&mm->mmap_sem);
return error;
}
static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
{
return put_user(me->clear_child_tid, tid_addr);
}
#else /* CONFIG_CHECKPOINT_RESTORE */
static int prctl_set_mm(int opt, unsigned long addr,
unsigned long arg4, unsigned long arg5)
{
return -EINVAL;
}
static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
{
return -EINVAL;
}
#endif
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
@ -2124,6 +2127,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else
return -EINVAL;
break;
case PR_GET_TID_ADDRESS:
error = prctl_get_tid_address(me, (int __user **)arg2);
break;
default:
return -EINVAL;
}

View file

@ -319,8 +319,8 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
if (head->height == 0)
return NULL;
retry:
longcpy(key, __key, geo->keylen);
retry:
dec_key(geo, key);
node = head->node;
@ -351,7 +351,7 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
}
miss:
if (retry_key) {
__key = retry_key;
longcpy(key, retry_key, geo->keylen);
retry_key = NULL;
goto retry;
}
@ -509,6 +509,7 @@ static int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
int btree_insert(struct btree_head *head, struct btree_geo *geo,
unsigned long *key, void *val, gfp_t gfp)
{
BUG_ON(!val);
return btree_insert_level(head, geo, key, val, 1, gfp);
}
EXPORT_SYMBOL_GPL(btree_insert);

View file

@ -683,10 +683,21 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
mutex_lock(&shmem_swaplist_mutex);
/*
* We needed to drop mutex to make that restrictive page
* allocation; but the inode might already be freed by now,
* and we cannot refer to inode or mapping or info to check.
* However, we do hold page lock on the PageSwapCache page,
* so can check if that still has our reference remaining.
* allocation, but the inode might have been freed while we
* dropped it: although a racing shmem_evict_inode() cannot
* complete without emptying the radix_tree, our page lock
* on this swapcache page is not enough to prevent that -
* free_swap_and_cache() of our swap entry will only
* trylock_page(), removing swap from radix_tree whatever.
*
* We must not proceed to shmem_add_to_page_cache() if the
* inode has been freed, but of course we cannot rely on
* inode or mapping or info to check that. However, we can
* safely check if our swap entry is still in use (and here
* it can't have got reused for another page): if it's still
* in use, then the inode cannot have been freed yet, and we
* can safely proceed (if it's no longer in use, that tells
* nothing about the inode, but we don't need to unuse swap).
*/
if (!page_swapcount(*pagep))
error = -ENOENT;
@ -730,9 +741,9 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
/*
* There's a faint possibility that swap page was replaced before
* caller locked it: it will come back later with the right page.
* caller locked it: caller will come back later with the right page.
*/
if (unlikely(!PageSwapCache(page)))
if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
goto out;
/*
@ -995,21 +1006,15 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
newpage = shmem_alloc_page(gfp, info, index);
if (!newpage)
return -ENOMEM;
VM_BUG_ON(shmem_should_replace_page(newpage, gfp));
*pagep = newpage;
page_cache_get(newpage);
copy_highpage(newpage, oldpage);
flush_dcache_page(newpage);
VM_BUG_ON(!PageLocked(oldpage));
__set_page_locked(newpage);
VM_BUG_ON(!PageUptodate(oldpage));
SetPageUptodate(newpage);
VM_BUG_ON(!PageSwapBacked(oldpage));
SetPageSwapBacked(newpage);
VM_BUG_ON(!swap_index);
set_page_private(newpage, swap_index);
VM_BUG_ON(!PageSwapCache(oldpage));
SetPageSwapCache(newpage);
/*
@ -1019,13 +1024,24 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
spin_lock_irq(&swap_mapping->tree_lock);
error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
newpage);
if (!error) {
__inc_zone_page_state(newpage, NR_FILE_PAGES);
__dec_zone_page_state(oldpage, NR_FILE_PAGES);
}
spin_unlock_irq(&swap_mapping->tree_lock);
BUG_ON(error);
if (unlikely(error)) {
/*
* Is this possible? I think not, now that our callers check
* both PageSwapCache and page_private after getting page lock;
* but be defensive. Reverse old to newpage for clear and free.
*/
oldpage = newpage;
} else {
mem_cgroup_replace_page_cache(oldpage, newpage);
lru_cache_add_anon(newpage);
*pagep = newpage;
}
ClearPageSwapCache(oldpage);
set_page_private(oldpage, 0);
@ -1033,7 +1049,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
unlock_page(oldpage);
page_cache_release(oldpage);
page_cache_release(oldpage);
return 0;
return error;
}
/*
@ -1107,7 +1123,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
/* We have to do this with page locked to prevent races */
lock_page(page);
if (!PageSwapCache(page) || page->mapping) {
if (!PageSwapCache(page) || page_private(page) != swap.val ||
page->mapping) {
error = -EEXIST; /* try again */
goto failed;
}