2006-01-08 02:01:31 -07:00
|
|
|
/*
|
2007-10-16 02:26:54 -06:00
|
|
|
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
2005-04-16 16:20:36 -06:00
|
|
|
* Licensed under the GPL
|
|
|
|
*/
|
|
|
|
|
2007-10-16 02:26:54 -06:00
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/hardirq.h>
|
|
|
|
#include <asm/current.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/tlbflush.h>
|
2007-05-06 15:51:07 -06:00
|
|
|
#include "arch.h"
|
2007-10-16 02:26:54 -06:00
|
|
|
#include "as-layout.h"
|
|
|
|
#include "kern_util.h"
|
2006-01-08 02:01:31 -07:00
|
|
|
#include "os.h"
|
2008-02-04 23:30:47 -07:00
|
|
|
#include "skas.h"
|
2007-10-16 02:26:54 -06:00
|
|
|
#include "sysdep/sigcontext.h"
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2007-10-16 02:26:54 -06:00
|
|
|
/*
|
|
|
|
* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
|
|
|
|
* segv().
|
|
|
|
*/
|
2006-07-10 05:45:13 -06:00
|
|
|
int handle_page_fault(unsigned long address, unsigned long ip,
|
2005-04-16 16:20:36 -06:00
|
|
|
int is_write, int is_user, int *code_out)
|
|
|
|
{
|
|
|
|
struct mm_struct *mm = current->mm;
|
|
|
|
struct vm_area_struct *vma;
|
|
|
|
pgd_t *pgd;
|
|
|
|
pud_t *pud;
|
|
|
|
pmd_t *pmd;
|
|
|
|
pte_t *pte;
|
|
|
|
int err = -EFAULT;
|
|
|
|
|
|
|
|
*code_out = SEGV_MAPERR;
|
2005-09-22 22:44:20 -06:00
|
|
|
|
2007-10-16 02:26:54 -06:00
|
|
|
/*
|
|
|
|
* If the fault was during atomic operation, don't take the fault, just
|
|
|
|
* fail.
|
|
|
|
*/
|
2005-09-22 22:44:20 -06:00
|
|
|
if (in_atomic())
|
|
|
|
goto out_nosemaphore;
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
down_read(&mm->mmap_sem);
|
|
|
|
vma = find_vma(mm, address);
|
2007-10-16 02:26:54 -06:00
|
|
|
if (!vma)
|
2005-04-16 16:20:36 -06:00
|
|
|
goto out;
|
2007-10-16 02:26:54 -06:00
|
|
|
else if (vma->vm_start <= address)
|
2005-04-16 16:20:36 -06:00
|
|
|
goto good_area;
|
2007-10-16 02:26:54 -06:00
|
|
|
else if (!(vma->vm_flags & VM_GROWSDOWN))
|
2005-04-16 16:20:36 -06:00
|
|
|
goto out;
|
2007-10-16 02:26:54 -06:00
|
|
|
else if (is_user && !ARCH_IS_STACKGROW(address))
|
2005-04-16 16:20:36 -06:00
|
|
|
goto out;
|
2007-10-16 02:26:54 -06:00
|
|
|
else if (expand_stack(vma, address))
|
2005-04-16 16:20:36 -06:00
|
|
|
goto out;
|
|
|
|
|
2005-09-03 16:57:26 -06:00
|
|
|
good_area:
|
2005-04-16 16:20:36 -06:00
|
|
|
*code_out = SEGV_ACCERR;
|
2007-10-16 02:26:54 -06:00
|
|
|
if (is_write && !(vma->vm_flags & VM_WRITE))
|
2005-04-16 16:20:36 -06:00
|
|
|
goto out;
|
2005-05-20 14:59:08 -06:00
|
|
|
|
2005-09-10 11:44:57 -06:00
|
|
|
/* Don't require VM_READ|VM_EXEC for write faults! */
|
2007-10-16 02:26:54 -06:00
|
|
|
if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
|
2007-05-06 15:51:24 -06:00
|
|
|
goto out;
|
2005-05-20 14:59:08 -06:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
do {
|
2007-07-19 02:47:05 -06:00
|
|
|
int fault;
|
2009-01-06 15:38:59 -07:00
|
|
|
|
2009-04-10 10:01:23 -06:00
|
|
|
fault = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0);
|
2007-07-19 02:47:05 -06:00
|
|
|
if (unlikely(fault & VM_FAULT_ERROR)) {
|
|
|
|
if (fault & VM_FAULT_OOM) {
|
|
|
|
goto out_of_memory;
|
|
|
|
} else if (fault & VM_FAULT_SIGBUS) {
|
|
|
|
err = -EACCES;
|
|
|
|
goto out;
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
BUG();
|
|
|
|
}
|
2007-07-19 02:47:05 -06:00
|
|
|
if (fault & VM_FAULT_MAJOR)
|
|
|
|
current->maj_flt++;
|
|
|
|
else
|
|
|
|
current->min_flt++;
|
|
|
|
|
2005-09-03 16:57:26 -06:00
|
|
|
pgd = pgd_offset(mm, address);
|
|
|
|
pud = pud_offset(pgd, address);
|
|
|
|
pmd = pmd_offset(pud, address);
|
|
|
|
pte = pte_offset_kernel(pmd, address);
|
2007-10-16 02:26:54 -06:00
|
|
|
} while (!pte_present(*pte));
|
2005-04-16 16:20:36 -06:00
|
|
|
err = 0;
|
2007-10-16 02:26:54 -06:00
|
|
|
/*
|
|
|
|
* The below warning was added in place of
|
[PATCH] uml: remove bogus WARN_ON, triggerable harmlessly on a page fault race
The below warning was added in place of pte_mkyoung(); if (is_write)
pte_mkdirty();
In fact, if the PTE is not marked young/dirty, our dirty/accessed bit
emulation would cause the TLB permission not to be changed, and so we'd loop,
and given we don't support preemption yet, we'd busy-hang here.
However, I've seen this warning trigger without crashes during a loop of
concurrent kernel builds, at random times (i.e. like a race condition), and I
realized that two concurrent faults on the same page, one on read and one on
write, can trigger it. The read fault gets serviced and the PTE gets marked
writable but clean (it's possible on a shared-writable mapping), while the
generic code sees the PTE was already installed and returns without action. In
this case, we'll see another fault and service it normally.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-13 17:07:04 -07:00
|
|
|
* pte_mkyoung(); if (is_write) pte_mkdirty();
|
|
|
|
* If it's triggered, we'd see normally a hang here (a clean pte is
|
|
|
|
* marked read-only to emulate the dirty bit).
|
|
|
|
* However, the generic code can mark a PTE writable but clean on a
|
|
|
|
* concurrent read fault, triggering this harmlessly. So comment it out.
|
|
|
|
*/
|
|
|
|
#if 0
|
2005-09-10 11:44:58 -06:00
|
|
|
WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
|
[PATCH] uml: remove bogus WARN_ON, triggerable harmlessly on a page fault race
The below warning was added in place of pte_mkyoung(); if (is_write)
pte_mkdirty();
In fact, if the PTE is not marked young/dirty, our dirty/accessed bit
emulation would cause the TLB permission not to be changed, and so we'd loop,
and given we don't support preemption yet, we'd busy-hang here.
However, I've seen this warning trigger without crashes during a loop of
concurrent kernel builds, at random times (i.e. like a race condition), and I
realized that two concurrent faults on the same page, one on read and one on
write, can trigger it. The read fault gets serviced and the PTE gets marked
writable but clean (it's possible on a shared-writable mapping), while the
generic code sees the PTE was already installed and returns without action. In
this case, we'll see another fault and service it normally.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Acked-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-11-13 17:07:04 -07:00
|
|
|
#endif
|
2005-09-03 16:57:26 -06:00
|
|
|
flush_tlb_page(vma, address);
|
|
|
|
out:
|
2005-04-16 16:20:36 -06:00
|
|
|
up_read(&mm->mmap_sem);
|
2005-09-22 22:44:20 -06:00
|
|
|
out_nosemaphore:
|
2007-10-16 02:26:54 -06:00
|
|
|
return err;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
out_of_memory:
|
2009-01-06 15:38:59 -07:00
|
|
|
/*
|
|
|
|
* We ran out of memory, call the OOM killer, and return the userspace
|
|
|
|
* (which will retry the fault, or kill us if we got oom-killed).
|
|
|
|
*/
|
|
|
|
up_read(&mm->mmap_sem);
|
|
|
|
pagefault_out_of_memory();
|
|
|
|
return 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2007-02-10 02:44:14 -07:00
|
|
|
static void bad_segv(struct faultinfo fi, unsigned long ip)
|
|
|
|
{
|
|
|
|
struct siginfo si;
|
|
|
|
|
|
|
|
si.si_signo = SIGSEGV;
|
|
|
|
si.si_code = SEGV_ACCERR;
|
|
|
|
si.si_addr = (void __user *) FAULT_ADDRESS(fi);
|
|
|
|
current->thread.arch.faultinfo = fi;
|
|
|
|
force_sig_info(SIGSEGV, &si, current);
|
|
|
|
}
|
|
|
|
|
2008-02-04 23:30:58 -07:00
|
|
|
void fatal_sigsegv(void)
|
|
|
|
{
|
|
|
|
force_sigsegv(SIGSEGV, current);
|
|
|
|
do_signal();
|
|
|
|
/*
|
|
|
|
* This is to tell gcc that we're not returning - do_signal
|
|
|
|
* can, in general, return, but in this case, it's not, since
|
|
|
|
* we just got a fatal SIGSEGV queued.
|
|
|
|
*/
|
|
|
|
os_dump_core();
|
|
|
|
}
|
|
|
|
|
2008-02-04 23:30:46 -07:00
|
|
|
void segv_handler(int sig, struct uml_pt_regs *regs)
|
2006-01-08 02:01:32 -07:00
|
|
|
{
|
|
|
|
struct faultinfo * fi = UPT_FAULTINFO(regs);
|
|
|
|
|
2007-10-16 02:26:54 -06:00
|
|
|
if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) {
|
2006-01-08 02:01:32 -07:00
|
|
|
bad_segv(*fi, UPT_IP(regs));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
|
|
|
|
}
|
|
|
|
|
[PATCH] uml: S390 preparation, abstract host page fault data
This patch removes the arch-specific fault/trap-infos from thread and
skas-regs.
It adds a new struct faultinfo, that is arch-specific defined in
sysdep/faultinfo.h.
The structure is inserted in thread.arch and thread.regs.skas and
thread.regs.tt
Now, segv and other trap-handlers can copy the contents from regs.X.faultinfo
to thread.arch.faultinfo with one simple assignment.
Also, the number of macros necessary is reduced to
FAULT_ADDRESS(struct faultinfo)
extracts the faulting address from faultinfo
FAULT_WRITE(struct faultinfo)
extracts the "is_write" flag
SEGV_IS_FIXABLE(struct faultinfo)
is true for the fixable segvs, i.e. (TRAP == 14)
on i386
UPT_FAULTINFO(regs)
result is (struct faultinfo *) to the faultinfo
in regs->skas.faultinfo
GET_FAULTINFO_FROM_SC(struct faultinfo, struct sigcontext *)
copies the relevant parts of the sigcontext to
struct faultinfo.
On SIGSEGV, call user_signal() instead of handle_segv(), if the architecture
provides the information needed in PTRACE_FAULTINFO, or if PTRACE_FAULTINFO is
missing, because segv-stub will provide the info.
The benefit of the change is, that in case of a non-fixable SIGSEGV, we can
give user processes a SIGSEGV, instead of possibly looping on pagefault
handling.
Since handle_segv() sikked arch_fixup() implicitly by passing ip==0 to segv(),
I changed segv() to call arch_fixup() only, if !is_user.
Signed-off-by: Bodo Stroesser <bstroesser@fujitsu-siemens.com>
Signed-off-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-05-05 17:15:31 -06:00
|
|
|
/*
|
|
|
|
* We give a *copy* of the faultinfo in the regs to segv.
|
|
|
|
* This must be done, since nesting SEGVs could overwrite
|
|
|
|
* the info in the regs. A pointer to the info then would
|
|
|
|
* give us bad data!
|
|
|
|
*/
|
2007-05-06 15:51:24 -06:00
|
|
|
unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
|
2007-10-16 02:26:58 -06:00
|
|
|
struct uml_pt_regs *regs)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
struct siginfo si;
|
2007-10-16 02:27:05 -06:00
|
|
|
jmp_buf *catcher;
|
2005-04-16 16:20:36 -06:00
|
|
|
int err;
|
2007-05-06 15:51:24 -06:00
|
|
|
int is_write = FAULT_WRITE(fi);
|
|
|
|
unsigned long address = FAULT_ADDRESS(fi);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2007-10-16 02:26:54 -06:00
|
|
|
if (!is_user && (address >= start_vm) && (address < end_vm)) {
|
2007-05-06 15:51:24 -06:00
|
|
|
flush_tlb_kernel_vm();
|
|
|
|
return 0;
|
|
|
|
}
|
2007-10-16 02:26:54 -06:00
|
|
|
else if (current->mm == NULL) {
|
2007-05-06 15:51:25 -06:00
|
|
|
show_regs(container_of(regs, struct pt_regs, regs));
|
2007-10-16 02:26:54 -06:00
|
|
|
panic("Segfault with no mm");
|
2007-05-06 15:51:25 -06:00
|
|
|
}
|
2005-09-22 22:44:16 -06:00
|
|
|
|
2005-09-30 12:58:59 -06:00
|
|
|
if (SEGV_IS_FIXABLE(&fi) || SEGV_MAYBE_FIXABLE(&fi))
|
2007-10-16 02:26:54 -06:00
|
|
|
err = handle_page_fault(address, ip, is_write, is_user,
|
|
|
|
&si.si_code);
|
2005-09-22 22:44:16 -06:00
|
|
|
else {
|
|
|
|
err = -EFAULT;
|
2007-10-16 02:26:54 -06:00
|
|
|
/*
|
|
|
|
* A thread accessed NULL, we get a fault, but CR2 is invalid.
|
|
|
|
* This code is used in __do_copy_from_user() of TT mode.
|
|
|
|
* XXX tt mode is gone, so maybe this isn't needed any more
|
|
|
|
*/
|
2005-09-22 22:44:16 -06:00
|
|
|
address = 0;
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
catcher = current->thread.fault_catcher;
|
2007-10-16 02:26:54 -06:00
|
|
|
if (!err)
|
2007-05-06 15:51:24 -06:00
|
|
|
return 0;
|
2007-10-16 02:26:54 -06:00
|
|
|
else if (catcher != NULL) {
|
2005-04-16 16:20:36 -06:00
|
|
|
current->thread.fault_addr = (void *) address;
|
2007-10-16 02:27:05 -06:00
|
|
|
UML_LONGJMP(catcher, 1);
|
2006-07-10 05:45:13 -06:00
|
|
|
}
|
2007-10-16 02:26:54 -06:00
|
|
|
else if (current->thread.fault_addr != NULL)
|
2005-04-16 16:20:36 -06:00
|
|
|
panic("fault_addr set but no fault catcher");
|
2007-10-16 02:26:54 -06:00
|
|
|
else if (!is_user && arch_fixup(ip, regs))
|
2007-05-06 15:51:24 -06:00
|
|
|
return 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2007-10-16 02:26:54 -06:00
|
|
|
if (!is_user) {
|
2007-05-06 15:51:25 -06:00
|
|
|
show_regs(container_of(regs, struct pt_regs, regs));
|
2006-07-10 05:45:13 -06:00
|
|
|
panic("Kernel mode fault at addr 0x%lx, ip 0x%lx",
|
2005-04-16 16:20:36 -06:00
|
|
|
address, ip);
|
2007-05-06 15:51:25 -06:00
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2005-09-03 16:57:26 -06:00
|
|
|
if (err == -EACCES) {
|
2005-04-16 16:20:36 -06:00
|
|
|
si.si_signo = SIGBUS;
|
|
|
|
si.si_errno = 0;
|
|
|
|
si.si_code = BUS_ADRERR;
|
2006-03-31 03:30:15 -07:00
|
|
|
si.si_addr = (void __user *)address;
|
2007-05-06 15:51:24 -06:00
|
|
|
current->thread.arch.faultinfo = fi;
|
2005-04-16 16:20:36 -06:00
|
|
|
force_sig_info(SIGBUS, &si, current);
|
2005-09-03 16:57:26 -06:00
|
|
|
} else {
|
|
|
|
BUG_ON(err != -EFAULT);
|
2005-04-16 16:20:36 -06:00
|
|
|
si.si_signo = SIGSEGV;
|
2006-03-31 03:30:15 -07:00
|
|
|
si.si_addr = (void __user *) address;
|
2007-05-06 15:51:24 -06:00
|
|
|
current->thread.arch.faultinfo = fi;
|
2005-04-16 16:20:36 -06:00
|
|
|
force_sig_info(SIGSEGV, &si, current);
|
|
|
|
}
|
2007-05-06 15:51:24 -06:00
|
|
|
return 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2007-10-16 02:26:58 -06:00
|
|
|
void relay_signal(int sig, struct uml_pt_regs *regs)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2007-10-16 02:26:54 -06:00
|
|
|
if (!UPT_IS_USER(regs)) {
|
|
|
|
if (sig == SIGBUS)
|
|
|
|
printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
|
|
|
|
"mount likely just ran out of space\n");
|
2005-04-16 16:20:36 -06:00
|
|
|
panic("Kernel mode signal %d", sig);
|
2006-09-26 00:33:03 -06:00
|
|
|
}
|
|
|
|
|
uml: further bugs.c tidying
bugs.c, for both i386 and x86_64, can undergo further cleaning -
The i386 arch_check_bugs only does one thing, so we might as
well inline the cmov checking.
The i386 includes can be trimmed down a bit.
arch_init_thread wasn't used, so it is deleted.
The panics in arch_handle_signal are turned into printks
because the process is about to get segfaulted anyway, so something is
dying no matter what happens here. Also, the return value was always
the same, so it contained no information, so it can be void instead.
The name is changed to arch_examine_signal because it doesn't handle
anything.
The caller of arch_handle_signal, relay_signal, does things in
a different order. The kernel-mode signal check is now first, which
puts everything else together, making things a bit clearer conceptually.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-04 23:30:40 -07:00
|
|
|
arch_examine_signal(sig, regs);
|
|
|
|
|
2007-05-06 15:51:24 -06:00
|
|
|
current->thread.arch.faultinfo = *UPT_FAULTINFO(regs);
|
2005-04-16 16:20:36 -06:00
|
|
|
force_sig(sig, current);
|
|
|
|
}
|
|
|
|
|
2008-02-04 23:30:46 -07:00
|
|
|
void bus_handler(int sig, struct uml_pt_regs *regs)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2007-10-16 02:26:54 -06:00
|
|
|
if (current->thread.fault_catcher != NULL)
|
2007-10-16 02:27:05 -06:00
|
|
|
UML_LONGJMP(current->thread.fault_catcher, 1);
|
2005-04-16 16:20:36 -06:00
|
|
|
else relay_signal(sig, regs);
|
|
|
|
}
|
|
|
|
|
2008-02-04 23:30:46 -07:00
|
|
|
void winch(int sig, struct uml_pt_regs *regs)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
do_IRQ(WINCH_IRQ, regs);
|
|
|
|
}
|
|
|
|
|
|
|
|
void trap_init(void)
|
|
|
|
{
|
|
|
|
}
|