kernel-fxtec-pro1x/arch/ppc/mm/fault.c

/*
 *  PowerPC version
 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
 *
 *  Derived from "arch/i386/mm/fault.c"
 *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 *
 *  Modified by Cort Dougan and Paul Mackerras.
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License
 *  as published by the Free Software Foundation; either version
 *  2 of the License, or (at your option) any later version.
 */

#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/highmem.h>
#include <linux/module.h>

#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>

#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
extern void (*debugger)(struct pt_regs *);
extern void (*debugger_fault_handler)(struct pt_regs *);
extern int (*debugger_dabr_match)(struct pt_regs *);
int debugger_kernel_faults = 1;
#endif

unsigned long htab_reloads;	/* updated by hashtable.S:hash_page() */
unsigned long htab_evicts; 	/* updated by hashtable.S:hash_page() */
unsigned long htab_preloads;	/* updated by hashtable.S:add_hash_page() */
unsigned long pte_misses;	/* updated by do_page_fault() */
unsigned long pte_errors;	/* updated by do_page_fault() */
unsigned int probingmem;

/*
 * Check whether the instruction at regs->nip is a store using
 * an update addressing form which will update r1.
 */
static int store_updates_sp(struct pt_regs *regs)
{
	unsigned int inst;

	if (get_user(inst, (unsigned int __user *)regs->nip))
		return 0;
	/* check for 1 in the rA field */
	if (((inst >> 16) & 0x1f) != 1)
		return 0;
	/* check major opcode */
	switch (inst >> 26) {
	case 37:	/* stwu */
	case 39:	/* stbu */
	case 45:	/* sthu */
	case 53:	/* stfsu */
	case 55:	/* stfdu */
		return 1;
	case 31:
		/* check minor opcode */
		switch ((inst >> 1) & 0x3ff) {
		case 183:	/* stwux */
		case 247:	/* stbux */
		case 439:	/* sthux */
		case 695:	/* stfsux */
		case 759:	/* stfdux */
			return 1;
		}
	}
	return 0;
}

/*
 * For 600- and 800-family processors, the error_code parameter is DSISR
 * for a data fault, SRR1 for an instruction fault. For 400-family processors
 * the error_code parameter is ESR for a data fault, 0 for an instruction
 * fault.
 */
int do_page_fault(struct pt_regs *regs, unsigned long address,
		  unsigned long error_code)
{
	struct vm_area_struct * vma;
	struct mm_struct *mm = current->mm;
	siginfo_t info;
	int code = SEGV_MAPERR;
#if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
	int is_write = error_code & ESR_DST;
#else
	int is_write = 0;

	/*
	 * Fortunately the bit assignments in SRR1 for an instruction
	 * fault and DSISR for a data fault are mostly the same for the
	 * bits we are interested in.  But there are some bits which
	 * indicate errors in DSISR but can validly be set in SRR1.
	 */
	if (TRAP(regs) == 0x400)
		error_code &= 0x48200000;
	else
		is_write = error_code & 0x02000000;
#endif /* CONFIG_4xx || CONFIG_BOOKE */

#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
	if (debugger_fault_handler && TRAP(regs) == 0x300) {
		debugger_fault_handler(regs);
		return 0;
	}
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
	if (error_code & 0x00400000) {
		/* DABR match */
		if (debugger_dabr_match(regs))
			return 0;
	}
#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
#endif /* CONFIG_XMON || CONFIG_KGDB */

	if (in_atomic() || mm == NULL)
		return SIGSEGV;

	down_read(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;
	if (vma->vm_start <= address)
		goto good_area;
	if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;
	if (!is_write)
                goto bad_area;

	/*
	 * N.B. The rs6000/xcoff ABI allows programs to access up to
	 * a few hundred bytes below the stack pointer.
	 * The kernel signal delivery code writes up to about 1.5kB
	 * below the stack pointer (r1) before decrementing it.
	 * The exec code can write slightly over 640kB to the stack
	 * before setting the user r1.  Thus we allow the stack to
	 * expand to 1MB without further checks.
	 */
	if (address + 0x100000 < vma->vm_end) {
		/* get user regs even if this fault is in kernel mode */
		struct pt_regs *uregs = current->thread.regs;
		if (uregs == NULL)
			goto bad_area;

		/*
		 * A user-mode access to an address a long way below
		 * the stack pointer is only valid if the instruction
		 * is one which would update the stack pointer to the
		 * address accessed if the instruction completed,
		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
		 * (or the byte, halfword, float or double forms).
		 *
		 * If we don't check this then any write to the area
		 * between the last mapped region and the stack will
		 * expand the stack rather than segfaulting.
		 */
		if (address + 2048 < uregs->gpr[1]
		    && (!user_mode(regs) || !store_updates_sp(regs)))
			goto bad_area;
	}
	if (expand_stack(vma, address))
		goto bad_area;

good_area:
	code = SEGV_ACCERR;
#if defined(CONFIG_6xx)
	if (error_code & 0x95700000)
		/* an error such as lwarx to I/O controller space,
		   address matching DABR, eciwx, etc. */
		goto bad_area;
#endif /* CONFIG_6xx */
#if defined(CONFIG_8xx)
        /* The MPC8xx seems to always set 0x80000000, which is
         * "undefined".  Of those that can be set, this is the only
         * one which seems bad.
         */
	if (error_code & 0x10000000)
                /* Guarded storage error. */
		goto bad_area;
#endif /* CONFIG_8xx */

	/* a write */
	if (is_write) {
		if (!(vma->vm_flags & VM_WRITE))
			goto bad_area;
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
	/* an exec  - 4xx/Book-E allows for per-page execute permission */
	} else if (TRAP(regs) == 0x400) {
		pte_t *ptep;
		pmd_t *pmdp;

#if 0
		/* It would be nice to actually enforce the VM execute
		   permission on CPUs which can do so, but far too
		   much stuff in userspace doesn't get the permissions
		   right, so we let any page be executed for now. */
		if (! (vma->vm_flags & VM_EXEC))
			goto bad_area;
#endif

		/* Since 4xx/Book-E supports per-page execute permission,
		 * we lazily flush dcache to icache. */
		ptep = NULL;
		if (get_pteptr(mm, address, &ptep, &pmdp)) {
			spinlock_t *ptl = pte_lockptr(mm, pmdp);
			spin_lock(ptl);
			if (pte_present(*ptep)) {
				struct page *page = pte_page(*ptep);

				if (!test_bit(PG_arch_1, &page->flags)) {
					flush_dcache_icache_page(page);
					set_bit(PG_arch_1, &page->flags);
				}
				pte_update(ptep, 0, _PAGE_HWEXEC);
				_tlbie(address);
				pte_unmap_unlock(ptep, ptl);
				up_read(&mm->mmap_sem);
				return 0;
			}
			pte_unmap_unlock(ptep, ptl);
		}
#endif
	/* a read */
	} else {
		/* protection fault */
		if (error_code & 0x08000000)
			goto bad_area;
		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
			goto bad_area;
	}

	/*
	 * If for any reason at all we couldn't handle the fault,
	 * make sure we exit gracefully rather than endlessly redo
	 * the fault.
	 */
 survive:
        switch (handle_mm_fault(mm, vma, address, is_write)) {
        case VM_FAULT_MINOR:
                current->min_flt++;
                break;
        case VM_FAULT_MAJOR:
                current->maj_flt++;
                break;
        case VM_FAULT_SIGBUS:
                goto do_sigbus;
        case VM_FAULT_OOM:
                goto out_of_memory;
	default:
		BUG();
	}

	up_read(&mm->mmap_sem);
	/*
	 * keep track of tlb+htab misses that are good addrs but
	 * just need pte's created via handle_mm_fault()
	 * -- Cort
	 */
	pte_misses++;
	return 0;

bad_area:
	up_read(&mm->mmap_sem);
	pte_errors++;

	/* User mode accesses cause a SIGSEGV */
	if (user_mode(regs)) {
		_exception(SIGSEGV, regs, code, address);
		return 0;
	}

	return SIGSEGV;

/*
 * We ran out of memory, or some other thing happened to us that made
 * us unable to handle the page fault gracefully.
 */
out_of_memory:
	up_read(&mm->mmap_sem);
	if (current->pid == 1) {
		yield();
		down_read(&mm->mmap_sem);
		goto survive;
	}
	printk("VM: killing process %s\n", current->comm);
	if (user_mode(regs))
		do_exit(SIGKILL);
	return SIGKILL;

do_sigbus:
	up_read(&mm->mmap_sem);
	info.si_signo = SIGBUS;
	info.si_errno = 0;
	info.si_code = BUS_ADRERR;
	info.si_addr = (void __user *)address;
	force_sig_info (SIGBUS, &info, current);
	if (!user_mode(regs))
		return SIGBUS;
	return 0;
}

/*
 * bad_page_fault is called when we have a bad access from the kernel.
 * It is called from the DSI and ISI handlers in head.S and from some
 * of the procedures in traps.c.
 */
void
bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
{
	const struct exception_table_entry *entry;

	/* Are we prepared to handle this fault?  */
	if ((entry = search_exception_tables(regs->nip)) != NULL) {
		regs->nip = entry->fixup;
		return;
	}

	/* kernel has accessed a bad area */
#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
	if (debugger_kernel_faults)
		debugger(regs);
#endif
	die("kernel access of bad area", regs, sig);
}

#ifdef CONFIG_8xx

/* The pgtable.h claims some functions generically exist, but I
 * can't find them......
 */
pte_t *va_to_pte(unsigned long address)
{
	pgd_t *dir;
	pmd_t *pmd;
	pte_t *pte;

	if (address < TASK_SIZE)
		return NULL;

	dir = pgd_offset(&init_mm, address);
	if (dir) {
		pmd = pmd_offset(dir, address & PAGE_MASK);
		if (pmd && pmd_present(*pmd)) {
			pte = pte_offset_kernel(pmd, address & PAGE_MASK);
			if (pte && pte_present(*pte))
				return(pte);
		}
	}
	return NULL;
}

unsigned long va_to_phys(unsigned long address)
{
	pte_t *pte;

	pte = va_to_pte(address);
	if (pte)
		return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
	return (0);
}

void
print_8xx_pte(struct mm_struct *mm, unsigned long addr)
{
        pgd_t * pgd;
        pmd_t * pmd;
        pte_t * pte;

        printk(" pte @ 0x%8lx: ", addr);
        pgd = pgd_offset(mm, addr & PAGE_MASK);
        if (pgd) {
                pmd = pmd_offset(pgd, addr & PAGE_MASK);
                if (pmd && pmd_present(*pmd)) {
                        pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
                        if (pte) {
                                printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
                                        (long)pgd, (long)pte, (long)pte_val(*pte));
#define pp ((long)pte_val(*pte))			
				printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
				       "CI: %lx v: %lx\n",
				       pp>>12,    /* rpn */
				       (pp>>10)&3, /* pp */
				       (pp>>3)&1, /* small */
				       (pp>>2)&1, /* shared */
				       (pp>>1)&1, /* cache inhibit */
				       pp&1       /* valid */
				       );
#undef pp			
                        }
                        else {
                                printk("no pte\n");
                        }
                }
                else {
                        printk("no pmd\n");
                }
        }
        else {
                printk("no pgd\n");
        }
}

int
get_8xx_pte(struct mm_struct *mm, unsigned long addr)
{
        pgd_t * pgd;
        pmd_t * pmd;
        pte_t * pte;
        int     retval = 0;

        pgd = pgd_offset(mm, addr & PAGE_MASK);
        if (pgd) {
                pmd = pmd_offset(pgd, addr & PAGE_MASK);
                if (pmd && pmd_present(*pmd)) {
                        pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
                        if (pte) {
				retval = (int)pte_val(*pte);
                        }
                }
        }
        return(retval);
}
#endif /* CONFIG_8xx */
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`/*`
			`* PowerPC version`
			`* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)`
			`*`
			`* Derived from "arch/i386/mm/fault.c"`
			`* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds`
			`*`
			`* Modified by Cort Dougan and Paul Mackerras.`
			`*`
			`* This program is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU General Public License`
			`* as published by the Free Software Foundation; either version`
			`* 2 of the License, or (at your option) any later version.`
			`*/`

			`#include <linux/signal.h>`
			`#include <linux/sched.h>`
			`#include <linux/kernel.h>`
			`#include <linux/errno.h>`
			`#include <linux/string.h>`
			`#include <linux/types.h>`
			`#include <linux/ptrace.h>`
			`#include <linux/mman.h>`
			`#include <linux/mm.h>`
			`#include <linux/interrupt.h>`
			`#include <linux/highmem.h>`
			`#include <linux/module.h>`

			`#include <asm/page.h>`
			`#include <asm/pgtable.h>`
			`#include <asm/mmu.h>`
			`#include <asm/mmu_context.h>`
			`#include <asm/system.h>`
			`#include <asm/uaccess.h>`
			`#include <asm/tlbflush.h>`

			`#if defined(CONFIG_XMON) \|\| defined(CONFIG_KGDB)`
			`extern void (debugger)(struct pt_regs );`
			`extern void (debugger_fault_handler)(struct pt_regs );`
			`extern int (debugger_dabr_match)(struct pt_regs );`
			`int debugger_kernel_faults = 1;`
			`#endif`

			`unsigned long htab_reloads; /* updated by hashtable.S:hash_page() */`
			`unsigned long htab_evicts; /* updated by hashtable.S:hash_page() */`
			`unsigned long htab_preloads; /* updated by hashtable.S:add_hash_page() */`
			`unsigned long pte_misses; /* updated by do_page_fault() */`
			`unsigned long pte_errors; /* updated by do_page_fault() */`
			`unsigned int probingmem;`

			`/*`
			`* Check whether the instruction at regs->nip is a store using`
			`* an update addressing form which will update r1.`
			`*/`
			`static int store_updates_sp(struct pt_regs *regs)`
			`{`
			`unsigned int inst;`

			`if (get_user(inst, (unsigned int __user *)regs->nip))`
			`return 0;`
			`/* check for 1 in the rA field */`
			`if (((inst >> 16) & 0x1f) != 1)`
			`return 0;`
			`/* check major opcode */`
			`switch (inst >> 26) {`
			`case 37: /* stwu */`
			`case 39: /* stbu */`
			`case 45: /* sthu */`
			`case 53: /* stfsu */`
			`case 55: /* stfdu */`
			`return 1;`
			`case 31:`
			`/* check minor opcode */`
			`switch ((inst >> 1) & 0x3ff) {`
			`case 183: /* stwux */`
			`case 247: /* stbux */`
			`case 439: /* sthux */`
			`case 695: /* stfsux */`
			`case 759: /* stfdux */`
			`return 1;`
			`}`
			`}`
			`return 0;`
			`}`

			`/*`
			`* For 600- and 800-family processors, the error_code parameter is DSISR`
			`* for a data fault, SRR1 for an instruction fault. For 400-family processors`
			`* the error_code parameter is ESR for a data fault, 0 for an instruction`
			`* fault.`
			`*/`
			`int do_page_fault(struct pt_regs *regs, unsigned long address,`
			`unsigned long error_code)`
			`{`
			`struct vm_area_struct * vma;`
			`struct mm_struct *mm = current->mm;`
			`siginfo_t info;`
			`int code = SEGV_MAPERR;`
			`#if defined(CONFIG_4xx) \|\| defined (CONFIG_BOOKE)`
			`int is_write = error_code & ESR_DST;`
			`#else`
			`int is_write = 0;`

			`/*`
			`* Fortunately the bit assignments in SRR1 for an instruction`
			`* fault and DSISR for a data fault are mostly the same for the`
			`* bits we are interested in. But there are some bits which`
			`* indicate errors in DSISR but can validly be set in SRR1.`
			`*/`
			`if (TRAP(regs) == 0x400)`
			`error_code &= 0x48200000;`
			`else`
			`is_write = error_code & 0x02000000;`
			`#endif /* CONFIG_4xx \|\| CONFIG_BOOKE */`

			`#if defined(CONFIG_XMON) \|\| defined(CONFIG_KGDB)`
			`if (debugger_fault_handler && TRAP(regs) == 0x300) {`
			`debugger_fault_handler(regs);`
			`return 0;`
			`}`
			`#if !(defined(CONFIG_4xx) \|\| defined(CONFIG_BOOKE))`
			`if (error_code & 0x00400000) {`
			`/* DABR match */`
			`if (debugger_dabr_match(regs))`
			`return 0;`
			`}`
			`#endif /* !(CONFIG_4xx \|\| CONFIG_BOOKE)*/`
			`#endif /* CONFIG_XMON \|\| CONFIG_KGDB */`

			`if (in_atomic() \|\| mm == NULL)`
			`return SIGSEGV;`

			`down_read(&mm->mmap_sem);`
			`vma = find_vma(mm, address);`
			`if (!vma)`
			`goto bad_area;`
			`if (vma->vm_start <= address)`
			`goto good_area;`
			`if (!(vma->vm_flags & VM_GROWSDOWN))`
			`goto bad_area;`
			`if (!is_write)`
			`goto bad_area;`

			`/*`
			`* N.B. The rs6000/xcoff ABI allows programs to access up to`
			`* a few hundred bytes below the stack pointer.`
			`* The kernel signal delivery code writes up to about 1.5kB`
			`* below the stack pointer (r1) before decrementing it.`
			`* The exec code can write slightly over 640kB to the stack`
			`* before setting the user r1. Thus we allow the stack to`
			`* expand to 1MB without further checks.`
			`*/`
			`if (address + 0x100000 < vma->vm_end) {`
			`/* get user regs even if this fault is in kernel mode */`
			`struct pt_regs *uregs = current->thread.regs;`
			`if (uregs == NULL)`
			`goto bad_area;`

			`/*`
			`* A user-mode access to an address a long way below`
			`* the stack pointer is only valid if the instruction`
			`* is one which would update the stack pointer to the`
			`* address accessed if the instruction completed,`
			`* i.e. either stwu rs,n(r1) or stwux rs,r1,rb`
			`* (or the byte, halfword, float or double forms).`
			`*`
			`* If we don't check this then any write to the area`
			`* between the last mapped region and the stack will`
			`* expand the stack rather than segfaulting.`
			`*/`
			`if (address + 2048 < uregs->gpr[1]`
			`&& (!user_mode(regs) \|\| !store_updates_sp(regs)))`
			`goto bad_area;`
			`}`
			`if (expand_stack(vma, address))`
			`goto bad_area;`

			`good_area:`
			`code = SEGV_ACCERR;`
			`#if defined(CONFIG_6xx)`
			`if (error_code & 0x95700000)`
			`/* an error such as lwarx to I/O controller space,`
			`address matching DABR, eciwx, etc. */`
			`goto bad_area;`
			`#endif /* CONFIG_6xx */`
			`#if defined(CONFIG_8xx)`
			`/* The MPC8xx seems to always set 0x80000000, which is`
			`* "undefined". Of those that can be set, this is the only`
			`* one which seems bad.`
			`*/`
			`if (error_code & 0x10000000)`
			`/* Guarded storage error. */`
			`goto bad_area;`
			`#endif /* CONFIG_8xx */`

			`/* a write */`
			`if (is_write) {`
			`if (!(vma->vm_flags & VM_WRITE))`
			`goto bad_area;`
			`#if defined(CONFIG_4xx) \|\| defined(CONFIG_BOOKE)`
			`/* an exec - 4xx/Book-E allows for per-page execute permission */`
			`} else if (TRAP(regs) == 0x400) {`
			`pte_t *ptep;`
[PATCH] lock PTE before updating it in 440/BookE page fault handler Fix 44x and BookE page fault handler to correctly lock PTE before trying to pte_update() it, otherwise this PTE might be swapped out after pte_present() check but before pte_uptdate() call, resulting in corrupted PTE. This can happen with enabled preemption and low memory condition. Signed-off-by: Eugene Surovegin <ebs@ebshome.net> Signed-off-by: Paul Mackerras <paulus@samba.org> 2006-03-28 11:13:12 -07:00			`pmd_t *pmdp;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00
			`#if 0`
			`/* It would be nice to actually enforce the VM execute`
			`permission on CPUs which can do so, but far too`
			`much stuff in userspace doesn't get the permissions`
			`right, so we let any page be executed for now. */`
			`if (! (vma->vm_flags & VM_EXEC))`
			`goto bad_area;`
			`#endif`

			`/* Since 4xx/Book-E supports per-page execute permission,`
			`* we lazily flush dcache to icache. */`
			`ptep = NULL;`
[PATCH] lock PTE before updating it in 440/BookE page fault handler Fix 44x and BookE page fault handler to correctly lock PTE before trying to pte_update() it, otherwise this PTE might be swapped out after pte_present() check but before pte_uptdate() call, resulting in corrupted PTE. This can happen with enabled preemption and low memory condition. Signed-off-by: Eugene Surovegin <ebs@ebshome.net> Signed-off-by: Paul Mackerras <paulus@samba.org> 2006-03-28 11:13:12 -07:00			`if (get_pteptr(mm, address, &ptep, &pmdp)) {`
			`spinlock_t *ptl = pte_lockptr(mm, pmdp);`
			`spin_lock(ptl);`
			`if (pte_present(*ptep)) {`
			`struct page page = pte_page(ptep);`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00
[PATCH] lock PTE before updating it in 440/BookE page fault handler Fix 44x and BookE page fault handler to correctly lock PTE before trying to pte_update() it, otherwise this PTE might be swapped out after pte_present() check but before pte_uptdate() call, resulting in corrupted PTE. This can happen with enabled preemption and low memory condition. Signed-off-by: Eugene Surovegin <ebs@ebshome.net> Signed-off-by: Paul Mackerras <paulus@samba.org> 2006-03-28 11:13:12 -07:00			`if (!test_bit(PG_arch_1, &page->flags)) {`
			`flush_dcache_icache_page(page);`
			`set_bit(PG_arch_1, &page->flags);`
			`}`
			`pte_update(ptep, 0, _PAGE_HWEXEC);`
			`_tlbie(address);`
			`pte_unmap_unlock(ptep, ptl);`
			`up_read(&mm->mmap_sem);`
			`return 0;`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`}`
[PATCH] lock PTE before updating it in 440/BookE page fault handler Fix 44x and BookE page fault handler to correctly lock PTE before trying to pte_update() it, otherwise this PTE might be swapped out after pte_present() check but before pte_uptdate() call, resulting in corrupted PTE. This can happen with enabled preemption and low memory condition. Signed-off-by: Eugene Surovegin <ebs@ebshome.net> Signed-off-by: Paul Mackerras <paulus@samba.org> 2006-03-28 11:13:12 -07:00			`pte_unmap_unlock(ptep, ptl);`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`}`
			`#endif`
			`/* a read */`
			`} else {`
			`/* protection fault */`
			`if (error_code & 0x08000000)`
			`goto bad_area;`
[PATCH] make PROT_WRITE imply PROT_READ Make PROT_WRITE imply PROT_READ for a number of architectures which don't support write only in hardware. While looking at this, I noticed that some architectures which do not support write only mappings already take the exact same approach. For example, in arch/alpha/mm/fault.c: " if (cause < 0) { if (!(vma->vm_flags & VM_EXEC)) goto bad_area; } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ \| VM_WRITE))) goto bad_area; } else { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; } " Thus, this patch brings other architectures which do not support write only mappings in-line and consistent with the rest. I've verified the patch on ia64, x86_64 and x86. Additional discussion: Several architectures, including x86, can not support write-only mappings. The pte for x86 reserves a single bit for protection and its two states are read only or read/write. Thus, write only is not supported in h/w. Currently, if i 'mmap' a page write-only, the first read attempt on that page creates a page fault and will SEGV. That check is enforced in arch/blah/mm/fault.c. However, if i first write that page it will fault in and the pte will be set to read/write. Thus, any subsequent reads to the page will succeed. It is this inconsistency in behavior that this patch is attempting to address. Furthermore, if the page is swapped out, and then brought back the first read will also cause a SEGV. Thus, any arbitrary read on a page can potentially result in a SEGV. According to the SuSv3 spec, "if the application requests only PROT_WRITE, the implementation may also allow read access." Also as mentioned, some archtectures, such as alpha, shown above already take the approach that i am suggesting. The counter-argument to this raised by Arjan, is that the kernel is enforcing the write only mapping the best it can given the h/w limitations. This is true, however Alan Cox, and myself would argue that the inconsitency in behavior, that is applications can sometimes work/sometimes fails is highly undesireable. If you read through the thread, i think people, came to an agreement on the last patch i posted, as nobody has objected to it... Signed-off-by: Jason Baron <jbaron@redhat.com> Cc: Russell King <rmk@arm.linux.org.uk> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Acked-by: Andi Kleen <ak@muc.de> Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: Arjan van de Ven <arjan@linux.intel.com> Acked-by: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Ian Molton <spyro@f2s.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2006-09-29 02:58:58 -06:00			`if (!(vma->vm_flags & (VM_READ \| VM_EXEC \| VM_WRITE)))`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`goto bad_area;`
			`}`

			`/*`
			`* If for any reason at all we couldn't handle the fault,`
			`* make sure we exit gracefully rather than endlessly redo`
			`* the fault.`
			`*/`
			`survive:`
			`switch (handle_mm_fault(mm, vma, address, is_write)) {`
			`case VM_FAULT_MINOR:`
			`current->min_flt++;`
			`break;`
			`case VM_FAULT_MAJOR:`
			`current->maj_flt++;`
			`break;`
			`case VM_FAULT_SIGBUS:`
			`goto do_sigbus;`
			`case VM_FAULT_OOM:`
			`goto out_of_memory;`
			`default:`
			`BUG();`
			`}`

			`up_read(&mm->mmap_sem);`
			`/*`
			`* keep track of tlb+htab misses that are good addrs but`
			`* just need pte's created via handle_mm_fault()`
			`* -- Cort`
			`*/`
			`pte_misses++;`
			`return 0;`

			`bad_area:`
			`up_read(&mm->mmap_sem);`
			`pte_errors++;`

			`/* User mode accesses cause a SIGSEGV */`
			`if (user_mode(regs)) {`
[PATCH] ppc32: Kill init on unhandled synchronous signals This is a patch that I have had in my tree for ages. If init causes an exception that raises a signal, such as a SIGSEGV, SIGILL or SIGFPE, and it hasn't registered a handler for it, we don't deliver the signal, since init doesn't get any signals that it doesn't have a handler for. But that means that we just return to userland and generate the same exception again immediately. With this patch we print a message and kill init in this situation. This is very useful when you have a bug in the kernel that means that init doesn't get as far as executing its first instruction. :) Without this patch the system hangs when it gets to starting the userland init; with it you at least get a message giving you a clue about what has gone wrong. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-09-10 05:13:11 -06:00			`_exception(SIGSEGV, regs, code, address);`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 16:20:36 -06:00			`return 0;`
			`}`

			`return SIGSEGV;`

			`/*`
			`* We ran out of memory, or some other thing happened to us that made`
			`* us unable to handle the page fault gracefully.`
			`*/`
			`out_of_memory:`
			`up_read(&mm->mmap_sem);`
			`if (current->pid == 1) {`
			`yield();`
			`down_read(&mm->mmap_sem);`
			`goto survive;`
			`}`
			`printk("VM: killing process %s\n", current->comm);`
			`if (user_mode(regs))`
			`do_exit(SIGKILL);`
			`return SIGKILL;`

			`do_sigbus:`
			`up_read(&mm->mmap_sem);`
			`info.si_signo = SIGBUS;`
			`info.si_errno = 0;`
			`info.si_code = BUS_ADRERR;`
			`info.si_addr = (void __user *)address;`
			`force_sig_info (SIGBUS, &info, current);`
			`if (!user_mode(regs))`
			`return SIGBUS;`
			`return 0;`
			`}`

			`/*`
			`* bad_page_fault is called when we have a bad access from the kernel.`
			`* It is called from the DSI and ISI handlers in head.S and from some`
			`* of the procedures in traps.c.`
			`*/`
			`void`
			`bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)`
			`{`
			`const struct exception_table_entry *entry;`

			`/* Are we prepared to handle this fault? */`
			`if ((entry = search_exception_tables(regs->nip)) != NULL) {`
			`regs->nip = entry->fixup;`
			`return;`
			`}`

			`/* kernel has accessed a bad area */`
			`#if defined(CONFIG_XMON) \|\| defined(CONFIG_KGDB)`
			`if (debugger_kernel_faults)`
			`debugger(regs);`
			`#endif`
			`die("kernel access of bad area", regs, sig);`
			`}`

			`#ifdef CONFIG_8xx`

			`/* The pgtable.h claims some functions generically exist, but I`
			`* can't find them......`
			`*/`
			`pte_t *va_to_pte(unsigned long address)`
			`{`
			`pgd_t *dir;`
			`pmd_t *pmd;`
			`pte_t *pte;`

			`if (address < TASK_SIZE)`
			`return NULL;`

			`dir = pgd_offset(&init_mm, address);`
			`if (dir) {`
			`pmd = pmd_offset(dir, address & PAGE_MASK);`
			`if (pmd && pmd_present(*pmd)) {`
			`pte = pte_offset_kernel(pmd, address & PAGE_MASK);`
			`if (pte && pte_present(*pte))`
			`return(pte);`
			`}`
			`}`
			`return NULL;`
			`}`

			`unsigned long va_to_phys(unsigned long address)`
			`{`
			`pte_t *pte;`

			`pte = va_to_pte(address);`
			`if (pte)`
			`return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) \| (address & ~(PAGE_MASK)));`
			`return (0);`
			`}`

			`void`
			`print_8xx_pte(struct mm_struct *mm, unsigned long addr)`
			`{`
			`pgd_t * pgd;`
			`pmd_t * pmd;`
			`pte_t * pte;`

			`printk(" pte @ 0x%8lx: ", addr);`
			`pgd = pgd_offset(mm, addr & PAGE_MASK);`
			`if (pgd) {`
			`pmd = pmd_offset(pgd, addr & PAGE_MASK);`
			`if (pmd && pmd_present(*pmd)) {`
			`pte = pte_offset_kernel(pmd, addr & PAGE_MASK);`
			`if (pte) {`
			`printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",`
			`(long)pgd, (long)pte, (long)pte_val(*pte));`
			`#define pp ((long)pte_val(*pte))`
			`printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "`
			`"CI: %lx v: %lx\n",`
			`pp>>12, /* rpn */`
			`(pp>>10)&3, /* pp */`
			`(pp>>3)&1, /* small */`
			`(pp>>2)&1, /* shared */`
			`(pp>>1)&1, /* cache inhibit */`
			`pp&1 /* valid */`
			`);`
			`#undef pp`
			`}`
			`else {`
			`printk("no pte\n");`
			`}`
			`}`
			`else {`
			`printk("no pmd\n");`
			`}`
			`}`
			`else {`
			`printk("no pgd\n");`
			`}`
			`}`

			`int`
			`get_8xx_pte(struct mm_struct *mm, unsigned long addr)`
			`{`
			`pgd_t * pgd;`
			`pmd_t * pmd;`
			`pte_t * pte;`
			`int retval = 0;`

			`pgd = pgd_offset(mm, addr & PAGE_MASK);`
			`if (pgd) {`
			`pmd = pmd_offset(pgd, addr & PAGE_MASK);`
			`if (pmd && pmd_present(*pmd)) {`
			`pte = pte_offset_kernel(pmd, addr & PAGE_MASK);`
			`if (pte) {`
			`retval = (int)pte_val(*pte);`
			`}`
			`}`
			`}`
			`return(retval);`
			`}`
			`#endif /* CONFIG_8xx */`