kernel-fxtec-pro1x/arch/powerpc/platforms/pseries/ras.c
Sukadev Bhattiprolu f400e198b2 [PATCH] pidspace: is_init()
This is an updated version of Eric Biederman's is_init() patch.
(http://lkml.org/lkml/2006/2/6/280).  It applies cleanly to 2.6.18-rc3 and
replaces a few more instances of ->pid == 1 with is_init().

Further, is_init() checks pid and thus removes dependency on Eric's other
patches for now.

Eric's original description:

	There are a lot of places in the kernel where we test for init
	because we give it special properties.  Most  significantly init
	must not die.  This results in code all over the kernel test
	->pid == 1.

	Introduce is_init to capture this case.

	With multiple pid spaces for all of the cases affected we are
	looking for only the first process on the system, not some other
	process that has pid == 1.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Serge Hallyn <serue@us.ibm.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: <lxc-devel@lists.sourceforge.net>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-29 09:18:12 -07:00

376 lines
11 KiB
C

/*
* Copyright (C) 2001 Dave Engebretsen IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* Change Activity:
* 2001/09/21 : engebret : Created with minimal EPOW and HW exception support.
* End Change Activity
*/
#include <linux/errno.h>
#include <linux/threads.h>
#include <linux/kernel_stat.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/ioport.h>
#include <linux/interrupt.h>
#include <linux/timex.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/random.h>
#include <linux/sysrq.h>
#include <linux/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/irq.h>
#include <asm/cache.h>
#include <asm/prom.h>
#include <asm/ptrace.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
#include <asm/udbg.h>
#include <asm/firmware.h>
#include "ras.h"
static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
static DEFINE_SPINLOCK(ras_log_buf_lock);
char mce_data_buf[RTAS_ERROR_LOG_MAX];
static int ras_get_sensor_state_token;
static int ras_check_exception_token;
#define EPOW_SENSOR_TOKEN 9
#define EPOW_SENSOR_INDEX 0
#define RAS_VECTOR_OFFSET 0x500
static irqreturn_t ras_epow_interrupt(int irq, void *dev_id,
struct pt_regs * regs);
static irqreturn_t ras_error_interrupt(int irq, void *dev_id,
struct pt_regs * regs);
/* #define DEBUG */
static void request_ras_irqs(struct device_node *np,
irqreturn_t (*handler)(int, void *, struct pt_regs *),
const char *name)
{
int i, index, count = 0;
struct of_irq oirq;
const u32 *opicprop;
unsigned int opicplen;
unsigned int virqs[16];
/* Check for obsolete "open-pic-interrupt" property. If present, then
* map those interrupts using the default interrupt host and default
* trigger
*/
opicprop = get_property(np, "open-pic-interrupt", &opicplen);
if (opicprop) {
opicplen /= sizeof(u32);
for (i = 0; i < opicplen; i++) {
if (count > 15)
break;
virqs[count] = irq_create_mapping(NULL, *(opicprop++));
if (virqs[count] == NO_IRQ)
printk(KERN_ERR "Unable to allocate interrupt "
"number for %s\n", np->full_name);
else
count++;
}
}
/* Else use normal interrupt tree parsing */
else {
/* First try to do a proper OF tree parsing */
for (index = 0; of_irq_map_one(np, index, &oirq) == 0;
index++) {
if (count > 15)
break;
virqs[count] = irq_create_of_mapping(oirq.controller,
oirq.specifier,
oirq.size);
if (virqs[count] == NO_IRQ)
printk(KERN_ERR "Unable to allocate interrupt "
"number for %s\n", np->full_name);
else
count++;
}
}
/* Now request them */
for (i = 0; i < count; i++) {
if (request_irq(virqs[i], handler, 0, name, NULL)) {
printk(KERN_ERR "Unable to request interrupt %d for "
"%s\n", virqs[i], np->full_name);
return;
}
}
}
/*
* Initialize handlers for the set of interrupts caused by hardware errors
* and power system events.
*/
static int __init init_ras_IRQ(void)
{
struct device_node *np;
ras_get_sensor_state_token = rtas_token("get-sensor-state");
ras_check_exception_token = rtas_token("check-exception");
/* Internal Errors */
np = of_find_node_by_path("/event-sources/internal-errors");
if (np != NULL) {
request_ras_irqs(np, ras_error_interrupt, "RAS_ERROR");
of_node_put(np);
}
/* EPOW Events */
np = of_find_node_by_path("/event-sources/epow-events");
if (np != NULL) {
request_ras_irqs(np, ras_epow_interrupt, "RAS_EPOW");
of_node_put(np);
}
return 0;
}
__initcall(init_ras_IRQ);
/*
* Handle power subsystem events (EPOW).
*
* Presently we just log the event has occurred. This should be fixed
* to examine the type of power failure and take appropriate action where
* the time horizon permits something useful to be done.
*/
static irqreturn_t
ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs)
{
int status = 0xdeadbeef;
int state = 0;
int critical;
status = rtas_call(ras_get_sensor_state_token, 2, 2, &state,
EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX);
if (state > 3)
critical = 1; /* Time Critical */
else
critical = 0;
spin_lock(&ras_log_buf_lock);
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
RAS_VECTOR_OFFSET,
irq_map[irq].hwirq,
RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
critical, __pa(&ras_log_buf),
rtas_get_error_log_max());
udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n",
*((unsigned long *)&ras_log_buf), status, state);
printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n",
*((unsigned long *)&ras_log_buf), status, state);
/* format and print the extended information */
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
spin_unlock(&ras_log_buf_lock);
return IRQ_HANDLED;
}
/*
* Handle hardware error interrupts.
*
* RTAS check-exception is called to collect data on the exception. If
* the error is deemed recoverable, we log a warning and return.
* For nonrecoverable errors, an error is logged and we stop all processing
* as quickly as possible in order to prevent propagation of the failure.
*/
static irqreturn_t
ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs)
{
struct rtas_error_log *rtas_elog;
int status = 0xdeadbeef;
int fatal;
spin_lock(&ras_log_buf_lock);
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
RAS_VECTOR_OFFSET,
irq_map[irq].hwirq,
RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
__pa(&ras_log_buf),
rtas_get_error_log_max());
rtas_elog = (struct rtas_error_log *)ras_log_buf;
if ((status == 0) && (rtas_elog->severity >= RTAS_SEVERITY_ERROR_SYNC))
fatal = 1;
else
fatal = 0;
/* format and print the extended information */
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
if (fatal) {
udbg_printf("Fatal HW Error <0x%lx 0x%x>\n",
*((unsigned long *)&ras_log_buf), status);
printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n",
*((unsigned long *)&ras_log_buf), status);
#ifndef DEBUG
/* Don't actually power off when debugging so we can test
* without actually failing while injecting errors.
* Error data will not be logged to syslog.
*/
ppc_md.power_off();
#endif
} else {
udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n",
*((unsigned long *)&ras_log_buf), status);
printk(KERN_WARNING
"Warning: Recoverable hardware error <0x%lx 0x%x>\n",
*((unsigned long *)&ras_log_buf), status);
}
spin_unlock(&ras_log_buf_lock);
return IRQ_HANDLED;
}
/* Get the error information for errors coming through the
* FWNMI vectors. The pt_regs' r3 will be updated to reflect
* the actual r3 if possible, and a ptr to the error log entry
* will be returned if found.
*
* The mce_data_buf does not have any locks or protection around it,
* if a second machine check comes in, or a system reset is done
* before we have logged the error, then we will get corruption in the
* error log. This is preferable over holding off on calling
* ibm,nmi-interlock which would result in us checkstopping if a
* second machine check did come in.
*/
static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
{
unsigned long errdata = regs->gpr[3];
struct rtas_error_log *errhdr = NULL;
unsigned long *savep;
if ((errdata >= 0x7000 && errdata < 0x7fff0) ||
(errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) {
savep = __va(errdata);
regs->gpr[3] = savep[0]; /* restore original r3 */
memset(mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
memcpy(mce_data_buf, (char *)(savep + 1), RTAS_ERROR_LOG_MAX);
errhdr = (struct rtas_error_log *)mce_data_buf;
} else {
printk("FWNMI: corrupt r3\n");
}
return errhdr;
}
/* Call this when done with the data returned by FWNMI_get_errinfo.
* It will release the saved data area for other CPUs in the
* partition to receive FWNMI errors.
*/
static void fwnmi_release_errinfo(void)
{
int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
if (ret != 0)
printk("FWNMI: nmi-interlock failed: %d\n", ret);
}
int pSeries_system_reset_exception(struct pt_regs *regs)
{
if (fwnmi_active) {
struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
if (errhdr) {
/* XXX Should look at FWNMI information */
}
fwnmi_release_errinfo();
}
return 0; /* need to perform reset */
}
/*
* See if we can recover from a machine check exception.
* This is only called on power4 (or above) and only via
* the Firmware Non-Maskable Interrupts (fwnmi) handler
* which provides the error analysis for us.
*
* Return 1 if corrected (or delivered a signal).
* Return 0 if there is nothing we can do.
*/
static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
{
int nonfatal = 0;
if (err->disposition == RTAS_DISP_FULLY_RECOVERED) {
/* Platform corrected itself */
nonfatal = 1;
} else if ((regs->msr & MSR_RI) &&
user_mode(regs) &&
err->severity == RTAS_SEVERITY_ERROR_SYNC &&
err->disposition == RTAS_DISP_NOT_RECOVERED &&
err->target == RTAS_TARGET_MEMORY &&
err->type == RTAS_TYPE_ECC_UNCORR &&
!(current->pid == 0 || is_init(current))) {
/* Kill off a user process with an ECC error */
printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
current->pid);
/* XXX something better for ECC error? */
_exception(SIGBUS, regs, BUS_ADRERR, regs->nip);
nonfatal = 1;
}
log_error((char *)err, ERR_TYPE_RTAS_LOG, !nonfatal);
return nonfatal;
}
/*
* Handle a machine check.
*
* Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
* should be present. If so the handler which called us tells us if the
* error was recovered (never true if RI=0).
*
* On hardware prior to Power 4 these exceptions were asynchronous which
* means we can't tell exactly where it occurred and so we can't recover.
*/
int pSeries_machine_check_exception(struct pt_regs *regs)
{
struct rtas_error_log *errp;
if (fwnmi_active) {
errp = fwnmi_get_errinfo(regs);
fwnmi_release_errinfo();
if (errp && recover_mce(regs, errp))
return 1;
}
return 0;
}