Merge branch 'release' of master.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6
This commit is contained in:
commit
0d0fc3a2d6
8 changed files with 292 additions and 74 deletions
194
Documentation/ia64/mca.txt
Normal file
194
Documentation/ia64/mca.txt
Normal file
|
@ -0,0 +1,194 @@
|
||||||
|
An ad-hoc collection of notes on IA64 MCA and INIT processing. Feel
|
||||||
|
free to update it with notes about any area that is not clear.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
MCA/INIT are completely asynchronous. They can occur at any time, when
|
||||||
|
the OS is in any state. Including when one of the cpus is already
|
||||||
|
holding a spinlock. Trying to get any lock from MCA/INIT state is
|
||||||
|
asking for deadlock. Also the state of structures that are protected
|
||||||
|
by locks is indeterminate, including linked lists.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
The complicated ia64 MCA process. All of this is mandated by Intel's
|
||||||
|
specification for ia64 SAL, error recovery and and unwind, it is not as
|
||||||
|
if we have a choice here.
|
||||||
|
|
||||||
|
* MCA occurs on one cpu, usually due to a double bit memory error.
|
||||||
|
This is the monarch cpu.
|
||||||
|
|
||||||
|
* SAL sends an MCA rendezvous interrupt (which is a normal interrupt)
|
||||||
|
to all the other cpus, the slaves.
|
||||||
|
|
||||||
|
* Slave cpus that receive the MCA interrupt call down into SAL, they
|
||||||
|
end up spinning disabled while the MCA is being serviced.
|
||||||
|
|
||||||
|
* If any slave cpu was already spinning disabled when the MCA occurred
|
||||||
|
then it cannot service the MCA interrupt. SAL waits ~20 seconds then
|
||||||
|
sends an unmaskable INIT event to the slave cpus that have not
|
||||||
|
already rendezvoused.
|
||||||
|
|
||||||
|
* Because MCA/INIT can be delivered at any time, including when the cpu
|
||||||
|
is down in PAL in physical mode, the registers at the time of the
|
||||||
|
event are _completely_ undefined. In particular the MCA/INIT
|
||||||
|
handlers cannot rely on the thread pointer, PAL physical mode can
|
||||||
|
(and does) modify TP. It is allowed to do that as long as it resets
|
||||||
|
TP on return. However MCA/INIT events expose us to these PAL
|
||||||
|
internal TP changes. Hence curr_task().
|
||||||
|
|
||||||
|
* If an MCA/INIT event occurs while the kernel was running (not user
|
||||||
|
space) and the kernel has called PAL then the MCA/INIT handler cannot
|
||||||
|
assume that the kernel stack is in a fit state to be used. Mainly
|
||||||
|
because PAL may or may not maintain the stack pointer internally.
|
||||||
|
Because the MCA/INIT handlers cannot trust the kernel stack, they
|
||||||
|
have to use their own, per-cpu stacks. The MCA/INIT stacks are
|
||||||
|
preformatted with just enough task state to let the relevant handlers
|
||||||
|
do their job.
|
||||||
|
|
||||||
|
* Unlike most other architectures, the ia64 struct task is embedded in
|
||||||
|
the kernel stack[1]. So switching to a new kernel stack means that
|
||||||
|
we switch to a new task as well. Because various bits of the kernel
|
||||||
|
assume that current points into the struct task, switching to a new
|
||||||
|
stack also means a new value for current.
|
||||||
|
|
||||||
|
* Once all slaves have rendezvoused and are spinning disabled, the
|
||||||
|
monarch is entered. The monarch now tries to diagnose the problem
|
||||||
|
and decide if it can recover or not.
|
||||||
|
|
||||||
|
* Part of the monarch's job is to look at the state of all the other
|
||||||
|
tasks. The only way to do that on ia64 is to call the unwinder,
|
||||||
|
as mandated by Intel.
|
||||||
|
|
||||||
|
* The starting point for the unwind depends on whether a task is
|
||||||
|
running or not. That is, whether it is on a cpu or is blocked. The
|
||||||
|
monarch has to determine whether or not a task is on a cpu before it
|
||||||
|
knows how to start unwinding it. The tasks that received an MCA or
|
||||||
|
INIT event are no longer running, they have been converted to blocked
|
||||||
|
tasks. But (and its a big but), the cpus that received the MCA
|
||||||
|
rendezvous interrupt are still running on their normal kernel stacks!
|
||||||
|
|
||||||
|
* To distinguish between these two cases, the monarch must know which
|
||||||
|
tasks are on a cpu and which are not. Hence each slave cpu that
|
||||||
|
switches to an MCA/INIT stack, registers its new stack using
|
||||||
|
set_curr_task(), so the monarch can tell that the _original_ task is
|
||||||
|
no longer running on that cpu. That gives us a decent chance of
|
||||||
|
getting a valid backtrace of the _original_ task.
|
||||||
|
|
||||||
|
* MCA/INIT can be nested, to a depth of 2 on any cpu. In the case of a
|
||||||
|
nested error, we want diagnostics on the MCA/INIT handler that
|
||||||
|
failed, not on the task that was originally running. Again this
|
||||||
|
requires set_curr_task() so the MCA/INIT handlers can register their
|
||||||
|
own stack as running on that cpu. Then a recursive error gets a
|
||||||
|
trace of the failing handler's "task".
|
||||||
|
|
||||||
|
[1] My (Keith Owens) original design called for ia64 to separate its
|
||||||
|
struct task and the kernel stacks. Then the MCA/INIT data would be
|
||||||
|
chained stacks like i386 interrupt stacks. But that required
|
||||||
|
radical surgery on the rest of ia64, plus extra hard wired TLB
|
||||||
|
entries with its associated performance degradation. David
|
||||||
|
Mosberger vetoed that approach. Which meant that separate kernel
|
||||||
|
stacks meant separate "tasks" for the MCA/INIT handlers.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
INIT is less complicated than MCA. Pressing the nmi button or using
|
||||||
|
the equivalent command on the management console sends INIT to all
|
||||||
|
cpus. SAL picks one one of the cpus as the monarch and the rest are
|
||||||
|
slaves. All the OS INIT handlers are entered at approximately the same
|
||||||
|
time. The OS monarch prints the state of all tasks and returns, after
|
||||||
|
which the slaves return and the system resumes.
|
||||||
|
|
||||||
|
At least that is what is supposed to happen. Alas there are broken
|
||||||
|
versions of SAL out there. Some drive all the cpus as monarchs. Some
|
||||||
|
drive them all as slaves. Some drive one cpu as monarch, wait for that
|
||||||
|
cpu to return from the OS then drive the rest as slaves. Some versions
|
||||||
|
of SAL cannot even cope with returning from the OS, they spin inside
|
||||||
|
SAL on resume. The OS INIT code has workarounds for some of these
|
||||||
|
broken SAL symptoms, but some simply cannot be fixed from the OS side.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
The scheduler hooks used by ia64 (curr_task, set_curr_task) are layer
|
||||||
|
violations. Unfortunately MCA/INIT start off as massive layer
|
||||||
|
violations (can occur at _any_ time) and they build from there.
|
||||||
|
|
||||||
|
At least ia64 makes an attempt at recovering from hardware errors, but
|
||||||
|
it is a difficult problem because of the asynchronous nature of these
|
||||||
|
errors. When processing an unmaskable interrupt we sometimes need
|
||||||
|
special code to cope with our inability to take any locks.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
How is ia64 MCA/INIT different from x86 NMI?
|
||||||
|
|
||||||
|
* x86 NMI typically gets delivered to one cpu. MCA/INIT gets sent to
|
||||||
|
all cpus.
|
||||||
|
|
||||||
|
* x86 NMI cannot be nested. MCA/INIT can be nested, to a depth of 2
|
||||||
|
per cpu.
|
||||||
|
|
||||||
|
* x86 has a separate struct task which points to one of multiple kernel
|
||||||
|
stacks. ia64 has the struct task embedded in the single kernel
|
||||||
|
stack, so switching stack means switching task.
|
||||||
|
|
||||||
|
* x86 does not call the BIOS so the NMI handler does not have to worry
|
||||||
|
about any registers having changed. MCA/INIT can occur while the cpu
|
||||||
|
is in PAL in physical mode, with undefined registers and an undefined
|
||||||
|
kernel stack.
|
||||||
|
|
||||||
|
* i386 backtrace is not very sensitive to whether a process is running
|
||||||
|
or not. ia64 unwind is very, very sensitive to whether a process is
|
||||||
|
running or not.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
What happens when MCA/INIT is delivered what a cpu is running user
|
||||||
|
space code?
|
||||||
|
|
||||||
|
The user mode registers are stored in the RSE area of the MCA/INIT on
|
||||||
|
entry to the OS and are restored from there on return to SAL, so user
|
||||||
|
mode registers are preserved across a recoverable MCA/INIT. Since the
|
||||||
|
OS has no idea what unwind data is available for the user space stack,
|
||||||
|
MCA/INIT never tries to backtrace user space. Which means that the OS
|
||||||
|
does not bother making the user space process look like a blocked task,
|
||||||
|
i.e. the OS does not copy pt_regs and switch_stack to the user space
|
||||||
|
stack. Also the OS has no idea how big the user space RSE and memory
|
||||||
|
stacks are, which makes it too risky to copy the saved state to a user
|
||||||
|
mode stack.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
How do we get a backtrace on the tasks that were running when MCA/INIT
|
||||||
|
was delivered?
|
||||||
|
|
||||||
|
mca.c:::ia64_mca_modify_original_stack(). That identifies and
|
||||||
|
verifies the original kernel stack, copies the dirty registers from
|
||||||
|
the MCA/INIT stack's RSE to the original stack's RSE, copies the
|
||||||
|
skeleton struct pt_regs and switch_stack to the original stack, fills
|
||||||
|
in the skeleton structures from the PAL minstate area and updates the
|
||||||
|
original stack's thread.ksp. That makes the original stack look
|
||||||
|
exactly like any other blocked task, i.e. it now appears to be
|
||||||
|
sleeping. To get a backtrace, just start with thread.ksp for the
|
||||||
|
original task and unwind like any other sleeping task.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
How do we identify the tasks that were running when MCA/INIT was
|
||||||
|
delivered?
|
||||||
|
|
||||||
|
If the previous task has been verified and converted to a blocked
|
||||||
|
state, then sos->prev_task on the MCA/INIT stack is updated to point to
|
||||||
|
the previous task. You can look at that field in dumps or debuggers.
|
||||||
|
To help distinguish between the handler and the original tasks,
|
||||||
|
handlers have _TIF_MCA_INIT set in thread_info.flags.
|
||||||
|
|
||||||
|
The sos data is always in the MCA/INIT handler stack, at offset
|
||||||
|
MCA_SOS_OFFSET. You can get that value from mca_asm.h or calculate it
|
||||||
|
as KERNEL_STACK_SIZE - sizeof(struct pt_regs) - sizeof(struct
|
||||||
|
ia64_sal_os_state), with 16 byte alignment for all structures.
|
||||||
|
|
||||||
|
Also the comm field of the MCA/INIT task is modified to include the pid
|
||||||
|
of the original task, for humans to use. For example, a comm field of
|
||||||
|
'MCA 12159' means that pid 12159 was running when the MCA was
|
||||||
|
delivered.
|
|
@ -899,7 +899,7 @@ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
|
||||||
if ((err = iosapic_init(phys_addr, gsi_base)))
|
if ((err = iosapic_init(phys_addr, gsi_base)))
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
#if CONFIG_ACPI_NUMA
|
#ifdef CONFIG_ACPI_NUMA
|
||||||
acpi_map_iosapic(handle, 0, NULL, NULL);
|
acpi_map_iosapic(handle, 0, NULL, NULL);
|
||||||
#endif /* CONFIG_ACPI_NUMA */
|
#endif /* CONFIG_ACPI_NUMA */
|
||||||
|
|
||||||
|
|
|
@ -491,7 +491,7 @@ GLOBAL_ENTRY(prefetch_stack)
|
||||||
;;
|
;;
|
||||||
lfetch.fault [r16], 128
|
lfetch.fault [r16], 128
|
||||||
br.ret.sptk.many rp
|
br.ret.sptk.many rp
|
||||||
END(prefetch_switch_stack)
|
END(prefetch_stack)
|
||||||
|
|
||||||
GLOBAL_ENTRY(execve)
|
GLOBAL_ENTRY(execve)
|
||||||
mov r15=__NR_execve // put syscall number in place
|
mov r15=__NR_execve // put syscall number in place
|
||||||
|
|
|
@ -84,23 +84,23 @@ mca_page_isolate(unsigned long paddr)
|
||||||
struct page *p;
|
struct page *p;
|
||||||
|
|
||||||
/* whether physical address is valid or not */
|
/* whether physical address is valid or not */
|
||||||
if ( !ia64_phys_addr_valid(paddr) )
|
if (!ia64_phys_addr_valid(paddr))
|
||||||
return ISOLATE_NG;
|
return ISOLATE_NG;
|
||||||
|
|
||||||
/* convert physical address to physical page number */
|
/* convert physical address to physical page number */
|
||||||
p = pfn_to_page(paddr>>PAGE_SHIFT);
|
p = pfn_to_page(paddr>>PAGE_SHIFT);
|
||||||
|
|
||||||
/* check whether a page number have been already registered or not */
|
/* check whether a page number have been already registered or not */
|
||||||
for( i = 0; i < num_page_isolate; i++ )
|
for (i = 0; i < num_page_isolate; i++)
|
||||||
if( page_isolate[i] == p )
|
if (page_isolate[i] == p)
|
||||||
return ISOLATE_OK; /* already listed */
|
return ISOLATE_OK; /* already listed */
|
||||||
|
|
||||||
/* limitation check */
|
/* limitation check */
|
||||||
if( num_page_isolate == MAX_PAGE_ISOLATE )
|
if (num_page_isolate == MAX_PAGE_ISOLATE)
|
||||||
return ISOLATE_NG;
|
return ISOLATE_NG;
|
||||||
|
|
||||||
/* kick pages having attribute 'SLAB' or 'Reserved' */
|
/* kick pages having attribute 'SLAB' or 'Reserved' */
|
||||||
if( PageSlab(p) || PageReserved(p) )
|
if (PageSlab(p) || PageReserved(p))
|
||||||
return ISOLATE_NG;
|
return ISOLATE_NG;
|
||||||
|
|
||||||
/* add attribute 'Reserved' and register the page */
|
/* add attribute 'Reserved' and register the page */
|
||||||
|
@ -139,10 +139,10 @@ mca_handler_bh(unsigned long paddr)
|
||||||
* @peidx: pointer to index of processor error section
|
* @peidx: pointer to index of processor error section
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
|
mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* calculate the start address of
|
* calculate the start address of
|
||||||
* "struct cpuid_info" and "sal_processor_static_info_t".
|
* "struct cpuid_info" and "sal_processor_static_info_t".
|
||||||
*/
|
*/
|
||||||
|
@ -164,7 +164,7 @@ mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* mca_make_slidx - Make index of SAL error record
|
* mca_make_slidx - Make index of SAL error record
|
||||||
* @buffer: pointer to SAL error record
|
* @buffer: pointer to SAL error record
|
||||||
* @slidx: pointer to index of SAL error record
|
* @slidx: pointer to index of SAL error record
|
||||||
*
|
*
|
||||||
|
@ -172,12 +172,12 @@ mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
|
||||||
* 1 if record has platform error / 0 if not
|
* 1 if record has platform error / 0 if not
|
||||||
*/
|
*/
|
||||||
#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
|
#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
|
||||||
{ slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
|
{slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
|
||||||
hl->hdr = ptr; \
|
hl->hdr = ptr; \
|
||||||
list_add(&hl->list, &(sect)); \
|
list_add(&hl->list, &(sect)); \
|
||||||
slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
|
slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
|
||||||
|
|
||||||
static int
|
static int
|
||||||
mca_make_slidx(void *buffer, slidx_table_t *slidx)
|
mca_make_slidx(void *buffer, slidx_table_t *slidx)
|
||||||
{
|
{
|
||||||
int platform_err = 0;
|
int platform_err = 0;
|
||||||
|
@ -214,28 +214,36 @@ mca_make_slidx(void *buffer, slidx_table_t *slidx)
|
||||||
sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
|
sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
|
||||||
if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
|
if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
|
||||||
LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
|
LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
|
||||||
} else if (!efi_guidcmp(sp->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
|
} else if (!efi_guidcmp(sp->guid,
|
||||||
|
SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
|
||||||
platform_err = 1;
|
platform_err = 1;
|
||||||
LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
|
LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
|
||||||
} else if (!efi_guidcmp(sp->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
|
} else if (!efi_guidcmp(sp->guid,
|
||||||
|
SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
|
||||||
platform_err = 1;
|
platform_err = 1;
|
||||||
LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
|
LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
|
||||||
} else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
|
} else if (!efi_guidcmp(sp->guid,
|
||||||
|
SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
|
||||||
platform_err = 1;
|
platform_err = 1;
|
||||||
LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
|
LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
|
||||||
} else if (!efi_guidcmp(sp->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
|
} else if (!efi_guidcmp(sp->guid,
|
||||||
|
SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
|
||||||
platform_err = 1;
|
platform_err = 1;
|
||||||
LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
|
LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
|
||||||
} else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
|
} else if (!efi_guidcmp(sp->guid,
|
||||||
|
SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
|
||||||
platform_err = 1;
|
platform_err = 1;
|
||||||
LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
|
LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
|
||||||
} else if (!efi_guidcmp(sp->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
|
} else if (!efi_guidcmp(sp->guid,
|
||||||
|
SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
|
||||||
platform_err = 1;
|
platform_err = 1;
|
||||||
LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
|
LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
|
||||||
} else if (!efi_guidcmp(sp->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
|
} else if (!efi_guidcmp(sp->guid,
|
||||||
|
SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
|
||||||
platform_err = 1;
|
platform_err = 1;
|
||||||
LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
|
LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
|
||||||
} else if (!efi_guidcmp(sp->guid, SAL_PLAT_BUS_ERR_SECT_GUID)) {
|
} else if (!efi_guidcmp(sp->guid,
|
||||||
|
SAL_PLAT_BUS_ERR_SECT_GUID)) {
|
||||||
platform_err = 1;
|
platform_err = 1;
|
||||||
LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
|
LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
|
||||||
} else {
|
} else {
|
||||||
|
@ -253,15 +261,16 @@ mca_make_slidx(void *buffer, slidx_table_t *slidx)
|
||||||
* Return value:
|
* Return value:
|
||||||
* 0 on Success / -ENOMEM on Failure
|
* 0 on Success / -ENOMEM on Failure
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
init_record_index_pools(void)
|
init_record_index_pools(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int rec_max_size; /* Maximum size of SAL error records */
|
int rec_max_size; /* Maximum size of SAL error records */
|
||||||
int sect_min_size; /* Minimum size of SAL error sections */
|
int sect_min_size; /* Minimum size of SAL error sections */
|
||||||
/* minimum size table of each section */
|
/* minimum size table of each section */
|
||||||
static int sal_log_sect_min_sizes[] = {
|
static int sal_log_sect_min_sizes[] = {
|
||||||
sizeof(sal_log_processor_info_t) + sizeof(sal_processor_static_info_t),
|
sizeof(sal_log_processor_info_t)
|
||||||
|
+ sizeof(sal_processor_static_info_t),
|
||||||
sizeof(sal_log_mem_dev_err_info_t),
|
sizeof(sal_log_mem_dev_err_info_t),
|
||||||
sizeof(sal_log_sel_dev_err_info_t),
|
sizeof(sal_log_sel_dev_err_info_t),
|
||||||
sizeof(sal_log_pci_bus_err_info_t),
|
sizeof(sal_log_pci_bus_err_info_t),
|
||||||
|
@ -294,7 +303,8 @@ init_record_index_pools(void)
|
||||||
|
|
||||||
/* - 3 - */
|
/* - 3 - */
|
||||||
slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
|
slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
|
||||||
slidx_pool.buffer = (slidx_list_t *) kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
|
slidx_pool.buffer = (slidx_list_t *)
|
||||||
|
kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
|
||||||
|
|
||||||
return slidx_pool.buffer ? 0 : -ENOMEM;
|
return slidx_pool.buffer ? 0 : -ENOMEM;
|
||||||
}
|
}
|
||||||
|
@ -308,6 +318,7 @@ init_record_index_pools(void)
|
||||||
* is_mca_global - Check whether this MCA is global or not
|
* is_mca_global - Check whether this MCA is global or not
|
||||||
* @peidx: pointer of index of processor error section
|
* @peidx: pointer of index of processor error section
|
||||||
* @pbci: pointer to pal_bus_check_info_t
|
* @pbci: pointer to pal_bus_check_info_t
|
||||||
|
* @sos: pointer to hand off struct between SAL and OS
|
||||||
*
|
*
|
||||||
* Return value:
|
* Return value:
|
||||||
* MCA_IS_LOCAL / MCA_IS_GLOBAL
|
* MCA_IS_LOCAL / MCA_IS_GLOBAL
|
||||||
|
@ -317,11 +328,12 @@ static mca_type_t
|
||||||
is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
|
is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
|
||||||
struct ia64_sal_os_state *sos)
|
struct ia64_sal_os_state *sos)
|
||||||
{
|
{
|
||||||
pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
|
pal_processor_state_info_t *psp =
|
||||||
|
(pal_processor_state_info_t*)peidx_psp(peidx);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* PAL can request a rendezvous, if the MCA has a global scope.
|
* PAL can request a rendezvous, if the MCA has a global scope.
|
||||||
* If "rz_always" flag is set, SAL requests MCA rendezvous
|
* If "rz_always" flag is set, SAL requests MCA rendezvous
|
||||||
* in spite of global MCA.
|
* in spite of global MCA.
|
||||||
* Therefore it is local MCA when rendezvous has not been requested.
|
* Therefore it is local MCA when rendezvous has not been requested.
|
||||||
* Failed to rendezvous, the system must be down.
|
* Failed to rendezvous, the system must be down.
|
||||||
|
@ -381,13 +393,15 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
|
||||||
* @slidx: pointer of index of SAL error record
|
* @slidx: pointer of index of SAL error record
|
||||||
* @peidx: pointer of index of processor error section
|
* @peidx: pointer of index of processor error section
|
||||||
* @pbci: pointer of pal_bus_check_info
|
* @pbci: pointer of pal_bus_check_info
|
||||||
|
* @sos: pointer to hand off struct between SAL and OS
|
||||||
*
|
*
|
||||||
* Return value:
|
* Return value:
|
||||||
* 1 on Success / 0 on Failure
|
* 1 on Success / 0 on Failure
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci,
|
recover_from_read_error(slidx_table_t *slidx,
|
||||||
|
peidx_table_t *peidx, pal_bus_check_info_t *pbci,
|
||||||
struct ia64_sal_os_state *sos)
|
struct ia64_sal_os_state *sos)
|
||||||
{
|
{
|
||||||
sal_log_mod_error_info_t *smei;
|
sal_log_mod_error_info_t *smei;
|
||||||
|
@ -453,24 +467,28 @@ recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_chec
|
||||||
* @slidx: pointer of index of SAL error record
|
* @slidx: pointer of index of SAL error record
|
||||||
* @peidx: pointer of index of processor error section
|
* @peidx: pointer of index of processor error section
|
||||||
* @pbci: pointer of pal_bus_check_info
|
* @pbci: pointer of pal_bus_check_info
|
||||||
|
* @sos: pointer to hand off struct between SAL and OS
|
||||||
*
|
*
|
||||||
* Return value:
|
* Return value:
|
||||||
* 1 on Success / 0 on Failure
|
* 1 on Success / 0 on Failure
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci,
|
recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
|
||||||
|
pal_bus_check_info_t *pbci,
|
||||||
struct ia64_sal_os_state *sos)
|
struct ia64_sal_os_state *sos)
|
||||||
{
|
{
|
||||||
int status = 0;
|
int status = 0;
|
||||||
pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
|
pal_processor_state_info_t *psp =
|
||||||
|
(pal_processor_state_info_t*)peidx_psp(peidx);
|
||||||
|
|
||||||
if (psp->bc && pbci->eb && pbci->bsi == 0) {
|
if (psp->bc && pbci->eb && pbci->bsi == 0) {
|
||||||
switch(pbci->type) {
|
switch(pbci->type) {
|
||||||
case 1: /* partial read */
|
case 1: /* partial read */
|
||||||
case 3: /* full line(cpu) read */
|
case 3: /* full line(cpu) read */
|
||||||
case 9: /* I/O space read */
|
case 9: /* I/O space read */
|
||||||
status = recover_from_read_error(slidx, peidx, pbci, sos);
|
status = recover_from_read_error(slidx, peidx, pbci,
|
||||||
|
sos);
|
||||||
break;
|
break;
|
||||||
case 0: /* unknown */
|
case 0: /* unknown */
|
||||||
case 2: /* partial write */
|
case 2: /* partial write */
|
||||||
|
@ -481,7 +499,8 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_
|
||||||
case 8: /* write coalescing transactions */
|
case 8: /* write coalescing transactions */
|
||||||
case 10: /* I/O space write */
|
case 10: /* I/O space write */
|
||||||
case 11: /* inter-processor interrupt message(IPI) */
|
case 11: /* inter-processor interrupt message(IPI) */
|
||||||
case 12: /* interrupt acknowledge or external task priority cycle */
|
case 12: /* interrupt acknowledge or
|
||||||
|
external task priority cycle */
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -496,6 +515,7 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_
|
||||||
* @slidx: pointer of index of SAL error record
|
* @slidx: pointer of index of SAL error record
|
||||||
* @peidx: pointer of index of processor error section
|
* @peidx: pointer of index of processor error section
|
||||||
* @pbci: pointer of pal_bus_check_info
|
* @pbci: pointer of pal_bus_check_info
|
||||||
|
* @sos: pointer to hand off struct between SAL and OS
|
||||||
*
|
*
|
||||||
* Return value:
|
* Return value:
|
||||||
* 1 on Success / 0 on Failure
|
* 1 on Success / 0 on Failure
|
||||||
|
@ -509,15 +529,17 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci,
|
recover_from_processor_error(int platform, slidx_table_t *slidx,
|
||||||
|
peidx_table_t *peidx, pal_bus_check_info_t *pbci,
|
||||||
struct ia64_sal_os_state *sos)
|
struct ia64_sal_os_state *sos)
|
||||||
{
|
{
|
||||||
pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
|
pal_processor_state_info_t *psp =
|
||||||
|
(pal_processor_state_info_t*)peidx_psp(peidx);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We cannot recover errors with other than bus_check.
|
* We cannot recover errors with other than bus_check.
|
||||||
*/
|
*/
|
||||||
if (psp->cc || psp->rc || psp->uc)
|
if (psp->cc || psp->rc || psp->uc)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -546,10 +568,10 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *
|
||||||
* (e.g. a load from poisoned memory)
|
* (e.g. a load from poisoned memory)
|
||||||
* This means "there are some platform errors".
|
* This means "there are some platform errors".
|
||||||
*/
|
*/
|
||||||
if (platform)
|
if (platform)
|
||||||
return recover_from_platform_error(slidx, peidx, pbci, sos);
|
return recover_from_platform_error(slidx, peidx, pbci, sos);
|
||||||
/*
|
/*
|
||||||
* On account of strange SAL error record, we cannot recover.
|
* On account of strange SAL error record, we cannot recover.
|
||||||
*/
|
*/
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -557,14 +579,14 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *
|
||||||
/**
|
/**
|
||||||
* mca_try_to_recover - Try to recover from MCA
|
* mca_try_to_recover - Try to recover from MCA
|
||||||
* @rec: pointer to a SAL error record
|
* @rec: pointer to a SAL error record
|
||||||
|
* @sos: pointer to hand off struct between SAL and OS
|
||||||
*
|
*
|
||||||
* Return value:
|
* Return value:
|
||||||
* 1 on Success / 0 on Failure
|
* 1 on Success / 0 on Failure
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
mca_try_to_recover(void *rec,
|
mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
|
||||||
struct ia64_sal_os_state *sos)
|
|
||||||
{
|
{
|
||||||
int platform_err;
|
int platform_err;
|
||||||
int n_proc_err;
|
int n_proc_err;
|
||||||
|
@ -588,7 +610,8 @@ mca_try_to_recover(void *rec,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Make index of processor error section */
|
/* Make index of processor error section */
|
||||||
mca_make_peidx((sal_log_processor_info_t*)slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
|
mca_make_peidx((sal_log_processor_info_t*)
|
||||||
|
slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
|
||||||
|
|
||||||
/* Extract Processor BUS_CHECK[0] */
|
/* Extract Processor BUS_CHECK[0] */
|
||||||
*((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
|
*((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
|
||||||
|
@ -598,7 +621,8 @@ mca_try_to_recover(void *rec,
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* Try to recover a processor error */
|
/* Try to recover a processor error */
|
||||||
return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci, sos);
|
return recover_from_processor_error(platform_err, &slidx, &peidx,
|
||||||
|
&pbci, sos);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -611,7 +635,7 @@ int __init mca_external_handler_init(void)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
/* register external mca handlers */
|
/* register external mca handlers */
|
||||||
if (ia64_reg_MCA_extension(mca_try_to_recover)){
|
if (ia64_reg_MCA_extension(mca_try_to_recover)) {
|
||||||
printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
|
printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
|
||||||
kfree(slidx_pool.buffer);
|
kfree(slidx_pool.buffer);
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
* Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
|
* Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
|
||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Processor error section:
|
* Processor error section:
|
||||||
*
|
*
|
||||||
* +-sal_log_processor_info_t *info-------------+
|
* +-sal_log_processor_info_t *info-------------+
|
||||||
* | sal_log_section_hdr_t header; |
|
* | sal_log_section_hdr_t header; |
|
||||||
|
|
|
@ -13,45 +13,45 @@
|
||||||
#include <asm/ptrace.h>
|
#include <asm/ptrace.h>
|
||||||
|
|
||||||
GLOBAL_ENTRY(mca_handler_bhhook)
|
GLOBAL_ENTRY(mca_handler_bhhook)
|
||||||
invala // clear RSE ?
|
invala // clear RSE ?
|
||||||
;; //
|
;;
|
||||||
cover //
|
cover
|
||||||
;; //
|
;;
|
||||||
clrrrb //
|
clrrrb
|
||||||
;;
|
;;
|
||||||
alloc r16=ar.pfs,0,2,1,0 // make a new frame
|
alloc r16=ar.pfs,0,2,1,0 // make a new frame
|
||||||
;;
|
;;
|
||||||
mov ar.rsc=0
|
mov ar.rsc=0
|
||||||
;;
|
;;
|
||||||
mov r13=IA64_KR(CURRENT) // current task pointer
|
mov r13=IA64_KR(CURRENT) // current task pointer
|
||||||
;;
|
;;
|
||||||
mov r2=r13
|
mov r2=r13
|
||||||
;;
|
;;
|
||||||
addl r22=IA64_RBS_OFFSET,r2
|
addl r22=IA64_RBS_OFFSET,r2
|
||||||
;;
|
;;
|
||||||
mov ar.bspstore=r22
|
mov ar.bspstore=r22
|
||||||
;;
|
;;
|
||||||
addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
|
addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2
|
||||||
;;
|
;;
|
||||||
adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
|
adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
|
||||||
;;
|
;;
|
||||||
st1 [r2]=r0 // clear current->thread.on_ustack flag
|
st1 [r2]=r0 // clear current->thread.on_ustack flag
|
||||||
mov loc0=r16
|
mov loc0=r16
|
||||||
movl loc1=mca_handler_bh // recovery C function
|
movl loc1=mca_handler_bh // recovery C function
|
||||||
;;
|
;;
|
||||||
mov out0=r8 // poisoned address
|
mov out0=r8 // poisoned address
|
||||||
mov b6=loc1
|
mov b6=loc1
|
||||||
;;
|
;;
|
||||||
mov loc1=rp
|
mov loc1=rp
|
||||||
;;
|
;;
|
||||||
ssm psr.i
|
ssm psr.i
|
||||||
;;
|
;;
|
||||||
br.call.sptk.many rp=b6 // does not return ...
|
br.call.sptk.many rp=b6 // does not return ...
|
||||||
;;
|
;;
|
||||||
mov ar.pfs=loc0
|
mov ar.pfs=loc0
|
||||||
mov rp=loc1
|
mov rp=loc1
|
||||||
;;
|
;;
|
||||||
mov r8=r0
|
mov r8=r0
|
||||||
br.ret.sptk.many rp
|
br.ret.sptk.many rp
|
||||||
;;
|
;;
|
||||||
END(mca_handler_bhhook)
|
END(mca_handler_bhhook)
|
||||||
|
|
|
@ -574,7 +574,7 @@ pfm_protect_ctx_ctxsw(pfm_context_t *x)
|
||||||
return 0UL;
|
return 0UL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned long
|
static inline void
|
||||||
pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
|
pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
|
||||||
{
|
{
|
||||||
spin_unlock(&(x)->ctx_lock);
|
spin_unlock(&(x)->ctx_lock);
|
||||||
|
|
|
@ -252,7 +252,7 @@ hp_zx1_configure (void)
|
||||||
readl(hp->ioc_regs+HP_ZX1_PDIR_BASE);
|
readl(hp->ioc_regs+HP_ZX1_PDIR_BASE);
|
||||||
writel(hp->io_tlb_ps, hp->ioc_regs+HP_ZX1_TCNFG);
|
writel(hp->io_tlb_ps, hp->ioc_regs+HP_ZX1_TCNFG);
|
||||||
readl(hp->ioc_regs+HP_ZX1_TCNFG);
|
readl(hp->ioc_regs+HP_ZX1_TCNFG);
|
||||||
writel(~(HP_ZX1_IOVA_SIZE-1), hp->ioc_regs+HP_ZX1_IMASK);
|
writel((unsigned int)(~(HP_ZX1_IOVA_SIZE-1)), hp->ioc_regs+HP_ZX1_IMASK);
|
||||||
readl(hp->ioc_regs+HP_ZX1_IMASK);
|
readl(hp->ioc_regs+HP_ZX1_IMASK);
|
||||||
writel(hp->iova_base|1, hp->ioc_regs+HP_ZX1_IBASE);
|
writel(hp->iova_base|1, hp->ioc_regs+HP_ZX1_IBASE);
|
||||||
readl(hp->ioc_regs+HP_ZX1_IBASE);
|
readl(hp->ioc_regs+HP_ZX1_IBASE);
|
||||||
|
|
Loading…
Reference in a new issue