68cb14c7c4
Summary: Asserting INIT on cpu going to be offline will result in unexpected behavior. It will be a real problem in kdump cases where INIT might be asserted to unstable APs going to be offline by returning to SAL. Description: Since psr.mc is cleared when bits in psr are set to SAL_PSR_BITS_TO_SET in ia64_jump_to_sal(), there is a small window (~few msecs) that the cpu can receive INIT even if the cpu enter there via INIT handler. In this window we do restore of registers for SAL, so INIT asserted here will not work properly. It is hard to remove this window by masking INIT (i.e. setting psr.mc) because we have to unmask it later in OS, because we have to use branch instruction (br.ret, not rfi) to return SAL, due to OS_BOOT_RENDEZ to SAL return convention. I suppose this window will not be a real problem on cpu offline if we can educate people not to push INIT button during hotplug operation. However, only exception is a race in kdump and INIT. Now kdump returns APs to SAL before processing dump, but the kernel might receive INIT at that point in time. Such INIT might be asserted by kdump itself if an AP doesn't react IPI soon and kdump decided to use INIT to stop the AP. Or it might be asserted by operator or an external agent to start dump on the unstable system. Such panic+INIT or INIT+INIT cases should be rare, but it will be happy if we can retrieve crashdump even in such cases. How to reproduce: panic+INIT or INIT+INIT, with kdump configured Expected results: crashdump is retrieved anyway Actual results: panic, hang etc. (unexpected) Proposed fix To avoid the window on the way to SAL, this patch stops returning APs to SAL in case of kdump. In other words, this patch makes APs spin in OS instead of spinning in SAL. (* Note: What impact would be there? If a cpu is spinning in SAL, the cpu is in BOOT_RENDEZ loop, as same as offlined cpu. In theory if an INIT is asserted there, cpus in the BOOT_RENDEZ loop should not invoke OS_INIT on it. So in either way, no matter where the cpu is spinning actually in, once cpu starts spin and act as "frozen," INIT on the cpu have no effects. From another point of view, all debug information on the cpu should have stored to memory before the cpu start to be frozen. So no more action on the cpu is required.) I confirmed that the kdump sometime hangs by concurrent INITs (another INIT after an INIT), and it doesn't hang after applying this patch. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Haren Myneni <hbabu@us.ibm.com> Cc: kexec@lists.infradead.org Acked-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
258 lines
5.7 KiB
C
258 lines
5.7 KiB
C
/*
|
|
* arch/ia64/kernel/crash.c
|
|
*
|
|
* Architecture specific (ia64) functions for kexec based crash dumps.
|
|
*
|
|
* Created by: Khalid Aziz <khalid.aziz@hp.com>
|
|
* Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
|
|
* Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com>
|
|
*
|
|
*/
|
|
#include <linux/smp.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/crash_dump.h>
|
|
#include <linux/bootmem.h>
|
|
#include <linux/kexec.h>
|
|
#include <linux/elfcore.h>
|
|
#include <linux/sysctl.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kdebug.h>
|
|
|
|
#include <asm/mca.h>
|
|
|
|
int kdump_status[NR_CPUS];
|
|
static atomic_t kdump_cpu_frozen;
|
|
atomic_t kdump_in_progress;
|
|
static int kdump_on_init = 1;
|
|
static int kdump_on_fatal_mca = 1;
|
|
|
|
static inline Elf64_Word
|
|
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
|
|
size_t data_len)
|
|
{
|
|
struct elf_note *note = (struct elf_note *)buf;
|
|
note->n_namesz = strlen(name) + 1;
|
|
note->n_descsz = data_len;
|
|
note->n_type = type;
|
|
buf += (sizeof(*note) + 3)/4;
|
|
memcpy(buf, name, note->n_namesz);
|
|
buf += (note->n_namesz + 3)/4;
|
|
memcpy(buf, data, data_len);
|
|
buf += (data_len + 3)/4;
|
|
return buf;
|
|
}
|
|
|
|
static void
|
|
final_note(void *buf)
|
|
{
|
|
memset(buf, 0, sizeof(struct elf_note));
|
|
}
|
|
|
|
extern void ia64_dump_cpu_regs(void *);
|
|
|
|
static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
|
|
|
|
void
|
|
crash_save_this_cpu(void)
|
|
{
|
|
void *buf;
|
|
unsigned long cfm, sof, sol;
|
|
|
|
int cpu = smp_processor_id();
|
|
struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
|
|
|
|
elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
|
|
memset(prstatus, 0, sizeof(*prstatus));
|
|
prstatus->pr_pid = current->pid;
|
|
|
|
ia64_dump_cpu_regs(dst);
|
|
cfm = dst[43];
|
|
sol = (cfm >> 7) & 0x7f;
|
|
sof = cfm & 0x7f;
|
|
dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
|
|
sof - sol);
|
|
|
|
buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
|
|
if (!buf)
|
|
return;
|
|
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus,
|
|
sizeof(*prstatus));
|
|
final_note(buf);
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
static int
|
|
kdump_wait_cpu_freeze(void)
|
|
{
|
|
int cpu_num = num_online_cpus() - 1;
|
|
int timeout = 1000;
|
|
while(timeout-- > 0) {
|
|
if (atomic_read(&kdump_cpu_frozen) == cpu_num)
|
|
return 0;
|
|
udelay(1000);
|
|
}
|
|
return 1;
|
|
}
|
|
#endif
|
|
|
|
void
|
|
machine_crash_shutdown(struct pt_regs *pt)
|
|
{
|
|
/* This function is only called after the system
|
|
* has paniced or is otherwise in a critical state.
|
|
* The minimum amount of code to allow a kexec'd kernel
|
|
* to run successfully needs to happen here.
|
|
*
|
|
* In practice this means shooting down the other cpus in
|
|
* an SMP system.
|
|
*/
|
|
kexec_disable_iosapic();
|
|
#ifdef CONFIG_SMP
|
|
kdump_smp_send_stop();
|
|
/* not all cpu response to IPI, send INIT to freeze them */
|
|
if (kdump_wait_cpu_freeze() && kdump_on_init) {
|
|
kdump_smp_send_init();
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void
|
|
machine_kdump_on_init(void)
|
|
{
|
|
crash_save_vmcoreinfo();
|
|
local_irq_disable();
|
|
kexec_disable_iosapic();
|
|
machine_kexec(ia64_kimage);
|
|
}
|
|
|
|
void
|
|
kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
|
|
{
|
|
int cpuid;
|
|
|
|
local_irq_disable();
|
|
cpuid = smp_processor_id();
|
|
crash_save_this_cpu();
|
|
current->thread.ksp = (__u64)info->sw - 16;
|
|
|
|
ia64_set_psr_mc(); /* mask MCA/INIT and stop reentrance */
|
|
|
|
atomic_inc(&kdump_cpu_frozen);
|
|
kdump_status[cpuid] = 1;
|
|
mb();
|
|
for (;;)
|
|
cpu_relax();
|
|
}
|
|
|
|
static int
|
|
kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
|
|
{
|
|
struct ia64_mca_notify_die *nd;
|
|
struct die_args *args = data;
|
|
|
|
if (!kdump_on_init && !kdump_on_fatal_mca)
|
|
return NOTIFY_DONE;
|
|
|
|
if (!ia64_kimage) {
|
|
if (val == DIE_INIT_MONARCH_LEAVE)
|
|
ia64_mca_printk(KERN_NOTICE
|
|
"%s: kdump not configured\n",
|
|
__func__);
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
if (val != DIE_INIT_MONARCH_LEAVE &&
|
|
val != DIE_INIT_SLAVE_LEAVE &&
|
|
val != DIE_INIT_MONARCH_PROCESS &&
|
|
val != DIE_MCA_RENDZVOUS_LEAVE &&
|
|
val != DIE_MCA_MONARCH_LEAVE)
|
|
return NOTIFY_DONE;
|
|
|
|
nd = (struct ia64_mca_notify_die *)args->err;
|
|
/* Reason code 1 means machine check rendezvous*/
|
|
if ((val == DIE_INIT_MONARCH_LEAVE || val == DIE_INIT_SLAVE_LEAVE
|
|
|| val == DIE_INIT_MONARCH_PROCESS) && nd->sos->rv_rc == 1)
|
|
return NOTIFY_DONE;
|
|
|
|
switch (val) {
|
|
case DIE_INIT_MONARCH_PROCESS:
|
|
if (kdump_on_init) {
|
|
atomic_set(&kdump_in_progress, 1);
|
|
*(nd->monarch_cpu) = -1;
|
|
}
|
|
break;
|
|
case DIE_INIT_MONARCH_LEAVE:
|
|
if (kdump_on_init)
|
|
machine_kdump_on_init();
|
|
break;
|
|
case DIE_INIT_SLAVE_LEAVE:
|
|
if (atomic_read(&kdump_in_progress))
|
|
unw_init_running(kdump_cpu_freeze, NULL);
|
|
break;
|
|
case DIE_MCA_RENDZVOUS_LEAVE:
|
|
if (atomic_read(&kdump_in_progress))
|
|
unw_init_running(kdump_cpu_freeze, NULL);
|
|
break;
|
|
case DIE_MCA_MONARCH_LEAVE:
|
|
/* *(nd->data) indicate if MCA is recoverable */
|
|
if (kdump_on_fatal_mca && !(*(nd->data))) {
|
|
atomic_set(&kdump_in_progress, 1);
|
|
*(nd->monarch_cpu) = -1;
|
|
machine_kdump_on_init();
|
|
}
|
|
break;
|
|
}
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
#ifdef CONFIG_SYSCTL
|
|
static ctl_table kdump_ctl_table[] = {
|
|
{
|
|
.ctl_name = CTL_UNNUMBERED,
|
|
.procname = "kdump_on_init",
|
|
.data = &kdump_on_init,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = &proc_dointvec,
|
|
},
|
|
{
|
|
.ctl_name = CTL_UNNUMBERED,
|
|
.procname = "kdump_on_fatal_mca",
|
|
.data = &kdump_on_fatal_mca,
|
|
.maxlen = sizeof(int),
|
|
.mode = 0644,
|
|
.proc_handler = &proc_dointvec,
|
|
},
|
|
{ .ctl_name = 0 }
|
|
};
|
|
|
|
static ctl_table sys_table[] = {
|
|
{
|
|
.ctl_name = CTL_KERN,
|
|
.procname = "kernel",
|
|
.mode = 0555,
|
|
.child = kdump_ctl_table,
|
|
},
|
|
{ .ctl_name = 0 }
|
|
};
|
|
#endif
|
|
|
|
static int
|
|
machine_crash_setup(void)
|
|
{
|
|
/* be notified before default_monarch_init_process */
|
|
static struct notifier_block kdump_init_notifier_nb = {
|
|
.notifier_call = kdump_init_notifier,
|
|
.priority = 1,
|
|
};
|
|
int ret;
|
|
if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
|
|
return ret;
|
|
#ifdef CONFIG_SYSCTL
|
|
register_sysctl_table(sys_table);
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
__initcall(machine_crash_setup);
|
|
|