55537871ef
In many cases of hardlockup reports, it's actually not possible to know why it triggered, because the CPU that got stuck is usually waiting on a resource (with IRQs disabled) in posession of some other CPU is holding. IOW, we are often looking at the stacktrace of the victim and not the actual offender. Introduce sysctl / cmdline parameter that makes it possible to have hardlockup detector perform all-CPU backtrace. Signed-off-by: Jiri Kosina <jkosina@suse.cz> Reviewed-by: Aaron Tomlin <atomlin@redhat.com> Cc: Ulrich Obergfell <uobergfe@redhat.com> Acked-by: Don Zickus <dzickus@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
105 lines
2.7 KiB
C
105 lines
2.7 KiB
C
/*
|
|
* linux/include/linux/nmi.h
|
|
*/
|
|
#ifndef LINUX_NMI_H
|
|
#define LINUX_NMI_H
|
|
|
|
#include <linux/sched.h>
|
|
#include <asm/irq.h>
|
|
|
|
/**
|
|
* touch_nmi_watchdog - restart NMI watchdog timeout.
|
|
*
|
|
* If the architecture supports the NMI watchdog, touch_nmi_watchdog()
|
|
* may be used to reset the timeout - for code which intentionally
|
|
* disables interrupts for a long time. This call is stateless.
|
|
*/
|
|
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
|
|
#include <asm/nmi.h>
|
|
extern void touch_nmi_watchdog(void);
|
|
#else
|
|
static inline void touch_nmi_watchdog(void)
|
|
{
|
|
touch_softlockup_watchdog();
|
|
}
|
|
#endif
|
|
|
|
#if defined(CONFIG_HARDLOCKUP_DETECTOR)
|
|
extern void hardlockup_detector_disable(void);
|
|
#else
|
|
static inline void hardlockup_detector_disable(void) {}
|
|
#endif
|
|
|
|
/*
|
|
* Create trigger_all_cpu_backtrace() out of the arch-provided
|
|
* base function. Return whether such support was available,
|
|
* to allow calling code to fall back to some other mechanism:
|
|
*/
|
|
#ifdef arch_trigger_all_cpu_backtrace
|
|
static inline bool trigger_all_cpu_backtrace(void)
|
|
{
|
|
arch_trigger_all_cpu_backtrace(true);
|
|
|
|
return true;
|
|
}
|
|
static inline bool trigger_allbutself_cpu_backtrace(void)
|
|
{
|
|
arch_trigger_all_cpu_backtrace(false);
|
|
return true;
|
|
}
|
|
|
|
/* generic implementation */
|
|
void nmi_trigger_all_cpu_backtrace(bool include_self,
|
|
void (*raise)(cpumask_t *mask));
|
|
bool nmi_cpu_backtrace(struct pt_regs *regs);
|
|
|
|
#else
|
|
static inline bool trigger_all_cpu_backtrace(void)
|
|
{
|
|
return false;
|
|
}
|
|
static inline bool trigger_allbutself_cpu_backtrace(void)
|
|
{
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_LOCKUP_DETECTOR
|
|
int hw_nmi_is_cpu_stuck(struct pt_regs *);
|
|
u64 hw_nmi_get_sample_period(int watchdog_thresh);
|
|
extern int nmi_watchdog_enabled;
|
|
extern int soft_watchdog_enabled;
|
|
extern int watchdog_user_enabled;
|
|
extern int watchdog_thresh;
|
|
extern unsigned long *watchdog_cpumask_bits;
|
|
extern int sysctl_softlockup_all_cpu_backtrace;
|
|
extern int sysctl_hardlockup_all_cpu_backtrace;
|
|
struct ctl_table;
|
|
extern int proc_watchdog(struct ctl_table *, int ,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_nmi_watchdog(struct ctl_table *, int ,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_soft_watchdog(struct ctl_table *, int ,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_watchdog_thresh(struct ctl_table *, int ,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int proc_watchdog_cpumask(struct ctl_table *, int,
|
|
void __user *, size_t *, loff_t *);
|
|
extern int lockup_detector_suspend(void);
|
|
extern void lockup_detector_resume(void);
|
|
#else
|
|
static inline int lockup_detector_suspend(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void lockup_detector_resume(void)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
|
|
#include <asm/nmi.h>
|
|
#endif
|
|
|
|
#endif
|