ipmi_watchdog: restore settings when BMC reset
If the BMC gets reset, it will return 0x80 response errors. In less than a week # grep "Error 80 on cmd 22" /var/log/kernel |wc -l 378681 In this case, it is probably a good idea to restore the IPMI settings. Signed-off-by: Corey Minyard <cminyard@mvista.com> Tested-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com> Reported-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
ff05b6f7ae
commit
b75d91f7ca
1 changed files with 38 additions and 3 deletions
|
@ -139,6 +139,8 @@
|
|||
#define IPMI_WDOG_SET_TIMER 0x24
|
||||
#define IPMI_WDOG_GET_TIMER 0x25
|
||||
|
||||
#define IPMI_WDOG_TIMER_NOT_INIT_RESP 0x80
|
||||
|
||||
/* These are here until the real ones get into the watchdog.h interface. */
|
||||
#ifndef WDIOC_GETTIMEOUT
|
||||
#define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int)
|
||||
|
@ -596,6 +598,7 @@ static int ipmi_heartbeat(void)
|
|||
struct kernel_ipmi_msg msg;
|
||||
int rv;
|
||||
struct ipmi_system_interface_addr addr;
|
||||
int timeout_retries = 0;
|
||||
|
||||
if (ipmi_ignore_heartbeat)
|
||||
return 0;
|
||||
|
@ -616,6 +619,7 @@ static int ipmi_heartbeat(void)
|
|||
|
||||
mutex_lock(&heartbeat_lock);
|
||||
|
||||
restart:
|
||||
atomic_set(&heartbeat_tofree, 2);
|
||||
|
||||
/*
|
||||
|
@ -653,7 +657,33 @@ static int ipmi_heartbeat(void)
|
|||
/* Wait for the heartbeat to be sent. */
|
||||
wait_for_completion(&heartbeat_wait);
|
||||
|
||||
if (heartbeat_recv_msg.msg.data[0] != 0) {
|
||||
if (heartbeat_recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) {
|
||||
timeout_retries++;
|
||||
if (timeout_retries > 3) {
|
||||
printk(KERN_ERR PFX ": Unable to restore the IPMI"
|
||||
" watchdog's settings, giving up.\n");
|
||||
rv = -EIO;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* The timer was not initialized, that means the BMC was
|
||||
* probably reset and lost the watchdog information. Attempt
|
||||
* to restore the timer's info. Note that we still hold
|
||||
* the heartbeat lock, to keep a heartbeat from happening
|
||||
* in this process, so must say no heartbeat to avoid a
|
||||
* deadlock on this mutex.
|
||||
*/
|
||||
rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
|
||||
if (rv) {
|
||||
printk(KERN_ERR PFX ": Unable to send the command to"
|
||||
" set the watchdog's settings, giving up.\n");
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* We might need a new heartbeat, so do it now */
|
||||
goto restart;
|
||||
} else if (heartbeat_recv_msg.msg.data[0] != 0) {
|
||||
/*
|
||||
* Got an error in the heartbeat response. It was already
|
||||
* reported in ipmi_wdog_msg_handler, but we should return
|
||||
|
@ -662,6 +692,7 @@ static int ipmi_heartbeat(void)
|
|||
rv = -EINVAL;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&heartbeat_lock);
|
||||
|
||||
return rv;
|
||||
|
@ -922,11 +953,15 @@ static struct miscdevice ipmi_wdog_miscdev = {
|
|||
static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg,
|
||||
void *handler_data)
|
||||
{
|
||||
if (msg->msg.data[0] != 0) {
|
||||
if (msg->msg.cmd == IPMI_WDOG_RESET_TIMER &&
|
||||
msg->msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP)
|
||||
printk(KERN_INFO PFX "response: The IPMI controller appears"
|
||||
" to have been reset, will attempt to reinitialize"
|
||||
" the watchdog timer\n");
|
||||
else if (msg->msg.data[0] != 0)
|
||||
printk(KERN_ERR PFX "response: Error %x on cmd %x\n",
|
||||
msg->msg.data[0],
|
||||
msg->msg.cmd);
|
||||
}
|
||||
|
||||
ipmi_free_recv_msg(msg);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue