ipmi_watchdog: restore settings when BMC reset

If the BMC gets reset, it will return 0x80 response errors.

In less than a week
# grep "Error 80 on cmd 22" /var/log/kernel |wc -l
378681

In this case, it is probably a good idea to restore the IPMI settings.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Tested-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Reported-by: Arkadiusz Miśkiewicz <a.miskiewicz@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Corey Minyard 2011-12-19 17:12:02 -08:00 committed by Linus Torvalds
parent ff05b6f7ae
commit b75d91f7ca

View file

@ -139,6 +139,8 @@
#define IPMI_WDOG_SET_TIMER 0x24
#define IPMI_WDOG_GET_TIMER 0x25
#define IPMI_WDOG_TIMER_NOT_INIT_RESP 0x80
/* These are here until the real ones get into the watchdog.h interface. */
#ifndef WDIOC_GETTIMEOUT
#define WDIOC_GETTIMEOUT _IOW(WATCHDOG_IOCTL_BASE, 20, int)
@ -596,6 +598,7 @@ static int ipmi_heartbeat(void)
struct kernel_ipmi_msg msg;
int rv;
struct ipmi_system_interface_addr addr;
int timeout_retries = 0;
if (ipmi_ignore_heartbeat)
return 0;
@ -616,6 +619,7 @@ static int ipmi_heartbeat(void)
mutex_lock(&heartbeat_lock);
restart:
atomic_set(&heartbeat_tofree, 2);
/*
@ -653,7 +657,33 @@ static int ipmi_heartbeat(void)
/* Wait for the heartbeat to be sent. */
wait_for_completion(&heartbeat_wait);
if (heartbeat_recv_msg.msg.data[0] != 0) {
if (heartbeat_recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) {
timeout_retries++;
if (timeout_retries > 3) {
printk(KERN_ERR PFX ": Unable to restore the IPMI"
" watchdog's settings, giving up.\n");
rv = -EIO;
goto out_unlock;
}
/*
* The timer was not initialized, that means the BMC was
* probably reset and lost the watchdog information. Attempt
* to restore the timer's info. Note that we still hold
* the heartbeat lock, to keep a heartbeat from happening
* in this process, so must say no heartbeat to avoid a
* deadlock on this mutex.
*/
rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
if (rv) {
printk(KERN_ERR PFX ": Unable to send the command to"
" set the watchdog's settings, giving up.\n");
goto out_unlock;
}
/* We might need a new heartbeat, so do it now */
goto restart;
} else if (heartbeat_recv_msg.msg.data[0] != 0) {
/*
* Got an error in the heartbeat response. It was already
* reported in ipmi_wdog_msg_handler, but we should return
@ -662,6 +692,7 @@ static int ipmi_heartbeat(void)
rv = -EINVAL;
}
out_unlock:
mutex_unlock(&heartbeat_lock);
return rv;
@ -922,11 +953,15 @@ static struct miscdevice ipmi_wdog_miscdev = {
static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg,
void *handler_data)
{
if (msg->msg.data[0] != 0) {
if (msg->msg.cmd == IPMI_WDOG_RESET_TIMER &&
msg->msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP)
printk(KERN_INFO PFX "response: The IPMI controller appears"
" to have been reset, will attempt to reinitialize"
" the watchdog timer\n");
else if (msg->msg.data[0] != 0)
printk(KERN_ERR PFX "response: Error %x on cmd %x\n",
msg->msg.data[0],
msg->msg.cmd);
}
ipmi_free_recv_msg(msg);
}