[SCSI] qla4xxx: Temperature monitoring for ISP82XX core.
During watchdog, need to monitor temperature of ISP82XX core and set device state to FAILED when temperature reaches "Panic" level. Signed-off-by: Mike Hernandez <michael.hernandez@qlogic.com> Signed-off-by: Vikas Chaudhary <vikas.chaudhary@qlogic.com> Reviewed-by: Mike Christie <michaelc@cs.wisc.edu> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
This commit is contained in:
parent
124dd90f65
commit
4f77083ed0
3 changed files with 58 additions and 4 deletions
|
@ -671,6 +671,7 @@ struct scsi_qla_host {
|
|||
uint16_t pri_ddb_idx;
|
||||
uint16_t sec_ddb_idx;
|
||||
int is_reset;
|
||||
uint16_t temperature;
|
||||
};
|
||||
|
||||
struct ql4_task_data {
|
||||
|
|
|
@ -19,12 +19,25 @@
|
|||
#define PHAN_PEG_RCV_INITIALIZED 0xff01
|
||||
|
||||
/*CRB_RELATED*/
|
||||
#define QLA82XX_CRB_BASE QLA82XX_CAM_RAM(0x200)
|
||||
#define QLA82XX_REG(X) (QLA82XX_CRB_BASE+(X))
|
||||
|
||||
#define QLA82XX_CRB_BASE (QLA82XX_CAM_RAM(0x200))
|
||||
#define QLA82XX_REG(X) (QLA82XX_CRB_BASE+(X))
|
||||
#define CRB_CMDPEG_STATE QLA82XX_REG(0x50)
|
||||
#define CRB_RCVPEG_STATE QLA82XX_REG(0x13c)
|
||||
#define CRB_DMA_SHIFT QLA82XX_REG(0xcc)
|
||||
#define CRB_TEMP_STATE QLA82XX_REG(0x1b4)
|
||||
|
||||
#define qla82xx_get_temp_val(x) ((x) >> 16)
|
||||
#define qla82xx_get_temp_state(x) ((x) & 0xffff)
|
||||
#define qla82xx_encode_temp(val, state) (((val) << 16) | (state))
|
||||
|
||||
/*
|
||||
* Temperature control.
|
||||
*/
|
||||
enum {
|
||||
QLA82XX_TEMP_NORMAL = 0x1, /* Normal operating range */
|
||||
QLA82XX_TEMP_WARN, /* Sound alert, temperature getting high */
|
||||
QLA82XX_TEMP_PANIC /* Fatal error, hardware has shut down. */
|
||||
};
|
||||
|
||||
#define QLA82XX_HW_H0_CH_HUB_ADR 0x05
|
||||
#define QLA82XX_HW_H1_CH_HUB_ADR 0x0E
|
||||
|
|
|
@ -1971,6 +1971,42 @@ static int qla4xxx_mem_alloc(struct scsi_qla_host *ha)
|
|||
return QLA_ERROR;
|
||||
}
|
||||
|
||||
/**
|
||||
* qla4_8xxx_check_temp - Check the ISP82XX temperature.
|
||||
* @ha: adapter block pointer.
|
||||
*
|
||||
* Note: The caller should not hold the idc lock.
|
||||
**/
|
||||
static int qla4_8xxx_check_temp(struct scsi_qla_host *ha)
|
||||
{
|
||||
uint32_t temp, temp_state, temp_val;
|
||||
int status = QLA_SUCCESS;
|
||||
|
||||
temp = qla4_8xxx_rd_32(ha, CRB_TEMP_STATE);
|
||||
|
||||
temp_state = qla82xx_get_temp_state(temp);
|
||||
temp_val = qla82xx_get_temp_val(temp);
|
||||
|
||||
if (temp_state == QLA82XX_TEMP_PANIC) {
|
||||
ql4_printk(KERN_WARNING, ha, "Device temperature %d degrees C"
|
||||
" exceeds maximum allowed. Hardware has been shut"
|
||||
" down.\n", temp_val);
|
||||
status = QLA_ERROR;
|
||||
} else if (temp_state == QLA82XX_TEMP_WARN) {
|
||||
if (ha->temperature == QLA82XX_TEMP_NORMAL)
|
||||
ql4_printk(KERN_WARNING, ha, "Device temperature %d"
|
||||
" degrees C exceeds operating range."
|
||||
" Immediate action needed.\n", temp_val);
|
||||
} else {
|
||||
if (ha->temperature == QLA82XX_TEMP_WARN)
|
||||
ql4_printk(KERN_INFO, ha, "Device temperature is"
|
||||
" now %d degrees C in normal range.\n",
|
||||
temp_val);
|
||||
}
|
||||
ha->temperature = temp_state;
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* qla4_8xxx_check_fw_alive - Check firmware health
|
||||
* @ha: Pointer to host adapter structure.
|
||||
|
@ -2042,7 +2078,11 @@ void qla4_8xxx_watchdog(struct scsi_qla_host *ha)
|
|||
test_bit(DPC_RESET_HA, &ha->dpc_flags) ||
|
||||
test_bit(DPC_RETRY_RESET_HA, &ha->dpc_flags))) {
|
||||
dev_state = qla4_8xxx_rd_32(ha, QLA82XX_CRB_DEV_STATE);
|
||||
if (dev_state == QLA82XX_DEV_NEED_RESET &&
|
||||
|
||||
if (qla4_8xxx_check_temp(ha)) {
|
||||
set_bit(DPC_HA_UNRECOVERABLE, &ha->dpc_flags);
|
||||
qla4xxx_wake_dpc(ha);
|
||||
} else if (dev_state == QLA82XX_DEV_NEED_RESET &&
|
||||
!test_bit(DPC_RESET_HA, &ha->dpc_flags)) {
|
||||
if (!ql4xdontresethba) {
|
||||
ql4_printk(KERN_INFO, ha, "%s: HW State: "
|
||||
|
|
Loading…
Reference in a new issue