From 18e144d32cd3dae6953c385e4b376ef9688b61b0 Mon Sep 17 00:00:00 2001 From: Andrew Vasquez Date: Fri, 27 May 2005 15:04:47 -0700 Subject: [PATCH 1/2] [SCSI] qla2xxx: fix bad locking during eh_abort Correct incorrect locking order in qla2xxx_eh_abort() handler which would case a hang during certain code-paths. With extra pieces to fix the irq state in the locks. Signed-off-by: Andrew Vasquez Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_os.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 579448222d69..3c97aa45772d 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -507,6 +507,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) int ret, i; unsigned int id, lun; unsigned long serial; + unsigned long flags; if (!CMD_SP(cmd)) return FAILED; @@ -519,7 +520,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) /* Check active list for command command. */ spin_unlock_irq(ha->host->host_lock); - spin_lock(&ha->hardware_lock); + spin_lock_irqsave(&ha->hardware_lock, flags); for (i = 1; i < MAX_OUTSTANDING_COMMANDS; i++) { sp = ha->outstanding_cmds[i]; @@ -534,7 +535,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) sp->state)); DEBUG3(qla2x00_print_scsi_cmd(cmd);) - spin_unlock(&ha->hardware_lock); + spin_unlock_irqrestore(&ha->hardware_lock, flags); if (qla2x00_abort_command(ha, sp)) { DEBUG2(printk("%s(%ld): abort_command " "mbx failed.\n", __func__, ha->host_no)); @@ -543,20 +544,19 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) "mbx success.\n", __func__, ha->host_no)); ret = SUCCESS; } - spin_lock(&ha->hardware_lock); + spin_lock_irqsave(&ha->hardware_lock, flags); break; } + spin_unlock_irqrestore(&ha->hardware_lock, flags); /* Wait for the command to be returned. */ if (ret == SUCCESS) { - spin_unlock(&ha->hardware_lock); if (qla2x00_eh_wait_on_command(ha, cmd) != QLA_SUCCESS) { qla_printk(KERN_ERR, ha, "scsi(%ld:%d:%d): Abort handler timed out -- %lx " "%x.\n", ha->host_no, id, lun, serial, ret); } - spin_lock(&ha->hardware_lock); } spin_lock_irq(ha->host->host_lock); @@ -588,6 +588,7 @@ qla2x00_eh_wait_for_pending_target_commands(scsi_qla_host_t *ha, unsigned int t) int status; srb_t *sp; struct scsi_cmnd *cmd; + unsigned long flags; status = 0; @@ -596,11 +597,11 @@ qla2x00_eh_wait_for_pending_target_commands(scsi_qla_host_t *ha, unsigned int t) * array */ for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { - spin_lock(&ha->hardware_lock); + spin_lock_irqsave(&ha->hardware_lock, flags); sp = ha->outstanding_cmds[cnt]; if (sp) { cmd = sp->cmd; - spin_unlock(&ha->hardware_lock); + spin_unlock_irqrestore(&ha->hardware_lock, flags); if (cmd->device->id == t) { if (!qla2x00_eh_wait_on_command(ha, cmd)) { status = 1; @@ -608,7 +609,7 @@ qla2x00_eh_wait_for_pending_target_commands(scsi_qla_host_t *ha, unsigned int t) } } } else { - spin_unlock(&ha->hardware_lock); + spin_unlock_irqrestore(&ha->hardware_lock, flags); } } return (status); @@ -740,6 +741,7 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *ha) int status; srb_t *sp; struct scsi_cmnd *cmd; + unsigned long flags; status = 1; @@ -748,17 +750,17 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *ha) * array */ for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { - spin_lock(&ha->hardware_lock); + spin_lock_irqsave(&ha->hardware_lock, flags); sp = ha->outstanding_cmds[cnt]; if (sp) { cmd = sp->cmd; - spin_unlock(&ha->hardware_lock); + spin_unlock_irqrestore(&ha->hardware_lock, flags); status = qla2x00_eh_wait_on_command(ha, cmd); if (status == 0) break; } else { - spin_unlock(&ha->hardware_lock); + spin_unlock_irqrestore(&ha->hardware_lock, flags); } } return (status); From c92715b3c22e94105a8fd9e4a23047d05c5077e7 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 2 Jun 2005 17:15:09 -0500 Subject: [PATCH 2/2] [SCSI] fix slab corruption during ipr probe With CONFIG_DEBUG_SLAB=y I see slab corruption messages during boot on pSeries machines with IPR adapters with any 2.6.12-rc kernel. The change which seems to have introduced the problem is "SCSI: revamp target scanning routines" and may be found at: http://marc.theaimsgroup.com/?l=bk-commits-head&m=111093946426333&w=2 In order to revert that in a 2.6.12-rc1 tree, I had to revert "target code updates to support scanned targets" first: http://marc.theaimsgroup.com/?l=bk-commits-head&m=111094132524649&w=2 With both patches reverted, the corruption messages go away. ipr: IBM Power RAID SCSI Device Driver version: 2.0.13 (February 21, 2005) ipr 0001:d0:01.0: Found IOA with IRQ: 167 ipr 0001:d0:01.0: Starting IOA initialization sequence. ipr 0001:d0:01.0: Adapter firmware version: 020A005C ipr 0001:d0:01.0: IOA initialized. scsi0 : IBM 570B Storage Adapter Vendor: IBM Model: VSBPD4E1 U4SCSI Rev: 4770 Type: Enclosure ANSI SCSI revision: 02 Vendor: IBM H0 Model: HUS103036FL3800 Rev: RPQF Type: Direct-Access ANSI SCSI revision: 04 Vendor: IBM H0 Model: HUS103036FL3800 Rev: RPQF Type: Direct-Access ANSI SCSI revision: 04 Vendor: IBM H0 Model: HUS103036FL3800 Rev: RPQF Type: Direct-Access ANSI SCSI revision: 04 Vendor: IBM H0 Model: HUS103036FL3800 Rev: RPQF Type: Direct-Access ANSI SCSI revision: 04 Vendor: IBM Model: VSBPD4E1 U4SCSI Rev: 4770 Type: Enclosure ANSI SCSI revision: 02 Slab corruption: start=c0000001e8de5268, len=512 Redzone: 0x5a2cf071/0x5a2cf071. Last user: [](.scsi_target_dev_release+0x28/0x50) 080: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6a Prev obj: start=c0000001e8de5050, len=512 Redzone: 0x5a2cf071/0x5a2cf071. Last user: [<0000000000000000>](0x0) 000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b Next obj: start=c0000001e8de5480, len=512 Redzone: 0x170fc2a5/0x170fc2a5. Last user: [](.as_init_queue+0x5c/0x228) 000: c0 00 00 01 e8 83 26 08 00 00 00 00 00 00 00 00 010: 00 00 00 00 00 00 00 00 c0 00 00 01 e8 de 54 98 Slab corruption: start=c0000001e8de5268, len=512 Redzone: 0x5a2cf071/0x5a2cf071. Last user: [](.scsi_target_dev_release+0x28/0x50) 080: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6a Prev obj: start=c0000001e8de5050, len=512 Redzone: 0x5a2cf071/0x5a2cf071. Last user: [<0000000000000000>](0x0) 000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b Next obj: start=c0000001e8de5480, len=512 Redzone: 0x170fc2a5/0x170fc2a5. Last user: [](.as_init_queue+0x5c/0x228) 000: c0 00 00 01 e8 83 26 08 00 00 00 00 00 00 00 00 010: 00 00 00 00 00 00 00 00 c0 00 00 01 e8 de 54 98 ... I did some digging and the problem seems to be a refcounting issue in __scsi_add_device. The target gets freed in scsi_target_reap, and then __scsi_add_device tries to do another device_put on it. Signed-off-by: Nathan Lynch Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index cca772624ae7..8d0d302844a1 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1197,6 +1197,7 @@ struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel, if (!starget) return ERR_PTR(-ENOMEM); + get_device(&starget->dev); down(&shost->scan_mutex); res = scsi_probe_and_add_lun(starget, lun, NULL, &sdev, 1, hostdata); if (res != SCSI_SCAN_LUN_PRESENT)