diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index c28a712fd4db..703eb6a88790 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1919,10 +1919,11 @@ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn) static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) { enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED; - struct iscsi_task *task = NULL; + struct iscsi_task *task = NULL, *running_task; struct iscsi_cls_session *cls_session; struct iscsi_session *session; struct iscsi_conn *conn; + int i; cls_session = starget_to_session(scsi_target(sc->device)); session = cls_session->dd_data; @@ -1947,8 +1948,15 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) } task = (struct iscsi_task *)sc->SCp.ptr; - if (!task) + if (!task) { + /* + * Raced with completion. Just reset timer, and let it + * complete normally + */ + rc = BLK_EH_RESET_TIMER; goto done; + } + /* * If we have sent (at least queued to the network layer) a pdu or * recvd one for the task since the last timeout ask for @@ -1956,10 +1964,10 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) * we can check if it is the task or connection when we send the * nop as a ping. */ - if (time_after_eq(task->last_xfer, task->last_timeout)) { + if (time_after(task->last_xfer, task->last_timeout)) { ISCSI_DBG_EH(session, "Command making progress. Asking " "scsi-ml for more time to complete. " - "Last data recv at %lu. Last timeout was at " + "Last data xfer at %lu. Last timeout was at " "%lu\n.", task->last_xfer, task->last_timeout); task->have_checked_conn = false; rc = BLK_EH_RESET_TIMER; @@ -1977,6 +1985,43 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) goto done; } + for (i = 0; i < conn->session->cmds_max; i++) { + running_task = conn->session->cmds[i]; + if (!running_task->sc || running_task == task || + running_task->state != ISCSI_TASK_RUNNING) + continue; + + /* + * Only check if cmds started before this one have made + * progress, or this could never fail + */ + if (time_after(running_task->sc->jiffies_at_alloc, + task->sc->jiffies_at_alloc)) + continue; + + if (time_after(running_task->last_xfer, task->last_timeout)) { + /* + * This task has not made progress, but a task + * started before us has transferred data since + * we started/last-checked. We could be queueing + * too many tasks or the LU is bad. + * + * If the device is bad the cmds ahead of us on + * other devs will complete, and this loop will + * eventually fail starting the scsi eh. + */ + ISCSI_DBG_EH(session, "Command has not made progress " + "but commands ahead of it have. " + "Asking scsi-ml for more time to " + "complete. Our last xfer vs running task " + "last xfer %lu/%lu. Last check %lu.\n", + task->last_xfer, running_task->last_xfer, + task->last_timeout); + rc = BLK_EH_RESET_TIMER; + goto done; + } + } + /* Assumes nop timeout is shorter than scsi cmd timeout */ if (task->have_checked_conn) goto done;