IB/ipath: Fix SDMA error recovery in absence of link status change

What's fixed:

    in ipath_cancel_sends()

        We need to unconditionally set ABORTING.  So, swap the tests
        so the set_bit() isn't shadowed by the &&.

        If we've disarmed the piobufs, then we need to unconditionally
        set DISARMED.  So, move it out from the overly protective if
        at the bottom.

    in sdma_abort_task()

        Abort_task was written knowing that the SDMA engine would always
        be reset (and restarted) on error.  A recent change broke that
        fundamental assumption by taking the restart portion and making
        it conditional on a link status change.  But, SDMA can go boom
        without a link status change in some conditions.

Signed-off-by: John Gregor <john.gregor@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
John Gregor 2008-05-07 11:01:10 -07:00 committed by Roland Dreier
parent e2ab41cae4
commit ab69b3cf12
2 changed files with 29 additions and 10 deletions

View file

@ -1898,8 +1898,8 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
skip_cancel =
!test_bit(IPATH_SDMA_DISABLED, statp) &&
test_and_set_bit(IPATH_SDMA_ABORTING, statp);
test_and_set_bit(IPATH_SDMA_ABORTING, statp)
&& !test_bit(IPATH_SDMA_DISABLED, statp);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
if (skip_cancel)
goto bail;
@ -1930,6 +1930,9 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
ipath_disarm_piobufs(dd, 0,
dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
if (restore_sendctrl) {
/* else done by caller later if needed */
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@ -1949,7 +1952,6 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
/* only wait so long for intr */
dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
dd->ipath_sdma_reset_wait = 200;
__set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);

View file

@ -308,13 +308,15 @@ static void sdma_abort_task(unsigned long opaque)
spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
/*
* Don't restart sdma here. Wait until link is up to ACTIVE.
* VL15 MADs used to bring the link up use PIO, and multiple
* link transitions otherwise cause the sdma engine to be
* Don't restart sdma here (with the exception
* below). Wait until link is up to ACTIVE. VL15 MADs
* used to bring the link up use PIO, and multiple link
* transitions otherwise cause the sdma engine to be
* stopped and started multiple times.
* The disable is done here, including the shadow, so the
* state is kept consistent.
* See ipath_restart_sdma() for the actual starting of sdma.
* The disable is done here, including the shadow,
* so the state is kept consistent.
* See ipath_restart_sdma() for the actual starting
* of sdma.
*/
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
@ -326,6 +328,13 @@ static void sdma_abort_task(unsigned long opaque)
/* make sure I see next message */
dd->ipath_sdma_abort_jiffies = 0;
/*
* Not everything that takes SDMA offline is a link
* status change. If the link was up, restart SDMA.
*/
if (dd->ipath_flags & IPATH_LINKACTIVE)
ipath_restart_sdma(dd);
goto done;
}
@ -427,7 +436,12 @@ int setup_sdma(struct ipath_devdata *dd)
goto done;
}
dd->ipath_sdma_status = 0;
/*
* Set initial status as if we had been up, then gone down.
* This lets initial start on transition to ACTIVE be the
* same as restart after link flap.
*/
dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
dd->ipath_sdma_abort_jiffies = 0;
dd->ipath_sdma_generation = 0;
dd->ipath_sdma_descq_tail = 0;
@ -618,6 +632,9 @@ void ipath_restart_sdma(struct ipath_devdata *dd)
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
/* notify upper layers */
ipath_ib_piobufavail(dd->verbs_dev);
bail:
return;
}