bonding: add some slack to arp monitoring time limits
Currently, all the time limits in the bonding ARP monitor are in multiples of arp_interval -- the time interval at which the ARP monitor is periodically scheduled. With a fast network round-trip and a little scheduling latency of the ARP monitor work, a limit of n*delta_in_ticks may effectively mean (n-1)*delta_in_ticks. This is fatal in case of n==1 (the link will stay down forever) and makes the behaviour non-deterministic in all the other cases. Add a delta_in_ticks/2 time slack to all the time limits. Signed-off-by: Jiri Bohac <jbohac@suse.cz> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
eb7e057596
commit
da210f5590
1 changed files with 20 additions and 11 deletions
|
@ -2811,12 +2811,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
|
|||
arp_work.work);
|
||||
struct slave *slave, *oldcurrent;
|
||||
int do_failover = 0;
|
||||
int delta_in_ticks;
|
||||
int delta_in_ticks, extra_ticks;
|
||||
int i;
|
||||
|
||||
read_lock(&bond->lock);
|
||||
|
||||
delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
|
||||
extra_ticks = delta_in_ticks / 2;
|
||||
|
||||
if (bond->slave_cnt == 0)
|
||||
goto re_arm;
|
||||
|
@ -2839,10 +2840,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
|
|||
if (slave->link != BOND_LINK_UP) {
|
||||
if (time_in_range(jiffies,
|
||||
trans_start - delta_in_ticks,
|
||||
trans_start + delta_in_ticks) &&
|
||||
trans_start + delta_in_ticks + extra_ticks) &&
|
||||
time_in_range(jiffies,
|
||||
slave->dev->last_rx - delta_in_ticks,
|
||||
slave->dev->last_rx + delta_in_ticks)) {
|
||||
slave->dev->last_rx + delta_in_ticks + extra_ticks)) {
|
||||
|
||||
slave->link = BOND_LINK_UP;
|
||||
bond_set_active_slave(slave);
|
||||
|
@ -2872,10 +2873,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
|
|||
*/
|
||||
if (!time_in_range(jiffies,
|
||||
trans_start - delta_in_ticks,
|
||||
trans_start + 2 * delta_in_ticks) ||
|
||||
trans_start + 2 * delta_in_ticks + extra_ticks) ||
|
||||
!time_in_range(jiffies,
|
||||
slave->dev->last_rx - delta_in_ticks,
|
||||
slave->dev->last_rx + 2 * delta_in_ticks)) {
|
||||
slave->dev->last_rx + 2 * delta_in_ticks + extra_ticks)) {
|
||||
|
||||
slave->link = BOND_LINK_DOWN;
|
||||
bond_set_backup_slave(slave);
|
||||
|
@ -2933,6 +2934,14 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
|
|||
struct slave *slave;
|
||||
int i, commit = 0;
|
||||
unsigned long trans_start;
|
||||
int extra_ticks;
|
||||
|
||||
/* All the time comparisons below need some extra time. Otherwise, on
|
||||
* fast networks the ARP probe/reply may arrive within the same jiffy
|
||||
* as it was sent. Then, the next time the ARP monitor is run, one
|
||||
* arp_interval will already have passed in the comparisons.
|
||||
*/
|
||||
extra_ticks = delta_in_ticks / 2;
|
||||
|
||||
bond_for_each_slave(bond, slave, i) {
|
||||
slave->new_link = BOND_LINK_NOCHANGE;
|
||||
|
@ -2940,7 +2949,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
|
|||
if (slave->link != BOND_LINK_UP) {
|
||||
if (time_in_range(jiffies,
|
||||
slave_last_rx(bond, slave) - delta_in_ticks,
|
||||
slave_last_rx(bond, slave) + delta_in_ticks)) {
|
||||
slave_last_rx(bond, slave) + delta_in_ticks + extra_ticks)) {
|
||||
|
||||
slave->new_link = BOND_LINK_UP;
|
||||
commit++;
|
||||
|
@ -2956,7 +2965,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
|
|||
*/
|
||||
if (time_in_range(jiffies,
|
||||
slave->jiffies - delta_in_ticks,
|
||||
slave->jiffies + 2 * delta_in_ticks))
|
||||
slave->jiffies + 2 * delta_in_ticks + extra_ticks))
|
||||
continue;
|
||||
|
||||
/*
|
||||
|
@ -2976,7 +2985,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
|
|||
!bond->current_arp_slave &&
|
||||
!time_in_range(jiffies,
|
||||
slave_last_rx(bond, slave) - delta_in_ticks,
|
||||
slave_last_rx(bond, slave) + 3 * delta_in_ticks)) {
|
||||
slave_last_rx(bond, slave) + 3 * delta_in_ticks + extra_ticks)) {
|
||||
|
||||
slave->new_link = BOND_LINK_DOWN;
|
||||
commit++;
|
||||
|
@ -2992,10 +3001,10 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
|
|||
if (bond_is_active_slave(slave) &&
|
||||
(!time_in_range(jiffies,
|
||||
trans_start - delta_in_ticks,
|
||||
trans_start + 2 * delta_in_ticks) ||
|
||||
trans_start + 2 * delta_in_ticks + extra_ticks) ||
|
||||
!time_in_range(jiffies,
|
||||
slave_last_rx(bond, slave) - delta_in_ticks,
|
||||
slave_last_rx(bond, slave) + 2 * delta_in_ticks))) {
|
||||
slave_last_rx(bond, slave) + 2 * delta_in_ticks + extra_ticks))) {
|
||||
|
||||
slave->new_link = BOND_LINK_DOWN;
|
||||
commit++;
|
||||
|
@ -3027,7 +3036,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
|
|||
if ((!bond->curr_active_slave &&
|
||||
time_in_range(jiffies,
|
||||
trans_start - delta_in_ticks,
|
||||
trans_start + delta_in_ticks)) ||
|
||||
trans_start + delta_in_ticks + delta_in_ticks / 2)) ||
|
||||
bond->curr_active_slave != slave) {
|
||||
slave->link = BOND_LINK_UP;
|
||||
if (bond->current_arp_slave) {
|
||||
|
|
Loading…
Reference in a new issue