net: Introduce generic failover module
The failover module provides a generic interface for paravirtual drivers to register a netdev and a set of ops with a failover instance. The ops are used as event handlers that get called to handle netdev register/ unregister/link change/name change events on slave pci ethernet devices with the same mac address as the failover netdev. This enables paravirtual drivers to use a VF as an accelerated low latency datapath. It also allows migration of VMs with direct attached VFs by failing over to the paravirtual datapath when the VF is unplugged. Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
cb1603948a
commit
30c8bd5aa8
7 changed files with 407 additions and 0 deletions
18
Documentation/networking/failover.rst
Normal file
18
Documentation/networking/failover.rst
Normal file
|
@ -0,0 +1,18 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
========
|
||||
FAILOVER
|
||||
========
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
The failover module provides a generic interface for paravirtual drivers
|
||||
to register a netdev and a set of ops with a failover instance. The ops
|
||||
are used as event handlers that get called to handle netdev register/
|
||||
unregister/link change/name change events on slave pci ethernet devices
|
||||
with the same mac address as the failover netdev.
|
||||
|
||||
This enables paravirtual drivers to use a VF as an accelerated low latency
|
||||
datapath. It also allows live migration of VMs with direct attached VFs by
|
||||
failing over to the paravirtual datapath when the VF is unplugged.
|
|
@ -5411,6 +5411,14 @@ S: Maintained
|
|||
F: Documentation/hwmon/f71805f
|
||||
F: drivers/hwmon/f71805f.c
|
||||
|
||||
FAILOVER MODULE
|
||||
M: Sridhar Samudrala <sridhar.samudrala@intel.com>
|
||||
L: netdev@vger.kernel.org
|
||||
S: Supported
|
||||
F: net/core/failover.c
|
||||
F: include/net/failover.h
|
||||
F: Documentation/networking/failover.rst
|
||||
|
||||
FANOTIFY
|
||||
M: Jan Kara <jack@suse.cz>
|
||||
R: Amir Goldstein <amir73il@gmail.com>
|
||||
|
|
|
@ -1425,6 +1425,8 @@ struct net_device_ops {
|
|||
* entity (i.e. the master device for bridged veth)
|
||||
* @IFF_MACSEC: device is a MACsec device
|
||||
* @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook
|
||||
* @IFF_FAILOVER: device is a failover master device
|
||||
* @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
|
||||
*/
|
||||
enum netdev_priv_flags {
|
||||
IFF_802_1Q_VLAN = 1<<0,
|
||||
|
@ -1454,6 +1456,8 @@ enum netdev_priv_flags {
|
|||
IFF_PHONY_HEADROOM = 1<<24,
|
||||
IFF_MACSEC = 1<<25,
|
||||
IFF_NO_RX_HANDLER = 1<<26,
|
||||
IFF_FAILOVER = 1<<27,
|
||||
IFF_FAILOVER_SLAVE = 1<<28,
|
||||
};
|
||||
|
||||
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
|
||||
|
@ -1482,6 +1486,8 @@ enum netdev_priv_flags {
|
|||
#define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED
|
||||
#define IFF_MACSEC IFF_MACSEC
|
||||
#define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER
|
||||
#define IFF_FAILOVER IFF_FAILOVER
|
||||
#define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE
|
||||
|
||||
/**
|
||||
* struct net_device - The DEVICE structure.
|
||||
|
@ -4336,6 +4342,16 @@ static inline bool netif_is_rxfh_configured(const struct net_device *dev)
|
|||
return dev->priv_flags & IFF_RXFH_CONFIGURED;
|
||||
}
|
||||
|
||||
static inline bool netif_is_failover(const struct net_device *dev)
|
||||
{
|
||||
return dev->priv_flags & IFF_FAILOVER;
|
||||
}
|
||||
|
||||
static inline bool netif_is_failover_slave(const struct net_device *dev)
|
||||
{
|
||||
return dev->priv_flags & IFF_FAILOVER_SLAVE;
|
||||
}
|
||||
|
||||
/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
|
||||
static inline void netif_keep_dst(struct net_device *dev)
|
||||
{
|
||||
|
|
36
include/net/failover.h
Normal file
36
include/net/failover.h
Normal file
|
@ -0,0 +1,36 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2018, Intel Corporation. */
|
||||
|
||||
#ifndef _FAILOVER_H
|
||||
#define _FAILOVER_H
|
||||
|
||||
#include <linux/netdevice.h>
|
||||
|
||||
struct failover_ops {
|
||||
int (*slave_pre_register)(struct net_device *slave_dev,
|
||||
struct net_device *failover_dev);
|
||||
int (*slave_register)(struct net_device *slave_dev,
|
||||
struct net_device *failover_dev);
|
||||
int (*slave_pre_unregister)(struct net_device *slave_dev,
|
||||
struct net_device *failover_dev);
|
||||
int (*slave_unregister)(struct net_device *slave_dev,
|
||||
struct net_device *failover_dev);
|
||||
int (*slave_link_change)(struct net_device *slave_dev,
|
||||
struct net_device *failover_dev);
|
||||
int (*slave_name_change)(struct net_device *slave_dev,
|
||||
struct net_device *failover_dev);
|
||||
rx_handler_result_t (*slave_handle_frame)(struct sk_buff **pskb);
|
||||
};
|
||||
|
||||
struct failover {
|
||||
struct list_head list;
|
||||
struct net_device __rcu *failover_dev;
|
||||
struct failover_ops __rcu *ops;
|
||||
};
|
||||
|
||||
struct failover *failover_register(struct net_device *dev,
|
||||
struct failover_ops *ops);
|
||||
void failover_unregister(struct failover *failover);
|
||||
int failover_slave_unregister(struct net_device *slave_dev);
|
||||
|
||||
#endif /* _FAILOVER_H */
|
13
net/Kconfig
13
net/Kconfig
|
@ -432,6 +432,19 @@ config MAY_USE_DEVLINK
|
|||
config PAGE_POOL
|
||||
bool
|
||||
|
||||
config FAILOVER
|
||||
tristate "Generic failover module"
|
||||
help
|
||||
The failover module provides a generic interface for paravirtual
|
||||
drivers to register a netdev and a set of ops with a failover
|
||||
instance. The ops are used as event handlers that get called to
|
||||
handle netdev register/unregister/link change/name change events
|
||||
on slave pci ethernet devices with the same mac address as the
|
||||
failover netdev. This enables paravirtual drivers to use a
|
||||
VF as an accelerated low latency datapath. It also allows live
|
||||
migration of VMs with direct attached VFs by failing over to the
|
||||
paravirtual datapath when the VF is unplugged.
|
||||
|
||||
endif # if NET
|
||||
|
||||
# Used by archs to tell that they support BPF JIT compiler plus which flavour.
|
||||
|
|
|
@ -31,3 +31,4 @@ obj-$(CONFIG_DST_CACHE) += dst_cache.o
|
|||
obj-$(CONFIG_HWBM) += hwbm.o
|
||||
obj-$(CONFIG_NET_DEVLINK) += devlink.o
|
||||
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
|
||||
obj-$(CONFIG_FAILOVER) += failover.o
|
||||
|
|
315
net/core/failover.c
Normal file
315
net/core/failover.c
Normal file
|
@ -0,0 +1,315 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2018, Intel Corporation. */
|
||||
|
||||
/* A common module to handle registrations and notifications for paravirtual
|
||||
* drivers to enable accelerated datapath and support VF live migration.
|
||||
*
|
||||
* The notifier and event handling code is based on netvsc driver.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/etherdevice.h>
|
||||
#include <uapi/linux/if_arp.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <linux/if_vlan.h>
|
||||
#include <net/failover.h>
|
||||
|
||||
static LIST_HEAD(failover_list);
|
||||
static DEFINE_SPINLOCK(failover_lock);
|
||||
|
||||
static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops)
|
||||
{
|
||||
struct net_device *failover_dev;
|
||||
struct failover *failover;
|
||||
|
||||
spin_lock(&failover_lock);
|
||||
list_for_each_entry(failover, &failover_list, list) {
|
||||
failover_dev = rtnl_dereference(failover->failover_dev);
|
||||
if (ether_addr_equal(failover_dev->perm_addr, mac)) {
|
||||
*ops = rtnl_dereference(failover->ops);
|
||||
spin_unlock(&failover_lock);
|
||||
return failover_dev;
|
||||
}
|
||||
}
|
||||
spin_unlock(&failover_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* failover_slave_register - Register a slave netdev
|
||||
*
|
||||
* @slave_dev: slave netdev that is being registered
|
||||
*
|
||||
* Registers a slave device to a failover instance. Only ethernet devices
|
||||
* are supported.
|
||||
*/
|
||||
static int failover_slave_register(struct net_device *slave_dev)
|
||||
{
|
||||
struct netdev_lag_upper_info lag_upper_info;
|
||||
struct net_device *failover_dev;
|
||||
struct failover_ops *fops;
|
||||
int err;
|
||||
|
||||
if (slave_dev->type != ARPHRD_ETHER)
|
||||
goto done;
|
||||
|
||||
ASSERT_RTNL();
|
||||
|
||||
failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
|
||||
if (!failover_dev)
|
||||
goto done;
|
||||
|
||||
if (fops && fops->slave_pre_register &&
|
||||
fops->slave_pre_register(slave_dev, failover_dev))
|
||||
goto done;
|
||||
|
||||
err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame,
|
||||
failover_dev);
|
||||
if (err) {
|
||||
netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n",
|
||||
err);
|
||||
goto done;
|
||||
}
|
||||
|
||||
lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP;
|
||||
err = netdev_master_upper_dev_link(slave_dev, failover_dev, NULL,
|
||||
&lag_upper_info, NULL);
|
||||
if (err) {
|
||||
netdev_err(slave_dev, "can not set failover device %s (err = %d)\n",
|
||||
failover_dev->name, err);
|
||||
goto err_upper_link;
|
||||
}
|
||||
|
||||
slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
|
||||
|
||||
if (fops && fops->slave_register &&
|
||||
!fops->slave_register(slave_dev, failover_dev))
|
||||
return NOTIFY_OK;
|
||||
|
||||
netdev_upper_dev_unlink(slave_dev, failover_dev);
|
||||
slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
|
||||
err_upper_link:
|
||||
netdev_rx_handler_unregister(slave_dev);
|
||||
done:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* failover_slave_unregister - Unregister a slave netdev
|
||||
*
|
||||
* @slave_dev: slave netdev that is being unregistered
|
||||
*
|
||||
* Unregisters a slave device from a failover instance.
|
||||
*/
|
||||
int failover_slave_unregister(struct net_device *slave_dev)
|
||||
{
|
||||
struct net_device *failover_dev;
|
||||
struct failover_ops *fops;
|
||||
|
||||
if (!netif_is_failover_slave(slave_dev))
|
||||
goto done;
|
||||
|
||||
ASSERT_RTNL();
|
||||
|
||||
failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
|
||||
if (!failover_dev)
|
||||
goto done;
|
||||
|
||||
if (fops && fops->slave_pre_unregister &&
|
||||
fops->slave_pre_unregister(slave_dev, failover_dev))
|
||||
goto done;
|
||||
|
||||
netdev_rx_handler_unregister(slave_dev);
|
||||
netdev_upper_dev_unlink(slave_dev, failover_dev);
|
||||
slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
|
||||
|
||||
if (fops && fops->slave_unregister &&
|
||||
!fops->slave_unregister(slave_dev, failover_dev))
|
||||
return NOTIFY_OK;
|
||||
|
||||
done:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(failover_slave_unregister);
|
||||
|
||||
static int failover_slave_link_change(struct net_device *slave_dev)
|
||||
{
|
||||
struct net_device *failover_dev;
|
||||
struct failover_ops *fops;
|
||||
|
||||
if (!netif_is_failover_slave(slave_dev))
|
||||
goto done;
|
||||
|
||||
ASSERT_RTNL();
|
||||
|
||||
failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
|
||||
if (!failover_dev)
|
||||
goto done;
|
||||
|
||||
if (!netif_running(failover_dev))
|
||||
goto done;
|
||||
|
||||
if (fops && fops->slave_link_change &&
|
||||
!fops->slave_link_change(slave_dev, failover_dev))
|
||||
return NOTIFY_OK;
|
||||
|
||||
done:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static int failover_slave_name_change(struct net_device *slave_dev)
|
||||
{
|
||||
struct net_device *failover_dev;
|
||||
struct failover_ops *fops;
|
||||
|
||||
if (!netif_is_failover_slave(slave_dev))
|
||||
goto done;
|
||||
|
||||
ASSERT_RTNL();
|
||||
|
||||
failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
|
||||
if (!failover_dev)
|
||||
goto done;
|
||||
|
||||
if (!netif_running(failover_dev))
|
||||
goto done;
|
||||
|
||||
if (fops && fops->slave_name_change &&
|
||||
!fops->slave_name_change(slave_dev, failover_dev))
|
||||
return NOTIFY_OK;
|
||||
|
||||
done:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static int
|
||||
failover_event(struct notifier_block *this, unsigned long event, void *ptr)
|
||||
{
|
||||
struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
|
||||
|
||||
/* Skip parent events */
|
||||
if (netif_is_failover(event_dev))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
switch (event) {
|
||||
case NETDEV_REGISTER:
|
||||
return failover_slave_register(event_dev);
|
||||
case NETDEV_UNREGISTER:
|
||||
return failover_slave_unregister(event_dev);
|
||||
case NETDEV_UP:
|
||||
case NETDEV_DOWN:
|
||||
case NETDEV_CHANGE:
|
||||
return failover_slave_link_change(event_dev);
|
||||
case NETDEV_CHANGENAME:
|
||||
return failover_slave_name_change(event_dev);
|
||||
default:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
}
|
||||
|
||||
static struct notifier_block failover_notifier = {
|
||||
.notifier_call = failover_event,
|
||||
};
|
||||
|
||||
static void
|
||||
failover_existing_slave_register(struct net_device *failover_dev)
|
||||
{
|
||||
struct net *net = dev_net(failover_dev);
|
||||
struct net_device *dev;
|
||||
|
||||
rtnl_lock();
|
||||
for_each_netdev(net, dev) {
|
||||
if (netif_is_failover(dev))
|
||||
continue;
|
||||
if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
|
||||
failover_slave_register(dev);
|
||||
}
|
||||
rtnl_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* failover_register - Register a failover instance
|
||||
*
|
||||
* @dev: failover netdev
|
||||
* @ops: failover ops
|
||||
*
|
||||
* Allocate and register a failover instance for a failover netdev. ops
|
||||
* provides handlers for slave device register/unregister/link change/
|
||||
* name change events.
|
||||
*
|
||||
* Return: pointer to failover instance
|
||||
*/
|
||||
struct failover *failover_register(struct net_device *dev,
|
||||
struct failover_ops *ops)
|
||||
{
|
||||
struct failover *failover;
|
||||
|
||||
if (dev->type != ARPHRD_ETHER)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
failover = kzalloc(sizeof(*failover), GFP_KERNEL);
|
||||
if (!failover)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
rcu_assign_pointer(failover->ops, ops);
|
||||
dev_hold(dev);
|
||||
dev->priv_flags |= IFF_FAILOVER;
|
||||
rcu_assign_pointer(failover->failover_dev, dev);
|
||||
|
||||
spin_lock(&failover_lock);
|
||||
list_add_tail(&failover->list, &failover_list);
|
||||
spin_unlock(&failover_lock);
|
||||
|
||||
netdev_info(dev, "failover master:%s registered\n", dev->name);
|
||||
|
||||
failover_existing_slave_register(dev);
|
||||
|
||||
return failover;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(failover_register);
|
||||
|
||||
/**
|
||||
* failover_unregister - Unregister a failover instance
|
||||
*
|
||||
* @failover: pointer to failover instance
|
||||
*
|
||||
* Unregisters and frees a failover instance.
|
||||
*/
|
||||
void failover_unregister(struct failover *failover)
|
||||
{
|
||||
struct net_device *failover_dev;
|
||||
|
||||
failover_dev = rcu_dereference(failover->failover_dev);
|
||||
|
||||
netdev_info(failover_dev, "failover master:%s unregistered\n",
|
||||
failover_dev->name);
|
||||
|
||||
failover_dev->priv_flags &= ~IFF_FAILOVER;
|
||||
dev_put(failover_dev);
|
||||
|
||||
spin_lock(&failover_lock);
|
||||
list_del(&failover->list);
|
||||
spin_unlock(&failover_lock);
|
||||
|
||||
kfree(failover);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(failover_unregister);
|
||||
|
||||
static __init int
|
||||
failover_init(void)
|
||||
{
|
||||
register_netdevice_notifier(&failover_notifier);
|
||||
|
||||
return 0;
|
||||
}
|
||||
module_init(failover_init);
|
||||
|
||||
static __exit
|
||||
void failover_exit(void)
|
||||
{
|
||||
unregister_netdevice_notifier(&failover_notifier);
|
||||
}
|
||||
module_exit(failover_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Generic failover infrastructure/interface");
|
||||
MODULE_LICENSE("GPL v2");
|
Loading…
Reference in a new issue