cd354f1ae7
After Al Viro (finally) succeeded in removing the sched.h #include in module.h recently, it makes sense again to remove other superfluous sched.h includes. There are quite a lot of files which include it but don't actually need anything defined in there. Presumably these includes were once needed for macros that used to live in sched.h, but moved to other header files in the course of cleaning it up. To ease the pain, this time I did not fiddle with any header files and only removed #includes from .c-files, which tend to cause less trouble. Compile tested against 2.6.20-rc2 and 2.6.20-rc2-mm2 (with offsets) on alpha, arm, i386, ia64, mips, powerpc, and x86_64 with allnoconfig, defconfig, allmodconfig, and allyesconfig as well as a few randconfigs on x86_64 and all configs in arch/arm/configs on arm. I also checked that no new warnings were introduced by the patch (actually, some warnings are removed that were emitted by unnecessarily included header files). Signed-off-by: Tim Schmielau <tim@physik3.uni-rostock.de> Acked-by: Russell King <rmk+kernel@arm.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
340 lines
8 KiB
C
340 lines
8 KiB
C
/*
|
|
* Weighted random policy for multipath.
|
|
*
|
|
*
|
|
* Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $
|
|
*
|
|
* Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#include <asm/system.h>
|
|
#include <asm/uaccess.h>
|
|
#include <linux/types.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/timer.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/fcntl.h>
|
|
#include <linux/stat.h>
|
|
#include <linux/socket.h>
|
|
#include <linux/in.h>
|
|
#include <linux/inet.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/inetdevice.h>
|
|
#include <linux/igmp.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/module.h>
|
|
#include <linux/mroute.h>
|
|
#include <linux/init.h>
|
|
#include <net/ip.h>
|
|
#include <net/protocol.h>
|
|
#include <linux/skbuff.h>
|
|
#include <net/sock.h>
|
|
#include <net/icmp.h>
|
|
#include <net/udp.h>
|
|
#include <net/raw.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/if_arp.h>
|
|
#include <linux/netfilter_ipv4.h>
|
|
#include <net/ipip.h>
|
|
#include <net/checksum.h>
|
|
#include <net/ip_fib.h>
|
|
#include <net/ip_mp_alg.h>
|
|
|
|
#define MULTIPATH_STATE_SIZE 15
|
|
|
|
struct multipath_candidate {
|
|
struct multipath_candidate *next;
|
|
int power;
|
|
struct rtable *rt;
|
|
};
|
|
|
|
struct multipath_dest {
|
|
struct list_head list;
|
|
|
|
const struct fib_nh *nh_info;
|
|
__be32 netmask;
|
|
__be32 network;
|
|
unsigned char prefixlen;
|
|
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
struct multipath_bucket {
|
|
struct list_head head;
|
|
spinlock_t lock;
|
|
};
|
|
|
|
struct multipath_route {
|
|
struct list_head list;
|
|
|
|
int oif;
|
|
__be32 gw;
|
|
struct list_head dests;
|
|
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
/* state: primarily weight per route information */
|
|
static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
|
|
|
|
/* interface to random number generation */
|
|
static unsigned int RANDOM_SEED = 93186752;
|
|
|
|
static inline unsigned int random(unsigned int ubound)
|
|
{
|
|
static unsigned int a = 1588635695,
|
|
q = 2,
|
|
r = 1117695901;
|
|
RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q);
|
|
return RANDOM_SEED % ubound;
|
|
}
|
|
|
|
static unsigned char __multipath_lookup_weight(const struct flowi *fl,
|
|
const struct rtable *rt)
|
|
{
|
|
const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE;
|
|
struct multipath_route *r;
|
|
struct multipath_route *target_route = NULL;
|
|
struct multipath_dest *d;
|
|
int weight = 1;
|
|
|
|
/* lookup the weight information for a certain route */
|
|
rcu_read_lock();
|
|
|
|
/* find state entry for gateway or add one if necessary */
|
|
list_for_each_entry_rcu(r, &state[state_idx].head, list) {
|
|
if (r->gw == rt->rt_gateway &&
|
|
r->oif == rt->idev->dev->ifindex) {
|
|
target_route = r;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!target_route) {
|
|
/* this should not happen... but we are prepared */
|
|
printk( KERN_CRIT"%s: missing state for gateway: %u and " \
|
|
"device %d\n", __FUNCTION__, rt->rt_gateway,
|
|
rt->idev->dev->ifindex);
|
|
goto out;
|
|
}
|
|
|
|
/* find state entry for destination */
|
|
list_for_each_entry_rcu(d, &target_route->dests, list) {
|
|
__be32 targetnetwork = fl->fl4_dst &
|
|
inet_make_mask(d->prefixlen);
|
|
|
|
if ((targetnetwork & d->netmask) == d->network) {
|
|
weight = d->nh_info->nh_weight;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
out:
|
|
rcu_read_unlock();
|
|
return weight;
|
|
}
|
|
|
|
static void wrandom_init_state(void)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
|
|
INIT_LIST_HEAD(&state[i].head);
|
|
spin_lock_init(&state[i].lock);
|
|
}
|
|
}
|
|
|
|
static void wrandom_select_route(const struct flowi *flp,
|
|
struct rtable *first,
|
|
struct rtable **rp)
|
|
{
|
|
struct rtable *rt;
|
|
struct rtable *decision;
|
|
struct multipath_candidate *first_mpc = NULL;
|
|
struct multipath_candidate *mpc, *last_mpc = NULL;
|
|
int power = 0;
|
|
int last_power;
|
|
int selector;
|
|
const size_t size_mpc = sizeof(struct multipath_candidate);
|
|
|
|
/* collect all candidates and identify their weights */
|
|
for (rt = rcu_dereference(first); rt;
|
|
rt = rcu_dereference(rt->u.dst.rt_next)) {
|
|
if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
|
|
multipath_comparekeys(&rt->fl, flp)) {
|
|
struct multipath_candidate* mpc =
|
|
(struct multipath_candidate*)
|
|
kmalloc(size_mpc, GFP_ATOMIC);
|
|
|
|
if (!mpc)
|
|
return;
|
|
|
|
power += __multipath_lookup_weight(flp, rt) * 10000;
|
|
|
|
mpc->power = power;
|
|
mpc->rt = rt;
|
|
mpc->next = NULL;
|
|
|
|
if (!first_mpc)
|
|
first_mpc = mpc;
|
|
else
|
|
last_mpc->next = mpc;
|
|
|
|
last_mpc = mpc;
|
|
}
|
|
}
|
|
|
|
/* choose a weighted random candidate */
|
|
decision = first;
|
|
selector = random(power);
|
|
last_power = 0;
|
|
|
|
/* select candidate, adjust GC data and cleanup local state */
|
|
decision = first;
|
|
last_mpc = NULL;
|
|
for (mpc = first_mpc; mpc; mpc = mpc->next) {
|
|
mpc->rt->u.dst.lastuse = jiffies;
|
|
if (last_power <= selector && selector < mpc->power)
|
|
decision = mpc->rt;
|
|
|
|
last_power = mpc->power;
|
|
kfree(last_mpc);
|
|
last_mpc = mpc;
|
|
}
|
|
|
|
/* concurrent __multipath_flush may lead to !last_mpc */
|
|
kfree(last_mpc);
|
|
|
|
decision->u.dst.__use++;
|
|
*rp = decision;
|
|
}
|
|
|
|
static void wrandom_set_nhinfo(__be32 network,
|
|
__be32 netmask,
|
|
unsigned char prefixlen,
|
|
const struct fib_nh *nh)
|
|
{
|
|
const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
|
|
struct multipath_route *r, *target_route = NULL;
|
|
struct multipath_dest *d, *target_dest = NULL;
|
|
|
|
/* store the weight information for a certain route */
|
|
spin_lock_bh(&state[state_idx].lock);
|
|
|
|
/* find state entry for gateway or add one if necessary */
|
|
list_for_each_entry_rcu(r, &state[state_idx].head, list) {
|
|
if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) {
|
|
target_route = r;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!target_route) {
|
|
const size_t size_rt = sizeof(struct multipath_route);
|
|
target_route = (struct multipath_route *)
|
|
kmalloc(size_rt, GFP_ATOMIC);
|
|
|
|
target_route->gw = nh->nh_gw;
|
|
target_route->oif = nh->nh_oif;
|
|
memset(&target_route->rcu, 0, sizeof(struct rcu_head));
|
|
INIT_LIST_HEAD(&target_route->dests);
|
|
|
|
list_add_rcu(&target_route->list, &state[state_idx].head);
|
|
}
|
|
|
|
/* find state entry for destination or add one if necessary */
|
|
list_for_each_entry_rcu(d, &target_route->dests, list) {
|
|
if (d->nh_info == nh) {
|
|
target_dest = d;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!target_dest) {
|
|
const size_t size_dst = sizeof(struct multipath_dest);
|
|
target_dest = (struct multipath_dest*)
|
|
kmalloc(size_dst, GFP_ATOMIC);
|
|
|
|
target_dest->nh_info = nh;
|
|
target_dest->network = network;
|
|
target_dest->netmask = netmask;
|
|
target_dest->prefixlen = prefixlen;
|
|
memset(&target_dest->rcu, 0, sizeof(struct rcu_head));
|
|
|
|
list_add_rcu(&target_dest->list, &target_route->dests);
|
|
}
|
|
/* else: we already stored this info for another destination =>
|
|
* we are finished
|
|
*/
|
|
|
|
spin_unlock_bh(&state[state_idx].lock);
|
|
}
|
|
|
|
static void __multipath_free(struct rcu_head *head)
|
|
{
|
|
struct multipath_route *rt = container_of(head, struct multipath_route,
|
|
rcu);
|
|
kfree(rt);
|
|
}
|
|
|
|
static void __multipath_free_dst(struct rcu_head *head)
|
|
{
|
|
struct multipath_dest *dst = container_of(head,
|
|
struct multipath_dest,
|
|
rcu);
|
|
kfree(dst);
|
|
}
|
|
|
|
static void wrandom_flush(void)
|
|
{
|
|
int i;
|
|
|
|
/* defere delete to all entries */
|
|
for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
|
|
struct multipath_route *r;
|
|
|
|
spin_lock_bh(&state[i].lock);
|
|
list_for_each_entry_rcu(r, &state[i].head, list) {
|
|
struct multipath_dest *d;
|
|
list_for_each_entry_rcu(d, &r->dests, list) {
|
|
list_del_rcu(&d->list);
|
|
call_rcu(&d->rcu,
|
|
__multipath_free_dst);
|
|
}
|
|
list_del_rcu(&r->list);
|
|
call_rcu(&r->rcu,
|
|
__multipath_free);
|
|
}
|
|
|
|
spin_unlock_bh(&state[i].lock);
|
|
}
|
|
}
|
|
|
|
static struct ip_mp_alg_ops wrandom_ops = {
|
|
.mp_alg_select_route = wrandom_select_route,
|
|
.mp_alg_flush = wrandom_flush,
|
|
.mp_alg_set_nhinfo = wrandom_set_nhinfo,
|
|
};
|
|
|
|
static int __init wrandom_init(void)
|
|
{
|
|
wrandom_init_state();
|
|
|
|
return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
|
|
}
|
|
|
|
static void __exit wrandom_exit(void)
|
|
{
|
|
multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
|
|
}
|
|
|
|
module_init(wrandom_init);
|
|
module_exit(wrandom_exit);
|
|
MODULE_LICENSE("GPL");
|