diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 591ea23639ca..691268f3a359 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -630,5 +630,3 @@ config TCP_MD5SIG If unsure, say N. -source "net/ipv4/ipvs/Kconfig" - diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ad40ef3f9ebc..80ff87ce43aa 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ -obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f70b4145ffc7..78892cf2b021 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -858,3 +858,5 @@ config NETFILTER_XT_MATCH_U32 endif # NETFILTER_XTABLES endmenu + +source "net/netfilter/ipvs/Kconfig" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 8ce67665882d..da3d909e053f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -89,3 +89,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o + +# IPVS +obj-$(CONFIG_IP_VS) += ipvs/ diff --git a/net/ipv4/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig similarity index 100% rename from net/ipv4/ipvs/Kconfig rename to net/netfilter/ipvs/Kconfig diff --git a/net/ipv4/ipvs/Makefile b/net/netfilter/ipvs/Makefile similarity index 100% rename from net/ipv4/ipvs/Makefile rename to net/netfilter/ipvs/Makefile diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_app.c rename to net/netfilter/ipvs/ip_vs_app.c diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_conn.c rename to net/netfilter/ipvs/ip_vs_conn.c diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_core.c rename to net/netfilter/ipvs/ip_vs_core.c diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c similarity index 99% rename from net/ipv4/ipvs/ip_vs_ctl.c rename to net/netfilter/ipvs/ip_vs_ctl.c index 771551d8fba9..0302cf3e5039 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1330,7 +1330,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) out_unlock: write_unlock_bh(&__ip_vs_svc_lock); +#ifdef CONFIG_IP_VS_IPV6 out: +#endif if (old_sched) ip_vs_scheduler_put(old_sched); diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c new file mode 100644 index 000000000000..a16943fd72f1 --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -0,0 +1,261 @@ +/* + * IPVS: Destination Hashing scheduling module + * + * Authors: Wensong Zhang + * + * Inspired by the consistent hashing scheduler patch from + * Thomas Proell + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Changes: + * + */ + +/* + * The dh algorithm is to select server by the hash key of destination IP + * address. The pseudo code is as follows: + * + * n <- servernode[dest_ip]; + * if (n is dead) OR + * (n is overloaded) OR (n.weight <= 0) then + * return NULL; + * + * return n; + * + * Notes that servernode is a 256-bucket hash table that maps the hash + * index derived from packet destination IP address to the current server + * array. If the dh scheduler is used in cache cluster, it is good to + * combine it with cache_bypass feature. When the statically assigned + * server is dead or overloaded, the load balancer can bypass the cache + * server and send requests to the original server directly. + * + */ + +#include +#include +#include +#include + +#include + + +/* + * IPVS DH bucket + */ +struct ip_vs_dh_bucket { + struct ip_vs_dest *dest; /* real server (cache) */ +}; + +/* + * for IPVS DH entry hash table + */ +#ifndef CONFIG_IP_VS_DH_TAB_BITS +#define CONFIG_IP_VS_DH_TAB_BITS 8 +#endif +#define IP_VS_DH_TAB_BITS CONFIG_IP_VS_DH_TAB_BITS +#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS) +#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1) + + +/* + * Returns hash value for IPVS DH entry + */ +static inline unsigned ip_vs_dh_hashkey(__be32 addr) +{ + return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK; +} + + +/* + * Get ip_vs_dest associated with supplied parameters. + */ +static inline struct ip_vs_dest * +ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr) +{ + return (tbl[ip_vs_dh_hashkey(addr)]).dest; +} + + +/* + * Assign all the hash buckets of the specified table with the service. + */ +static int +ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) +{ + int i; + struct ip_vs_dh_bucket *b; + struct list_head *p; + struct ip_vs_dest *dest; + + b = tbl; + p = &svc->destinations; + for (i=0; idest = NULL; + } else { + if (p == &svc->destinations) + p = p->next; + + dest = list_entry(p, struct ip_vs_dest, n_list); + atomic_inc(&dest->refcnt); + b->dest = dest; + + p = p->next; + } + b++; + } + return 0; +} + + +/* + * Flush all the hash buckets of the specified table. + */ +static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) +{ + int i; + struct ip_vs_dh_bucket *b; + + b = tbl; + for (i=0; idest) { + atomic_dec(&b->dest->refcnt); + b->dest = NULL; + } + b++; + } +} + + +static int ip_vs_dh_init_svc(struct ip_vs_service *svc) +{ + struct ip_vs_dh_bucket *tbl; + + /* allocate the DH table for this service */ + tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, + GFP_ATOMIC); + if (tbl == NULL) { + IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n"); + return -ENOMEM; + } + svc->sched_data = tbl; + IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for " + "current service\n", + sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); + + /* assign the hash buckets with the updated service */ + ip_vs_dh_assign(tbl, svc); + + return 0; +} + + +static int ip_vs_dh_done_svc(struct ip_vs_service *svc) +{ + struct ip_vs_dh_bucket *tbl = svc->sched_data; + + /* got to clean up hash buckets here */ + ip_vs_dh_flush(tbl); + + /* release the table itself */ + kfree(svc->sched_data); + IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", + sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); + + return 0; +} + + +static int ip_vs_dh_update_svc(struct ip_vs_service *svc) +{ + struct ip_vs_dh_bucket *tbl = svc->sched_data; + + /* got to clean up hash buckets here */ + ip_vs_dh_flush(tbl); + + /* assign the hash buckets with the updated service */ + ip_vs_dh_assign(tbl, svc); + + return 0; +} + + +/* + * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, + * consider that the server is overloaded here. + */ +static inline int is_overloaded(struct ip_vs_dest *dest) +{ + return dest->flags & IP_VS_DEST_F_OVERLOAD; +} + + +/* + * Destination hashing scheduling + */ +static struct ip_vs_dest * +ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +{ + struct ip_vs_dest *dest; + struct ip_vs_dh_bucket *tbl; + struct iphdr *iph = ip_hdr(skb); + + IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n"); + + tbl = (struct ip_vs_dh_bucket *)svc->sched_data; + dest = ip_vs_dh_get(tbl, iph->daddr); + if (!dest + || !(dest->flags & IP_VS_DEST_F_AVAILABLE) + || atomic_read(&dest->weight) <= 0 + || is_overloaded(dest)) { + return NULL; + } + + IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u " + "--> server %u.%u.%u.%u:%d\n", + NIPQUAD(iph->daddr), + NIPQUAD(dest->addr.ip), + ntohs(dest->port)); + + return dest; +} + + +/* + * IPVS DH Scheduler structure + */ +static struct ip_vs_scheduler ip_vs_dh_scheduler = +{ + .name = "dh", + .refcnt = ATOMIC_INIT(0), + .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif + .init_service = ip_vs_dh_init_svc, + .done_service = ip_vs_dh_done_svc, + .update_service = ip_vs_dh_update_svc, + .schedule = ip_vs_dh_schedule, +}; + + +static int __init ip_vs_dh_init(void) +{ + return register_ip_vs_scheduler(&ip_vs_dh_scheduler); +} + + +static void __exit ip_vs_dh_cleanup(void) +{ + unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); +} + + +module_init(ip_vs_dh_init); +module_exit(ip_vs_dh_cleanup); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_est.c rename to net/netfilter/ipvs/ip_vs_est.c diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_ftp.c rename to net/netfilter/ipvs/ip_vs_ftp.c diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_lblc.c rename to net/netfilter/ipvs/ip_vs_lblc.c diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_lblcr.c rename to net/netfilter/ipvs/ip_vs_lblcr.c diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_lc.c rename to net/netfilter/ipvs/ip_vs_lc.c diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_nq.c rename to net/netfilter/ipvs/ip_vs_nq.c diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_proto.c rename to net/netfilter/ipvs/ip_vs_proto.c diff --git a/net/ipv4/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_proto_ah_esp.c rename to net/netfilter/ipvs/ip_vs_proto_ah_esp.c diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_proto_tcp.c rename to net/netfilter/ipvs/ip_vs_proto_tcp.c diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_proto_udp.c rename to net/netfilter/ipvs/ip_vs_proto_udp.c diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_rr.c rename to net/netfilter/ipvs/ip_vs_rr.c diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_sched.c rename to net/netfilter/ipvs/ip_vs_sched.c diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_sed.c rename to net/netfilter/ipvs/ip_vs_sed.c diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_sh.c rename to net/netfilter/ipvs/ip_vs_sh.c diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c similarity index 99% rename from net/ipv4/ipvs/ip_vs_sync.c rename to net/netfilter/ipvs/ip_vs_sync.c index 28237a5f62e2..de5e7e118eed 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -99,6 +100,7 @@ struct ip_vs_sync_thread_data { */ #define SYNC_MESG_HEADER_LEN 4 +#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ struct ip_vs_sync_mesg { __u8 nr_conns; @@ -516,8 +518,8 @@ static int set_sync_mesg_maxlen(int sync_state) num = (dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr) - SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; - sync_send_mesg_maxlen = - SYNC_MESG_HEADER_LEN + SIMPLE_CONN_SIZE * num; + sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + + SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); IP_VS_DBG(7, "setting the maximum length of sync sending " "message %d.\n", sync_send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_wlc.c rename to net/netfilter/ipvs/ip_vs_wlc.c diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_wrr.c rename to net/netfilter/ipvs/ip_vs_wrr.c diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c similarity index 100% rename from net/ipv4/ipvs/ip_vs_xmit.c rename to net/netfilter/ipvs/ip_vs_xmit.c