14c850212e
To help in reducing the number of include dependencies, several files were touched as they were getting needed headers indirectly for stuff they use. Thanks also to Alan Menegotto for pointing out that net/dccp/proto.c had linux/dccp.h include twice. Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com> Signed-off-by: David S. Miller <davem@davemloft.net>
430 lines
9.9 KiB
C
430 lines
9.9 KiB
C
/*
|
|
* ip_vs_proto_udp.c: UDP load balancing support for IPVS
|
|
*
|
|
* Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
|
|
*
|
|
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
|
|
* Julian Anastasov <ja@ssi.bg>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
* Changes:
|
|
*
|
|
*/
|
|
|
|
#include <linux/in.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/netfilter_ipv4.h>
|
|
#include <linux/udp.h>
|
|
|
|
#include <net/ip_vs.h>
|
|
|
|
|
|
static struct ip_vs_conn *
|
|
udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
|
|
const struct iphdr *iph, unsigned int proto_off, int inverse)
|
|
{
|
|
struct ip_vs_conn *cp;
|
|
__u16 _ports[2], *pptr;
|
|
|
|
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
|
|
if (pptr == NULL)
|
|
return NULL;
|
|
|
|
if (likely(!inverse)) {
|
|
cp = ip_vs_conn_in_get(iph->protocol,
|
|
iph->saddr, pptr[0],
|
|
iph->daddr, pptr[1]);
|
|
} else {
|
|
cp = ip_vs_conn_in_get(iph->protocol,
|
|
iph->daddr, pptr[1],
|
|
iph->saddr, pptr[0]);
|
|
}
|
|
|
|
return cp;
|
|
}
|
|
|
|
|
|
static struct ip_vs_conn *
|
|
udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
|
|
const struct iphdr *iph, unsigned int proto_off, int inverse)
|
|
{
|
|
struct ip_vs_conn *cp;
|
|
__u16 _ports[2], *pptr;
|
|
|
|
pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
|
|
sizeof(_ports), _ports);
|
|
if (pptr == NULL)
|
|
return NULL;
|
|
|
|
if (likely(!inverse)) {
|
|
cp = ip_vs_conn_out_get(iph->protocol,
|
|
iph->saddr, pptr[0],
|
|
iph->daddr, pptr[1]);
|
|
} else {
|
|
cp = ip_vs_conn_out_get(iph->protocol,
|
|
iph->daddr, pptr[1],
|
|
iph->saddr, pptr[0]);
|
|
}
|
|
|
|
return cp;
|
|
}
|
|
|
|
|
|
static int
|
|
udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
|
|
int *verdict, struct ip_vs_conn **cpp)
|
|
{
|
|
struct ip_vs_service *svc;
|
|
struct udphdr _udph, *uh;
|
|
|
|
uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
|
|
sizeof(_udph), &_udph);
|
|
if (uh == NULL) {
|
|
*verdict = NF_DROP;
|
|
return 0;
|
|
}
|
|
|
|
if ((svc = ip_vs_service_get(skb->nfmark, skb->nh.iph->protocol,
|
|
skb->nh.iph->daddr, uh->dest))) {
|
|
if (ip_vs_todrop()) {
|
|
/*
|
|
* It seems that we are very loaded.
|
|
* We have to drop this packet :(
|
|
*/
|
|
ip_vs_service_put(svc);
|
|
*verdict = NF_DROP;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Let the virtual server select a real server for the
|
|
* incoming connection, and create a connection entry.
|
|
*/
|
|
*cpp = ip_vs_schedule(svc, skb);
|
|
if (!*cpp) {
|
|
*verdict = ip_vs_leave(svc, skb, pp);
|
|
return 0;
|
|
}
|
|
ip_vs_service_put(svc);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
|
|
static inline void
|
|
udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip,
|
|
u16 oldport, u16 newport)
|
|
{
|
|
uhdr->check =
|
|
ip_vs_check_diff(~oldip, newip,
|
|
ip_vs_check_diff(oldport ^ 0xFFFF,
|
|
newport, uhdr->check));
|
|
if (!uhdr->check)
|
|
uhdr->check = 0xFFFF;
|
|
}
|
|
|
|
static int
|
|
udp_snat_handler(struct sk_buff **pskb,
|
|
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
|
|
{
|
|
struct udphdr *udph;
|
|
unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
|
|
|
|
/* csum_check requires unshared skb */
|
|
if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
|
|
return 0;
|
|
|
|
if (unlikely(cp->app != NULL)) {
|
|
/* Some checks before mangling */
|
|
if (pp->csum_check && !pp->csum_check(*pskb, pp))
|
|
return 0;
|
|
|
|
/*
|
|
* Call application helper if needed
|
|
*/
|
|
if (!ip_vs_app_pkt_out(cp, pskb))
|
|
return 0;
|
|
}
|
|
|
|
udph = (void *)(*pskb)->nh.iph + udphoff;
|
|
udph->source = cp->vport;
|
|
|
|
/*
|
|
* Adjust UDP checksums
|
|
*/
|
|
if (!cp->app && (udph->check != 0)) {
|
|
/* Only port and addr are changed, do fast csum update */
|
|
udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
|
|
cp->dport, cp->vport);
|
|
if ((*pskb)->ip_summed == CHECKSUM_HW)
|
|
(*pskb)->ip_summed = CHECKSUM_NONE;
|
|
} else {
|
|
/* full checksum calculation */
|
|
udph->check = 0;
|
|
(*pskb)->csum = skb_checksum(*pskb, udphoff,
|
|
(*pskb)->len - udphoff, 0);
|
|
udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
|
|
(*pskb)->len - udphoff,
|
|
cp->protocol,
|
|
(*pskb)->csum);
|
|
if (udph->check == 0)
|
|
udph->check = 0xFFFF;
|
|
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
|
|
pp->name, udph->check,
|
|
(char*)&(udph->check) - (char*)udph);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
|
|
static int
|
|
udp_dnat_handler(struct sk_buff **pskb,
|
|
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
|
|
{
|
|
struct udphdr *udph;
|
|
unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
|
|
|
|
/* csum_check requires unshared skb */
|
|
if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
|
|
return 0;
|
|
|
|
if (unlikely(cp->app != NULL)) {
|
|
/* Some checks before mangling */
|
|
if (pp->csum_check && !pp->csum_check(*pskb, pp))
|
|
return 0;
|
|
|
|
/*
|
|
* Attempt ip_vs_app call.
|
|
* It will fix ip_vs_conn
|
|
*/
|
|
if (!ip_vs_app_pkt_in(cp, pskb))
|
|
return 0;
|
|
}
|
|
|
|
udph = (void *)(*pskb)->nh.iph + udphoff;
|
|
udph->dest = cp->dport;
|
|
|
|
/*
|
|
* Adjust UDP checksums
|
|
*/
|
|
if (!cp->app && (udph->check != 0)) {
|
|
/* Only port and addr are changed, do fast csum update */
|
|
udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
|
|
cp->vport, cp->dport);
|
|
if ((*pskb)->ip_summed == CHECKSUM_HW)
|
|
(*pskb)->ip_summed = CHECKSUM_NONE;
|
|
} else {
|
|
/* full checksum calculation */
|
|
udph->check = 0;
|
|
(*pskb)->csum = skb_checksum(*pskb, udphoff,
|
|
(*pskb)->len - udphoff, 0);
|
|
udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
|
|
(*pskb)->len - udphoff,
|
|
cp->protocol,
|
|
(*pskb)->csum);
|
|
if (udph->check == 0)
|
|
udph->check = 0xFFFF;
|
|
(*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
|
|
static int
|
|
udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
|
|
{
|
|
struct udphdr _udph, *uh;
|
|
unsigned int udphoff = skb->nh.iph->ihl*4;
|
|
|
|
uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
|
|
if (uh == NULL)
|
|
return 0;
|
|
|
|
if (uh->check != 0) {
|
|
switch (skb->ip_summed) {
|
|
case CHECKSUM_NONE:
|
|
skb->csum = skb_checksum(skb, udphoff,
|
|
skb->len - udphoff, 0);
|
|
case CHECKSUM_HW:
|
|
if (csum_tcpudp_magic(skb->nh.iph->saddr,
|
|
skb->nh.iph->daddr,
|
|
skb->len - udphoff,
|
|
skb->nh.iph->protocol,
|
|
skb->csum)) {
|
|
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
|
|
"Failed checksum for");
|
|
return 0;
|
|
}
|
|
break;
|
|
default:
|
|
/* CHECKSUM_UNNECESSARY */
|
|
break;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
|
|
/*
|
|
* Note: the caller guarantees that only one of register_app,
|
|
* unregister_app or app_conn_bind is called each time.
|
|
*/
|
|
|
|
#define UDP_APP_TAB_BITS 4
|
|
#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
|
|
#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
|
|
|
|
static struct list_head udp_apps[UDP_APP_TAB_SIZE];
|
|
static DEFINE_SPINLOCK(udp_app_lock);
|
|
|
|
static inline __u16 udp_app_hashkey(__u16 port)
|
|
{
|
|
return ((port >> UDP_APP_TAB_BITS) ^ port) & UDP_APP_TAB_MASK;
|
|
}
|
|
|
|
|
|
static int udp_register_app(struct ip_vs_app *inc)
|
|
{
|
|
struct ip_vs_app *i;
|
|
__u16 hash, port = inc->port;
|
|
int ret = 0;
|
|
|
|
hash = udp_app_hashkey(port);
|
|
|
|
|
|
spin_lock_bh(&udp_app_lock);
|
|
list_for_each_entry(i, &udp_apps[hash], p_list) {
|
|
if (i->port == port) {
|
|
ret = -EEXIST;
|
|
goto out;
|
|
}
|
|
}
|
|
list_add(&inc->p_list, &udp_apps[hash]);
|
|
atomic_inc(&ip_vs_protocol_udp.appcnt);
|
|
|
|
out:
|
|
spin_unlock_bh(&udp_app_lock);
|
|
return ret;
|
|
}
|
|
|
|
|
|
static void
|
|
udp_unregister_app(struct ip_vs_app *inc)
|
|
{
|
|
spin_lock_bh(&udp_app_lock);
|
|
atomic_dec(&ip_vs_protocol_udp.appcnt);
|
|
list_del(&inc->p_list);
|
|
spin_unlock_bh(&udp_app_lock);
|
|
}
|
|
|
|
|
|
static int udp_app_conn_bind(struct ip_vs_conn *cp)
|
|
{
|
|
int hash;
|
|
struct ip_vs_app *inc;
|
|
int result = 0;
|
|
|
|
/* Default binding: bind app only for NAT */
|
|
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
|
|
return 0;
|
|
|
|
/* Lookup application incarnations and bind the right one */
|
|
hash = udp_app_hashkey(cp->vport);
|
|
|
|
spin_lock(&udp_app_lock);
|
|
list_for_each_entry(inc, &udp_apps[hash], p_list) {
|
|
if (inc->port == cp->vport) {
|
|
if (unlikely(!ip_vs_app_inc_get(inc)))
|
|
break;
|
|
spin_unlock(&udp_app_lock);
|
|
|
|
IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
|
|
"%u.%u.%u.%u:%u to app %s on port %u\n",
|
|
__FUNCTION__,
|
|
NIPQUAD(cp->caddr), ntohs(cp->cport),
|
|
NIPQUAD(cp->vaddr), ntohs(cp->vport),
|
|
inc->name, ntohs(inc->port));
|
|
cp->app = inc;
|
|
if (inc->init_conn)
|
|
result = inc->init_conn(inc, cp);
|
|
goto out;
|
|
}
|
|
}
|
|
spin_unlock(&udp_app_lock);
|
|
|
|
out:
|
|
return result;
|
|
}
|
|
|
|
|
|
static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
|
|
[IP_VS_UDP_S_NORMAL] = 5*60*HZ,
|
|
[IP_VS_UDP_S_LAST] = 2*HZ,
|
|
};
|
|
|
|
static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
|
|
[IP_VS_UDP_S_NORMAL] = "UDP",
|
|
[IP_VS_UDP_S_LAST] = "BUG!",
|
|
};
|
|
|
|
|
|
static int
|
|
udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
|
|
{
|
|
return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
|
|
udp_state_name_table, sname, to);
|
|
}
|
|
|
|
static const char * udp_state_name(int state)
|
|
{
|
|
if (state >= IP_VS_UDP_S_LAST)
|
|
return "ERR!";
|
|
return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
|
|
}
|
|
|
|
static int
|
|
udp_state_transition(struct ip_vs_conn *cp, int direction,
|
|
const struct sk_buff *skb,
|
|
struct ip_vs_protocol *pp)
|
|
{
|
|
cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
|
|
return 1;
|
|
}
|
|
|
|
static void udp_init(struct ip_vs_protocol *pp)
|
|
{
|
|
IP_VS_INIT_HASH_TABLE(udp_apps);
|
|
pp->timeout_table = udp_timeouts;
|
|
}
|
|
|
|
static void udp_exit(struct ip_vs_protocol *pp)
|
|
{
|
|
}
|
|
|
|
|
|
struct ip_vs_protocol ip_vs_protocol_udp = {
|
|
.name = "UDP",
|
|
.protocol = IPPROTO_UDP,
|
|
.dont_defrag = 0,
|
|
.init = udp_init,
|
|
.exit = udp_exit,
|
|
.conn_schedule = udp_conn_schedule,
|
|
.conn_in_get = udp_conn_in_get,
|
|
.conn_out_get = udp_conn_out_get,
|
|
.snat_handler = udp_snat_handler,
|
|
.dnat_handler = udp_dnat_handler,
|
|
.csum_check = udp_csum_check,
|
|
.state_transition = udp_state_transition,
|
|
.state_name = udp_state_name,
|
|
.register_app = udp_register_app,
|
|
.unregister_app = udp_unregister_app,
|
|
.app_conn_bind = udp_app_conn_bind,
|
|
.debug_packet = ip_vs_tcpudp_debug_packet,
|
|
.timeout_change = NULL,
|
|
.set_state_timeout = udp_set_state_timeout,
|
|
};
|