f87c10a8aa
While forwarding we should not use the protocol path mtu to calculate the mtu for a forwarded packet but instead use the interface mtu. We mark forwarded skbs in ip_forward with IPSKB_FORWARDED, which was introduced for multicast forwarding. But as it does not conflict with our usage in unicast code path it is perfect for reuse. I moved the functions ip_sk_accept_pmtu, ip_sk_use_pmtu and ip_skb_dst_mtu along with the new ip_dst_mtu_maybe_forward to net/ip.h to fix circular dependencies because of IPSKB_FORWARDED. Because someone might have written a software which does probe destinations manually and expects the kernel to honour those path mtus I introduced a new per-namespace "ip_forward_use_pmtu" knob so someone can disable this new behaviour. We also still use mtus which are locked on a route for forwarding. The reason for this change is, that path mtus information can be injected into the kernel via e.g. icmp_err protocol handler without verification of local sockets. As such, this could cause the IPv4 forwarding path to wrongfully emit fragmentation needed notifications or start to fragment packets along a path. Tunnel and ipsec output paths clear IPCB again, thus IPSKB_FORWARDED won't be set and further fragmentation logic will use the path mtu to determine the fragmentation size. They also recheck packet size with help of path mtu discovery and report appropriate errors. Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: David Miller <davem@davemloft.net> Cc: John Heffner <johnwheffner@gmail.com> Cc: Steffen Klassert <steffen.klassert@secunet.com> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> Signed-off-by: David S. Miller <davem@davemloft.net>
136 lines
3.4 KiB
C
136 lines
3.4 KiB
C
/*
|
|
* INET An implementation of the TCP/IP protocol suite for the LINUX
|
|
* operating system. INET is implemented using the BSD Socket
|
|
* interface as the means of communication with the user level.
|
|
*
|
|
* The IP forwarding functionality.
|
|
*
|
|
* Authors: see ip.c
|
|
*
|
|
* Fixes:
|
|
* Many : Split from ip.c , see ip_input.c for
|
|
* history.
|
|
* Dave Gregorich : NULL ip_rt_put fix for multicast
|
|
* routing.
|
|
* Jos Vos : Add call_out_firewall before sending,
|
|
* use output device for accounting.
|
|
* Jos Vos : Call forward firewall after routing
|
|
* (always use output device).
|
|
* Mike McLagan : Routing by source
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/icmp.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/slab.h>
|
|
#include <net/sock.h>
|
|
#include <net/ip.h>
|
|
#include <net/tcp.h>
|
|
#include <net/udp.h>
|
|
#include <net/icmp.h>
|
|
#include <linux/tcp.h>
|
|
#include <linux/udp.h>
|
|
#include <linux/netfilter_ipv4.h>
|
|
#include <net/checksum.h>
|
|
#include <linux/route.h>
|
|
#include <net/route.h>
|
|
#include <net/xfrm.h>
|
|
|
|
static int ip_forward_finish(struct sk_buff *skb)
|
|
{
|
|
struct ip_options *opt = &(IPCB(skb)->opt);
|
|
|
|
IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
|
|
IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
|
|
|
|
if (unlikely(opt->optlen))
|
|
ip_forward_options(skb);
|
|
|
|
return dst_output(skb);
|
|
}
|
|
|
|
int ip_forward(struct sk_buff *skb)
|
|
{
|
|
u32 mtu;
|
|
struct iphdr *iph; /* Our header */
|
|
struct rtable *rt; /* Route we use */
|
|
struct ip_options *opt = &(IPCB(skb)->opt);
|
|
|
|
if (skb_warn_if_lro(skb))
|
|
goto drop;
|
|
|
|
if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
|
|
goto drop;
|
|
|
|
if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
|
|
return NET_RX_SUCCESS;
|
|
|
|
if (skb->pkt_type != PACKET_HOST)
|
|
goto drop;
|
|
|
|
skb_forward_csum(skb);
|
|
|
|
/*
|
|
* According to the RFC, we must first decrease the TTL field. If
|
|
* that reaches zero, we must reply an ICMP control message telling
|
|
* that the packet's lifetime expired.
|
|
*/
|
|
if (ip_hdr(skb)->ttl <= 1)
|
|
goto too_many_hops;
|
|
|
|
if (!xfrm4_route_forward(skb))
|
|
goto drop;
|
|
|
|
rt = skb_rtable(skb);
|
|
|
|
if (opt->is_strictroute && rt->rt_uses_gateway)
|
|
goto sr_failed;
|
|
|
|
IPCB(skb)->flags |= IPSKB_FORWARDED;
|
|
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
|
|
if (unlikely(skb->len > mtu && !skb_is_gso(skb) &&
|
|
(ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
|
|
IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
|
|
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
|
|
htonl(mtu));
|
|
goto drop;
|
|
}
|
|
|
|
/* We are about to mangle packet. Copy it! */
|
|
if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len))
|
|
goto drop;
|
|
iph = ip_hdr(skb);
|
|
|
|
/* Decrease ttl after skb cow done */
|
|
ip_decrease_ttl(iph);
|
|
|
|
/*
|
|
* We now generate an ICMP HOST REDIRECT giving the route
|
|
* we calculated.
|
|
*/
|
|
if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb))
|
|
ip_rt_send_redirect(skb);
|
|
|
|
skb->priority = rt_tos2priority(iph->tos);
|
|
|
|
return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev,
|
|
rt->dst.dev, ip_forward_finish);
|
|
|
|
sr_failed:
|
|
/*
|
|
* Strict routing permits no gatewaying
|
|
*/
|
|
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_SR_FAILED, 0);
|
|
goto drop;
|
|
|
|
too_many_hops:
|
|
/* Tell the sender its packet died... */
|
|
IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_INHDRERRORS);
|
|
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
|
|
drop:
|
|
kfree_skb(skb);
|
|
return NET_RX_DROP;
|
|
}
|