tunneling: Add generic Tunnel segmentation.

Adds generic tunneling offloading support for IPv4-UDP based
tunnels.
GSO type is added to request this offload for a skb.
netdev feature NETIF_F_UDP_TUNNEL is added for hardware offloaded
udp-tunnel support. Currently no device supports this feature,
software offload is used.

This can be used by tunneling protocols like VXLAN.

CC: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Pravin B Shelar 2013-03-07 13:21:51 +00:00 committed by David S. Miller
parent aefbd2b3c2
commit 7313626745
8 changed files with 111 additions and 30 deletions

View file

@ -42,9 +42,9 @@ enum {
NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */
NETIF_F_FSO_BIT, /* ... FCoE segmentation */
NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */
/**/NETIF_F_GSO_LAST, /* [can't be last bit, see GSO_MASK] */
NETIF_F_GSO_RESERVED2 /* ... free (fill GSO_MASK to 8 bits) */
= NETIF_F_GSO_LAST,
NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
NETIF_F_GSO_UDP_TUNNEL_BIT,
NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */
@ -103,6 +103,7 @@ enum {
#define NETIF_F_RXFCS __NETIF_F(RXFCS)
#define NETIF_F_RXALL __NETIF_F(RXALL)
#define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE)
#define NETIF_F_UDP_TUNNEL __NETIF_F(UDP_TUNNEL)
/* Features valid for ethtool to change */
/* = all defined minus driver/device-class-related */

View file

@ -316,6 +316,8 @@ enum {
SKB_GSO_FCOE = 1 << 5,
SKB_GSO_GRE = 1 << 6,
SKB_GSO_UDP_TUNNEL = 1 << 7,
};
#if BITS_PER_LONG > 32

View file

@ -78,6 +78,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
[NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
[NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",

View file

@ -1283,6 +1283,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
int ihl;
int id;
unsigned int offset = 0;
bool tunnel;
if (unlikely(skb_shinfo(skb)->gso_type &
~(SKB_GSO_TCPV4 |
@ -1290,6 +1291,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
SKB_GSO_UDP_TUNNEL |
0)))
goto out;
@ -1304,6 +1306,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, ihl)))
goto out;
tunnel = !!skb->encapsulation;
__skb_pull(skb, ihl);
skb_reset_transport_header(skb);
iph = ip_hdr(skb);
@ -1323,7 +1327,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
skb = segs;
do {
iph = ip_hdr(skb);
if (proto == IPPROTO_UDP) {
if (!tunnel && proto == IPPROTO_UDP) {
iph->id = htons(id);
iph->frag_off = htons(offset >> 3);
if (skb->next != NULL)

View file

@ -3044,6 +3044,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
SKB_GSO_TCP_ECN |
SKB_GSO_TCPV6 |
SKB_GSO_GRE |
SKB_GSO_UDP_TUNNEL |
0) ||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
goto out;

View file

@ -2272,31 +2272,88 @@ void __init udp_init(void)
int udp4_ufo_send_check(struct sk_buff *skb)
{
const struct iphdr *iph;
struct udphdr *uh;
if (!pskb_may_pull(skb, sizeof(*uh)))
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
return -EINVAL;
iph = ip_hdr(skb);
uh = udp_hdr(skb);
if (likely(!skb->encapsulation)) {
const struct iphdr *iph;
struct udphdr *uh;
uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
IPPROTO_UDP, 0);
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
skb->ip_summed = CHECKSUM_PARTIAL;
iph = ip_hdr(skb);
uh = udp_hdr(skb);
uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
IPPROTO_UDP, 0);
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
skb->ip_summed = CHECKSUM_PARTIAL;
}
return 0;
}
static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
int mac_len = skb->mac_len;
int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
int outer_hlen;
netdev_features_t enc_features;
if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
goto out;
skb->encapsulation = 0;
__skb_pull(skb, tnl_hlen);
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
segs = skb_mac_gso_segment(skb, enc_features);
if (!segs || IS_ERR(segs))
goto out;
outer_hlen = skb_tnl_header_len(skb);
skb = segs;
do {
struct udphdr *uh;
int udp_offset = outer_hlen - tnl_hlen;
skb->mac_len = mac_len;
skb_push(skb, outer_hlen);
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
skb_set_transport_header(skb, udp_offset);
uh = udp_hdr(skb);
uh->len = htons(skb->len - udp_offset);
/* csum segment if tunnel sets skb with csum. */
if (unlikely(uh->check)) {
struct iphdr *iph = ip_hdr(skb);
uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
skb->len - udp_offset,
IPPROTO_UDP, 0);
uh->check = csum_fold(skb_checksum(skb, udp_offset,
skb->len - udp_offset, 0));
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
}
skb->ip_summed = CHECKSUM_NONE;
} while ((skb = skb->next));
out:
return segs;
}
struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
int offset;
__wsum csum;
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
goto out;
@ -2306,6 +2363,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
int type = skb_shinfo(skb)->gso_type;
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_GRE) ||
!(type & (SKB_GSO_UDP))))
goto out;
@ -2316,20 +2374,27 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
goto out;
}
/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
* do checksum of UDP packets sent as multiple IP fragments.
*/
offset = skb_checksum_start_offset(skb);
csum = skb_checksum(skb, offset, skb->len - offset, 0);
offset += skb->csum_offset;
*(__sum16 *)(skb->data + offset) = csum_fold(csum);
skb->ip_summed = CHECKSUM_NONE;
/* Fragment the skb. IP headers of the fragments are updated in
* inet_gso_segment()
*/
segs = skb_segment(skb, features);
if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
segs = skb_udp_tunnel_segment(skb, features);
else {
int offset;
__wsum csum;
/* Do software UFO. Complete and fill in the UDP checksum as
* HW cannot do checksum of UDP packets sent as multiple
* IP fragments.
*/
offset = skb_checksum_start_offset(skb);
csum = skb_checksum(skb, offset, skb->len - offset, 0);
offset += skb->csum_offset;
*(__sum16 *)(skb->data + offset) = csum_fold(csum);
skb->ip_summed = CHECKSUM_NONE;
segs = skb_segment(skb, features);
}
out:
return segs;
}

View file

@ -97,6 +97,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_TCPV6 |
0)))
goto out;

View file

@ -21,6 +21,10 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
/* UDP Tunnel offload on ipv6 is not yet supported. */
if (skb->encapsulation)
return -EINVAL;
if (!pskb_may_pull(skb, sizeof(*uh)))
return -EINVAL;
@ -56,7 +60,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
/* Packet is from an untrusted source, reset gso_segs. */
int type = skb_shinfo(skb)->gso_type;
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
if (unlikely(type & ~(SKB_GSO_UDP |
SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_GRE) ||
!(type & (SKB_GSO_UDP))))
goto out;