v4 GRE: Add TCP segmentation offload for GRE
Following patch adds GRE protocol offload handler so that skb_gso_segment() can segment GRE packets. SKB GSO CB is added to keep track of total header length so that skb_segment can push entire header. e.g. in case of GRE, skb_segment need to push inner and outer headers to every segment. New NETIF_F_GRE_GSO feature is added for devices which support HW GRE TSO offload. Currently none of devices support it therefore GRE GSO always fall backs to software GSO. [ Compute pkt_len before ip_local_out() invocation. -DaveM ] Signed-off-by: Pravin B Shelar <pshelar@nicira.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
05e8ef4ab2
commit
68c3316311
12 changed files with 226 additions and 11 deletions
|
@ -41,7 +41,7 @@ enum {
|
|||
NETIF_F_TSO_ECN_BIT, /* ... TCP ECN support */
|
||||
NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */
|
||||
NETIF_F_FSO_BIT, /* ... FCoE segmentation */
|
||||
NETIF_F_GSO_RESERVED1, /* ... free (fill GSO_MASK to 8 bits) */
|
||||
NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */
|
||||
/**/NETIF_F_GSO_LAST, /* [can't be last bit, see GSO_MASK] */
|
||||
NETIF_F_GSO_RESERVED2 /* ... free (fill GSO_MASK to 8 bits) */
|
||||
= NETIF_F_GSO_LAST,
|
||||
|
@ -102,6 +102,7 @@ enum {
|
|||
#define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED)
|
||||
#define NETIF_F_RXFCS __NETIF_F(RXFCS)
|
||||
#define NETIF_F_RXALL __NETIF_F(RXALL)
|
||||
#define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE)
|
||||
|
||||
/* Features valid for ethtool to change */
|
||||
/* = all defined minus driver/device-class-related */
|
||||
|
|
|
@ -314,6 +314,8 @@ enum {
|
|||
SKB_GSO_TCPV6 = 1 << 4,
|
||||
|
||||
SKB_GSO_FCOE = 1 << 5,
|
||||
|
||||
SKB_GSO_GRE = 1 << 6,
|
||||
};
|
||||
|
||||
#if BITS_PER_LONG > 32
|
||||
|
@ -2732,6 +2734,21 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
|
|||
}
|
||||
#endif
|
||||
|
||||
/* Keeps track of mac header offset relative to skb->head.
|
||||
* It is useful for TSO of Tunneling protocol. e.g. GRE.
|
||||
* For non-tunnel skb it points to skb_mac_header() and for
|
||||
* tunnel skb it points to outer mac header. */
|
||||
struct skb_gso_cb {
|
||||
int mac_offset;
|
||||
};
|
||||
#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
|
||||
|
||||
static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
|
||||
{
|
||||
return (skb_mac_header(inner_skb) - inner_skb->head) -
|
||||
SKB_GSO_CB(inner_skb)->mac_offset;
|
||||
}
|
||||
|
||||
static inline bool skb_is_gso(const struct sk_buff *skb)
|
||||
{
|
||||
return skb_shinfo(skb)->gso_size;
|
||||
|
|
|
@ -2413,6 +2413,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
|
||||
skb_reset_mac_header(skb);
|
||||
skb_reset_mac_len(skb);
|
||||
|
||||
|
|
|
@ -77,6 +77,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
|
|||
[NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
|
||||
[NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
|
||||
[NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
|
||||
[NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
|
||||
|
||||
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
|
||||
[NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
|
||||
|
|
|
@ -2738,6 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
|
|||
unsigned int mss = skb_shinfo(skb)->gso_size;
|
||||
unsigned int doffset = skb->data - skb_mac_header(skb);
|
||||
unsigned int offset = doffset;
|
||||
unsigned int tnl_hlen = skb_tnl_header_len(skb);
|
||||
unsigned int headroom;
|
||||
unsigned int len;
|
||||
int sg = !!(features & NETIF_F_SG);
|
||||
|
@ -2814,7 +2815,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
|
|||
skb_set_network_header(nskb, skb->mac_len);
|
||||
nskb->transport_header = (nskb->network_header +
|
||||
skb_network_header_len(skb));
|
||||
skb_copy_from_linear_data(skb, nskb->data, doffset);
|
||||
|
||||
skb_copy_from_linear_data_offset(skb, -tnl_hlen,
|
||||
nskb->data - tnl_hlen,
|
||||
doffset + tnl_hlen);
|
||||
|
||||
if (fskb != skb_shinfo(skb)->frag_list)
|
||||
continue;
|
||||
|
|
|
@ -1287,6 +1287,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
|
|||
SKB_GSO_UDP |
|
||||
SKB_GSO_DODGY |
|
||||
SKB_GSO_TCP_ECN |
|
||||
SKB_GSO_GRE |
|
||||
0)))
|
||||
goto out;
|
||||
|
||||
|
|
118
net/ipv4/gre.c
118
net/ipv4/gre.c
|
@ -19,6 +19,7 @@
|
|||
#include <linux/in.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/if_tunnel.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <net/protocol.h>
|
||||
#include <net/gre.h>
|
||||
|
@ -26,6 +27,11 @@
|
|||
|
||||
static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
|
||||
static DEFINE_SPINLOCK(gre_proto_lock);
|
||||
struct gre_base_hdr {
|
||||
__be16 flags;
|
||||
__be16 protocol;
|
||||
};
|
||||
#define GRE_HEADER_SECTION 4
|
||||
|
||||
int gre_add_protocol(const struct gre_protocol *proto, u8 version)
|
||||
{
|
||||
|
@ -112,12 +118,117 @@ static void gre_err(struct sk_buff *skb, u32 info)
|
|||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
|
||||
netdev_features_t features)
|
||||
{
|
||||
struct sk_buff *segs = ERR_PTR(-EINVAL);
|
||||
netdev_features_t enc_features;
|
||||
int ghl = GRE_HEADER_SECTION;
|
||||
struct gre_base_hdr *greh;
|
||||
int mac_len = skb->mac_len;
|
||||
int tnl_hlen;
|
||||
bool csum;
|
||||
|
||||
if (unlikely(skb_shinfo(skb)->gso_type &
|
||||
~(SKB_GSO_TCPV4 |
|
||||
SKB_GSO_TCPV6 |
|
||||
SKB_GSO_UDP |
|
||||
SKB_GSO_DODGY |
|
||||
SKB_GSO_TCP_ECN |
|
||||
SKB_GSO_GRE)))
|
||||
goto out;
|
||||
|
||||
if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
|
||||
goto out;
|
||||
|
||||
greh = (struct gre_base_hdr *)skb_transport_header(skb);
|
||||
|
||||
if (greh->flags & GRE_KEY)
|
||||
ghl += GRE_HEADER_SECTION;
|
||||
if (greh->flags & GRE_SEQ)
|
||||
ghl += GRE_HEADER_SECTION;
|
||||
if (greh->flags & GRE_CSUM) {
|
||||
ghl += GRE_HEADER_SECTION;
|
||||
csum = true;
|
||||
} else
|
||||
csum = false;
|
||||
|
||||
/* setup inner skb. */
|
||||
if (greh->protocol == htons(ETH_P_TEB)) {
|
||||
struct ethhdr *eth = eth_hdr(skb);
|
||||
skb->protocol = eth->h_proto;
|
||||
} else {
|
||||
skb->protocol = greh->protocol;
|
||||
}
|
||||
|
||||
skb->encapsulation = 0;
|
||||
|
||||
if (unlikely(!pskb_may_pull(skb, ghl)))
|
||||
goto out;
|
||||
__skb_pull(skb, ghl);
|
||||
skb_reset_mac_header(skb);
|
||||
skb_set_network_header(skb, skb_inner_network_offset(skb));
|
||||
skb->mac_len = skb_inner_network_offset(skb);
|
||||
|
||||
/* segment inner packet. */
|
||||
enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
|
||||
segs = skb_mac_gso_segment(skb, enc_features);
|
||||
if (!segs || IS_ERR(segs))
|
||||
goto out;
|
||||
|
||||
skb = segs;
|
||||
tnl_hlen = skb_tnl_header_len(skb);
|
||||
do {
|
||||
__skb_push(skb, ghl);
|
||||
if (csum) {
|
||||
__be32 *pcsum;
|
||||
|
||||
if (skb_has_shared_frag(skb)) {
|
||||
int err;
|
||||
|
||||
err = __skb_linearize(skb);
|
||||
if (err) {
|
||||
kfree_skb(segs);
|
||||
segs = ERR_PTR(err);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
greh = (struct gre_base_hdr *)(skb->data);
|
||||
pcsum = (__be32 *)(greh + 1);
|
||||
*pcsum = 0;
|
||||
*(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
|
||||
}
|
||||
__skb_push(skb, tnl_hlen - ghl);
|
||||
|
||||
skb_reset_mac_header(skb);
|
||||
skb_set_network_header(skb, mac_len);
|
||||
skb->mac_len = mac_len;
|
||||
} while ((skb = skb->next));
|
||||
out:
|
||||
return segs;
|
||||
}
|
||||
|
||||
static int gre_gso_send_check(struct sk_buff *skb)
|
||||
{
|
||||
if (!skb->encapsulation)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct net_protocol net_gre_protocol = {
|
||||
.handler = gre_rcv,
|
||||
.err_handler = gre_err,
|
||||
.netns_ok = 1,
|
||||
};
|
||||
|
||||
static const struct net_offload gre_offload = {
|
||||
.callbacks = {
|
||||
.gso_send_check = gre_gso_send_check,
|
||||
.gso_segment = gre_gso_segment,
|
||||
},
|
||||
};
|
||||
|
||||
static int __init gre_init(void)
|
||||
{
|
||||
pr_info("GRE over IPv4 demultiplexor driver\n");
|
||||
|
@ -127,11 +238,18 @@ static int __init gre_init(void)
|
|||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (inet_add_offload(&gre_offload, IPPROTO_GRE)) {
|
||||
pr_err("can't add protocol offload\n");
|
||||
inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit gre_exit(void)
|
||||
{
|
||||
inet_del_offload(&gre_offload, IPPROTO_GRE);
|
||||
inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
|
||||
}
|
||||
|
||||
|
|
|
@ -735,8 +735,33 @@ static int ipgre_rcv(struct sk_buff *skb)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct sk_buff *handle_offloads(struct sk_buff *skb)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (skb_is_gso(skb)) {
|
||||
err = skb_unclone(skb, GFP_ATOMIC);
|
||||
if (unlikely(err))
|
||||
goto error;
|
||||
skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
|
||||
return skb;
|
||||
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
|
||||
err = skb_checksum_help(skb);
|
||||
if (unlikely(err))
|
||||
goto error;
|
||||
}
|
||||
skb->ip_summed = CHECKSUM_NONE;
|
||||
|
||||
return skb;
|
||||
|
||||
error:
|
||||
kfree_skb(skb);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats);
|
||||
struct ip_tunnel *tunnel = netdev_priv(dev);
|
||||
const struct iphdr *old_iph;
|
||||
const struct iphdr *tiph;
|
||||
|
@ -751,10 +776,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
|
|||
__be32 dst;
|
||||
int mtu;
|
||||
u8 ttl;
|
||||
int err;
|
||||
int pkt_len;
|
||||
|
||||
if (skb->ip_summed == CHECKSUM_PARTIAL &&
|
||||
skb_checksum_help(skb))
|
||||
goto tx_error;
|
||||
skb = handle_offloads(skb);
|
||||
if (IS_ERR(skb)) {
|
||||
dev->stats.tx_dropped++;
|
||||
return NETDEV_TX_OK;
|
||||
}
|
||||
|
||||
if (!skb->encapsulation) {
|
||||
skb_reset_inner_headers(skb);
|
||||
skb->encapsulation = 1;
|
||||
}
|
||||
|
||||
old_iph = ip_hdr(skb);
|
||||
|
||||
|
@ -855,7 +889,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
|
|||
if (skb->protocol == htons(ETH_P_IP)) {
|
||||
df |= (old_iph->frag_off&htons(IP_DF));
|
||||
|
||||
if ((old_iph->frag_off&htons(IP_DF)) &&
|
||||
if (!skb_is_gso(skb) &&
|
||||
(old_iph->frag_off&htons(IP_DF)) &&
|
||||
mtu < ntohs(old_iph->tot_len)) {
|
||||
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
|
||||
ip_rt_put(rt);
|
||||
|
@ -875,7 +910,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
|
|||
}
|
||||
}
|
||||
|
||||
if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
|
||||
if (!skb_is_gso(skb) &&
|
||||
mtu >= IPV6_MIN_MTU &&
|
||||
mtu < skb->len - tunnel->hlen + gre_hlen) {
|
||||
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
|
||||
ip_rt_put(rt);
|
||||
goto tx_error;
|
||||
|
@ -936,6 +973,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
|
|||
iph->daddr = fl4.daddr;
|
||||
iph->saddr = fl4.saddr;
|
||||
iph->ttl = ttl;
|
||||
iph->id = 0;
|
||||
|
||||
if (ttl == 0) {
|
||||
if (skb->protocol == htons(ETH_P_IP))
|
||||
|
@ -964,9 +1002,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
|
|||
*ptr = tunnel->parms.o_key;
|
||||
ptr--;
|
||||
}
|
||||
if (tunnel->parms.o_flags&GRE_CSUM) {
|
||||
/* Skip GRE checksum if skb is getting offloaded. */
|
||||
if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) &&
|
||||
(tunnel->parms.o_flags&GRE_CSUM)) {
|
||||
int offset = skb_transport_offset(skb);
|
||||
|
||||
if (skb_has_shared_frag(skb)) {
|
||||
err = __skb_linearize(skb);
|
||||
if (err) {
|
||||
ip_rt_put(rt);
|
||||
goto tx_error;
|
||||
}
|
||||
}
|
||||
|
||||
*ptr = 0;
|
||||
*(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
|
||||
skb->len - offset,
|
||||
|
@ -974,7 +1022,19 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
|
|||
}
|
||||
}
|
||||
|
||||
iptunnel_xmit(skb, dev);
|
||||
nf_reset(skb);
|
||||
|
||||
pkt_len = skb->len - skb_transport_offset(skb);
|
||||
err = ip_local_out(skb);
|
||||
if (likely(net_xmit_eval(err) == 0)) {
|
||||
u64_stats_update_begin(&tstats->syncp);
|
||||
tstats->tx_bytes += pkt_len;
|
||||
tstats->tx_packets++;
|
||||
u64_stats_update_end(&tstats->syncp);
|
||||
} else {
|
||||
dev->stats.tx_errors++;
|
||||
dev->stats.tx_aborted_errors++;
|
||||
}
|
||||
return NETDEV_TX_OK;
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
|
@ -1044,6 +1104,11 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
|
|||
mtu = 68;
|
||||
|
||||
tunnel->hlen = addend;
|
||||
/* TCP offload with GRE SEQ is not supported. */
|
||||
if (!(tunnel->parms.o_flags & GRE_SEQ)) {
|
||||
dev->features |= NETIF_F_GSO_SOFTWARE;
|
||||
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
|
||||
}
|
||||
|
||||
return mtu;
|
||||
}
|
||||
|
@ -1593,6 +1658,9 @@ static void ipgre_tap_setup(struct net_device *dev)
|
|||
|
||||
dev->iflink = 0;
|
||||
dev->features |= NETIF_F_NETNS_LOCAL;
|
||||
|
||||
dev->features |= GRE_FEATURES;
|
||||
dev->hw_features |= GRE_FEATURES;
|
||||
}
|
||||
|
||||
static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
|
||||
|
|
|
@ -3043,6 +3043,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
|
|||
SKB_GSO_DODGY |
|
||||
SKB_GSO_TCP_ECN |
|
||||
SKB_GSO_TCPV6 |
|
||||
SKB_GSO_GRE |
|
||||
0) ||
|
||||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
|
||||
goto out;
|
||||
|
|
|
@ -2305,7 +2305,8 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
|
|||
/* Packet is from an untrusted source, reset gso_segs. */
|
||||
int type = skb_shinfo(skb)->gso_type;
|
||||
|
||||
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
|
||||
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
|
||||
SKB_GSO_GRE) ||
|
||||
!(type & (SKB_GSO_UDP))))
|
||||
goto out;
|
||||
|
||||
|
|
|
@ -99,6 +99,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
|
|||
~(SKB_GSO_UDP |
|
||||
SKB_GSO_DODGY |
|
||||
SKB_GSO_TCP_ECN |
|
||||
SKB_GSO_GRE |
|
||||
SKB_GSO_TCPV6 |
|
||||
0)))
|
||||
goto out;
|
||||
|
|
|
@ -56,7 +56,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
|
|||
/* Packet is from an untrusted source, reset gso_segs. */
|
||||
int type = skb_shinfo(skb)->gso_type;
|
||||
|
||||
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
|
||||
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
|
||||
SKB_GSO_GRE) ||
|
||||
!(type & (SKB_GSO_UDP))))
|
||||
goto out;
|
||||
|
||||
|
|
Loading…
Reference in a new issue