0d5501c1c8
Currently the functionality to untag traffic on input resides as part of the vlan module and is build only when VLAN support is enabled in the kernel. When VLAN is disabled, the function vlan_untag() turns into a stub and doesn't really untag the packets. This seems to create an interesting interaction between VMs supporting checksum offloading and some network drivers. There are some drivers that do not allow the user to change tx-vlan-offload feature of the driver. These drivers also seem to assume that any VLAN-tagged traffic they transmit will have the vlan information in the vlan_tci and not in the vlan header already in the skb. When transmitting skbs that already have tagged data with partial checksum set, the checksum doesn't appear to be updated correctly by the card thus resulting in a failure to establish TCP connections. The following is a packet trace taken on the receiver where a sender is a VM with a VLAN configued. The host VM is running on doest not have VLAN support and the outging interface on the host is tg3: 10:12:43.503055 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q (0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27243, offset 0, flags [DF], proto TCP (6), length 60) 10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect -> 0x48d9), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val 4294837885 ecr 0,nop,wscale 7], length 0 10:12:44.505556 52:54:00:ae:42:3f > 28:d2:44:7d:c2:de, ethertype 802.1Q (0x8100), length 78: vlan 100, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 27244, offset 0, flags [DF], proto TCP (6), length 60) 10.0.100.1.58545 > 10.0.100.10.ircu-2: Flags [S], cksum 0xdc39 (incorrect -> 0x44ee), seq 1069378582, win 29200, options [mss 1460,sackOK,TS val 4294838888 ecr 0,nop,wscale 7], length 0 This connection finally times out. I've only access to the TG3 hardware in this configuration thus have only tested this with TG3 driver. There are a lot of other drivers that do not permit user changes to vlan acceleration features, and I don't know if they all suffere from a similar issue. The patch attempt to fix this another way. It moves the vlan header stipping code out of the vlan module and always builds it into the kernel network core. This way, even if vlan is not supported on a virtualizatoin host, the virtual machines running on top of such host will still work with VLANs enabled. CC: Patrick McHardy <kaber@trash.net> CC: Nithin Nayak Sujir <nsujir@broadcom.com> CC: Michael Chan <mchan@broadcom.com> CC: Jiri Pirko <jiri@resnulli.us> Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com> Acked-by: Jiri Pirko <jiri@resnulli.us> Signed-off-by: David S. Miller <davem@davemloft.net>
493 lines
13 KiB
C
493 lines
13 KiB
C
/*
|
|
* VLAN An implementation of 802.1Q VLAN tagging.
|
|
*
|
|
* Authors: Ben Greear <greearb@candelatech.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*
|
|
*/
|
|
#ifndef _LINUX_IF_VLAN_H_
|
|
#define _LINUX_IF_VLAN_H_
|
|
|
|
#include <linux/netdevice.h>
|
|
#include <linux/etherdevice.h>
|
|
#include <linux/rtnetlink.h>
|
|
#include <linux/bug.h>
|
|
#include <uapi/linux/if_vlan.h>
|
|
|
|
#define VLAN_HLEN 4 /* The additional bytes required by VLAN
|
|
* (in addition to the Ethernet header)
|
|
*/
|
|
#define VLAN_ETH_HLEN 18 /* Total octets in header. */
|
|
#define VLAN_ETH_ZLEN 64 /* Min. octets in frame sans FCS */
|
|
|
|
/*
|
|
* According to 802.3ac, the packet can be 4 bytes longer. --Klika Jan
|
|
*/
|
|
#define VLAN_ETH_DATA_LEN 1500 /* Max. octets in payload */
|
|
#define VLAN_ETH_FRAME_LEN 1518 /* Max. octets in frame sans FCS */
|
|
|
|
/*
|
|
* struct vlan_hdr - vlan header
|
|
* @h_vlan_TCI: priority and VLAN ID
|
|
* @h_vlan_encapsulated_proto: packet type ID or len
|
|
*/
|
|
struct vlan_hdr {
|
|
__be16 h_vlan_TCI;
|
|
__be16 h_vlan_encapsulated_proto;
|
|
};
|
|
|
|
/**
|
|
* struct vlan_ethhdr - vlan ethernet header (ethhdr + vlan_hdr)
|
|
* @h_dest: destination ethernet address
|
|
* @h_source: source ethernet address
|
|
* @h_vlan_proto: ethernet protocol
|
|
* @h_vlan_TCI: priority and VLAN ID
|
|
* @h_vlan_encapsulated_proto: packet type ID or len
|
|
*/
|
|
struct vlan_ethhdr {
|
|
unsigned char h_dest[ETH_ALEN];
|
|
unsigned char h_source[ETH_ALEN];
|
|
__be16 h_vlan_proto;
|
|
__be16 h_vlan_TCI;
|
|
__be16 h_vlan_encapsulated_proto;
|
|
};
|
|
|
|
#include <linux/skbuff.h>
|
|
|
|
static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
|
|
{
|
|
return (struct vlan_ethhdr *)skb_mac_header(skb);
|
|
}
|
|
|
|
#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */
|
|
#define VLAN_PRIO_SHIFT 13
|
|
#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */
|
|
#define VLAN_TAG_PRESENT VLAN_CFI_MASK
|
|
#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */
|
|
#define VLAN_N_VID 4096
|
|
|
|
/* found in socket.c */
|
|
extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
|
|
|
|
static inline bool is_vlan_dev(struct net_device *dev)
|
|
{
|
|
return dev->priv_flags & IFF_802_1Q_VLAN;
|
|
}
|
|
|
|
#define vlan_tx_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT)
|
|
#define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
|
|
#define vlan_tx_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK)
|
|
|
|
/**
|
|
* struct vlan_pcpu_stats - VLAN percpu rx/tx stats
|
|
* @rx_packets: number of received packets
|
|
* @rx_bytes: number of received bytes
|
|
* @rx_multicast: number of received multicast packets
|
|
* @tx_packets: number of transmitted packets
|
|
* @tx_bytes: number of transmitted bytes
|
|
* @syncp: synchronization point for 64bit counters
|
|
* @rx_errors: number of rx errors
|
|
* @tx_dropped: number of tx drops
|
|
*/
|
|
struct vlan_pcpu_stats {
|
|
u64 rx_packets;
|
|
u64 rx_bytes;
|
|
u64 rx_multicast;
|
|
u64 tx_packets;
|
|
u64 tx_bytes;
|
|
struct u64_stats_sync syncp;
|
|
u32 rx_errors;
|
|
u32 tx_dropped;
|
|
};
|
|
|
|
#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
|
|
|
|
extern struct net_device *__vlan_find_dev_deep_rcu(struct net_device *real_dev,
|
|
__be16 vlan_proto, u16 vlan_id);
|
|
extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
|
|
extern u16 vlan_dev_vlan_id(const struct net_device *dev);
|
|
extern __be16 vlan_dev_vlan_proto(const struct net_device *dev);
|
|
|
|
/**
|
|
* struct vlan_priority_tci_mapping - vlan egress priority mappings
|
|
* @priority: skb priority
|
|
* @vlan_qos: vlan priority: (skb->priority << 13) & 0xE000
|
|
* @next: pointer to next struct
|
|
*/
|
|
struct vlan_priority_tci_mapping {
|
|
u32 priority;
|
|
u16 vlan_qos;
|
|
struct vlan_priority_tci_mapping *next;
|
|
};
|
|
|
|
struct proc_dir_entry;
|
|
struct netpoll;
|
|
|
|
/**
|
|
* struct vlan_dev_priv - VLAN private device data
|
|
* @nr_ingress_mappings: number of ingress priority mappings
|
|
* @ingress_priority_map: ingress priority mappings
|
|
* @nr_egress_mappings: number of egress priority mappings
|
|
* @egress_priority_map: hash of egress priority mappings
|
|
* @vlan_proto: VLAN encapsulation protocol
|
|
* @vlan_id: VLAN identifier
|
|
* @flags: device flags
|
|
* @real_dev: underlying netdevice
|
|
* @real_dev_addr: address of underlying netdevice
|
|
* @dent: proc dir entry
|
|
* @vlan_pcpu_stats: ptr to percpu rx stats
|
|
*/
|
|
struct vlan_dev_priv {
|
|
unsigned int nr_ingress_mappings;
|
|
u32 ingress_priority_map[8];
|
|
unsigned int nr_egress_mappings;
|
|
struct vlan_priority_tci_mapping *egress_priority_map[16];
|
|
|
|
__be16 vlan_proto;
|
|
u16 vlan_id;
|
|
u16 flags;
|
|
|
|
struct net_device *real_dev;
|
|
unsigned char real_dev_addr[ETH_ALEN];
|
|
|
|
struct proc_dir_entry *dent;
|
|
struct vlan_pcpu_stats __percpu *vlan_pcpu_stats;
|
|
#ifdef CONFIG_NET_POLL_CONTROLLER
|
|
struct netpoll *netpoll;
|
|
#endif
|
|
unsigned int nest_level;
|
|
};
|
|
|
|
static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
|
|
{
|
|
return netdev_priv(dev);
|
|
}
|
|
|
|
static inline u16
|
|
vlan_dev_get_egress_qos_mask(struct net_device *dev, u32 skprio)
|
|
{
|
|
struct vlan_priority_tci_mapping *mp;
|
|
|
|
smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
|
|
|
|
mp = vlan_dev_priv(dev)->egress_priority_map[(skprio & 0xF)];
|
|
while (mp) {
|
|
if (mp->priority == skprio) {
|
|
return mp->vlan_qos; /* This should already be shifted
|
|
* to mask correctly with the
|
|
* VLAN's TCI */
|
|
}
|
|
mp = mp->next;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
extern bool vlan_do_receive(struct sk_buff **skb);
|
|
|
|
extern int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid);
|
|
extern void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid);
|
|
|
|
extern int vlan_vids_add_by_dev(struct net_device *dev,
|
|
const struct net_device *by_dev);
|
|
extern void vlan_vids_del_by_dev(struct net_device *dev,
|
|
const struct net_device *by_dev);
|
|
|
|
extern bool vlan_uses_dev(const struct net_device *dev);
|
|
|
|
static inline int vlan_get_encap_level(struct net_device *dev)
|
|
{
|
|
BUG_ON(!is_vlan_dev(dev));
|
|
return vlan_dev_priv(dev)->nest_level;
|
|
}
|
|
#else
|
|
static inline struct net_device *
|
|
__vlan_find_dev_deep_rcu(struct net_device *real_dev,
|
|
__be16 vlan_proto, u16 vlan_id)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev)
|
|
{
|
|
BUG();
|
|
return NULL;
|
|
}
|
|
|
|
static inline u16 vlan_dev_vlan_id(const struct net_device *dev)
|
|
{
|
|
BUG();
|
|
return 0;
|
|
}
|
|
|
|
static inline __be16 vlan_dev_vlan_proto(const struct net_device *dev)
|
|
{
|
|
BUG();
|
|
return 0;
|
|
}
|
|
|
|
static inline u16 vlan_dev_get_egress_qos_mask(struct net_device *dev,
|
|
u32 skprio)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline bool vlan_do_receive(struct sk_buff **skb)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid)
|
|
{
|
|
}
|
|
|
|
static inline int vlan_vids_add_by_dev(struct net_device *dev,
|
|
const struct net_device *by_dev)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void vlan_vids_del_by_dev(struct net_device *dev,
|
|
const struct net_device *by_dev)
|
|
{
|
|
}
|
|
|
|
static inline bool vlan_uses_dev(const struct net_device *dev)
|
|
{
|
|
return false;
|
|
}
|
|
static inline int vlan_get_encap_level(struct net_device *dev)
|
|
{
|
|
BUG();
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static inline bool vlan_hw_offload_capable(netdev_features_t features,
|
|
__be16 proto)
|
|
{
|
|
if (proto == htons(ETH_P_8021Q) && features & NETIF_F_HW_VLAN_CTAG_TX)
|
|
return true;
|
|
if (proto == htons(ETH_P_8021AD) && features & NETIF_F_HW_VLAN_STAG_TX)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* vlan_insert_tag - regular VLAN tag inserting
|
|
* @skb: skbuff to tag
|
|
* @vlan_proto: VLAN encapsulation protocol
|
|
* @vlan_tci: VLAN TCI to insert
|
|
*
|
|
* Inserts the VLAN tag into @skb as part of the payload
|
|
* Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
|
|
*
|
|
* Following the skb_unshare() example, in case of error, the calling function
|
|
* doesn't have to worry about freeing the original skb.
|
|
*
|
|
* Does not change skb->protocol so this function can be used during receive.
|
|
*/
|
|
static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
|
|
__be16 vlan_proto, u16 vlan_tci)
|
|
{
|
|
struct vlan_ethhdr *veth;
|
|
|
|
if (skb_cow_head(skb, VLAN_HLEN) < 0) {
|
|
dev_kfree_skb_any(skb);
|
|
return NULL;
|
|
}
|
|
veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN);
|
|
|
|
/* Move the mac addresses to the beginning of the new header. */
|
|
memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN);
|
|
skb->mac_header -= VLAN_HLEN;
|
|
|
|
/* first, the ethernet type */
|
|
veth->h_vlan_proto = vlan_proto;
|
|
|
|
/* now, the TCI */
|
|
veth->h_vlan_TCI = htons(vlan_tci);
|
|
|
|
return skb;
|
|
}
|
|
|
|
/**
|
|
* __vlan_put_tag - regular VLAN tag inserting
|
|
* @skb: skbuff to tag
|
|
* @vlan_tci: VLAN TCI to insert
|
|
*
|
|
* Inserts the VLAN tag into @skb as part of the payload
|
|
* Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
|
|
*
|
|
* Following the skb_unshare() example, in case of error, the calling function
|
|
* doesn't have to worry about freeing the original skb.
|
|
*/
|
|
static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb,
|
|
__be16 vlan_proto, u16 vlan_tci)
|
|
{
|
|
skb = vlan_insert_tag(skb, vlan_proto, vlan_tci);
|
|
if (skb)
|
|
skb->protocol = vlan_proto;
|
|
return skb;
|
|
}
|
|
|
|
/**
|
|
* __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting
|
|
* @skb: skbuff to tag
|
|
* @vlan_proto: VLAN encapsulation protocol
|
|
* @vlan_tci: VLAN TCI to insert
|
|
*
|
|
* Puts the VLAN TCI in @skb->vlan_tci and lets the device do the rest
|
|
*/
|
|
static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb,
|
|
__be16 vlan_proto,
|
|
u16 vlan_tci)
|
|
{
|
|
skb->vlan_proto = vlan_proto;
|
|
skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci;
|
|
return skb;
|
|
}
|
|
|
|
/**
|
|
* vlan_put_tag - inserts VLAN tag according to device features
|
|
* @skb: skbuff to tag
|
|
* @vlan_tci: VLAN TCI to insert
|
|
*
|
|
* Assumes skb->dev is the target that will xmit this frame.
|
|
* Returns a VLAN tagged skb.
|
|
*/
|
|
static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb,
|
|
__be16 vlan_proto, u16 vlan_tci)
|
|
{
|
|
if (vlan_hw_offload_capable(skb->dev->features, vlan_proto)) {
|
|
return __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
|
|
} else {
|
|
return __vlan_put_tag(skb, vlan_proto, vlan_tci);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* __vlan_get_tag - get the VLAN ID that is part of the payload
|
|
* @skb: skbuff to query
|
|
* @vlan_tci: buffer to store vlaue
|
|
*
|
|
* Returns error if the skb is not of VLAN type
|
|
*/
|
|
static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
|
|
{
|
|
struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data;
|
|
|
|
if (veth->h_vlan_proto != htons(ETH_P_8021Q) &&
|
|
veth->h_vlan_proto != htons(ETH_P_8021AD))
|
|
return -EINVAL;
|
|
|
|
*vlan_tci = ntohs(veth->h_vlan_TCI);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* __vlan_hwaccel_get_tag - get the VLAN ID that is in @skb->cb[]
|
|
* @skb: skbuff to query
|
|
* @vlan_tci: buffer to store vlaue
|
|
*
|
|
* Returns error if @skb->vlan_tci is not set correctly
|
|
*/
|
|
static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
|
|
u16 *vlan_tci)
|
|
{
|
|
if (vlan_tx_tag_present(skb)) {
|
|
*vlan_tci = vlan_tx_tag_get(skb);
|
|
return 0;
|
|
} else {
|
|
*vlan_tci = 0;
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
#define HAVE_VLAN_GET_TAG
|
|
|
|
/**
|
|
* vlan_get_tag - get the VLAN ID from the skb
|
|
* @skb: skbuff to query
|
|
* @vlan_tci: buffer to store vlaue
|
|
*
|
|
* Returns error if the skb is not VLAN tagged
|
|
*/
|
|
static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
|
|
{
|
|
if (skb->dev->features & NETIF_F_HW_VLAN_CTAG_TX) {
|
|
return __vlan_hwaccel_get_tag(skb, vlan_tci);
|
|
} else {
|
|
return __vlan_get_tag(skb, vlan_tci);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* vlan_get_protocol - get protocol EtherType.
|
|
* @skb: skbuff to query
|
|
*
|
|
* Returns the EtherType of the packet, regardless of whether it is
|
|
* vlan encapsulated (normal or hardware accelerated) or not.
|
|
*/
|
|
static inline __be16 vlan_get_protocol(const struct sk_buff *skb)
|
|
{
|
|
__be16 protocol = 0;
|
|
|
|
if (vlan_tx_tag_present(skb) ||
|
|
skb->protocol != cpu_to_be16(ETH_P_8021Q))
|
|
protocol = skb->protocol;
|
|
else {
|
|
__be16 proto, *protop;
|
|
protop = skb_header_pointer(skb, offsetof(struct vlan_ethhdr,
|
|
h_vlan_encapsulated_proto),
|
|
sizeof(proto), &proto);
|
|
if (likely(protop))
|
|
protocol = *protop;
|
|
}
|
|
|
|
return protocol;
|
|
}
|
|
|
|
static inline void vlan_set_encap_proto(struct sk_buff *skb,
|
|
struct vlan_hdr *vhdr)
|
|
{
|
|
__be16 proto;
|
|
unsigned short *rawp;
|
|
|
|
/*
|
|
* Was a VLAN packet, grab the encapsulated protocol, which the layer
|
|
* three protocols care about.
|
|
*/
|
|
|
|
proto = vhdr->h_vlan_encapsulated_proto;
|
|
if (ntohs(proto) >= ETH_P_802_3_MIN) {
|
|
skb->protocol = proto;
|
|
return;
|
|
}
|
|
|
|
rawp = (unsigned short *)(vhdr + 1);
|
|
if (*rawp == 0xFFFF)
|
|
/*
|
|
* This is a magic hack to spot IPX packets. Older Novell
|
|
* breaks the protocol design and runs IPX over 802.3 without
|
|
* an 802.2 LLC layer. We look for FFFF which isn't a used
|
|
* 802.2 SSAP/DSAP. This won't work for fault tolerant netware
|
|
* but does for the rest.
|
|
*/
|
|
skb->protocol = htons(ETH_P_802_3);
|
|
else
|
|
/*
|
|
* Real 802.2 LLC
|
|
*/
|
|
skb->protocol = htons(ETH_P_802_2);
|
|
}
|
|
|
|
#endif /* !(_LINUX_IF_VLAN_H_) */
|