packet: vnet_hdr support for tpacket_rcv

Support socket option PACKET_VNET_HDR together with PACKET_RX_RING.
When enabled, a struct virtio_net_hdr will precede the data in the
packet ring slots.

Verified with test program at
github.com/wdebruij/kerneltools/blob/master/tests/psock_rxring_vnet.c

  pkt: 1454269209.798420 len=5066
  vnet: gso_type=tcpv4 gso_size=1448 hlen=66 ecn=off
  csum: start=34 off=16
  eth: proto=0x800
  ip: src=<masked> dst=<masked> proto=6 len=5052

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Willem de Bruijn 2016-02-03 18:02:15 -05:00 committed by David S. Miller
parent 16cc140045
commit 58d19b19cd

View file

@ -2206,7 +2206,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
unsigned int maclen = skb_network_offset(skb); unsigned int maclen = skb_network_offset(skb);
netoff = TPACKET_ALIGN(po->tp_hdrlen + netoff = TPACKET_ALIGN(po->tp_hdrlen +
(maclen < 16 ? 16 : maclen)) + (maclen < 16 ? 16 : maclen)) +
po->tp_reserve; po->tp_reserve;
if (po->has_vnet_hdr)
netoff += sizeof(struct virtio_net_hdr);
macoff = netoff - maclen; macoff = netoff - maclen;
} }
if (po->tp_version <= TPACKET_V2) { if (po->tp_version <= TPACKET_V2) {
@ -2243,7 +2245,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
h.raw = packet_current_rx_frame(po, skb, h.raw = packet_current_rx_frame(po, skb,
TP_STATUS_KERNEL, (macoff+snaplen)); TP_STATUS_KERNEL, (macoff+snaplen));
if (!h.raw) if (!h.raw)
goto ring_is_full; goto drop_n_account;
if (po->tp_version <= TPACKET_V2) { if (po->tp_version <= TPACKET_V2) {
packet_increment_rx_head(po, &po->rx_ring); packet_increment_rx_head(po, &po->rx_ring);
/* /*
@ -2262,6 +2264,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
} }
spin_unlock(&sk->sk_receive_queue.lock); spin_unlock(&sk->sk_receive_queue.lock);
if (po->has_vnet_hdr) {
if (__packet_rcv_vnet(skb, h.raw + macoff -
sizeof(struct virtio_net_hdr))) {
spin_lock(&sk->sk_receive_queue.lock);
goto drop_n_account;
}
}
skb_copy_bits(skb, 0, h.raw + macoff, snaplen); skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
@ -2357,7 +2367,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
kfree_skb(skb); kfree_skb(skb);
return 0; return 0;
ring_is_full: drop_n_account:
po->stats.stats1.tp_drops++; po->stats.stats1.tp_drops++;
spin_unlock(&sk->sk_receive_queue.lock); spin_unlock(&sk->sk_receive_queue.lock);
@ -3587,7 +3597,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
} }
if (optlen < len) if (optlen < len)
return -EINVAL; return -EINVAL;
if (pkt_sk(sk)->has_vnet_hdr) if (pkt_sk(sk)->has_vnet_hdr &&
optname == PACKET_TX_RING)
return -EINVAL; return -EINVAL;
if (copy_from_user(&req_u.req, optval, len)) if (copy_from_user(&req_u.req, optval, len))
return -EFAULT; return -EFAULT;