Merge branch 'xmit_list'

David Miller says:

====================
net: Make dev_hard_start_xmit() work fundamentally on lists

After this patch set, dev_hard_start_xmit() will work fundemantally on
any and all SKB lists.

This opens the path for a clean implementation of pulling multiple
packets out during qdisc_restart(), and then passing that blob in one
shot to dev_hard_start_xmit().

There were two main architectural blockers to this:

1) The GSO handling, we kept the original GSO head SKB around simply
   because dev_hard_start_xmit() had no way to communicate to the
   caller how far into the segmented list it was able to go.  Now it
   can, so the head GSO can be liberated immediately.

   All of the special GSO head SKB destructor et al. handling goes
   away too.

2) Validate of VLAN, CSUM, and segmentation characteristics was being
   performed inside of dev_hard_start_xmit().  If want to truly batch,
   we have to let the higher levels to this.  In particular, this is
   now dequeue_skb()'s job.

And with those two issues out of the way, it should now be trivial to
build experiments on top of this patch set, all of the framework
should be there now.  You could do something as simple as:

	skb = q->dequeue(q);
	if (skb)
		skb = validate_xmit_skb(skb, qdisc_dev(q));
	if (skb) {
		struct sk_buff *new, *head = skb;
		int limit = 5;

		do {
			new = q->dequeue(q);
			if (new)
				new = validate_xmit_skb(new, qdisc_dev(q));
			if (new) {
				skb->next = new;
				skb = new;
			}
		} while (new && --limit);
		skb = head;
	}

inside of the else branch of dequeue_skb().

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2014-09-01 17:40:01 -07:00
commit 53fda7f7f9
9 changed files with 144 additions and 176 deletions

View file

@ -192,8 +192,10 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
{
struct dlci_local *dlp = netdev_priv(dev);
if (skb)
netdev_start_xmit(skb, dlp->slave);
if (skb) {
struct netdev_queue *txq = skb_get_tx_queue(dev, skb);
netdev_start_xmit(skb, dlp->slave, txq, false);
}
return NETDEV_TX_OK;
}

View file

@ -2827,8 +2827,9 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
int dev_change_carrier(struct net_device *, bool new_carrier);
int dev_get_phys_port_id(struct net_device *dev,
struct netdev_phys_port_id *ppid);
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq);
struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, int *ret);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
@ -3431,17 +3432,24 @@ int __init dev_proc_init(void);
#endif
static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
struct sk_buff *skb, struct net_device *dev)
struct sk_buff *skb, struct net_device *dev,
bool more)
{
skb->xmit_more = 0;
skb->xmit_more = more ? 1 : 0;
return ops->ndo_start_xmit(skb, dev);
}
static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, bool more)
{
const struct net_device_ops *ops = dev->netdev_ops;
int rc;
return __netdev_start_xmit(ops, skb, dev);
rc = __netdev_start_xmit(ops, skb, dev, more);
if (rc == NETDEV_TX_OK)
txq_trans_update(txq);
return rc;
}
int netdev_class_create_file_ns(struct class_attribute *class_attr,

View file

@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
}
non_ip:
return __netdev_start_xmit(mpc->old_ops, skb, dev);
return __netdev_start_xmit(mpc->old_ops, skb, dev, false);
}
static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)

View file

@ -2485,52 +2485,6 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
return 0;
}
struct dev_gso_cb {
void (*destructor)(struct sk_buff *skb);
};
#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
static void dev_gso_skb_destructor(struct sk_buff *skb)
{
struct dev_gso_cb *cb;
kfree_skb_list(skb->next);
skb->next = NULL;
cb = DEV_GSO_CB(skb);
if (cb->destructor)
cb->destructor(skb);
}
/**
* dev_gso_segment - Perform emulated hardware segmentation on skb.
* @skb: buffer to segment
* @features: device features as applicable to this skb
*
* This function segments the given skb and stores the list of segments
* in skb->next.
*/
static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
{
struct sk_buff *segs;
segs = skb_gso_segment(skb, features);
/* Verifying header integrity only. */
if (!segs)
return 0;
if (IS_ERR(segs))
return PTR_ERR(segs);
skb->next = segs;
DEV_GSO_CB(skb)->destructor = skb->destructor;
skb->destructor = dev_gso_skb_destructor;
return 0;
}
/* If MPLS offload request, verify we are testing hardware MPLS features
* instead of standard features for the netdev.
*/
@ -2599,118 +2553,125 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
}
EXPORT_SYMBOL(netif_skb_features);
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
static int xmit_one(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq, bool more)
{
int rc = NETDEV_TX_OK;
unsigned int skb_len;
unsigned int len;
int rc;
if (likely(!skb->next)) {
netdev_features_t features;
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
/*
* If device doesn't need skb->dst, release it right now while
* its hot in this cpu cache
*/
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(skb);
len = skb->len;
trace_net_dev_start_xmit(skb, dev);
rc = netdev_start_xmit(skb, dev, txq, more);
trace_net_dev_xmit(skb, rc, dev, len);
features = netif_skb_features(skb);
if (vlan_tx_tag_present(skb) &&
!vlan_hw_offload_capable(features, skb->vlan_proto)) {
skb = __vlan_put_tag(skb, skb->vlan_proto,
vlan_tx_tag_get(skb));
if (unlikely(!skb))
goto out;
skb->vlan_tci = 0;
}
/* If encapsulation offload request, verify we are testing
* hardware encapsulation features instead of standard
* features for the netdev
*/
if (skb->encapsulation)
features &= dev->hw_enc_features;
if (netif_needs_gso(skb, features)) {
if (unlikely(dev_gso_segment(skb, features)))
goto out_kfree_skb;
if (skb->next)
goto gso;
} else {
if (skb_needs_linearize(skb, features) &&
__skb_linearize(skb))
goto out_kfree_skb;
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
* checksumming here.
*/
if (skb->ip_summed == CHECKSUM_PARTIAL) {
if (skb->encapsulation)
skb_set_inner_transport_header(skb,
skb_checksum_start_offset(skb));
else
skb_set_transport_header(skb,
skb_checksum_start_offset(skb));
if (!(features & NETIF_F_ALL_CSUM) &&
skb_checksum_help(skb))
goto out_kfree_skb;
}
}
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
skb_len = skb->len;
trace_net_dev_start_xmit(skb, dev);
rc = netdev_start_xmit(skb, dev);
trace_net_dev_xmit(skb, rc, dev, skb_len);
if (rc == NETDEV_TX_OK)
txq_trans_update(txq);
return rc;
}
gso:
do {
struct sk_buff *nskb = skb->next;
skb->next = nskb->next;
nskb->next = NULL;
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(nskb, dev);
skb_len = nskb->len;
trace_net_dev_start_xmit(nskb, dev);
rc = netdev_start_xmit(nskb, dev);
trace_net_dev_xmit(nskb, rc, dev, skb_len);
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
goto out_kfree_gso_skb;
nskb->next = skb->next;
skb->next = nskb;
return rc;
}
txq_trans_update(txq);
if (unlikely(netif_xmit_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
out_kfree_gso_skb:
if (likely(skb->next == NULL)) {
skb->destructor = DEV_GSO_CB(skb)->destructor;
consume_skb(skb);
return rc;
}
out_kfree_skb:
kfree_skb(skb);
out:
return rc;
}
EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
struct netdev_queue *txq, int *ret)
{
struct sk_buff *skb = first;
int rc = NETDEV_TX_OK;
while (skb) {
struct sk_buff *next = skb->next;
skb->next = NULL;
rc = xmit_one(skb, dev, txq, next != NULL);
if (unlikely(!dev_xmit_complete(rc))) {
skb->next = next;
goto out;
}
skb = next;
if (netif_xmit_stopped(txq) && skb) {
rc = NETDEV_TX_BUSY;
break;
}
}
out:
*ret = rc;
return skb;
}
struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t features)
{
if (vlan_tx_tag_present(skb) &&
!vlan_hw_offload_capable(features, skb->vlan_proto)) {
skb = __vlan_put_tag(skb, skb->vlan_proto,
vlan_tx_tag_get(skb));
if (skb)
skb->vlan_tci = 0;
}
return skb;
}
struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
{
netdev_features_t features;
if (skb->next)
return skb;
/* If device doesn't need skb->dst, release it right now while
* its hot in this cpu cache
*/
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(skb);
features = netif_skb_features(skb);
skb = validate_xmit_vlan(skb, features);
if (unlikely(!skb))
goto out_null;
/* If encapsulation offload request, verify we are testing
* hardware encapsulation features instead of standard
* features for the netdev
*/
if (skb->encapsulation)
features &= dev->hw_enc_features;
if (netif_needs_gso(skb, features)) {
struct sk_buff *segs;
segs = skb_gso_segment(skb, features);
kfree_skb(skb);
if (IS_ERR(segs))
segs = NULL;
skb = segs;
} else {
if (skb_needs_linearize(skb, features) &&
__skb_linearize(skb))
goto out_kfree_skb;
/* If packet is not checksummed and device does not
* support checksumming for this protocol, complete
* checksumming here.
*/
if (skb->ip_summed == CHECKSUM_PARTIAL) {
if (skb->encapsulation)
skb_set_inner_transport_header(skb,
skb_checksum_start_offset(skb));
else
skb_set_transport_header(skb,
skb_checksum_start_offset(skb));
if (!(features & NETIF_F_ALL_CSUM) &&
skb_checksum_help(skb))
goto out_kfree_skb;
}
}
return skb;
out_kfree_skb:
kfree_skb(skb);
out_null:
return NULL;
}
static void qdisc_pkt_len_init(struct sk_buff *skb)
{
@ -2922,7 +2883,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
if (!netif_xmit_stopped(txq)) {
__this_cpu_inc(xmit_recursion);
rc = dev_hard_start_xmit(skb, dev, txq);
skb = dev_hard_start_xmit(skb, dev, txq, &rc);
__this_cpu_dec(xmit_recursion);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);

View file

@ -91,9 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
skb->vlan_tci = 0;
}
status = netdev_start_xmit(skb, dev);
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
status = netdev_start_xmit(skb, dev, txq, false);
out:
return status;

View file

@ -3335,11 +3335,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
goto unlock;
}
atomic_inc(&(pkt_dev->skb->users));
ret = netdev_start_xmit(pkt_dev->skb, odev);
ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false);
switch (ret) {
case NETDEV_TX_OK:
txq_trans_update(txq);
pkt_dev->last_ok = 1;
pkt_dev->sofar++;
pkt_dev->seq_num++;

View file

@ -258,11 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb)
local_bh_disable();
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_drv_stopped(txq)) {
ret = netdev_start_xmit(skb, dev);
if (ret == NETDEV_TX_OK)
txq_trans_update(txq);
}
if (!netif_xmit_frozen_or_drv_stopped(txq))
ret = netdev_start_xmit(skb, dev, txq, false);
HARD_TX_UNLOCK(dev, txq);
local_bh_enable();

View file

@ -70,8 +70,11 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
} else
skb = NULL;
} else {
if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq))
if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) {
skb = q->dequeue(q);
if (skb)
skb = validate_xmit_skb(skb, qdisc_dev(q));
}
}
return skb;
@ -126,7 +129,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_stopped(txq))
ret = dev_hard_start_xmit(skb, dev, txq);
skb = dev_hard_start_xmit(skb, dev, txq, &ret);
HARD_TX_UNLOCK(dev, txq);

View file

@ -316,8 +316,8 @@ static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
unsigned int length = qdisc_pkt_len(skb);
if (!netif_xmit_frozen_or_stopped(slave_txq) &&
netdev_start_xmit(skb, slave) == NETDEV_TX_OK) {
txq_trans_update(slave_txq);
netdev_start_xmit(skb, slave, slave_txq, false) ==
NETDEV_TX_OK) {
__netif_tx_unlock(slave_txq);
master->slaves = NEXT_SLAVE(q);
netif_wake_queue(dev);