diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h index 65f5a7b2646b..d075725bf444 100644 --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -365,8 +365,11 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, * powers of 2 writes until it reaches sufficient alignment). * * Based on this we disable the IP header alignment in network drivers. + * We also modify NET_SKB_PAD to be a cacheline in size, thus maintaining + * cacheline alignment of buffers. */ -#define NET_IP_ALIGN 0 +#define NET_IP_ALIGN 0 +#define NET_SKB_PAD L1_CACHE_BYTES #endif #define arch_align_stack(x) (x) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 950dc55e5192..40ccf8cc4239 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -598,20 +598,7 @@ DECLARE_PER_CPU(struct softnet_data,softnet_data); #define HAVE_NETIF_QUEUE -static inline void __netif_schedule(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { - unsigned long flags; - struct softnet_data *sd; - - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - dev->next_sched = sd->output_queue; - sd->output_queue = dev; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); - } -} +extern void __netif_schedule(struct net_device *dev); static inline void netif_schedule(struct net_device *dev) { @@ -675,13 +662,7 @@ static inline void dev_kfree_skb_irq(struct sk_buff *skb) /* Use this variant in places where it could be invoked * either from interrupt or non-interrupt context. */ -static inline void dev_kfree_skb_any(struct sk_buff *skb) -{ - if (in_irq() || irqs_disabled()) - dev_kfree_skb_irq(skb); - else - dev_kfree_skb(skb); -} +extern void dev_kfree_skb_any(struct sk_buff *skb); #define HAVE_NETIF_RX 1 extern int netif_rx(struct sk_buff *skb); @@ -768,22 +749,9 @@ static inline int netif_device_present(struct net_device *dev) return test_bit(__LINK_STATE_PRESENT, &dev->state); } -static inline void netif_device_detach(struct net_device *dev) -{ - if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && - netif_running(dev)) { - netif_stop_queue(dev); - } -} +extern void netif_device_detach(struct net_device *dev); -static inline void netif_device_attach(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && - netif_running(dev)) { - netif_wake_queue(dev); - __netdev_watchdog_up(dev); - } -} +extern void netif_device_attach(struct net_device *dev); /* * Network interface message level settings @@ -851,20 +819,7 @@ static inline int netif_rx_schedule_prep(struct net_device *dev) * already been called and returned 1. */ -static inline void __netif_rx_schedule(struct net_device *dev) -{ - unsigned long flags; - - local_irq_save(flags); - dev_hold(dev); - list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); - if (dev->quota < 0) - dev->quota += dev->weight; - else - dev->quota = dev->weight; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - local_irq_restore(flags); -} +extern void __netif_rx_schedule(struct net_device *dev); /* Try to reschedule poll. Called by irq handler. */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 613b9513f8b9..c4619a428d9b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -941,6 +941,25 @@ static inline void skb_reserve(struct sk_buff *skb, int len) #define NET_IP_ALIGN 2 #endif +/* + * The networking layer reserves some headroom in skb data (via + * dev_alloc_skb). This is used to avoid having to reallocate skb data when + * the header has to grow. In the default case, if the header has to grow + * 16 bytes or less we avoid the reallocation. + * + * Unfortunately this headroom changes the DMA alignment of the resulting + * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive + * on some architectures. An architecture can override this value, + * perhaps setting it to a cacheline in size (since that will maintain + * cacheline alignment of the DMA). It must be a power of 2. + * + * Various parts of the networking layer expect at least 16 bytes of + * headroom, you should not reduce this. + */ +#ifndef NET_SKB_PAD +#define NET_SKB_PAD 16 +#endif + extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); static inline void __skb_trim(struct sk_buff *skb, unsigned int len) @@ -1030,9 +1049,9 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) static inline struct sk_buff *__dev_alloc_skb(unsigned int length, gfp_t gfp_mask) { - struct sk_buff *skb = alloc_skb(length + 16, gfp_mask); + struct sk_buff *skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); if (likely(skb)) - skb_reserve(skb, 16); + skb_reserve(skb, NET_SKB_PAD); return skb; } #else @@ -1070,13 +1089,15 @@ static inline struct sk_buff *dev_alloc_skb(unsigned int length) */ static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) { - int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); + int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) - + skb_headroom(skb); if (delta < 0) delta = 0; if (delta || skb_cloned(skb)) - return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC); + return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) & + ~(NET_SKB_PAD-1), 0, GFP_ATOMIC); return 0; } diff --git a/include/net/tcp.h b/include/net/tcp.h index 9418f4d1afbb..3c989db8a7aa 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -405,9 +405,6 @@ extern int tcp_disconnect(struct sock *sk, int flags); extern void tcp_unhash(struct sock *sk); -extern int tcp_v4_hash_connecting(struct sock *sk); - - /* From syncookies.c */ extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt); diff --git a/net/core/dev.c b/net/core/dev.c index a3ab11f34153..434220d093aa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1080,6 +1080,70 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_unlock(); } + +void __netif_schedule(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { + unsigned long flags; + struct softnet_data *sd; + + local_irq_save(flags); + sd = &__get_cpu_var(softnet_data); + dev->next_sched = sd->output_queue; + sd->output_queue = dev; + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); + } +} +EXPORT_SYMBOL(__netif_schedule); + +void __netif_rx_schedule(struct net_device *dev) +{ + unsigned long flags; + + local_irq_save(flags); + dev_hold(dev); + list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); + if (dev->quota < 0) + dev->quota += dev->weight; + else + dev->quota = dev->weight; + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + local_irq_restore(flags); +} +EXPORT_SYMBOL(__netif_rx_schedule); + +void dev_kfree_skb_any(struct sk_buff *skb) +{ + if (in_irq() || irqs_disabled()) + dev_kfree_skb_irq(skb); + else + dev_kfree_skb(skb); +} +EXPORT_SYMBOL(dev_kfree_skb_any); + + +/* Hot-plugging. */ +void netif_device_detach(struct net_device *dev) +{ + if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && + netif_running(dev)) { + netif_stop_queue(dev); + } +} +EXPORT_SYMBOL(netif_device_detach); + +void netif_device_attach(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && + netif_running(dev)) { + netif_wake_queue(dev); + __netdev_watchdog_up(dev); + } +} +EXPORT_SYMBOL(netif_device_attach); + + /* * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. diff --git a/net/core/sock.c b/net/core/sock.c index a96ea7dd0fc1..ed2afdb9ea2d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -385,7 +385,21 @@ int sock_setsockopt(struct socket *sock, int level, int optname, val = sysctl_rmem_max; set_rcvbuf: sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - /* FIXME: is this lower bound the right one? */ + /* + * We double it on the way in to account for + * "struct sk_buff" etc. overhead. Applications + * assume that the SO_RCVBUF setting they make will + * allow that much actual data to be received on that + * socket. + * + * Applications are unaware that "struct sk_buff" and + * other overheads allocate from the receive buffer + * during socket buffer allocation. + * + * And after considering the possible alternatives, + * returning the value we actually used in getsockopt + * is the most desirable behavior. + */ if ((val * 2) < SOCK_MIN_RCVBUF) sk->sk_rcvbuf = SOCK_MIN_RCVBUF; else diff --git a/net/dccp/feat.c b/net/dccp/feat.c index e3dd30d36c8a..b39e2a597889 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -204,7 +204,7 @@ static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt, if (rc) { kfree(opt->dccpop_sc->dccpoc_val); kfree(opt->dccpop_sc); - opt->dccpop_sc = 0; + opt->dccpop_sc = NULL; return rc; } @@ -322,7 +322,7 @@ static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk, opt->dccpop_type = type == DCCPO_CHANGE_L ? DCCPO_CONFIRM_R : DCCPO_CONFIRM_L; opt->dccpop_feat = feature; - opt->dccpop_val = 0; + opt->dccpop_val = NULL; opt->dccpop_len = 0; /* change feature */ @@ -523,7 +523,7 @@ int dccp_feat_clone(struct sock *oldsk, struct sock *newsk) * once... */ /* the master socket no longer needs to worry about confirms */ - opt->dccpop_sc = 0; /* it's not a memleak---new socket has it */ + opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */ /* reset state for a new socket */ opt->dccpop_conf = 0; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index d2ae9893ca17..a26ff9f44576 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -620,7 +620,7 @@ int dn_dev_set_default(struct net_device *dev, int force) } write_unlock(&dndev_lock); if (old) - dev_put(dev); + dev_put(old); return rv; }