From 247df4548fdbb0f263aaa0386bbaf52bc359a972 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 18 Mar 2008 17:15:58 -0700 Subject: [PATCH 01/13] [RT2X00] drivers/net/wireless/rt2x00/rt2x00dev.c: remove dead code, fix warning Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- drivers/net/wireless/rt2x00/rt2x00dev.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index bc846ba12f63..bd305f7f3efd 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -1145,9 +1145,6 @@ static int rt2x00lib_initialize(struct rt2x00_dev *rt2x00dev) return 0; -exit_unitialize: - rt2x00lib_uninitialize(rt2x00dev); - exit: rt2x00lib_free_ring_entries(rt2x00dev); From 6aebb9b280e5662ece41cf570e25e61795443985 Mon Sep 17 00:00:00 2001 From: Roel Kluin <12o3l@tiscali.nl> Date: Thu, 20 Mar 2008 15:06:23 -0700 Subject: [PATCH 02/13] [NETFILTER]: nf_conntrack_h323: logical-bitwise & confusion in process_setup() logical-bitwise & confusion Signed-off-by: Roel Kluin <12o3l@tiscali.nl> Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_h323_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 62137879e6aa..898f1922b5b8 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -842,7 +842,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, set_h225_addr = rcu_dereference(set_h225_addr_hook); if ((setup->options & eSetup_UUIE_destCallSignalAddress) && - (set_h225_addr) && ct->status && IPS_NAT_MASK && + (set_h225_addr) && ct->status & IPS_NAT_MASK && get_h225_addr(ct, *data, &setup->destCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) { From d0ebf133590abdc035af6e19a6568667af0ab3b0 Mon Sep 17 00:00:00 2001 From: Daniel Hokka Zakrisson Date: Thu, 20 Mar 2008 15:07:10 -0700 Subject: [PATCH 03/13] [NETFILTER]: ipt_recent: sanity check hit count If a rule using ipt_recent is created with a hit count greater than ip_pkt_list_tot, the rule will never match as it cannot keep track of enough timestamps. This patch makes ipt_recent refuse to create such rules. With ip_pkt_list_tot's default value of 20, the following can be used to reproduce the problem. nc -u -l 0.0.0.0 1234 & for i in `seq 1 100`; do echo $i | nc -w 1 -u 127.0.0.1 1234; done This limits it to 20 packets: iptables -A OUTPUT -p udp --dport 1234 -m recent --set --name test \ --rsource iptables -A OUTPUT -p udp --dport 1234 -m recent --update --seconds \ 60 --hitcount 20 --name test --rsource -j DROP While this is unlimited: iptables -A OUTPUT -p udp --dport 1234 -m recent --set --name test \ --rsource iptables -A OUTPUT -p udp --dport 1234 -m recent --update --seconds \ 60 --hitcount 21 --name test --rsource -j DROP With the patch the second rule-set will throw an EINVAL. Reported-by: Sean Kennedy Signed-off-by: Daniel Hokka Zakrisson Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_recent.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 68cbe3ca01ce..8e8f0425a8ed 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -252,6 +252,8 @@ recent_mt_check(const char *tablename, const void *ip, if ((info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE)) && (info->seconds || info->hit_count)) return false; + if (info->hit_count > ip_pkt_list_tot) + return false; if (info->name[0] == '\0' || strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN) return false; From 270637abff0cdf848b910b9f96ad342e1da61c66 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 20 Mar 2008 15:17:14 -0700 Subject: [PATCH 04/13] [SCTP]: Fix a race between module load and protosw access There is a race is SCTP between the loading of the module and the access by the socket layer to the protocol functions. In particular, a list of addresss that SCTP maintains is not initialized prior to the registration with the protosw. Thus it is possible for a user application to gain access to SCTP functions before everything has been initialized. The problem shows up as odd crashes during connection initializtion when we try to access the SCTP address list. The solution is to refactor how we do registration and initialize the lists prior to registering with the protosw. Care must be taken since the address list initialization depends on some other pieces of SCTP initialization. Also the clean-up in case of failure now also needs to be refactored. Signed-off-by: Vlad Yasevich Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 12 ++-- net/sctp/ipv6.c | 32 ++++++---- net/sctp/protocol.c | 129 ++++++++++++++++++++++++++-------------- 3 files changed, 112 insertions(+), 61 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 57df27f19588..57ed3e323d97 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -380,15 +380,19 @@ static inline int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -int sctp_v6_init(void); -void sctp_v6_exit(void); +void sctp_v6_pf_init(void); +void sctp_v6_pf_exit(void); +int sctp_v6_protosw_init(void); +void sctp_v6_protosw_exit(void); int sctp_v6_add_protocol(void); void sctp_v6_del_protocol(void); #else /* #ifdef defined(CONFIG_IPV6) */ -static inline int sctp_v6_init(void) { return 0; } -static inline void sctp_v6_exit(void) { return; } +static inline void sctp_v6_pf_init(void) { return 0; } +static inline void sctp_v6_pf_exit(void) { return; } +static inline int sctp_v6_protosw_init(void) { return 0; } +static inline void sctp_v6_protosw_exit(void) { return; } static inline int sctp_v6_add_protocol(void) { return 0; } static inline void sctp_v6_del_protocol(void) { return; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 9aa0733aee87..b1e05d719f9b 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1014,15 +1014,24 @@ static struct sctp_pf sctp_pf_inet6 = { }; /* Initialize IPv6 support and register with socket layer. */ -int sctp_v6_init(void) +void sctp_v6_pf_init(void) { - int rc; - /* Register the SCTP specific PF_INET6 functions. */ sctp_register_pf(&sctp_pf_inet6, PF_INET6); /* Register the SCTP specific AF_INET6 functions. */ sctp_register_af(&sctp_af_inet6); +} + +void sctp_v6_pf_exit(void) +{ + list_del(&sctp_af_inet6.list); +} + +/* Initialize IPv6 support and register with socket layer. */ +int sctp_v6_protosw_init(void) +{ + int rc; rc = proto_register(&sctpv6_prot, 1); if (rc) @@ -1035,6 +1044,14 @@ int sctp_v6_init(void) return 0; } +void sctp_v6_protosw_exit(void) +{ + inet6_unregister_protosw(&sctpv6_seqpacket_protosw); + inet6_unregister_protosw(&sctpv6_stream_protosw); + proto_unregister(&sctpv6_prot); +} + + /* Register with inet6 layer. */ int sctp_v6_add_protocol(void) { @@ -1047,15 +1064,6 @@ int sctp_v6_add_protocol(void) return 0; } -/* IPv6 specific exit support. */ -void sctp_v6_exit(void) -{ - inet6_unregister_protosw(&sctpv6_seqpacket_protosw); - inet6_unregister_protosw(&sctpv6_stream_protosw); - proto_unregister(&sctpv6_prot); - list_del(&sctp_af_inet6.list); -} - /* Unregister with inet6 layer. */ void sctp_v6_del_protocol(void) { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 7a7646a9565c..f90091a1b9ce 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -992,6 +992,58 @@ static void cleanup_sctp_mibs(void) free_percpu(sctp_statistics[1]); } +static void sctp_v4_pf_init(void) +{ + /* Initialize the SCTP specific PF functions. */ + sctp_register_pf(&sctp_pf_inet, PF_INET); + sctp_register_af(&sctp_af_inet); +} + +static void sctp_v4_pf_exit(void) +{ + list_del(&sctp_af_inet.list); +} + +static int sctp_v4_protosw_init(void) +{ + int rc; + + rc = proto_register(&sctp_prot, 1); + if (rc) + return rc; + + /* Register SCTP(UDP and TCP style) with socket layer. */ + inet_register_protosw(&sctp_seqpacket_protosw); + inet_register_protosw(&sctp_stream_protosw); + + return 0; +} + +static void sctp_v4_protosw_exit(void) +{ + inet_unregister_protosw(&sctp_stream_protosw); + inet_unregister_protosw(&sctp_seqpacket_protosw); + proto_unregister(&sctp_prot); +} + +static int sctp_v4_add_protocol(void) +{ + /* Register notifier for inet address additions/deletions. */ + register_inetaddr_notifier(&sctp_inetaddr_notifier); + + /* Register SCTP with inet layer. */ + if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0) + return -EAGAIN; + + return 0; +} + +static void sctp_v4_del_protocol(void) +{ + inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); + unregister_inetaddr_notifier(&sctp_inetaddr_notifier); +} + /* Initialize the universe into something sensible. */ SCTP_STATIC __init int sctp_init(void) { @@ -1035,8 +1087,6 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize object count debugging. */ sctp_dbg_objcnt_init(); - /* Initialize the SCTP specific PF functions. */ - sctp_register_pf(&sctp_pf_inet, PF_INET); /* * 14. Suggested SCTP Protocol Parameter Values */ @@ -1194,19 +1244,22 @@ SCTP_STATIC __init int sctp_init(void) sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); - sctp_register_af(&sctp_af_inet); + sctp_v4_pf_init(); + sctp_v6_pf_init(); + + /* Initialize the local address list. */ + INIT_LIST_HEAD(&sctp_local_addr_list); + spin_lock_init(&sctp_local_addr_lock); + sctp_get_local_addr_list(); + + status = sctp_v4_protosw_init(); - status = proto_register(&sctp_prot, 1); if (status) - goto err_proto_register; + goto err_protosw_init; - /* Register SCTP(UDP and TCP style) with socket layer. */ - inet_register_protosw(&sctp_seqpacket_protosw); - inet_register_protosw(&sctp_stream_protosw); - - status = sctp_v6_init(); + status = sctp_v6_protosw_init(); if (status) - goto err_v6_init; + goto err_v6_protosw_init; /* Initialize the control inode/socket for handling OOTB packets. */ if ((status = sctp_ctl_sock_init())) { @@ -1215,19 +1268,9 @@ SCTP_STATIC __init int sctp_init(void) goto err_ctl_sock_init; } - /* Initialize the local address list. */ - INIT_LIST_HEAD(&sctp_local_addr_list); - spin_lock_init(&sctp_local_addr_lock); - sctp_get_local_addr_list(); - - /* Register notifier for inet address additions/deletions. */ - register_inetaddr_notifier(&sctp_inetaddr_notifier); - - /* Register SCTP with inet layer. */ - if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0) { - status = -EAGAIN; + status = sctp_v4_add_protocol(); + if (status) goto err_add_protocol; - } /* Register SCTP with inet6 layer. */ status = sctp_v6_add_protocol(); @@ -1238,18 +1281,18 @@ SCTP_STATIC __init int sctp_init(void) out: return status; err_v6_add_protocol: - inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); - unregister_inetaddr_notifier(&sctp_inetaddr_notifier); + sctp_v6_del_protocol(); err_add_protocol: - sctp_free_local_addr_list(); + sctp_v4_del_protocol(); sock_release(sctp_ctl_socket); err_ctl_sock_init: - sctp_v6_exit(); -err_v6_init: - inet_unregister_protosw(&sctp_stream_protosw); - inet_unregister_protosw(&sctp_seqpacket_protosw); - proto_unregister(&sctp_prot); -err_proto_register: + sctp_v6_protosw_exit(); +err_v6_protosw_init: + sctp_v4_protosw_exit(); +err_protosw_init: + sctp_free_local_addr_list(); + sctp_v4_pf_exit(); + sctp_v6_pf_exit(); sctp_sysctl_unregister(); list_del(&sctp_af_inet.list); free_pages((unsigned long)sctp_port_hashtable, @@ -1282,23 +1325,21 @@ SCTP_STATIC __exit void sctp_exit(void) /* Unregister with inet6/inet layers. */ sctp_v6_del_protocol(); - inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); - - /* Unregister notifier for inet address additions/deletions. */ - unregister_inetaddr_notifier(&sctp_inetaddr_notifier); - - /* Free the local address list. */ - sctp_free_local_addr_list(); + sctp_v4_del_protocol(); /* Free the control endpoint. */ sock_release(sctp_ctl_socket); - /* Cleanup v6 initializations. */ - sctp_v6_exit(); + /* Free protosw registrations */ + sctp_v6_protosw_exit(); + sctp_v4_protosw_exit(); + + /* Free the local address list. */ + sctp_free_local_addr_list(); /* Unregister with socket layer. */ - inet_unregister_protosw(&sctp_stream_protosw); - inet_unregister_protosw(&sctp_seqpacket_protosw); + sctp_v6_pf_exit(); + sctp_v4_pf_exit(); sctp_sysctl_unregister(); list_del(&sctp_af_inet.list); @@ -1317,8 +1358,6 @@ SCTP_STATIC __exit void sctp_exit(void) kmem_cache_destroy(sctp_chunk_cachep); kmem_cache_destroy(sctp_bucket_cachep); - - proto_unregister(&sctp_prot); } module_init(sctp_init); From 4f42c288e66a3395e94158badbd182b2dae8eccb Mon Sep 17 00:00:00 2001 From: Andre Noll Date: Thu, 20 Mar 2008 15:27:28 -0700 Subject: [PATCH 05/13] [NET]: Fix permissions of /proc/net commit e9720ac ([NET]: Make /proc/net a symlink on /proc/self/net (v3)) broke ganglia and probably other applications that read /proc/net/dev. This is due to the change of permissions of /proc/net that was introduced in that commit. Before: dr-xr-xr-x 5 root root 0 Mar 19 11:30 /proc/net After: dr-xr--r-- 5 root root 0 Mar 19 11:29 /proc/self/net This patch restores the permissions to the old value which makes ganglia happy again. Pavel Emelyanov says: This also broke the postfix, as it was reported in bug #10286 and described in detail by Benjamin. Signed-off-by: Andre Noll Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- fs/proc/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 9a4da0aae02e..770de444eba0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2270,7 +2270,7 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, fd), DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), #ifdef CONFIG_NET - DIR("net", S_IRUGO|S_IXUSR, net), + DIR("net", S_IRUGO|S_IXUGO, net), #endif REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), From 75c0371a2d385ecbd6e1f854d9dce20889f06736 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 20 Mar 2008 15:39:41 -0700 Subject: [PATCH 06/13] audit: netlink socket can be auto-bound to pid other than current->pid (v2) From: Pavel Emelyanov This patch is based on the one from Thomas. The kauditd_thread() calls the netlink_unicast() and passes the audit_pid to it. The audit_pid, in turn, is received from the user space and the tool (I've checked the audit v1.6.9) uses getpid() to pass one in the kernel. Besides, this tool doesn't bind the netlink socket to this id, but simply creates it allowing the kernel to auto-bind one. That's the preamble. The problem is that netlink_autobind() _does_not_ guarantees that the socket will be auto-bound to the current pid. Instead it uses the current pid as a hint to start looking for a free id. So, in case of conflict, the audit messages can be sent to a wrong socket. This can happen (it's unlikely, but can be) in case some task opens more than one netlink sockets and then the audit one starts - in this case the audit's pid can be busy and its socket will be bound to another id. The proposal is to introduce an audit_nlk_pid in audit subsys, that will point to the netlink socket to send packets to. It will most often be equal to audit_pid. The socket id can be got from the skb's netlink CB right in the audit_receive_msg. The audit_nlk_pid reset to 0 is not required, since all the decisions are taken based on audit_pid value only. Later, if the audit tools will bind the socket themselves, the kernel will have to provide a way to setup the audit_nlk_pid as well. A good side effect of this patch is that audit_pid can later be converted to struct pid, as it is not longer safe to use pid_t-s in the presence of pid namespaces. But audit code still uses the tgid from task_struct in the audit_signal_info and in the audit_filter_syscall. Signed-off-by: Thomas Graf Signed-off-by: Pavel Emelyanov Acked-by: Eric Paris Signed-off-by: David S. Miller --- kernel/audit.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index 10c4930c2bbf..be55cb503633 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -78,9 +78,13 @@ static int audit_default; /* If auditing cannot proceed, audit_failure selects what happens. */ static int audit_failure = AUDIT_FAIL_PRINTK; -/* If audit records are to be written to the netlink socket, audit_pid - * contains the (non-zero) pid. */ +/* + * If audit records are to be written to the netlink socket, audit_pid + * contains the pid of the auditd process and audit_nlk_pid contains + * the pid to use to send netlink messages to that process. + */ int audit_pid; +static int audit_nlk_pid; /* If audit_rate_limit is non-zero, limit the rate of sending audit records * to that number per second. This prevents DoS attacks, but results in @@ -350,7 +354,7 @@ static int kauditd_thread(void *dummy) wake_up(&audit_backlog_wait); if (skb) { if (audit_pid) { - int err = netlink_unicast(audit_sock, skb, audit_pid, 0); + int err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0); if (err < 0) { BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */ printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); @@ -626,6 +630,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) sid, 1); audit_pid = new_pid; + audit_nlk_pid = NETLINK_CB(skb).pid; } if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) err = audit_set_rate_limit(status_get->rate_limit, From 781c2844845cdc80b19eed3d6e451e65f046b58b Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 20 Mar 2008 15:41:02 -0700 Subject: [PATCH 07/13] MAINTAINERS: bluez-devel is subscribers-only Signed-off-by: Pavel Machek Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6d628fb6ee01..76843d011f49 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -880,7 +880,7 @@ P: Marcel Holtmann M: marcel@holtmann.org P: Maxim Krasnyansky M: maxk@qualcomm.com -L: bluez-devel@lists.sf.net +L: linux-bluetooth@vger.kernel.org W: http://bluez.sf.net W: http://www.bluez.org W: http://www.holtmann.org/linux/bluetooth/ From 7582a33557cc6dc42b4c6918c6e7f8e465b72a70 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 20 Mar 2008 15:53:15 -0700 Subject: [PATCH 08/13] [TG3]: Fix build warning on sparc32. Sparc MAC address support should be protected consistently with CONFIG_SPARC, but there was a stray CONFIG_SPARC64 case. Bump driver version and release date. Reported by Andrew Morton. Signed-off-by: David S. Miller --- drivers/net/tg3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 26ffb67f1da2..f9ef8bd8b11e 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -64,8 +64,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.87" -#define DRV_MODULE_RELDATE "December 20, 2007" +#define DRV_MODULE_VERSION "3.88" +#define DRV_MODULE_RELDATE "March 20, 2008" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -11841,7 +11841,7 @@ static int __devinit tg3_get_device_address(struct tg3 *tp) } if (!is_valid_ether_addr(&dev->dev_addr[0])) { -#ifdef CONFIG_SPARC64 +#ifdef CONFIG_SPARC if (!tg3_get_default_macaddr_sparc(tp)) return 0; #endif From 2bec008ca9fd009aa503b75344d1c22da9256141 Mon Sep 17 00:00:00 2001 From: Fabio Checconi Date: Thu, 20 Mar 2008 15:54:58 -0700 Subject: [PATCH 09/13] bridge: use time_before() in br_fdb_cleanup() In br_fdb_cleanup() next_timer and this_timer are in jiffies, so they should be compared using the time_after() macro. Signed-off-by: Fabio Checconi Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index bc40377136a2..9326c377822e 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -136,7 +136,7 @@ void br_fdb_cleanup(unsigned long _data) this_timer = f->ageing_timer + delay; if (time_before_eq(this_timer, jiffies)) fdb_delete(f); - else if (this_timer < next_timer) + else if (time_before(this_timer, next_timer)) next_timer = this_timer; } } From 8a455b087c9629b3ae3b521b4f1ed16672f978cc Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 20 Mar 2008 16:07:27 -0700 Subject: [PATCH 10/13] netpoll: zap_completion_queue: adjust skb->users counter zap_completion_queue() retrieves skbs from completion_queue where they have zero skb->users counter. Before dev_kfree_skb_any() it should be non-zero yet, so it's increased now. Reported-and-tested-by: Andrew Morton Signed-off-by: Jarek Poplawski Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/core/netpoll.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4b7e756181c9..c635de52526c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -215,10 +215,12 @@ static void zap_completion_queue(void) while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; - if (skb->destructor) + if (skb->destructor) { + atomic_inc(&skb->users); dev_kfree_skb_any(skb); /* put this one back */ - else + } else { __kfree_skb(skb); + } } } From 607bfbf2d55dd1cfe5368b41c2a81a8c9ccf4723 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Mar 2008 16:11:27 -0700 Subject: [PATCH 11/13] [TCP]: Fix shrinking windows with window scaling When selecting a new window, tcp_select_window() tries not to shrink the offered window by using the maximum of the remaining offered window size and the newly calculated window size. The newly calculated window size is always a multiple of the window scaling factor, the remaining window size however might not be since it depends on rcv_wup/rcv_nxt. This means we're effectively shrinking the window when scaling it down. The dump below shows the problem (scaling factor 2^7): - Window size of 557 (71296) is advertised, up to 3111907257: IP 172.2.2.3.33000 > 172.2.2.2.33000: . ack 3111835961 win 557 <...> - New window size of 514 (65792) is advertised, up to 3111907217, 40 bytes below the last end: IP 172.2.2.3.33000 > 172.2.2.2.33000: . 3113575668:3113577116(1448) ack 3111841425 win 514 <...> The number 40 results from downscaling the remaining window: 3111907257 - 3111841425 = 65832 65832 / 2^7 = 514 65832 % 2^7 = 40 If the sender uses up the entire window before it is shrunk, this can have chaotic effects on the connection. When sending ACKs, tcp_acceptable_seq() will notice that the window has been shrunk since tcp_wnd_end() is before tp->snd_nxt, which makes it choose tcp_wnd_end() as sequence number. This will fail the receivers checks in tcp_sequence() however since it is before it's tp->rcv_wup, making it respond with a dupack. If both sides are in this condition, this leads to a constant flood of ACKs until the connection times out. Make sure the window is never shrunk by aligning the remaining window to the window scaling factor. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 01578f544ad6..72b9350006fe 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -255,7 +255,7 @@ static u16 tcp_select_window(struct sock *sk) * * Relax Will Robinson. */ - new_win = cur_win; + new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); } tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; From 38fe999e2286139cccdaa500a81bd49a16a81158 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 20 Mar 2008 16:13:58 -0700 Subject: [PATCH 12/13] [IPV6] KCONFIG: Fix description about IPV6_TUNNEL. Based on notice from "Colin" . Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 58219dfffef8..47263e45bacb 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -179,11 +179,12 @@ config IPV6_SIT Saying M here will produce a module called sit.ko. If unsure, say Y. config IPV6_TUNNEL - tristate "IPv6: IPv6-in-IPv6 tunnel" + tristate "IPv6: IP-in-IPv6 tunnel (RFC2473)" select INET6_TUNNEL depends on IPV6 ---help--- - Support for IPv6-in-IPv6 tunnels described in RFC 2473. + Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in + RFC 2473. If unsure, say N. From 94833dfb8c98ed4ca1944dd2c1339d88a2d1c758 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 20 Mar 2008 17:05:13 -0700 Subject: [PATCH 13/13] [NET] ifb: set separate lockdep classes for queue locks [ 10.536424] ======================================================= [ 10.536424] [ INFO: possible circular locking dependency detected ] [ 10.536424] 2.6.25-rc3-devel #3 [ 10.536424] ------------------------------------------------------- [ 10.536424] swapper/0 is trying to acquire lock: [ 10.536424] (&dev->queue_lock){-+..}, at: [] dev_queue_xmit+0x175/0x2f3 [ 10.536424] [ 10.536424] but task is already holding lock: [ 10.536424] (&p->tcfc_lock){-+..}, at: [] tcf_mirred+0x20/0x178 [act_mirred] [ 10.536424] [ 10.536424] which lock already depends on the new lock. lockdep warns of locking order while using ifb with sch_ingress and act_mirred: ingress_lock, tcfc_lock, queue_lock (usually queue_lock is at the beginning). This patch is only to tell lockdep that ifb is a different device (e.g. from eth) and has its own pair of queue locks. (This warning is a false-positive in common scenario of using ifb; yet there are possible situations, when this order could be dangerous; lockdep should warn in such a case.) (With suggestions by David S. Miller) Reported-and-tested-by: Denys Fedoryshchenko Signed-off-by: Jarek Poplawski Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- drivers/net/ifb.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 15949d3df17e..af233b591534 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -35,6 +35,7 @@ #include #include #include +#include #define TX_TIMEOUT (2*HZ) @@ -227,6 +228,16 @@ static struct rtnl_link_ops ifb_link_ops __read_mostly = { module_param(numifbs, int, 0); MODULE_PARM_DESC(numifbs, "Number of ifb devices"); +/* + * dev_ifb->queue_lock is usually taken after dev->ingress_lock, + * reversely to e.g. qdisc_lock_tree(). It should be safe until + * ifb doesn't take dev->queue_lock with dev_ifb->ingress_lock. + * But lockdep should know that ifb has different locks from dev. + */ +static struct lock_class_key ifb_queue_lock_key; +static struct lock_class_key ifb_ingress_lock_key; + + static int __init ifb_init_one(int index) { struct net_device *dev_ifb; @@ -246,6 +257,10 @@ static int __init ifb_init_one(int index) err = register_netdevice(dev_ifb); if (err < 0) goto err; + + lockdep_set_class(&dev_ifb->queue_lock, &ifb_queue_lock_key); + lockdep_set_class(&dev_ifb->ingress_lock, &ifb_ingress_lock_key); + return 0; err: