From 39f390167e9ca73c009d3c8e2d6c3b4286b02ab6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 1 Sep 2014 11:09:35 +0200 Subject: [PATCH 01/32] netfilter: nft_hash: no need for rcu in the hash set destroy path The sets are released from the rcu callback, after the rule is removed from the chain list, which implies that nfnetlink cannot update the hashes (thus, no resizing may occur) and no packets are walking on the set anymore. This resolves a lockdep splat in the nft_hash_destroy() path since the nfnl mutex is not held there. =============================== [ INFO: suspicious RCU usage. ] 3.16.0-rc2+ #168 Not tainted ------------------------------- net/netfilter/nft_hash.c:362 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 1 lock held by ksoftirqd/0/3: #0: (rcu_callback){......}, at: [] rcu_process_callbacks+0x27e/0x4c7 stack backtrace: CPU: 0 PID: 3 Comm: ksoftirqd/0 Not tainted 3.16.0-rc2+ #168 Hardware name: LENOVO 23259H1/23259H1, BIOS G2ET32WW (1.12 ) 05/30/2012 0000000000000001 ffff88011769bb98 ffffffff8142c922 0000000000000006 ffff880117694090 ffff88011769bbc8 ffffffff8107c3ff ffff8800cba52400 ffff8800c476bea8 ffff8800c476bea8 ffff8800cba52400 ffff88011769bc08 Call Trace: [] dump_stack+0x4e/0x68 [] lockdep_rcu_suspicious+0xfa/0x103 [] nft_hash_destroy+0x50/0x137 [nft_hash] [] nft_set_destroy+0x11/0x2a [nf_tables] Signed-off-by: Pablo Neira Ayuso Acked-by: Thomas Graf --- net/netfilter/nft_hash.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 28fb8f38e6ba..8892b7b6184a 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -180,15 +180,17 @@ static int nft_hash_init(const struct nft_set *set, static void nft_hash_destroy(const struct nft_set *set) { const struct rhashtable *priv = nft_set_priv(set); - const struct bucket_table *tbl; + const struct bucket_table *tbl = priv->tbl; struct nft_hash_elem *he, *next; unsigned int i; - tbl = rht_dereference(priv->tbl, priv); - for (i = 0; i < tbl->size; i++) - rht_for_each_entry_safe(he, next, tbl->buckets[i], priv, node) + for (i = 0; i < tbl->size; i++) { + for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node); + he != NULL; he = next) { + next = rht_entry(he->node.next, struct nft_hash_elem, node); nft_hash_elem_destroy(set, he); - + } + } rhashtable_destroy(priv); } From d99407f42f05843ae9e23696ea6d91529d9600db Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Sep 2014 11:29:56 +0200 Subject: [PATCH 02/32] netfilter: nft_rbtree: no need for spinlock from set destroy path The sets are released from the rcu callback, after the rule is removed from the chain list, which implies that nfnetlink cannot update the rbtree and no packets are walking on the set anymore. Thus, we can get rid of the spinlock in the set destroy path there. Signed-off-by: Pablo Neira Ayuso Reviewied-by: Thomas Graf --- net/netfilter/nft_rbtree.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index e1836ff88199..46214f245665 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -234,13 +234,11 @@ static void nft_rbtree_destroy(const struct nft_set *set) struct nft_rbtree_elem *rbe; struct rb_node *node; - spin_lock_bh(&nft_rbtree_lock); while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); nft_rbtree_elem_destroy(set, rbe); } - spin_unlock_bh(&nft_rbtree_lock); } static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, From ae82ddcf8e8239bdd06e6830d450cf9e785b8024 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Sep 2014 00:26:05 +0200 Subject: [PATCH 03/32] rhashtable: fix lockdep splat in rhashtable_destroy() No need for rht_dereference() from rhashtable_destroy() since the existing callers don't hold the mutex when invoking this function from: 1) Netlink, this is called in case of memory allocation errors in the initialization path, no nl_sk_hash_lock is held. 2) Netfilter, this is called from the rcu callback, no nfnl_lock is held either. I think it's reasonable to assume that the caller has to make sure that no hash resizing may happen before releasing the bucket array. Therefore, the caller should be responsible for releasing this in a safe way, document this to make people aware of it. This resolves a rcu lockdep splat in nft_hash: =============================== [ INFO: suspicious RCU usage. ] 3.16.0+ #178 Not tainted ------------------------------- lib/rhashtable.c:596 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 1 lock held by ksoftirqd/2/18: #0: (rcu_callback){......}, at: [] rcu_process_callbacks+0x27e/0x4c7 stack backtrace: CPU: 2 PID: 18 Comm: ksoftirqd/2 Not tainted 3.16.0+ #178 Hardware name: LENOVO 23259H1/23259H1, BIOS G2ET32WW (1.12 ) 05/30/2012 0000000000000001 ffff88011706bb68 ffffffff8143debc 0000000000000000 ffff880117062610 ffff88011706bb98 ffffffff81077515 ffff8800ca041a50 0000000000000004 ffff8800ca386480 ffff8800ca041a00 ffff88011706bbb8 Call Trace: [] dump_stack+0x4e/0x68 [] lockdep_rcu_suspicious+0xfa/0x103 [] rhashtable_destroy+0x46/0x52 [] nft_hash_destroy+0x73/0x82 [nft_hash] Signed-off-by: Pablo Neira Ayuso Acked-by: Thomas Graf --- lib/rhashtable.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a2c78810ebc1..fc0dd8ee5c35 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -589,13 +589,13 @@ EXPORT_SYMBOL_GPL(rhashtable_init); * rhashtable_destroy - destroy hash table * @ht: the hash table to destroy * - * Frees the bucket array. + * Frees the bucket array. This function is not rcu safe, therefore the caller + * has to make sure that no resizing may happen by unpublishing the hashtable + * and waiting for the quiescent cycle before releasing the bucket array. */ void rhashtable_destroy(const struct rhashtable *ht) { - const struct bucket_table *tbl = rht_dereference(ht->tbl, ht); - - bucket_table_free(tbl); + bucket_table_free(ht->tbl); } EXPORT_SYMBOL_GPL(rhashtable_destroy); From cbb8125eb40b05f96d557b2705ee641873eb30b0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 Sep 2014 18:04:53 +0200 Subject: [PATCH 04/32] netfilter: nfnetlink: deliver netlink errors on batch completion We have to wait until the full batch has been processed to deliver the netlink error messages to userspace. Otherwise, we may deliver duplicated errors to userspace in case that we need to abort and replay the transaction if any of the required modules needs to be autoloaded. A simple way to reproduce this (assumming nft_meta is not loaded) with the following test file: add table filter add chain filter test add chain bad test # intentional wrong unexistent table add rule filter test meta mark 0 Then, when trying to load the batch: # nft -f test test:4:1-19: Error: Could not process rule: No such file or directory add chain bad test ^^^^^^^^^^^^^^^^^^^ test:4:1-19: Error: Could not process rule: No such file or directory add chain bad test ^^^^^^^^^^^^^^^^^^^ The error is reported twice, once when the batch is aborted due to missing nft_meta and another when it is fully processed. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 64 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index c138b8fbe280..f37f0716a9fc 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -222,6 +222,51 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } } +struct nfnl_err { + struct list_head head; + struct nlmsghdr *nlh; + int err; +}; + +static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err) +{ + struct nfnl_err *nfnl_err; + + nfnl_err = kmalloc(sizeof(struct nfnl_err), GFP_KERNEL); + if (nfnl_err == NULL) + return -ENOMEM; + + nfnl_err->nlh = nlh; + nfnl_err->err = err; + list_add_tail(&nfnl_err->head, list); + + return 0; +} + +static void nfnl_err_del(struct nfnl_err *nfnl_err) +{ + list_del(&nfnl_err->head); + kfree(nfnl_err); +} + +static void nfnl_err_reset(struct list_head *err_list) +{ + struct nfnl_err *nfnl_err, *next; + + list_for_each_entry_safe(nfnl_err, next, err_list, head) + nfnl_err_del(nfnl_err); +} + +static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) +{ + struct nfnl_err *nfnl_err, *next; + + list_for_each_entry_safe(nfnl_err, next, err_list, head) { + netlink_ack(skb, nfnl_err->nlh, nfnl_err->err); + nfnl_err_del(nfnl_err); + } +} + static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u_int16_t subsys_id) { @@ -230,6 +275,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; bool success = true, done = false; + static LIST_HEAD(err_list); int err; if (subsys_id >= NFNL_SUBSYS_COUNT) @@ -287,6 +333,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, type = nlh->nlmsg_type; if (type == NFNL_MSG_BATCH_BEGIN) { /* Malformed: Batch begin twice */ + nfnl_err_reset(&err_list); success = false; goto done; } else if (type == NFNL_MSG_BATCH_END) { @@ -333,6 +380,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, * original skb. */ if (err == -EAGAIN) { + nfnl_err_reset(&err_list); ss->abort(skb); nfnl_unlock(subsys_id); kfree_skb(nskb); @@ -341,11 +389,24 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, } ack: if (nlh->nlmsg_flags & NLM_F_ACK || err) { + /* Errors are delivered once the full batch has been + * processed, this avoids that the same error is + * reported several times when replaying the batch. + */ + if (nfnl_err_add(&err_list, nlh, err) < 0) { + /* We failed to enqueue an error, reset the + * list of errors and send OOM to userspace + * pointing to the batch header. + */ + nfnl_err_reset(&err_list); + netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); + success = false; + goto done; + } /* We don't stop processing the batch on errors, thus, * userspace gets all the errors that the batch * triggers. */ - netlink_ack(skb, nlh, err); if (err) success = false; } @@ -361,6 +422,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, else ss->abort(skb); + nfnl_err_deliver(&err_list, oskb); nfnl_unlock(subsys_id); kfree_skb(nskb); } From 679ab4ddbdfab8af39104e63819db71f428aefd9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 7 Sep 2014 17:20:27 +0200 Subject: [PATCH 05/32] netfilter: xt_TPROXY: undefined reference to `udp6_lib_lookup' CONFIG_IPV6=m CONFIG_NETFILTER_XT_TARGET_TPROXY=y net/built-in.o: In function `nf_tproxy_get_sock_v6.constprop.11': >> xt_TPROXY.c:(.text+0x583a1): undefined reference to `udp6_lib_lookup' net/built-in.o: In function `tproxy_tg_init': >> xt_TPROXY.c:(.init.text+0x1dc3): undefined reference to `nf_defrag_ipv6_enable' This fix is similar to 1a5bbfc ("netfilter: Fix build errors with xt_socket.c"). Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4bef6eb7c40d..831f9603c0ed 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -839,6 +839,7 @@ config NETFILTER_XT_TARGET_TPROXY tristate '"TPROXY" target transparent proxying support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + depends on (IPV6 || IPV6=n) depends on IP_NF_MANGLE select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES From d61746b2e71bf612fb397b00242de5df5ba7f29a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 22 Sep 2014 09:11:08 +0200 Subject: [PATCH 06/32] ip_tunnel: Don't allow to add the same tunnel multiple times. When we try to add an already existing tunnel, we don't return an error. Instead we continue and call ip_tunnel_update(). This means that we can change existing tunnels by adding the same tunnel multiple times. It is even possible to change the tunnel endpoints of the fallback device. We fix this by returning an error if we try to add an existing tunnel. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index bd41dd1948b6..bda4bb8ae260 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -764,9 +764,14 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); - if (!t && (cmd == SIOCADDTUNNEL)) { - t = ip_tunnel_create(net, itn, p); - err = PTR_ERR_OR_ZERO(t); + if (cmd == SIOCADDTUNNEL) { + if (!t) { + t = ip_tunnel_create(net, itn, p); + err = PTR_ERR_OR_ZERO(t); + break; + } + + err = -EEXIST; break; } if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { From 4324414f8ccae4997dd1e91646683679a4436959 Mon Sep 17 00:00:00 2001 From: Sony Chacko Date: Mon, 22 Sep 2014 05:51:50 -0400 Subject: [PATCH 07/32] qlcnic: Use qlcnic_83xx_flash_read32() API instead of lockless version of the API. In qlcnic_83xx_setup_idc_parameters() routine use qlcnic_83xx_flash_read32() API which takes flash lock internally instead of the lockless version qlcnic_83xx_lockless_flash_read32(). Signed-off-by: Sony Chacko Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index 86783e1afcf7..3172cdf591fe 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -1177,9 +1177,8 @@ static void qlcnic_83xx_setup_idc_parameters(struct qlcnic_adapter *adapter) { u32 idc_params, val; - if (qlcnic_83xx_lockless_flash_read32(adapter, - QLC_83XX_IDC_FLASH_PARAM_ADDR, - (u8 *)&idc_params, 1)) { + if (qlcnic_83xx_flash_read32(adapter, QLC_83XX_IDC_FLASH_PARAM_ADDR, + (u8 *)&idc_params, 1)) { dev_info(&adapter->pdev->dev, "%s:failed to get IDC params from flash\n", __func__); adapter->dev_init_timeo = QLC_83XX_IDC_INIT_TIMEOUT_SECS; From c023030198b2b0fb87f5d9bcb388c41bfaf3c3c0 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 22 Sep 2014 05:51:51 -0400 Subject: [PATCH 08/32] qlcnic: Fix memory corruption while reading stats using ethtool. o Driver is doing memset with zero for total number of stats bytes when it has already filled some data in the stats buffer, which can overwrite memory area beyond the length of stats buffer. o Fix this by initializing stats buffer with zero before filling any data in it. Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 141f116eb868..2d77b7694d16 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1333,12 +1333,11 @@ static void qlcnic_get_ethtool_stats(struct net_device *dev, struct qlcnic_host_tx_ring *tx_ring; struct qlcnic_esw_statistics port_stats; struct qlcnic_mac_statistics mac_stats; - int index, ret, length, size, tx_size, ring; + int index, ret, length, size, ring; char *p; - tx_size = adapter->drv_tx_rings * QLCNIC_TX_STATS_LEN; + memset(data, 0, stats->n_stats * sizeof(u64)); - memset(data, 0, tx_size * sizeof(u64)); for (ring = 0, index = 0; ring < adapter->drv_tx_rings; ring++) { if (test_bit(__QLCNIC_DEV_UP, &adapter->state)) { tx_ring = &adapter->tx_ring[ring]; @@ -1347,7 +1346,6 @@ static void qlcnic_get_ethtool_stats(struct net_device *dev, } } - memset(data, 0, stats->n_stats * sizeof(u64)); length = QLCNIC_STATS_LEN; for (index = 0; index < length; index++) { p = (char *)adapter + qlcnic_gstrings_stats[index].stat_offset; From 6c0fd0df0c44d1129ab65fc28d17f23a1c006a0c Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 22 Sep 2014 05:51:52 -0400 Subject: [PATCH 09/32] qlcnic: Remove __QLCNIC_DEV_UP bit check to read TX queues statistics. o TX queues stats must be read when queues are allocated regardless of interface is up or not. Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 2d77b7694d16..863c4456d2d1 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1339,7 +1339,7 @@ static void qlcnic_get_ethtool_stats(struct net_device *dev, memset(data, 0, stats->n_stats * sizeof(u64)); for (ring = 0, index = 0; ring < adapter->drv_tx_rings; ring++) { - if (test_bit(__QLCNIC_DEV_UP, &adapter->state)) { + if (adapter->is_up == QLCNIC_ADAPTER_UP_MAGIC) { tx_ring = &adapter->tx_ring[ring]; data = qlcnic_fill_tx_queue_stats(data, tx_ring); qlcnic_update_stats(adapter); From 3456399b031c4bd71b90521301c646b62dfd0caa Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 22 Sep 2014 05:51:53 -0400 Subject: [PATCH 10/32] qlcnic: Fix ordering of stats in stats buffer. o When TX queues are not allocated, driver does not fill TX queues stats in the buffer. However, it is also not advancing data pointer by TX queue stats length, which would misplace all successive stats data in the buffer and will result in mismatch between stats strings and it's values. o Fix this by advancing data pointer by TX queue stats length when queues are not allocated. Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 863c4456d2d1..494e8105adee 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1343,6 +1343,8 @@ static void qlcnic_get_ethtool_stats(struct net_device *dev, tx_ring = &adapter->tx_ring[ring]; data = qlcnic_fill_tx_queue_stats(data, tx_ring); qlcnic_update_stats(adapter); + } else { + data += QLCNIC_TX_STATS_LEN; } } From 40b8fe45d1f094e3babe7b2dc2b71557ab71401d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 22 Sep 2014 16:34:17 -0400 Subject: [PATCH 11/32] macvtap: Fix race between device delete and open. In macvtap device delete and open calls can race and this causes a list curruption of the vlan queue_list. The race intself is triggered by the idr accessors that located the vlan device. The device is stored into and removed from the idr under both an rtnl and a mutex. However, when attempting to locate the device in idr, only a mutex is taken. As a result, once cpu perfoming a delete may take an rtnl and wait for the mutex, while another cput doing an open() will take the idr mutex first to fetch the device pointer and later take an rtnl to add a queue for the device which may have just gotten deleted. With this patch, we now hold the rtnl for the duration of the macvtap_open() call thus making sure that open will not race with delete. CC: Michael S. Tsirkin CC: Jason Wang Signed-off-by: Vladislav Yasevich Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 3381c4f91a8c..0c6adaaf898c 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -112,17 +112,15 @@ static int macvtap_enable_queue(struct net_device *dev, struct file *file, return err; } +/* Requires RTNL */ static int macvtap_set_queue(struct net_device *dev, struct file *file, struct macvtap_queue *q) { struct macvlan_dev *vlan = netdev_priv(dev); - int err = -EBUSY; - rtnl_lock(); if (vlan->numqueues == MAX_MACVTAP_QUEUES) - goto out; + return -EBUSY; - err = 0; rcu_assign_pointer(q->vlan, vlan); rcu_assign_pointer(vlan->taps[vlan->numvtaps], q); sock_hold(&q->sk); @@ -136,9 +134,7 @@ static int macvtap_set_queue(struct net_device *dev, struct file *file, vlan->numvtaps++; vlan->numqueues++; -out: - rtnl_unlock(); - return err; + return 0; } static int macvtap_disable_queue(struct macvtap_queue *q) @@ -454,11 +450,12 @@ static void macvtap_sock_destruct(struct sock *sk) static int macvtap_open(struct inode *inode, struct file *file) { struct net *net = current->nsproxy->net_ns; - struct net_device *dev = dev_get_by_macvtap_minor(iminor(inode)); + struct net_device *dev; struct macvtap_queue *q; - int err; + int err = -ENODEV; - err = -ENODEV; + rtnl_lock(); + dev = dev_get_by_macvtap_minor(iminor(inode)); if (!dev) goto out; @@ -498,6 +495,7 @@ static int macvtap_open(struct inode *inode, struct file *file) if (dev) dev_put(dev); + rtnl_unlock(); return err; } From 9026968abe7ad102f4ac5c6d96d733643f75399c Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Mon, 22 Sep 2014 16:49:08 -0700 Subject: [PATCH 12/32] Revert "net/macb: add pinctrl consumer support" This reverts commit 8ef29f8aae524bd51298fb10ac6a5ce6c4c5a3d8. The driver core already calls pinctrl_get() and claims the default state. There is no need to replicate this in the driver. Acked-by: Nicolas Ferre Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index ca5d7798b265..e1e02fba4fcc 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -30,7 +30,6 @@ #include #include #include -#include #include "macb.h" @@ -2071,7 +2070,6 @@ static int __init macb_probe(struct platform_device *pdev) struct phy_device *phydev; u32 config; int err = -ENXIO; - struct pinctrl *pinctrl; const char *mac; regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -2080,15 +2078,6 @@ static int __init macb_probe(struct platform_device *pdev) goto err_out; } - pinctrl = devm_pinctrl_get_select_default(&pdev->dev); - if (IS_ERR(pinctrl)) { - err = PTR_ERR(pinctrl); - if (err == -EPROBE_DEFER) - goto err_out; - - dev_warn(&pdev->dev, "No pinctrl provided\n"); - } - err = -ENOMEM; dev = alloc_etherdev(sizeof(*bp)); if (!dev) From 445f7f4d62628cb2971db884084d162ecb622ec7 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 23 Sep 2014 16:31:47 +0800 Subject: [PATCH 13/32] r8152: fix the carrier off when autoresuming netif_carrier_off would be called when autoresuming, even though the cable is plugged. This causes some applications do relative actions when detecting the carrier off. Keep the status of the carrier, and let it be modified when the linking change occurs. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 52 +++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 74760e8143e3..e0394427e372 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1949,10 +1949,34 @@ static void rxdy_gated_en(struct r8152 *tp, bool enable) ocp_write_word(tp, MCU_TYPE_PLA, PLA_MISC_1, ocp_data); } +static int rtl_start_rx(struct r8152 *tp) +{ + int i, ret = 0; + + INIT_LIST_HEAD(&tp->rx_done); + for (i = 0; i < RTL8152_MAX_RX; i++) { + INIT_LIST_HEAD(&tp->rx_info[i].list); + ret = r8152_submit_rx(tp, &tp->rx_info[i], GFP_KERNEL); + if (ret) + break; + } + + return ret; +} + +static int rtl_stop_rx(struct r8152 *tp) +{ + int i; + + for (i = 0; i < RTL8152_MAX_RX; i++) + usb_kill_urb(tp->rx_info[i].urb); + + return 0; +} + static int rtl_enable(struct r8152 *tp) { u32 ocp_data; - int i, ret; r8152b_reset_packet_filter(tp); @@ -1962,14 +1986,7 @@ static int rtl_enable(struct r8152 *tp) rxdy_gated_en(tp, false); - INIT_LIST_HEAD(&tp->rx_done); - ret = 0; - for (i = 0; i < RTL8152_MAX_RX; i++) { - INIT_LIST_HEAD(&tp->rx_info[i].list); - ret |= r8152_submit_rx(tp, &tp->rx_info[i], GFP_KERNEL); - } - - return ret; + return rtl_start_rx(tp); } static int rtl8152_enable(struct r8152 *tp) @@ -2053,8 +2070,7 @@ static void rtl_disable(struct r8152 *tp) mdelay(1); } - for (i = 0; i < RTL8152_MAX_RX; i++) - usb_kill_urb(tp->rx_info[i].urb); + rtl_stop_rx(tp); rtl8152_nic_reset(tp); } @@ -3083,13 +3099,14 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); cancel_delayed_work_sync(&tp->schedule); + tasklet_disable(&tp->tl); if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { + rtl_stop_rx(tp); rtl_runtime_suspend_enable(tp, true); } else { - tasklet_disable(&tp->tl); tp->rtl_ops.down(tp); - tasklet_enable(&tp->tl); } + tasklet_enable(&tp->tl); } return 0; @@ -3108,17 +3125,18 @@ static int rtl8152_resume(struct usb_interface *intf) if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { rtl_runtime_suspend_enable(tp, false); clear_bit(SELECTIVE_SUSPEND, &tp->flags); + set_bit(WORK_ENABLE, &tp->flags); if (tp->speed & LINK_STATUS) - tp->rtl_ops.disable(tp); + rtl_start_rx(tp); } else { tp->rtl_ops.up(tp); rtl8152_set_speed(tp, AUTONEG_ENABLE, tp->mii.supports_gmii ? SPEED_1000 : SPEED_100, DUPLEX_FULL); + tp->speed = 0; + netif_carrier_off(tp->netdev); + set_bit(WORK_ENABLE, &tp->flags); } - tp->speed = 0; - netif_carrier_off(tp->netdev); - set_bit(WORK_ENABLE, &tp->flags); usb_submit_urb(tp->intr_urb, GFP_KERNEL); } From effa4bc4e75a265105f4ccb55857057e5ad231ed Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Tue, 23 Sep 2014 16:05:59 +0300 Subject: [PATCH 14/32] net/mlx4_core: Allow not to specify probe_vf in SRIOV IB mode When the HCA is configured in SRIOV IB mode (that is, at least one of the ports is IB) and the probe_vf module param isn't specified, mlx4_init_one() failed because of the following condition: if (ib_ports && (num_vfs_argc > 1 || probe_vfs_argc > 1)) { ..... } The root cause for that is a mistake in the initialization of num_vfs_argc and probe_vfs_argc. When num_vfs / probe_vf aren't given, their argument count counterpart should be 0, fix that. Fixes: dd41cc3bb90e ('net/mlx4: Adapt num_vfs/probed_vf params for single port VF') Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 7e2d5d57c598..871e3a5bda38 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -78,13 +78,13 @@ MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero"); #endif /* CONFIG_PCI_MSI */ static uint8_t num_vfs[3] = {0, 0, 0}; -static int num_vfs_argc = 3; +static int num_vfs_argc; module_param_array(num_vfs, byte , &num_vfs_argc, 0444); MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n" "num_vfs=port1,port2,port1+2"); static uint8_t probe_vf[3] = {0, 0, 0}; -static int probe_vfs_argc = 3; +static int probe_vfs_argc; module_param_array(probe_vf, byte, &probe_vfs_argc, 0444); MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n" "probe_vf=port1,port2,port1+2"); From 5a4ee9a9a066b1600509d968e1e9eab37c8501d8 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 24 Sep 2014 11:03:00 +0200 Subject: [PATCH 15/32] ip6gre: add a rtnl link alias for ip6gretap With this alias, we don't need to load manually the module before adding an ip6gretap interface with iproute2. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 5f19dfbc4c6a..d172ec4ec9d3 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1724,4 +1724,5 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); MODULE_DESCRIPTION("GRE over IPv6 tunneling device"); MODULE_ALIAS_RTNL_LINK("ip6gre"); +MODULE_ALIAS_RTNL_LINK("ip6gretap"); MODULE_ALIAS_NETDEV("ip6gre0"); From 2b0bb01b6edb3e13c7f71e43bf3a173a795b7b66 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 22 Sep 2014 10:07:24 +0200 Subject: [PATCH 16/32] ip6_tunnel: Return an error when adding an existing tunnel. ip6_tnl_locate() should not return an existing tunnel if create is true. Otherwise it is possible to add the same tunnel multiple times without getting an error. So return NULL if the tunnel that should be created already exists. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index f9de5a695072..69a84b464009 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -364,8 +364,12 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net, (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_equal(remote, &t->parms.raddr)) + ipv6_addr_equal(remote, &t->parms.raddr)) { + if (create) + return NULL; + return t; + } } if (!create) return NULL; From d814b847be7b47575a1cc2194760d3461e1c43c8 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 22 Sep 2014 10:07:25 +0200 Subject: [PATCH 17/32] ip6_vti: Return an error when adding an existing tunnel. vti6_locate() should not return an existing tunnel if create is true. Otherwise it is possible to add the same tunnel multiple times without getting an error. So return NULL if the tunnel that should be created already exists. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 7f52fd9fa7b0..5833a2244467 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -253,8 +253,12 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, (t = rtnl_dereference(*tp)) != NULL; tp = &t->next) { if (ipv6_addr_equal(local, &t->parms.laddr) && - ipv6_addr_equal(remote, &t->parms.raddr)) + ipv6_addr_equal(remote, &t->parms.raddr)) { + if (create) + return NULL; + return t; + } } if (!create) return NULL; From cd0a0bd9b8e157b19aa38eeac30c60f1a0d010bd Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 22 Sep 2014 10:07:26 +0200 Subject: [PATCH 18/32] ip6_gre: Return an error when adding an existing tunnel. ip6gre_tunnel_locate() should not return an existing tunnel if create is true. Otherwise it is possible to add the same tunnel multiple times without getting an error. So return NULL if the tunnel that should be created already exists. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d172ec4ec9d3..f304471477dc 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -314,6 +314,8 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE); + if (t && create) + return NULL; if (t || !create) return t; From 2c1a4311b61072afe2309d4152a7993e92caa41c Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 24 Sep 2014 17:07:53 -0700 Subject: [PATCH 19/32] neigh: check error pointer instead of NULL for ipv4_neigh_lookup() Fixes: commit f187bc6efb7250afee0e2009b6106 ("ipv4: No need to set generic neighbour pointer") Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 173e7ea54c70..cbadb942c332 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -746,7 +746,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow } n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); - if (n) { + if (!IS_ERR(n)) { if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); } else { From 73d3fe6d1c6d840763ceafa9afae0aaafa18c4b5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Sep 2014 10:34:29 -0700 Subject: [PATCH 20/32] gro: fix aggregation for skb using frag_list In commit 8a29111c7ca6 ("net: gro: allow to build full sized skb") I added a regression for linear skb that traditionally force GRO to use the frag_list fallback. Erez Shitrit found that at most two segments were aggregated and the "if (skb_gro_len(p) != pinfo->gso_size)" test was failing. This is because pinfo at this spot still points to the last skb in the chain, instead of the first one, where we find the correct gso_size information. Signed-off-by: Eric Dumazet Fixes: 8a29111c7ca6 ("net: gro: allow to build full sized skb") Reported-by: Erez Shitrit Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index da1378a3e2c7..8d289697cc7a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3152,6 +3152,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } + /* switch back to head shinfo */ + pinfo = skb_shinfo(p); + if (pinfo->frag_list) goto merge; if (skb_gro_len(p) != pinfo->gso_size) From 17c9c8232663a47f074b7452b9b034efda868ca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacy=20Gaw=C4=99dzki?= Date: Wed, 24 Sep 2014 18:38:39 +0200 Subject: [PATCH 21/32] ematch: Fix matching of inverted containers. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Negated expressions and sub-expressions need to have their flags checked for TCF_EM_INVERT and their result negated accordingly. Signed-off-by: Ignacy Gawędzki Signed-off-by: David S. Miller --- net/sched/ematch.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 3a633debb6df..ad57f4444b9c 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -526,9 +526,11 @@ int __tcf_em_tree_match(struct sk_buff *skb, struct tcf_ematch_tree *tree, match_idx = stack[--stackp]; cur_match = tcf_em_get_match(tree, match_idx); - if (tcf_em_early_end(cur_match, res)) + if (tcf_em_early_end(cur_match, res)) { + if (tcf_em_is_inverted(cur_match)) + res = !res; goto pop_stack; - else { + } else { match_idx++; goto proceed; } From c5bb86c3848174aad59ea6cf5748e210fbb8f744 Mon Sep 17 00:00:00 2001 From: "Kweh, Hock Leong" Date: Fri, 26 Sep 2014 21:42:55 +0800 Subject: [PATCH 22/32] net: stmmac: fix stmmac_pci_probe failed when CONFIG_HAVE_CLK is selected When the CONFIG_HAVE_CLK is selected for the system, the stmmac_pci_probe will fail with dmesg: [ 2.167225] stmmaceth 0000:00:14.6: enabling device (0000 -> 0002) [ 2.178267] stmmaceth 0000:00:14.6: enabling bus mastering [ 2.178436] stmmaceth 0000:00:14.6: irq 24 for MSI/MSI-X [ 2.178703] stmmaceth 0000:00:14.6: stmmac_dvr_probe: warning: cannot get CSR clock [ 2.186503] stmmac_pci_probe: main driver probe failed [ 2.194003] stmmaceth 0000:00:14.6: disabling bus mastering [ 2.196473] stmmaceth: probe of 0000:00:14.6 failed with error -2 This patch fix the issue by breaking the dependency to devm_clk_get() as the CSR clock can be obtained at priv->plat->clk_csr from pci driver. Reported-by: Tobias Klausmann Signed-off-by: Kweh, Hock Leong Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6e6ee226de04..b0c1521e08a3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2786,8 +2786,15 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device, if (IS_ERR(priv->stmmac_clk)) { dev_warn(priv->device, "%s: warning: cannot get CSR clock\n", __func__); - ret = PTR_ERR(priv->stmmac_clk); - goto error_clk_get; + /* If failed to obtain stmmac_clk and specific clk_csr value + * is NOT passed from the platform, probe fail. + */ + if (!priv->plat->clk_csr) { + ret = PTR_ERR(priv->stmmac_clk); + goto error_clk_get; + } else { + priv->stmmac_clk = NULL; + } } clk_prepare_enable(priv->stmmac_clk); From dedb845ded56ded1c62f5398a94ffa8615d4592d Mon Sep 17 00:00:00 2001 From: KY Srinivasan Date: Sun, 28 Sep 2014 22:16:43 -0700 Subject: [PATCH 23/32] hyperv: Fix a bug in netvsc_start_xmit() After the packet is successfully sent, we should not touch the skb as it may have been freed. This patch is based on the work done by Long Li . In this version of the patch I have fixed issues pointed out by David. David, please queue this up for stable. Signed-off-by: K. Y. Srinivasan Tested-by: Long Li Tested-by: Sitsofe Wheeler Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index a9c5eaadc426..0fcb5e7eb073 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -387,6 +387,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) int hdr_offset; u32 net_trans_info; u32 hash; + u32 skb_length = skb->len; /* We will atmost need two pages to describe the rndis @@ -562,7 +563,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) drop: if (ret == 0) { - net->stats.tx_bytes += skb->len; + net->stats.tx_bytes += skb_length; net->stats.tx_packets++; } else { kfree(packet); From 705f1c869d577c8055736dd02501f26a2507dd5b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 28 Sep 2014 00:46:06 +0200 Subject: [PATCH 24/32] ipv6: remove rt6i_genid Eric Dumazet noticed that all no-nonexthop or no-gateway routes which are already marked DST_HOST (e.g. input routes routes) will always be invalidated during sk_dst_check. Thus per-socket dst caching absolutely had no effect and early demuxing had no effect. Thus this patch removes rt6i_genid: fn_sernum already gets modified during add operations, so we only must ensure we mutate fn_sernum during ipv6 address remove operations. This is a fairly cost extensive operations, but address removal should not happen that often. Also our mtu update functions do the same and we heard no complains so far. xfrm policy changes also cause a call into fib6_flush_trees. Also plug a hole in rt6_info (no cacheline changes). I verified via tracing that this change has effect. Cc: Eric Dumazet Cc: YOSHIFUJI Hideaki Cc: Vlad Yasevich Cc: Nicolas Dichtel Cc: Martin Lau Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +---- include/net/net_namespace.h | 20 +++----------------- net/ipv6/addrconf.c | 3 ++- net/ipv6/addrconf_core.c | 7 +++++++ net/ipv6/ip6_fib.c | 20 ++++++++++++++++++++ net/ipv6/route.c | 4 ---- 6 files changed, 33 insertions(+), 26 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 9bcb220bd4ad..cf485f9aa563 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -114,16 +114,13 @@ struct rt6_info { u32 rt6i_flags; struct rt6key rt6i_src; struct rt6key rt6i_prefsrc; - u32 rt6i_metric; struct inet6_dev *rt6i_idev; unsigned long _rt6i_peer; - u32 rt6i_genid; - + u32 rt6i_metric; /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; - u8 rt6i_protocol; }; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 361d26077196..e0d64667a4b3 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -352,26 +352,12 @@ static inline void rt_genid_bump_ipv4(struct net *net) atomic_inc(&net->ipv4.rt_genid); } -#if IS_ENABLED(CONFIG_IPV6) -static inline int rt_genid_ipv6(struct net *net) -{ - return atomic_read(&net->ipv6.rt_genid); -} - +extern void (*__fib6_flush_trees)(struct net *net); static inline void rt_genid_bump_ipv6(struct net *net) { - atomic_inc(&net->ipv6.rt_genid); + if (__fib6_flush_trees) + __fib6_flush_trees(net); } -#else -static inline int rt_genid_ipv6(struct net *net) -{ - return 0; -} - -static inline void rt_genid_bump_ipv6(struct net *net) -{ -} -#endif #if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN) static inline struct netns_ieee802154_lowpan * diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3342ee64f2e3..3e118dfddd02 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4780,10 +4780,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (ip6_del_rt(ifp->rt)) dst_free(&ifp->rt->dst); + + rt_genid_bump_ipv6(net); break; } atomic_inc(&net->ipv6.dev_addr_genid); - rt_genid_bump_ipv6(net); } static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index e6960457f625..98cc4cd570e2 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -8,6 +8,13 @@ #include #include +/* if ipv6 module registers this function is used by xfrm to force all + * sockets to relookup their nodes - this is fairly expensive, be + * careful + */ +void (*__fib6_flush_trees)(struct net *); +EXPORT_SYMBOL(__fib6_flush_trees); + #define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) static inline unsigned int ipv6_addr_scope2type(unsigned int scope) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 76b7f5ee8f4c..97b9fa8de377 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1605,6 +1605,24 @@ static void fib6_prune_clones(struct net *net, struct fib6_node *fn) fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL); } +static int fib6_update_sernum(struct rt6_info *rt, void *arg) +{ + __u32 sernum = *(__u32 *)arg; + + if (rt->rt6i_node && + rt->rt6i_node->fn_sernum != sernum) + rt->rt6i_node->fn_sernum = sernum; + + return 0; +} + +static void fib6_flush_trees(struct net *net) +{ + __u32 new_sernum = fib6_new_sernum(); + + fib6_clean_all(net, fib6_update_sernum, &new_sernum); +} + /* * Garbage collection */ @@ -1788,6 +1806,8 @@ int __init fib6_init(void) NULL); if (ret) goto out_unregister_subsys; + + __fib6_flush_trees = fib6_flush_trees; out: return ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f23fbd28a501..bafde82324c5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -314,7 +314,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); - rt->rt6i_genid = rt_genid_ipv6(net); INIT_LIST_HEAD(&rt->rt6i_siblings); } return rt; @@ -1098,9 +1097,6 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. */ - if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev))) - return NULL; - if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) return NULL; From 0d36882013e61dc47aa835c4ec98b1fe776c9db8 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Tue, 30 Sep 2014 03:56:35 -0400 Subject: [PATCH 25/32] netxen: Fix BUG "sleeping function called from invalid context" o __netxen_nic_down() function might sleep while holding spinlock_t(tx_clean_lock). Acquire this lock for only releasing TX buffers instead of taking it for whole down path. Reported-by: Mike Galbraith Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c | 2 ++ drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c index 32058614151a..ae4ec7b3bfd3 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c @@ -135,6 +135,7 @@ void netxen_release_tx_buffers(struct netxen_adapter *adapter) int i, j; struct nx_host_tx_ring *tx_ring = adapter->tx_ring; + spin_lock(&adapter->tx_clean_lock); cmd_buf = tx_ring->cmd_buf_arr; for (i = 0; i < tx_ring->num_desc; i++) { buffrag = cmd_buf->frag_array; @@ -158,6 +159,7 @@ void netxen_release_tx_buffers(struct netxen_adapter *adapter) } cmd_buf++; } + spin_unlock(&adapter->tx_clean_lock); } void netxen_free_sw_resources(struct netxen_adapter *adapter) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 1159031f885b..5ec5a2b0e989 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -1186,7 +1186,6 @@ __netxen_nic_down(struct netxen_adapter *adapter, struct net_device *netdev) return; smp_mb(); - spin_lock(&adapter->tx_clean_lock); netif_carrier_off(netdev); netif_tx_disable(netdev); @@ -1204,7 +1203,6 @@ __netxen_nic_down(struct netxen_adapter *adapter, struct net_device *netdev) netxen_napi_disable(adapter); netxen_release_tx_buffers(adapter); - spin_unlock(&adapter->tx_clean_lock); } /* Usage: During suspend and firmware recovery module */ From 9295f940fb677b343980cd67f3c91a4eb48d2fae Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Tue, 30 Sep 2014 03:56:36 -0400 Subject: [PATCH 26/32] netxen: Fix bug in Tx completion path. o Driver is not updating sw_consumer while processing Tx completion when interface is going down. Due to this interface down path gets stuck forever waiting for NAPI to complete. Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c index ae4ec7b3bfd3..5c4068353f66 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c @@ -1794,9 +1794,9 @@ int netxen_process_cmd_ring(struct netxen_adapter *adapter) break; } - if (count && netif_running(netdev)) { - tx_ring->sw_consumer = sw_consumer; + tx_ring->sw_consumer = sw_consumer; + if (count && netif_running(netdev)) { smp_mb(); if (netif_queue_stopped(netdev) && netif_carrier_ok(netdev)) From f561de33d63aefb97fb0c3653a36fb32d4e8c74a Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 30 Sep 2014 16:48:01 +0800 Subject: [PATCH 27/32] r8152: fix setting RTL8152_UNPLUG The flag of RTL8152_UNPLUG should only be set when the device is unplugged, not each time the rtl8152_disconnect() is called. Otherwise, the device wouldn't be stopped normally. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e0394427e372..f68a4e6f45be 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3576,7 +3576,11 @@ static void rtl8152_disconnect(struct usb_interface *intf) usb_set_intfdata(intf, NULL); if (tp) { - set_bit(RTL8152_UNPLUG, &tp->flags); + struct usb_device *udev = tp->udev; + + if (udev->state == USB_STATE_NOTATTACHED) + set_bit(RTL8152_UNPLUG, &tp->flags); + tasklet_kill(&tp->tl); unregister_netdev(tp->netdev); tp->rtl_ops.unload(tp); From 7d3083ee36b51e425b6abd76778a2046906b0fd3 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 30 Sep 2014 19:39:36 -0400 Subject: [PATCH 28/32] tg3: Allow for recieve of full-size 8021AD frames When receiving a vlan-tagged frame that still contains a vlan header, the length of the packet will be greater then MTU+ETH_HLEN since it will account of the extra vlan header. TG3 checks this for the case for 802.1Q, but not for 802.1ad. As a result, full sized 802.1ad frames get dropped by the card. Add a check for 802.1ad protocol when receving full sized frames. Suggested-by: Prashant Sreedharan CC: Prashant Sreedharan CC: Michael Chan Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index e7d3a620d96a..ba499489969a 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6918,7 +6918,8 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) skb->protocol = eth_type_trans(skb, tp->dev); if (len > (tp->dev->mtu + ETH_HLEN) && - skb->protocol != htons(ETH_P_8021Q)) { + skb->protocol != htons(ETH_P_8021Q) && + skb->protocol != htons(ETH_P_8021AD)) { dev_kfree_skb_any(skb); goto drop_it_no_recycle; } From 1b0ecb28b0cc216535ce6477d39aa610c3ff68a1 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 30 Sep 2014 19:39:37 -0400 Subject: [PATCH 29/32] bnx2: Correctly receive full sized 802.1ad fragmes This driver, similar to tg3, has a check that will cause full sized 802.1ad frames to be dropped. The frame will be larger then the standard mtu due to the presense of vlan header that has not been stripped. The driver should not drop this frame and should process it just like it does for 802.1q. CC: Sony Chacko CC: Dept-HSGLinuxNICDev@qlogic.com Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 2fee73b878c2..823d01c5684c 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -3236,8 +3236,9 @@ bnx2_rx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) skb->protocol = eth_type_trans(skb, bp->dev); - if ((len > (bp->dev->mtu + ETH_HLEN)) && - (ntohs(skb->protocol) != 0x8100)) { + if (len > (bp->dev->mtu + ETH_HLEN) && + skb->protocol != htons(0x8100) && + skb->protocol != htons(ETH_P_8021AD)) { dev_kfree_skb(skb); goto next_rx; From 204c8704128943bf3f8b605f4b40bdc2b6bd89dc Mon Sep 17 00:00:00 2001 From: hayeswang Date: Wed, 1 Oct 2014 13:25:10 +0800 Subject: [PATCH 30/32] r8152: remove clearing bp The xxx_clear_bp() is used to halt the firmware. It only necessary for updating the new firmware. Besides, depend on the version of the current firmware, it may have problem to halt the firmware directly. Finally, halt the firmware would let the firmware code useless, and the bugs which are fixed by the firmware would occur. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f68a4e6f45be..18b9e0d7d6bb 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -24,7 +24,7 @@ #include /* Version Information */ -#define DRIVER_VERSION "v1.06.0 (2014/03/03)" +#define DRIVER_VERSION "v1.06.1 (2014/10/01)" #define DRIVER_AUTHOR "Realtek linux nic maintainers " #define DRIVER_DESC "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters" #define MODULENAME "r8152" @@ -2201,28 +2201,6 @@ static void rtl_phy_reset(struct r8152 *tp) } } -static void rtl_clear_bp(struct r8152 *tp) -{ - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_BP_0, 0); - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_BP_2, 0); - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_BP_4, 0); - ocp_write_dword(tp, MCU_TYPE_PLA, PLA_BP_6, 0); - ocp_write_dword(tp, MCU_TYPE_USB, USB_BP_0, 0); - ocp_write_dword(tp, MCU_TYPE_USB, USB_BP_2, 0); - ocp_write_dword(tp, MCU_TYPE_USB, USB_BP_4, 0); - ocp_write_dword(tp, MCU_TYPE_USB, USB_BP_6, 0); - mdelay(3); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_BP_BA, 0); - ocp_write_word(tp, MCU_TYPE_USB, USB_BP_BA, 0); -} - -static void r8153_clear_bp(struct r8152 *tp) -{ - ocp_write_byte(tp, MCU_TYPE_PLA, PLA_BP_EN, 0); - ocp_write_byte(tp, MCU_TYPE_USB, USB_BP_EN, 0); - rtl_clear_bp(tp); -} - static void r8153_teredo_off(struct r8152 *tp) { u32 ocp_data; @@ -2265,8 +2243,6 @@ static void r8152b_hw_phy_cfg(struct r8152 *tp) r8152_mdio_write(tp, MII_BMCR, data); } - rtl_clear_bp(tp); - set_bit(PHY_RESET, &tp->flags); } @@ -2417,8 +2393,6 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) r8152_mdio_write(tp, MII_BMCR, data); } - r8153_clear_bp(tp); - if (tp->version == RTL_VER_03) { data = ocp_reg_read(tp, OCP_EEE_CFG); data &= ~CTAP_SHORT_EN; From 49be17235c0acd96f2ff0fe282867fe3a83f554c Mon Sep 17 00:00:00 2001 From: hayeswang Date: Wed, 1 Oct 2014 13:25:11 +0800 Subject: [PATCH 31/32] r8152: disable power cut for RTL8153 The firmware would be clear when the power cut is enabled for RTL8153. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 18b9e0d7d6bb..604ef210a4de 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3397,7 +3397,7 @@ static void rtl8153_unload(struct r8152 *tp) if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; - r8153_power_cut_en(tp, true); + r8153_power_cut_en(tp, false); } static int rtl_ops_init(struct r8152 *tp, const struct usb_device_id *id) From 439e9575e777bdad1d9da15941e02adf34f4c392 Mon Sep 17 00:00:00 2001 From: Rasesh Mody Date: Wed, 1 Oct 2014 17:20:41 -0400 Subject: [PATCH 32/32] bna: Update Maintainer Email Update the maintainer email for BNA driver. Signed-off-by: Rasesh Mody Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 670b3dcce2de..20c818677955 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2098,7 +2098,7 @@ S: Supported F: drivers/scsi/bfa/ BROCADE BNA 10 GIGABIT ETHERNET DRIVER -M: Rasesh Mody +M: Rasesh Mody L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/brocade/bna/