kernel-fxtec-pro1x/include/linux/rculist_nulls.h
Paul E. McKenney 60a4491705 rcu: Use WRITE_ONCE() for assignments to ->pprev for hlist_nulls
[ Upstream commit 860c8802ace14c646864795e057349c9fb2d60ad ]

Eric Dumazet supplied a KCSAN report of a bug that forces use
of hlist_unhashed_lockless() from sk_unhashed():

------------------------------------------------------------------------

BUG: KCSAN: data-race in inet_unhash / inet_unhash

write to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 1:
 __hlist_nulls_del include/linux/list_nulls.h:88 [inline]
 hlist_nulls_del_init_rcu include/linux/rculist_nulls.h:36 [inline]
 __sk_nulls_del_node_init_rcu include/net/sock.h:676 [inline]
 inet_unhash+0x38f/0x4a0 net/ipv4/inet_hashtables.c:612
 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249
 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854
 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56
 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479
 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599
 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619
 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404
 expire_timers kernel/time/timer.c:1449 [inline]
 __run_timers kernel/time/timer.c:1773 [inline]
 __run_timers kernel/time/timer.c:1740 [inline]
 run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786
 __do_softirq+0x115/0x33f kernel/softirq.c:292
 invoke_softirq kernel/softirq.c:373 [inline]
 irq_exit+0xbb/0xe0 kernel/softirq.c:413
 exiting_irq arch/x86/include/asm/apic.h:536 [inline]
 smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830
 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71
 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571
 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94
 cpuidle_idle_call kernel/sched/idle.c:154 [inline]
 do_idle+0x1af/0x280 kernel/sched/idle.c:263
 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355
 start_secondary+0x208/0x260 arch/x86/kernel/smpboot.c:264
 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241

read to 0xffff8880a69a0170 of 8 bytes by interrupt on cpu 0:
 sk_unhashed include/net/sock.h:607 [inline]
 inet_unhash+0x3d/0x4a0 net/ipv4/inet_hashtables.c:592
 tcp_set_state+0xfa/0x3e0 net/ipv4/tcp.c:2249
 tcp_done+0x93/0x1e0 net/ipv4/tcp.c:3854
 tcp_write_err+0x7e/0xc0 net/ipv4/tcp_timer.c:56
 tcp_retransmit_timer+0x9b8/0x16d0 net/ipv4/tcp_timer.c:479
 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:599
 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:619
 call_timer_fn+0x5f/0x2f0 kernel/time/timer.c:1404
 expire_timers kernel/time/timer.c:1449 [inline]
 __run_timers kernel/time/timer.c:1773 [inline]
 __run_timers kernel/time/timer.c:1740 [inline]
 run_timer_softirq+0xc0c/0xcd0 kernel/time/timer.c:1786
 __do_softirq+0x115/0x33f kernel/softirq.c:292
 invoke_softirq kernel/softirq.c:373 [inline]
 irq_exit+0xbb/0xe0 kernel/softirq.c:413
 exiting_irq arch/x86/include/asm/apic.h:536 [inline]
 smp_apic_timer_interrupt+0xe6/0x280 arch/x86/kernel/apic/apic.c:1137
 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:830
 native_safe_halt+0xe/0x10 arch/x86/kernel/paravirt.c:71
 arch_cpu_idle+0x1f/0x30 arch/x86/kernel/process.c:571
 default_idle_call+0x1e/0x40 kernel/sched/idle.c:94
 cpuidle_idle_call kernel/sched/idle.c:154 [inline]
 do_idle+0x1af/0x280 kernel/sched/idle.c:263
 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:355
 rest_init+0xec/0xf6 init/main.c:452
 arch_call_rest_init+0x17/0x37
 start_kernel+0x838/0x85e init/main.c:786
 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490
 x86_64_start_kernel+0x72/0x76 arch/x86/kernel/head64.c:471
 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:241

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc6+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 01/01/2011

------------------------------------------------------------------------

This commit therefore replaces C-language assignments with WRITE_ONCE()
in include/linux/list_nulls.h and include/linux/rculist_nulls.h.

Reported-by: Eric Dumazet <edumazet@google.com> # For KCSAN
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
2020-02-24 08:34:46 +01:00

174 lines
6.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_RCULIST_NULLS_H
#define _LINUX_RCULIST_NULLS_H
#ifdef __KERNEL__
/*
* RCU-protected list version
*/
#include <linux/list_nulls.h>
#include <linux/rcupdate.h>
/**
* hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization
* @n: the element to delete from the hash list.
*
* Note: hlist_nulls_unhashed() on the node return true after this. It is
* useful for RCU based read lockfree traversal if the writer side
* must know if the list entry is still hashed or already unhashed.
*
* In particular, it means that we can not poison the forward pointers
* that may still be used for walking the hash list and we can only
* zero the pprev pointer so list_unhashed() will return true after
* this.
*
* The caller must take whatever precautions are necessary (such as
* holding appropriate locks) to avoid racing with another
* list-mutation primitive, such as hlist_nulls_add_head_rcu() or
* hlist_nulls_del_rcu(), running on this same list. However, it is
* perfectly legal to run concurrently with the _rcu list-traversal
* primitives, such as hlist_nulls_for_each_entry_rcu().
*/
static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
{
if (!hlist_nulls_unhashed(n)) {
__hlist_nulls_del(n);
WRITE_ONCE(n->pprev, NULL);
}
}
#define hlist_nulls_first_rcu(head) \
(*((struct hlist_nulls_node __rcu __force **)&(head)->first))
#define hlist_nulls_next_rcu(node) \
(*((struct hlist_nulls_node __rcu __force **)&(node)->next))
/**
* hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
* @n: the element to delete from the hash list.
*
* Note: hlist_nulls_unhashed() on entry does not return true after this,
* the entry is in an undefined state. It is useful for RCU based
* lockfree traversal.
*
* In particular, it means that we can not poison the forward
* pointers that may still be used for walking the hash list.
*
* The caller must take whatever precautions are necessary
* (such as holding appropriate locks) to avoid racing
* with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
* or hlist_nulls_del_rcu(), running on this same list.
* However, it is perfectly legal to run concurrently with
* the _rcu list-traversal primitives, such as
* hlist_nulls_for_each_entry().
*/
static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n)
{
__hlist_nulls_del(n);
WRITE_ONCE(n->pprev, LIST_POISON2);
}
/**
* hlist_nulls_add_head_rcu
* @n: the element to add to the hash list.
* @h: the list to add to.
*
* Description:
* Adds the specified element to the specified hlist_nulls,
* while permitting racing traversals.
*
* The caller must take whatever precautions are necessary
* (such as holding appropriate locks) to avoid racing
* with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
* or hlist_nulls_del_rcu(), running on this same list.
* However, it is perfectly legal to run concurrently with
* the _rcu list-traversal primitives, such as
* hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
* problems on Alpha CPUs. Regardless of the type of CPU, the
* list-traversal primitive must be guarded by rcu_read_lock().
*/
static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
struct hlist_nulls_head *h)
{
struct hlist_nulls_node *first = h->first;
n->next = first;
WRITE_ONCE(n->pprev, &h->first);
rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
if (!is_a_nulls(first))
WRITE_ONCE(first->pprev, &n->next);
}
/**
* hlist_nulls_add_tail_rcu
* @n: the element to add to the hash list.
* @h: the list to add to.
*
* Description:
* Adds the specified element to the specified hlist_nulls,
* while permitting racing traversals.
*
* The caller must take whatever precautions are necessary
* (such as holding appropriate locks) to avoid racing
* with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
* or hlist_nulls_del_rcu(), running on this same list.
* However, it is perfectly legal to run concurrently with
* the _rcu list-traversal primitives, such as
* hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
* problems on Alpha CPUs. Regardless of the type of CPU, the
* list-traversal primitive must be guarded by rcu_read_lock().
*/
static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
struct hlist_nulls_head *h)
{
struct hlist_nulls_node *i, *last = NULL;
/* Note: write side code, so rcu accessors are not needed. */
for (i = h->first; !is_a_nulls(i); i = i->next)
last = i;
if (last) {
n->next = last->next;
n->pprev = &last->next;
rcu_assign_pointer(hlist_next_rcu(last), n);
} else {
hlist_nulls_add_head_rcu(n, h);
}
}
/**
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_nulls_node to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the hlist_nulls_node within the struct.
*
* The barrier() is needed to make sure compiler doesn't cache first element [1],
* as this loop can be restarted [2]
* [1] Documentation/core-api/atomic_ops.rst around line 114
* [2] Documentation/RCU/rculist_nulls.txt around line 146
*/
#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
for (({barrier();}), \
pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
(!is_a_nulls(pos)) && \
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
/**
* hlist_nulls_for_each_entry_safe -
* iterate over list of given type safe against removal of list entry
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_nulls_node to use as a loop cursor.
* @head: the head for your list.
* @member: the name of the hlist_nulls_node within the struct.
*/
#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \
for (({barrier();}), \
pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
(!is_a_nulls(pos)) && \
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \
pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
#endif
#endif