af_unix: fix a fatal race with bit fields
Using bit fields is dangerous on ppc64/sparc64, as the compiler [1] uses 64bit instructions to manipulate them. If the 64bit word includes any atomic_t or spinlock_t, we can lose critical concurrent changes. This is happening in af_unix, where unix_sk(sk)->gc_candidate/ gc_maybe_cycle/lock share the same 64bit word. This leads to fatal deadlock, as one/several cpus spin forever on a spinlock that will never be available again. A safer way would be to use a long to store flags. This way we are sure compiler/arch wont do bad things. As we own unix_gc_lock spinlock when clearing or setting bits, we can use the non atomic __set_bit()/__clear_bit(). recursion_level can share the same 64bit location with the spinlock, as it is set only with this spinlock held. [1] bug fixed in gcc-4.8.0 : http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52080 Reported-by: Ambrose Feinstein <ambrose@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
c3b28ea369
commit
60bc851ae5
2 changed files with 9 additions and 8 deletions
|
@ -57,9 +57,10 @@ struct unix_sock {
|
|||
struct list_head link;
|
||||
atomic_long_t inflight;
|
||||
spinlock_t lock;
|
||||
unsigned int gc_candidate : 1;
|
||||
unsigned int gc_maybe_cycle : 1;
|
||||
unsigned char recursion_level;
|
||||
unsigned long gc_flags;
|
||||
#define UNIX_GC_CANDIDATE 0
|
||||
#define UNIX_GC_MAYBE_CYCLE 1
|
||||
struct socket_wq peer_wq;
|
||||
};
|
||||
#define unix_sk(__sk) ((struct unix_sock *)__sk)
|
||||
|
|
|
@ -185,7 +185,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
|
|||
* have been added to the queues after
|
||||
* starting the garbage collection
|
||||
*/
|
||||
if (u->gc_candidate) {
|
||||
if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
|
||||
hit = true;
|
||||
func(u);
|
||||
}
|
||||
|
@ -254,7 +254,7 @@ static void inc_inflight_move_tail(struct unix_sock *u)
|
|||
* of the list, so that it's checked even if it was already
|
||||
* passed over
|
||||
*/
|
||||
if (u->gc_maybe_cycle)
|
||||
if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
|
||||
list_move_tail(&u->link, &gc_candidates);
|
||||
}
|
||||
|
||||
|
@ -315,8 +315,8 @@ void unix_gc(void)
|
|||
BUG_ON(total_refs < inflight_refs);
|
||||
if (total_refs == inflight_refs) {
|
||||
list_move_tail(&u->link, &gc_candidates);
|
||||
u->gc_candidate = 1;
|
||||
u->gc_maybe_cycle = 1;
|
||||
__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
|
||||
__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -344,7 +344,7 @@ void unix_gc(void)
|
|||
|
||||
if (atomic_long_read(&u->inflight) > 0) {
|
||||
list_move_tail(&u->link, ¬_cycle_list);
|
||||
u->gc_maybe_cycle = 0;
|
||||
__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
|
||||
scan_children(&u->sk, inc_inflight_move_tail, NULL);
|
||||
}
|
||||
}
|
||||
|
@ -356,7 +356,7 @@ void unix_gc(void)
|
|||
*/
|
||||
while (!list_empty(¬_cycle_list)) {
|
||||
u = list_entry(not_cycle_list.next, struct unix_sock, link);
|
||||
u->gc_candidate = 0;
|
||||
__clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
|
||||
list_move_tail(&u->link, &gc_inflight_list);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue