73f97b1bb5
commit 61e713bdca3678e84815f2427f7a063fc353a1fc upstream. Christof Meerwald <cmeerw@cmeerw.org> writes: > Hi, > > this is probably related to commit > 7a0cf094944e2540758b7f957eb6846d5126f535 (signal: Correct namespace > fixups of si_pid and si_uid). > > With a 5.6.5 kernel I am seeing SIGCHLD signals that don't include a > properly set si_pid field - this seems to happen for multi-threaded > child processes. > > A simple test program (based on the sample from the signalfd man page): > > #include <sys/signalfd.h> > #include <signal.h> > #include <unistd.h> > #include <spawn.h> > #include <stdlib.h> > #include <stdio.h> > > #define handle_error(msg) \ > do { perror(msg); exit(EXIT_FAILURE); } while (0) > > int main(int argc, char *argv[]) > { > sigset_t mask; > int sfd; > struct signalfd_siginfo fdsi; > ssize_t s; > > sigemptyset(&mask); > sigaddset(&mask, SIGCHLD); > > if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1) > handle_error("sigprocmask"); > > pid_t chldpid; > char *chldargv[] = { "./sfdclient", NULL }; > posix_spawn(&chldpid, "./sfdclient", NULL, NULL, chldargv, NULL); > > sfd = signalfd(-1, &mask, 0); > if (sfd == -1) > handle_error("signalfd"); > > for (;;) { > s = read(sfd, &fdsi, sizeof(struct signalfd_siginfo)); > if (s != sizeof(struct signalfd_siginfo)) > handle_error("read"); > > if (fdsi.ssi_signo == SIGCHLD) { > printf("Got SIGCHLD %d %d %d %d\n", > fdsi.ssi_status, fdsi.ssi_code, > fdsi.ssi_uid, fdsi.ssi_pid); > return 0; > } else { > printf("Read unexpected signal\n"); > } > } > } > > > and a multi-threaded client to test with: > > #include <unistd.h> > #include <pthread.h> > > void *f(void *arg) > { > sleep(100); > } > > int main() > { > pthread_t t[8]; > > for (int i = 0; i != 8; ++i) > { > pthread_create(&t[i], NULL, f, NULL); > } > } > > I tried to do a bit of debugging and what seems to be happening is > that > > /* From an ancestor pid namespace? */ > if (!task_pid_nr_ns(current, task_active_pid_ns(t))) { > > fails inside task_pid_nr_ns because the check for "pid_alive" fails. > > This code seems to be called from do_notify_parent and there we > actually have "tsk != current" (I am assuming both are threads of the > current process?) I instrumented the code with a warning and received the following backtrace: > WARNING: CPU: 0 PID: 777 at kernel/pid.c:501 __task_pid_nr_ns.cold.6+0xc/0x15 > Modules linked in: > CPU: 0 PID: 777 Comm: sfdclient Not tainted 5.7.0-rc1userns+ #2924 > Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > RIP: 0010:__task_pid_nr_ns.cold.6+0xc/0x15 > Code: ff 66 90 48 83 ec 08 89 7c 24 04 48 8d 7e 08 48 8d 74 24 04 e8 9a b6 44 00 48 83 c4 08 c3 48 c7 c7 59 9f ac 82 e8 c2 c4 04 00 <0f> 0b e9 3fd > RSP: 0018:ffffc9000042fbf8 EFLAGS: 00010046 > RAX: 000000000000000c RBX: 0000000000000000 RCX: ffffc9000042faf4 > RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff81193d29 > RBP: ffffc9000042fc18 R08: 0000000000000000 R09: 0000000000000001 > R10: 000000100f938416 R11: 0000000000000309 R12: ffff8880b941c140 > R13: 0000000000000000 R14: 0000000000000000 R15: ffff8880b941c140 > FS: 0000000000000000(0000) GS:ffff8880bca00000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 00007f2e8c0a32e0 CR3: 0000000002e10000 CR4: 00000000000006f0 > Call Trace: > send_signal+0x1c8/0x310 > do_notify_parent+0x50f/0x550 > release_task.part.21+0x4fd/0x620 > do_exit+0x6f6/0xaf0 > do_group_exit+0x42/0xb0 > get_signal+0x13b/0xbb0 > do_signal+0x2b/0x670 > ? __audit_syscall_exit+0x24d/0x2b0 > ? rcu_read_lock_sched_held+0x4d/0x60 > ? kfree+0x24c/0x2b0 > do_syscall_64+0x176/0x640 > ? trace_hardirqs_off_thunk+0x1a/0x1c > entry_SYSCALL_64_after_hwframe+0x49/0xb3 The immediate problem is as Christof noticed that "pid_alive(current) == false". This happens because do_notify_parent is called from the last thread to exit in a process after that thread has been reaped. The bigger issue is that do_notify_parent can be called from any process that manages to wait on a thread of a multi-threaded process from wait_task_zombie. So any logic based upon current for do_notify_parent is just nonsense, as current can be pretty much anything. So change do_notify_parent to call __send_signal directly. Inspecting the code it appears this problem has existed since the pid namespace support started handling this case in 2.6.30. This fix only backports to 7a0cf094944e ("signal: Correct namespace fixups of si_pid and si_uid") where the problem logic was moved out of __send_signal and into send_signal. Change-Id: I21808c7826456b57e38a7637d8a955797db8d1b1 Cc: stable@vger.kernel.org Fixes: |
||
---|---|---|
.. | ||
bpf | ||
cgroup | ||
configs | ||
debug | ||
dma | ||
events | ||
gcov | ||
irq | ||
livepatch | ||
locking | ||
power | ||
printk | ||
rcu | ||
sched | ||
time | ||
trace | ||
.gitignore | ||
acct.c | ||
async.c | ||
audit.c | ||
audit.h | ||
audit_fsnotify.c | ||
audit_tree.c | ||
audit_watch.c | ||
auditfilter.c | ||
auditsc.c | ||
backtracetest.c | ||
bounds.c | ||
capability.c | ||
cfi.c | ||
compat.c | ||
configs.c | ||
context_tracking.c | ||
cpu.c | ||
cpu_pm.c | ||
crash_core.c | ||
crash_dump.c | ||
cred.c | ||
delayacct.c | ||
dma.c | ||
elfcore.c | ||
exec_domain.c | ||
exit.c | ||
extable.c | ||
fail_function.c | ||
fork.c | ||
freezer.c | ||
futex.c | ||
gen_kheaders.sh | ||
groups.c | ||
hung_task.c | ||
iomem.c | ||
irq_work.c | ||
jump_label.c | ||
kallsyms.c | ||
kcmp.c | ||
Kconfig.freezer | ||
Kconfig.hz | ||
Kconfig.locks | ||
Kconfig.preempt | ||
kcov.c | ||
kexec.c | ||
kexec_core.c | ||
kexec_file.c | ||
kexec_internal.h | ||
kheaders.c | ||
kmod.c | ||
kprobes.c | ||
ksysfs.c | ||
kthread.c | ||
latencytop.c | ||
Makefile | ||
memremap.c | ||
module-internal.h | ||
module.c | ||
module_signing.c | ||
notifier.c | ||
nsproxy.c | ||
padata.c | ||
panic.c | ||
params.c | ||
pid.c | ||
pid_namespace.c | ||
profile.c | ||
ptrace.c | ||
range.c | ||
reboot.c | ||
relay.c | ||
resource.c | ||
rseq.c | ||
scs.c | ||
seccomp.c | ||
signal.c | ||
smp.c | ||
smpboot.c | ||
smpboot.h | ||
softirq.c | ||
stacktrace.c | ||
stop_machine.c | ||
sys.c | ||
sys_ni.c | ||
sysctl.c | ||
sysctl_binary.c | ||
task_work.c | ||
taskstats.c | ||
test_kprobes.c | ||
torture.c | ||
tracepoint.c | ||
tsacct.c | ||
ucount.c | ||
uid16.c | ||
uid16.h | ||
umh.c | ||
up.c | ||
user-return-notifier.c | ||
user.c | ||
user_namespace.c | ||
utsname.c | ||
utsname_sysctl.c | ||
watchdog.c | ||
watchdog_hld.c | ||
workqueue.c | ||
workqueue_internal.h |