UPSTREAM: pid: add pidfd_open()
This adds the pidfd_open() syscall. It allows a caller to retrieve pollable pidfds for a process which did not get created via CLONE_PIDFD, i.e. for a process that is created via traditional fork()/clone() calls that is only referenced by a PID: int pidfd = pidfd_open(1234, 0); ret = pidfd_send_signal(pidfd, SIGSTOP, NULL, 0); With the introduction of pidfds through CLONE_PIDFD it is possible to created pidfds at process creation time. However, a lot of processes get created with traditional PID-based calls such as fork() or clone() (without CLONE_PIDFD). For these processes a caller can currently not create a pollable pidfd. This is a problem for Android's low memory killer (LMK) and service managers such as systemd. Both are examples of tools that want to make use of pidfds to get reliable notification of process exit for non-parents (pidfd polling) and race-free signal sending (pidfd_send_signal()). They intend to switch to this API for process supervision/management as soon as possible. Having no way to get pollable pidfds from PID-only processes is one of the biggest blockers for them in adopting this api. With pidfd_open() making it possible to retrieve pidfds for PID-based processes we enable them to adopt this api. In line with Arnd's recent changes to consolidate syscall numbers across architectures, I have added the pidfd_open() syscall to all architectures at the same time. Signed-off-by: Christian Brauner <christian@brauner.io> Reviewed-by: David Howells <dhowells@redhat.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Kees Cook <keescook@chromium.org> Cc: Joel Fernandes (Google) <joel@joelfernandes.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Jann Horn <jannh@google.com> Cc: Andy Lutomirsky <luto@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Aleksa Sarai <cyphar@cyphar.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: linux-api@vger.kernel.org (cherry picked from commit 32fcb426ec001cb6d5a4a195091a8486ea77e2df) Bug: 135608568 Test: test program using syscall(__NR_sys_pidfd_open,..) and poll() Change-Id: I97583cfa441a08585cbedf9a44f982c0d0ee5583 Signed-off-by: Suren Baghdasaryan <surenb@google.com>
This commit is contained in:
parent
b3481301f4
commit
abd43bb345
2 changed files with 70 additions and 0 deletions
|
@ -850,6 +850,7 @@ asmlinkage long sys_clock_adjtime(clockid_t which_clock,
|
||||||
struct timex __user *tx);
|
struct timex __user *tx);
|
||||||
asmlinkage long sys_syncfs(int fd);
|
asmlinkage long sys_syncfs(int fd);
|
||||||
asmlinkage long sys_setns(int fd, int nstype);
|
asmlinkage long sys_setns(int fd, int nstype);
|
||||||
|
asmlinkage long sys_pidfd_open(pid_t pid, unsigned int flags);
|
||||||
asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
|
asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
|
||||||
unsigned int vlen, unsigned flags);
|
unsigned int vlen, unsigned flags);
|
||||||
asmlinkage long sys_process_vm_readv(pid_t pid,
|
asmlinkage long sys_process_vm_readv(pid_t pid,
|
||||||
|
|
69
kernel/pid.c
69
kernel/pid.c
|
@ -38,6 +38,8 @@
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
#include <linux/proc_ns.h>
|
#include <linux/proc_ns.h>
|
||||||
#include <linux/proc_fs.h>
|
#include <linux/proc_fs.h>
|
||||||
|
#include <linux/anon_inodes.h>
|
||||||
|
#include <linux/sched/signal.h>
|
||||||
#include <linux/sched/task.h>
|
#include <linux/sched/task.h>
|
||||||
#include <linux/idr.h>
|
#include <linux/idr.h>
|
||||||
|
|
||||||
|
@ -453,6 +455,73 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
|
||||||
return idr_get_next(&ns->idr, &nr);
|
return idr_get_next(&ns->idr, &nr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pidfd_create() - Create a new pid file descriptor.
|
||||||
|
*
|
||||||
|
* @pid: struct pid that the pidfd will reference
|
||||||
|
*
|
||||||
|
* This creates a new pid file descriptor with the O_CLOEXEC flag set.
|
||||||
|
*
|
||||||
|
* Note, that this function can only be called after the fd table has
|
||||||
|
* been unshared to avoid leaking the pidfd to the new process.
|
||||||
|
*
|
||||||
|
* Return: On success, a cloexec pidfd is returned.
|
||||||
|
* On error, a negative errno number will be returned.
|
||||||
|
*/
|
||||||
|
static int pidfd_create(struct pid *pid)
|
||||||
|
{
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
|
||||||
|
O_RDWR | O_CLOEXEC);
|
||||||
|
if (fd < 0)
|
||||||
|
put_pid(pid);
|
||||||
|
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* pidfd_open() - Open new pid file descriptor.
|
||||||
|
*
|
||||||
|
* @pid: pid for which to retrieve a pidfd
|
||||||
|
* @flags: flags to pass
|
||||||
|
*
|
||||||
|
* This creates a new pid file descriptor with the O_CLOEXEC flag set for
|
||||||
|
* the process identified by @pid. Currently, the process identified by
|
||||||
|
* @pid must be a thread-group leader. This restriction currently exists
|
||||||
|
* for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
|
||||||
|
* be used with CLONE_THREAD) and pidfd polling (only supports thread group
|
||||||
|
* leaders).
|
||||||
|
*
|
||||||
|
* Return: On success, a cloexec pidfd is returned.
|
||||||
|
* On error, a negative errno number will be returned.
|
||||||
|
*/
|
||||||
|
SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
|
||||||
|
{
|
||||||
|
int fd, ret;
|
||||||
|
struct pid *p;
|
||||||
|
|
||||||
|
if (flags)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (pid <= 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
p = find_get_pid(pid);
|
||||||
|
if (!p)
|
||||||
|
return -ESRCH;
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
rcu_read_lock();
|
||||||
|
if (!pid_task(p, PIDTYPE_TGID))
|
||||||
|
ret = -EINVAL;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
fd = ret ?: pidfd_create(p);
|
||||||
|
put_pid(p);
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
void __init pid_idr_init(void)
|
void __init pid_idr_init(void)
|
||||||
{
|
{
|
||||||
/* Verify no one has done anything silly: */
|
/* Verify no one has done anything silly: */
|
||||||
|
|
Loading…
Reference in a new issue