2017-09-08 17:17:00 -06:00
|
|
|
/*
|
|
|
|
* umh - the kernel usermode helper
|
|
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/sched/task.h>
|
|
|
|
#include <linux/binfmts.h>
|
|
|
|
#include <linux/syscalls.h>
|
|
|
|
#include <linux/unistd.h>
|
|
|
|
#include <linux/kmod.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/completion.h>
|
|
|
|
#include <linux/cred.h>
|
|
|
|
#include <linux/file.h>
|
|
|
|
#include <linux/fdtable.h>
|
2020-10-05 11:56:22 -06:00
|
|
|
#include <linux/fs_struct.h>
|
2017-09-08 17:17:00 -06:00
|
|
|
#include <linux/workqueue.h>
|
|
|
|
#include <linux/security.h>
|
|
|
|
#include <linux/mount.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/resource.h>
|
|
|
|
#include <linux/notifier.h>
|
|
|
|
#include <linux/suspend.h>
|
|
|
|
#include <linux/rwsem.h>
|
|
|
|
#include <linux/ptrace.h>
|
|
|
|
#include <linux/async.h>
|
|
|
|
#include <linux/uaccess.h>
|
umh: introduce fork_usermode_blob() helper
Introduce helper:
int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
struct umh_info {
struct file *pipe_to_umh;
struct file *pipe_from_umh;
pid_t pid;
};
that GPLed kernel modules (signed or unsigned) can use it to execute part
of its own data as swappable user mode process.
The kernel will do:
- allocate a unique file in tmpfs
- populate that file with [data, data + len] bytes
- user-mode-helper code will do_execve that file and, before the process
starts, the kernel will create two unix pipes for bidirectional
communication between kernel module and umh
- close tmpfs file, effectively deleting it
- the fork_usermode_blob will return zero on success and populate
'struct umh_info' with two unix pipes and the pid of the user process
As the first step in the development of the bpfilter project
the fork_usermode_blob() helper is introduced to allow user mode code
to be invoked from a kernel module. The idea is that user mode code plus
normal kernel module code are built as part of the kernel build
and installed as traditional kernel module into distro specified location,
such that from a distribution point of view, there is
no difference between regular kernel modules and kernel modules + umh code.
Such modules can be signed, modprobed, rmmod, etc. The use of this new helper
by a kernel module doesn't make it any special from kernel and user space
tooling point of view.
Such approach enables kernel to delegate functionality traditionally done
by the kernel modules into the user space processes (either root or !root) and
reduces security attack surface of the new code. The buggy umh code would crash
the user process, but not the kernel. Another advantage is that umh code
of the kernel module can be debugged and tested out of user space
(e.g. opening the possibility to run clang sanitizers, fuzzers or
user space test suites on the umh code).
In case of the bpfilter project such architecture allows complex control plane
to be done in the user space while bpf based data plane stays in the kernel.
Since umh can crash, can be oom-ed by the kernel, killed by the admin,
the kernel module that uses them (like bpfilter) needs to manage life
time of umh on its own via two unix pipes and the pid of umh.
The exit code of such kernel module should kill the umh it started,
so that rmmod of the kernel module will cleanup the corresponding umh.
Just like if the kernel module does kmalloc() it should kfree() it
in the exit code.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-21 20:22:29 -06:00
|
|
|
#include <linux/shmem_fs.h>
|
|
|
|
#include <linux/pipe_fs_i.h>
|
2017-09-08 17:17:00 -06:00
|
|
|
|
|
|
|
#include <trace/events/module.h>
|
|
|
|
|
|
|
|
#define CAP_BSET (void *)1
|
|
|
|
#define CAP_PI (void *)2
|
|
|
|
|
|
|
|
static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
|
|
|
|
static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
|
|
|
|
static DEFINE_SPINLOCK(umh_sysctl_lock);
|
|
|
|
static DECLARE_RWSEM(umhelper_sem);
|
|
|
|
|
|
|
|
static void call_usermodehelper_freeinfo(struct subprocess_info *info)
|
|
|
|
{
|
|
|
|
if (info->cleanup)
|
|
|
|
(*info->cleanup)(info);
|
|
|
|
kfree(info);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void umh_complete(struct subprocess_info *sub_info)
|
|
|
|
{
|
|
|
|
struct completion *comp = xchg(&sub_info->complete, NULL);
|
|
|
|
/*
|
|
|
|
* See call_usermodehelper_exec(). If xchg() returns NULL
|
|
|
|
* we own sub_info, the UMH_KILLABLE caller has gone away
|
|
|
|
* or the caller used UMH_NO_WAIT.
|
|
|
|
*/
|
|
|
|
if (comp)
|
|
|
|
complete(comp);
|
|
|
|
else
|
|
|
|
call_usermodehelper_freeinfo(sub_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the task which runs the usermode application
|
|
|
|
*/
|
|
|
|
static int call_usermodehelper_exec_async(void *data)
|
|
|
|
{
|
|
|
|
struct subprocess_info *sub_info = data;
|
|
|
|
struct cred *new;
|
|
|
|
int retval;
|
|
|
|
|
|
|
|
spin_lock_irq(¤t->sighand->siglock);
|
|
|
|
flush_signal_handlers(current, 1);
|
|
|
|
spin_unlock_irq(¤t->sighand->siglock);
|
|
|
|
|
2020-10-05 11:56:22 -06:00
|
|
|
/*
|
|
|
|
* Initial kernel threads share ther FS with init, in order to
|
|
|
|
* get the init root directory. But we've now created a new
|
|
|
|
* thread that is going to execve a user process and has its own
|
|
|
|
* 'struct fs_struct'. Reset umask to the default.
|
|
|
|
*/
|
|
|
|
current->fs->umask = 0022;
|
|
|
|
|
2017-09-08 17:17:00 -06:00
|
|
|
/*
|
|
|
|
* Our parent (unbound workqueue) runs with elevated scheduling
|
|
|
|
* priority. Avoid propagating that into the userspace child.
|
|
|
|
*/
|
|
|
|
set_user_nice(current, 0);
|
|
|
|
|
|
|
|
retval = -ENOMEM;
|
|
|
|
new = prepare_kernel_cred(current);
|
|
|
|
if (!new)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
spin_lock(&umh_sysctl_lock);
|
|
|
|
new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset);
|
|
|
|
new->cap_inheritable = cap_intersect(usermodehelper_inheritable,
|
|
|
|
new->cap_inheritable);
|
|
|
|
spin_unlock(&umh_sysctl_lock);
|
|
|
|
|
|
|
|
if (sub_info->init) {
|
|
|
|
retval = sub_info->init(sub_info, new);
|
|
|
|
if (retval) {
|
|
|
|
abort_creds(new);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
commit_creds(new);
|
|
|
|
|
2018-06-07 11:23:10 -06:00
|
|
|
sub_info->pid = task_pid_nr(current);
|
umh: introduce fork_usermode_blob() helper
Introduce helper:
int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
struct umh_info {
struct file *pipe_to_umh;
struct file *pipe_from_umh;
pid_t pid;
};
that GPLed kernel modules (signed or unsigned) can use it to execute part
of its own data as swappable user mode process.
The kernel will do:
- allocate a unique file in tmpfs
- populate that file with [data, data + len] bytes
- user-mode-helper code will do_execve that file and, before the process
starts, the kernel will create two unix pipes for bidirectional
communication between kernel module and umh
- close tmpfs file, effectively deleting it
- the fork_usermode_blob will return zero on success and populate
'struct umh_info' with two unix pipes and the pid of the user process
As the first step in the development of the bpfilter project
the fork_usermode_blob() helper is introduced to allow user mode code
to be invoked from a kernel module. The idea is that user mode code plus
normal kernel module code are built as part of the kernel build
and installed as traditional kernel module into distro specified location,
such that from a distribution point of view, there is
no difference between regular kernel modules and kernel modules + umh code.
Such modules can be signed, modprobed, rmmod, etc. The use of this new helper
by a kernel module doesn't make it any special from kernel and user space
tooling point of view.
Such approach enables kernel to delegate functionality traditionally done
by the kernel modules into the user space processes (either root or !root) and
reduces security attack surface of the new code. The buggy umh code would crash
the user process, but not the kernel. Another advantage is that umh code
of the kernel module can be debugged and tested out of user space
(e.g. opening the possibility to run clang sanitizers, fuzzers or
user space test suites on the umh code).
In case of the bpfilter project such architecture allows complex control plane
to be done in the user space while bpf based data plane stays in the kernel.
Since umh can crash, can be oom-ed by the kernel, killed by the admin,
the kernel module that uses them (like bpfilter) needs to manage life
time of umh on its own via two unix pipes and the pid of umh.
The exit code of such kernel module should kill the umh it started,
so that rmmod of the kernel module will cleanup the corresponding umh.
Just like if the kernel module does kmalloc() it should kfree() it
in the exit code.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-21 20:22:29 -06:00
|
|
|
if (sub_info->file)
|
|
|
|
retval = do_execve_file(sub_info->file,
|
|
|
|
sub_info->argv, sub_info->envp);
|
|
|
|
else
|
|
|
|
retval = do_execve(getname_kernel(sub_info->path),
|
|
|
|
(const char __user *const __user *)sub_info->argv,
|
|
|
|
(const char __user *const __user *)sub_info->envp);
|
2017-09-08 17:17:00 -06:00
|
|
|
out:
|
|
|
|
sub_info->retval = retval;
|
|
|
|
/*
|
|
|
|
* call_usermodehelper_exec_sync() will call umh_complete
|
|
|
|
* if UHM_WAIT_PROC.
|
|
|
|
*/
|
|
|
|
if (!(sub_info->wait & UMH_WAIT_PROC))
|
|
|
|
umh_complete(sub_info);
|
|
|
|
if (!retval)
|
|
|
|
return 0;
|
|
|
|
do_exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Handles UMH_WAIT_PROC. */
|
|
|
|
static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
|
|
|
|
{
|
|
|
|
pid_t pid;
|
|
|
|
|
2018-03-11 04:34:26 -06:00
|
|
|
/* If SIGCLD is ignored kernel_wait4 won't populate the status. */
|
2017-09-08 17:17:00 -06:00
|
|
|
kernel_sigaction(SIGCHLD, SIG_DFL);
|
|
|
|
pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
|
|
|
|
if (pid < 0) {
|
|
|
|
sub_info->retval = pid;
|
|
|
|
} else {
|
|
|
|
int ret = -ECHILD;
|
|
|
|
/*
|
|
|
|
* Normally it is bogus to call wait4() from in-kernel because
|
|
|
|
* wait4() wants to write the exit code to a userspace address.
|
|
|
|
* But call_usermodehelper_exec_sync() always runs as kernel
|
|
|
|
* thread (workqueue) and put_user() to a kernel address works
|
|
|
|
* OK for kernel threads, due to their having an mm_segment_t
|
|
|
|
* which spans the entire address space.
|
|
|
|
*
|
|
|
|
* Thus the __user pointer cast is valid here.
|
|
|
|
*/
|
2018-03-11 04:34:26 -06:00
|
|
|
kernel_wait4(pid, (int __user *)&ret, 0, NULL);
|
2017-09-08 17:17:00 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If ret is 0, either call_usermodehelper_exec_async failed and
|
|
|
|
* the real error code is already in sub_info->retval or
|
|
|
|
* sub_info->retval is 0 anyway, so don't mess with it then.
|
|
|
|
*/
|
|
|
|
if (ret)
|
|
|
|
sub_info->retval = ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Restore default kernel sig handler */
|
|
|
|
kernel_sigaction(SIGCHLD, SIG_IGN);
|
|
|
|
|
|
|
|
umh_complete(sub_info);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to create the usermodehelper kernel thread from a task that is affine
|
|
|
|
* to an optimized set of CPUs (or nohz housekeeping ones) such that they
|
|
|
|
* inherit a widest affinity irrespective of call_usermodehelper() callers with
|
|
|
|
* possibly reduced affinity (eg: per-cpu workqueues). We don't want
|
|
|
|
* usermodehelper targets to contend a busy CPU.
|
|
|
|
*
|
|
|
|
* Unbound workqueues provide such wide affinity and allow to block on
|
|
|
|
* UMH_WAIT_PROC requests without blocking pending request (up to some limit).
|
|
|
|
*
|
|
|
|
* Besides, workqueues provide the privilege level that caller might not have
|
|
|
|
* to perform the usermodehelper request.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static void call_usermodehelper_exec_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct subprocess_info *sub_info =
|
|
|
|
container_of(work, struct subprocess_info, work);
|
|
|
|
|
|
|
|
if (sub_info->wait & UMH_WAIT_PROC) {
|
|
|
|
call_usermodehelper_exec_sync(sub_info);
|
|
|
|
} else {
|
|
|
|
pid_t pid;
|
|
|
|
/*
|
|
|
|
* Use CLONE_PARENT to reparent it to kthreadd; we do not
|
|
|
|
* want to pollute current->children, and we need a parent
|
|
|
|
* that always ignores SIGCHLD to ensure auto-reaping.
|
|
|
|
*/
|
|
|
|
pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
|
|
|
|
CLONE_PARENT | SIGCHLD);
|
|
|
|
if (pid < 0) {
|
|
|
|
sub_info->retval = pid;
|
|
|
|
umh_complete(sub_info);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
|
|
|
|
* (used for preventing user land processes from being created after the user
|
|
|
|
* land has been frozen during a system-wide hibernation or suspend operation).
|
|
|
|
* Should always be manipulated under umhelper_sem acquired for write.
|
|
|
|
*/
|
|
|
|
static enum umh_disable_depth usermodehelper_disabled = UMH_DISABLED;
|
|
|
|
|
|
|
|
/* Number of helpers running */
|
|
|
|
static atomic_t running_helpers = ATOMIC_INIT(0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wait queue head used by usermodehelper_disable() to wait for all running
|
|
|
|
* helpers to finish.
|
|
|
|
*/
|
|
|
|
static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used by usermodehelper_read_lock_wait() to wait for usermodehelper_disabled
|
|
|
|
* to become 'false'.
|
|
|
|
*/
|
|
|
|
static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Time to wait for running_helpers to become zero before the setting of
|
|
|
|
* usermodehelper_disabled in usermodehelper_disable() fails
|
|
|
|
*/
|
|
|
|
#define RUNNING_HELPERS_TIMEOUT (5 * HZ)
|
|
|
|
|
|
|
|
int usermodehelper_read_trylock(void)
|
|
|
|
{
|
|
|
|
DEFINE_WAIT(wait);
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
down_read(&umhelper_sem);
|
|
|
|
for (;;) {
|
|
|
|
prepare_to_wait(&usermodehelper_disabled_waitq, &wait,
|
|
|
|
TASK_INTERRUPTIBLE);
|
|
|
|
if (!usermodehelper_disabled)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (usermodehelper_disabled == UMH_DISABLED)
|
|
|
|
ret = -EAGAIN;
|
|
|
|
|
|
|
|
up_read(&umhelper_sem);
|
|
|
|
|
|
|
|
if (ret)
|
|
|
|
break;
|
|
|
|
|
|
|
|
schedule();
|
|
|
|
try_to_freeze();
|
|
|
|
|
|
|
|
down_read(&umhelper_sem);
|
|
|
|
}
|
|
|
|
finish_wait(&usermodehelper_disabled_waitq, &wait);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(usermodehelper_read_trylock);
|
|
|
|
|
|
|
|
long usermodehelper_read_lock_wait(long timeout)
|
|
|
|
{
|
|
|
|
DEFINE_WAIT(wait);
|
|
|
|
|
|
|
|
if (timeout < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
down_read(&umhelper_sem);
|
|
|
|
for (;;) {
|
|
|
|
prepare_to_wait(&usermodehelper_disabled_waitq, &wait,
|
|
|
|
TASK_UNINTERRUPTIBLE);
|
|
|
|
if (!usermodehelper_disabled)
|
|
|
|
break;
|
|
|
|
|
|
|
|
up_read(&umhelper_sem);
|
|
|
|
|
|
|
|
timeout = schedule_timeout(timeout);
|
|
|
|
if (!timeout)
|
|
|
|
break;
|
|
|
|
|
|
|
|
down_read(&umhelper_sem);
|
|
|
|
}
|
|
|
|
finish_wait(&usermodehelper_disabled_waitq, &wait);
|
|
|
|
return timeout;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(usermodehelper_read_lock_wait);
|
|
|
|
|
|
|
|
void usermodehelper_read_unlock(void)
|
|
|
|
{
|
|
|
|
up_read(&umhelper_sem);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(usermodehelper_read_unlock);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __usermodehelper_set_disable_depth - Modify usermodehelper_disabled.
|
|
|
|
* @depth: New value to assign to usermodehelper_disabled.
|
|
|
|
*
|
|
|
|
* Change the value of usermodehelper_disabled (under umhelper_sem locked for
|
|
|
|
* writing) and wakeup tasks waiting for it to change.
|
|
|
|
*/
|
|
|
|
void __usermodehelper_set_disable_depth(enum umh_disable_depth depth)
|
|
|
|
{
|
|
|
|
down_write(&umhelper_sem);
|
|
|
|
usermodehelper_disabled = depth;
|
|
|
|
wake_up(&usermodehelper_disabled_waitq);
|
|
|
|
up_write(&umhelper_sem);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __usermodehelper_disable - Prevent new helpers from being started.
|
|
|
|
* @depth: New value to assign to usermodehelper_disabled.
|
|
|
|
*
|
|
|
|
* Set usermodehelper_disabled to @depth and wait for running helpers to exit.
|
|
|
|
*/
|
|
|
|
int __usermodehelper_disable(enum umh_disable_depth depth)
|
|
|
|
{
|
|
|
|
long retval;
|
|
|
|
|
|
|
|
if (!depth)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
down_write(&umhelper_sem);
|
|
|
|
usermodehelper_disabled = depth;
|
|
|
|
up_write(&umhelper_sem);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* From now on call_usermodehelper_exec() won't start any new
|
|
|
|
* helpers, so it is sufficient if running_helpers turns out to
|
|
|
|
* be zero at one point (it may be increased later, but that
|
|
|
|
* doesn't matter).
|
|
|
|
*/
|
|
|
|
retval = wait_event_timeout(running_helpers_waitq,
|
|
|
|
atomic_read(&running_helpers) == 0,
|
|
|
|
RUNNING_HELPERS_TIMEOUT);
|
|
|
|
if (retval)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
__usermodehelper_set_disable_depth(UMH_ENABLED);
|
|
|
|
return -EAGAIN;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void helper_lock(void)
|
|
|
|
{
|
|
|
|
atomic_inc(&running_helpers);
|
|
|
|
smp_mb__after_atomic();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void helper_unlock(void)
|
|
|
|
{
|
|
|
|
if (atomic_dec_and_test(&running_helpers))
|
|
|
|
wake_up(&running_helpers_waitq);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* call_usermodehelper_setup - prepare to call a usermode helper
|
|
|
|
* @path: path to usermode executable
|
|
|
|
* @argv: arg vector for process
|
|
|
|
* @envp: environment for process
|
|
|
|
* @gfp_mask: gfp mask for memory allocation
|
|
|
|
* @cleanup: a cleanup function
|
|
|
|
* @init: an init function
|
|
|
|
* @data: arbitrary context sensitive data
|
|
|
|
*
|
|
|
|
* Returns either %NULL on allocation failure, or a subprocess_info
|
|
|
|
* structure. This should be passed to call_usermodehelper_exec to
|
|
|
|
* exec the process and free the structure.
|
|
|
|
*
|
|
|
|
* The init function is used to customize the helper process prior to
|
|
|
|
* exec. A non-zero return code causes the process to error out, exit,
|
|
|
|
* and return the failure to the calling process
|
|
|
|
*
|
|
|
|
* The cleanup function is just before ethe subprocess_info is about to
|
|
|
|
* be freed. This can be used for freeing the argv and envp. The
|
|
|
|
* Function must be runnable in either a process context or the
|
|
|
|
* context in which call_usermodehelper_exec is called.
|
|
|
|
*/
|
|
|
|
struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
|
|
|
|
char **envp, gfp_t gfp_mask,
|
|
|
|
int (*init)(struct subprocess_info *info, struct cred *new),
|
|
|
|
void (*cleanup)(struct subprocess_info *info),
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
struct subprocess_info *sub_info;
|
|
|
|
sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
|
|
|
|
if (!sub_info)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
|
|
|
|
|
|
|
|
#ifdef CONFIG_STATIC_USERMODEHELPER
|
|
|
|
sub_info->path = CONFIG_STATIC_USERMODEHELPER_PATH;
|
|
|
|
#else
|
|
|
|
sub_info->path = path;
|
|
|
|
#endif
|
|
|
|
sub_info->argv = argv;
|
|
|
|
sub_info->envp = envp;
|
|
|
|
|
|
|
|
sub_info->cleanup = cleanup;
|
|
|
|
sub_info->init = init;
|
|
|
|
sub_info->data = data;
|
|
|
|
out:
|
|
|
|
return sub_info;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(call_usermodehelper_setup);
|
|
|
|
|
umh: introduce fork_usermode_blob() helper
Introduce helper:
int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
struct umh_info {
struct file *pipe_to_umh;
struct file *pipe_from_umh;
pid_t pid;
};
that GPLed kernel modules (signed or unsigned) can use it to execute part
of its own data as swappable user mode process.
The kernel will do:
- allocate a unique file in tmpfs
- populate that file with [data, data + len] bytes
- user-mode-helper code will do_execve that file and, before the process
starts, the kernel will create two unix pipes for bidirectional
communication between kernel module and umh
- close tmpfs file, effectively deleting it
- the fork_usermode_blob will return zero on success and populate
'struct umh_info' with two unix pipes and the pid of the user process
As the first step in the development of the bpfilter project
the fork_usermode_blob() helper is introduced to allow user mode code
to be invoked from a kernel module. The idea is that user mode code plus
normal kernel module code are built as part of the kernel build
and installed as traditional kernel module into distro specified location,
such that from a distribution point of view, there is
no difference between regular kernel modules and kernel modules + umh code.
Such modules can be signed, modprobed, rmmod, etc. The use of this new helper
by a kernel module doesn't make it any special from kernel and user space
tooling point of view.
Such approach enables kernel to delegate functionality traditionally done
by the kernel modules into the user space processes (either root or !root) and
reduces security attack surface of the new code. The buggy umh code would crash
the user process, but not the kernel. Another advantage is that umh code
of the kernel module can be debugged and tested out of user space
(e.g. opening the possibility to run clang sanitizers, fuzzers or
user space test suites on the umh code).
In case of the bpfilter project such architecture allows complex control plane
to be done in the user space while bpf based data plane stays in the kernel.
Since umh can crash, can be oom-ed by the kernel, killed by the admin,
the kernel module that uses them (like bpfilter) needs to manage life
time of umh on its own via two unix pipes and the pid of umh.
The exit code of such kernel module should kill the umh it started,
so that rmmod of the kernel module will cleanup the corresponding umh.
Just like if the kernel module does kmalloc() it should kfree() it
in the exit code.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-05-21 20:22:29 -06:00
|
|
|
struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
|
|
|
|
int (*init)(struct subprocess_info *info, struct cred *new),
|
|
|
|
void (*cleanup)(struct subprocess_info *info), void *data)
|
|
|
|
{
|
|
|
|
struct subprocess_info *sub_info;
|
|
|
|
|
|
|
|
sub_info = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL);
|
|
|
|
if (!sub_info)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
|
|
|
|
sub_info->path = "none";
|
|
|
|
sub_info->file = file;
|
|
|
|
sub_info->init = init;
|
|
|
|
sub_info->cleanup = cleanup;
|
|
|
|
sub_info->data = data;
|
|
|
|
return sub_info;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
|
|
|
|
{
|
|
|
|
struct umh_info *umh_info = info->data;
|
|
|
|
struct file *from_umh[2];
|
|
|
|
struct file *to_umh[2];
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/* create pipe to send data to umh */
|
|
|
|
err = create_pipe_files(to_umh, 0);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
err = replace_fd(0, to_umh[0], 0);
|
|
|
|
fput(to_umh[0]);
|
|
|
|
if (err < 0) {
|
|
|
|
fput(to_umh[1]);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* create pipe to receive data from umh */
|
|
|
|
err = create_pipe_files(from_umh, 0);
|
|
|
|
if (err) {
|
|
|
|
fput(to_umh[1]);
|
|
|
|
replace_fd(0, NULL, 0);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
err = replace_fd(1, from_umh[1], 0);
|
|
|
|
fput(from_umh[1]);
|
|
|
|
if (err < 0) {
|
|
|
|
fput(to_umh[1]);
|
|
|
|
replace_fd(0, NULL, 0);
|
|
|
|
fput(from_umh[0]);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
umh_info->pipe_to_umh = to_umh[1];
|
|
|
|
umh_info->pipe_from_umh = from_umh[0];
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void umh_save_pid(struct subprocess_info *info)
|
|
|
|
{
|
|
|
|
struct umh_info *umh_info = info->data;
|
|
|
|
|
|
|
|
umh_info->pid = info->pid;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fork_usermode_blob - fork a blob of bytes as a usermode process
|
|
|
|
* @data: a blob of bytes that can be do_execv-ed as a file
|
|
|
|
* @len: length of the blob
|
|
|
|
* @info: information about usermode process (shouldn't be NULL)
|
|
|
|
*
|
|
|
|
* Returns either negative error or zero which indicates success
|
|
|
|
* in executing a blob of bytes as a usermode process. In such
|
|
|
|
* case 'struct umh_info *info' is populated with two pipes
|
|
|
|
* and a pid of the process. The caller is responsible for health
|
|
|
|
* check of the user process, killing it via pid, and closing the
|
|
|
|
* pipes when user process is no longer needed.
|
|
|
|
*/
|
|
|
|
int fork_usermode_blob(void *data, size_t len, struct umh_info *info)
|
|
|
|
{
|
|
|
|
struct subprocess_info *sub_info;
|
|
|
|
struct file *file;
|
|
|
|
ssize_t written;
|
|
|
|
loff_t pos = 0;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
file = shmem_kernel_file_setup("", len, 0);
|
|
|
|
if (IS_ERR(file))
|
|
|
|
return PTR_ERR(file);
|
|
|
|
|
|
|
|
written = kernel_write(file, data, len, &pos);
|
|
|
|
if (written != len) {
|
|
|
|
err = written;
|
|
|
|
if (err >= 0)
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = -ENOMEM;
|
|
|
|
sub_info = call_usermodehelper_setup_file(file, umh_pipe_setup,
|
|
|
|
umh_save_pid, info);
|
|
|
|
if (!sub_info)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
|
|
|
|
out:
|
|
|
|
fput(file);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(fork_usermode_blob);
|
|
|
|
|
2017-09-08 17:17:00 -06:00
|
|
|
/**
|
|
|
|
* call_usermodehelper_exec - start a usermode application
|
|
|
|
* @sub_info: information about the subprocessa
|
|
|
|
* @wait: wait for the application to finish and return status.
|
|
|
|
* when UMH_NO_WAIT don't wait at all, but you get no useful error back
|
|
|
|
* when the program couldn't be exec'ed. This makes it safe to call
|
|
|
|
* from interrupt context.
|
|
|
|
*
|
|
|
|
* Runs a user-space application. The application is started
|
|
|
|
* asynchronously if wait is not set, and runs as a child of system workqueues.
|
|
|
|
* (ie. it runs with full root capabilities and optimized affinity).
|
2020-04-16 10:28:59 -06:00
|
|
|
*
|
|
|
|
* Note: successful return value does not guarantee the helper was called at
|
|
|
|
* all. You can't rely on sub_info->{init,cleanup} being called even for
|
|
|
|
* UMH_WAIT_* wait modes as STATIC_USERMODEHELPER_PATH="" turns all helpers
|
|
|
|
* into a successful no-op.
|
2017-09-08 17:17:00 -06:00
|
|
|
*/
|
|
|
|
int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
|
|
|
|
{
|
|
|
|
DECLARE_COMPLETION_ONSTACK(done);
|
|
|
|
int retval = 0;
|
|
|
|
|
|
|
|
if (!sub_info->path) {
|
|
|
|
call_usermodehelper_freeinfo(sub_info);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
helper_lock();
|
|
|
|
if (usermodehelper_disabled) {
|
|
|
|
retval = -EBUSY;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there is no binary for us to call, then just return and get out of
|
|
|
|
* here. This allows us to set STATIC_USERMODEHELPER_PATH to "" and
|
|
|
|
* disable all call_usermodehelper() calls.
|
|
|
|
*/
|
|
|
|
if (strlen(sub_info->path) == 0)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the completion pointer only if there is a waiter.
|
|
|
|
* This makes it possible to use umh_complete to free
|
|
|
|
* the data structure in case of UMH_NO_WAIT.
|
|
|
|
*/
|
|
|
|
sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
|
|
|
|
sub_info->wait = wait;
|
|
|
|
|
|
|
|
queue_work(system_unbound_wq, &sub_info->work);
|
|
|
|
if (wait == UMH_NO_WAIT) /* task has freed sub_info */
|
|
|
|
goto unlock;
|
|
|
|
|
|
|
|
if (wait & UMH_KILLABLE) {
|
|
|
|
retval = wait_for_completion_killable(&done);
|
|
|
|
if (!retval)
|
|
|
|
goto wait_done;
|
|
|
|
|
|
|
|
/* umh_complete() will see NULL and free sub_info */
|
|
|
|
if (xchg(&sub_info->complete, NULL))
|
|
|
|
goto unlock;
|
|
|
|
/* fallthrough, umh_complete() was already called */
|
|
|
|
}
|
|
|
|
|
|
|
|
wait_for_completion(&done);
|
|
|
|
wait_done:
|
|
|
|
retval = sub_info->retval;
|
|
|
|
out:
|
|
|
|
call_usermodehelper_freeinfo(sub_info);
|
|
|
|
unlock:
|
|
|
|
helper_unlock();
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(call_usermodehelper_exec);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* call_usermodehelper() - prepare and start a usermode application
|
|
|
|
* @path: path to usermode executable
|
|
|
|
* @argv: arg vector for process
|
|
|
|
* @envp: environment for process
|
|
|
|
* @wait: wait for the application to finish and return status.
|
|
|
|
* when UMH_NO_WAIT don't wait at all, but you get no useful error back
|
|
|
|
* when the program couldn't be exec'ed. This makes it safe to call
|
|
|
|
* from interrupt context.
|
|
|
|
*
|
|
|
|
* This function is the equivalent to use call_usermodehelper_setup() and
|
|
|
|
* call_usermodehelper_exec().
|
|
|
|
*/
|
|
|
|
int call_usermodehelper(const char *path, char **argv, char **envp, int wait)
|
|
|
|
{
|
|
|
|
struct subprocess_info *info;
|
|
|
|
gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
|
|
|
|
|
|
|
|
info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
|
|
|
|
NULL, NULL, NULL);
|
|
|
|
if (info == NULL)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
return call_usermodehelper_exec(info, wait);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(call_usermodehelper);
|
|
|
|
|
|
|
|
static int proc_cap_handler(struct ctl_table *table, int write,
|
|
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
|
|
{
|
|
|
|
struct ctl_table t;
|
|
|
|
unsigned long cap_array[_KERNEL_CAPABILITY_U32S];
|
|
|
|
kernel_cap_t new_cap;
|
|
|
|
int err, i;
|
|
|
|
|
|
|
|
if (write && (!capable(CAP_SETPCAP) ||
|
|
|
|
!capable(CAP_SYS_MODULE)))
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* convert from the global kernel_cap_t to the ulong array to print to
|
|
|
|
* userspace if this is a read.
|
|
|
|
*/
|
|
|
|
spin_lock(&umh_sysctl_lock);
|
|
|
|
for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) {
|
|
|
|
if (table->data == CAP_BSET)
|
|
|
|
cap_array[i] = usermodehelper_bset.cap[i];
|
|
|
|
else if (table->data == CAP_PI)
|
|
|
|
cap_array[i] = usermodehelper_inheritable.cap[i];
|
|
|
|
else
|
|
|
|
BUG();
|
|
|
|
}
|
|
|
|
spin_unlock(&umh_sysctl_lock);
|
|
|
|
|
|
|
|
t = *table;
|
|
|
|
t.data = &cap_array;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* actually read or write and array of ulongs from userspace. Remember
|
|
|
|
* these are least significant 32 bits first
|
|
|
|
*/
|
|
|
|
err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
|
|
|
|
if (err < 0)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* convert from the sysctl array of ulongs to the kernel_cap_t
|
|
|
|
* internal representation
|
|
|
|
*/
|
|
|
|
for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)
|
|
|
|
new_cap.cap[i] = cap_array[i];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Drop everything not in the new_cap (but don't add things)
|
|
|
|
*/
|
|
|
|
if (write) {
|
2017-11-17 16:27:32 -07:00
|
|
|
spin_lock(&umh_sysctl_lock);
|
2017-09-08 17:17:00 -06:00
|
|
|
if (table->data == CAP_BSET)
|
|
|
|
usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap);
|
|
|
|
if (table->data == CAP_PI)
|
|
|
|
usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap);
|
2017-11-17 16:27:32 -07:00
|
|
|
spin_unlock(&umh_sysctl_lock);
|
2017-09-08 17:17:00 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ctl_table usermodehelper_table[] = {
|
|
|
|
{
|
|
|
|
.procname = "bset",
|
|
|
|
.data = CAP_BSET,
|
|
|
|
.maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
|
|
|
|
.mode = 0600,
|
|
|
|
.proc_handler = proc_cap_handler,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.procname = "inheritable",
|
|
|
|
.data = CAP_PI,
|
|
|
|
.maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
|
|
|
|
.mode = 0600,
|
|
|
|
.proc_handler = proc_cap_handler,
|
|
|
|
},
|
|
|
|
{ }
|
|
|
|
};
|