18b6e0414e
The user_ns is moved from nsproxy to user_struct, so that a struct cred by itself is sufficient to determine access (which it otherwise would not be). Corresponding ecryptfs fixes (by David Howells) are here as well. Fix refcounting. The following rules now apply: 1. The task pins the user struct. 2. The user struct pins its user namespace. 3. The user namespace pins the struct user which created it. User namespaces are cloned during copy_creds(). Unsharing a new user_ns is no longer possible. (We could re-add that, but it'll cause code duplication and doesn't seem useful if PAM doesn't need to clone user namespaces). When a user namespace is created, its first user (uid 0) gets empty keyrings and a clean group_info. This incorporates a previous patch by David Howells. Here is his original patch description: >I suggest adding the attached incremental patch. It makes the following >changes: > > (1) Provides a current_user_ns() macro to wrap accesses to current's user > namespace. > > (2) Fixes eCryptFS. > > (3) Renames create_new_userns() to create_user_ns() to be more consistent > with the other associated functions and because the 'new' in the name is > superfluous. > > (4) Moves the argument and permission checks made for CLONE_NEWUSER to the > beginning of do_fork() so that they're done prior to making any attempts > at allocation. > > (5) Calls create_user_ns() after prepare_creds(), and gives it the new creds > to fill in rather than have it return the new root user. I don't imagine > the new root user being used for anything other than filling in a cred > struct. > > This also permits me to get rid of a get_uid() and a free_uid(), as the > reference the creds were holding on the old user_struct can just be > transferred to the new namespace's creator pointer. > > (6) Makes create_user_ns() reset the UIDs and GIDs of the creds under > preparation rather than doing it in copy_creds(). > >David >Signed-off-by: David Howells <dhowells@redhat.com> Changelog: Oct 20: integrate dhowells comments 1. leave thread_keyring alone 2. use current_user_ns() in set_user() Signed-off-by: Serge Hallyn <serue@us.ibm.com>
192 lines
5.2 KiB
C
192 lines
5.2 KiB
C
#ifndef _LINUX__INIT_TASK_H
|
|
#define _LINUX__INIT_TASK_H
|
|
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/irqflags.h>
|
|
#include <linux/utsname.h>
|
|
#include <linux/lockdep.h>
|
|
#include <linux/ipc.h>
|
|
#include <linux/pid_namespace.h>
|
|
#include <linux/user_namespace.h>
|
|
#include <linux/securebits.h>
|
|
#include <net/net_namespace.h>
|
|
|
|
extern struct files_struct init_files;
|
|
|
|
#define INIT_KIOCTX(name, which_mm) \
|
|
{ \
|
|
.users = ATOMIC_INIT(1), \
|
|
.dead = 0, \
|
|
.mm = &which_mm, \
|
|
.user_id = 0, \
|
|
.next = NULL, \
|
|
.wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \
|
|
.ctx_lock = __SPIN_LOCK_UNLOCKED(name.ctx_lock), \
|
|
.reqs_active = 0U, \
|
|
.max_reqs = ~0U, \
|
|
}
|
|
|
|
#define INIT_MM(name) \
|
|
{ \
|
|
.mm_rb = RB_ROOT, \
|
|
.pgd = swapper_pg_dir, \
|
|
.mm_users = ATOMIC_INIT(2), \
|
|
.mm_count = ATOMIC_INIT(1), \
|
|
.mmap_sem = __RWSEM_INITIALIZER(name.mmap_sem), \
|
|
.page_table_lock = __SPIN_LOCK_UNLOCKED(name.page_table_lock), \
|
|
.mmlist = LIST_HEAD_INIT(name.mmlist), \
|
|
.cpu_vm_mask = CPU_MASK_ALL, \
|
|
}
|
|
|
|
#define INIT_SIGNALS(sig) { \
|
|
.count = ATOMIC_INIT(1), \
|
|
.wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\
|
|
.shared_pending = { \
|
|
.list = LIST_HEAD_INIT(sig.shared_pending.list), \
|
|
.signal = {{0}}}, \
|
|
.posix_timers = LIST_HEAD_INIT(sig.posix_timers), \
|
|
.cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \
|
|
.rlim = INIT_RLIMITS, \
|
|
}
|
|
|
|
extern struct nsproxy init_nsproxy;
|
|
#define INIT_NSPROXY(nsproxy) { \
|
|
.pid_ns = &init_pid_ns, \
|
|
.count = ATOMIC_INIT(1), \
|
|
.uts_ns = &init_uts_ns, \
|
|
.mnt_ns = NULL, \
|
|
INIT_NET_NS(net_ns) \
|
|
INIT_IPC_NS(ipc_ns) \
|
|
}
|
|
|
|
#define INIT_SIGHAND(sighand) { \
|
|
.count = ATOMIC_INIT(1), \
|
|
.action = { { { .sa_handler = NULL, } }, }, \
|
|
.siglock = __SPIN_LOCK_UNLOCKED(sighand.siglock), \
|
|
.signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(sighand.signalfd_wqh), \
|
|
}
|
|
|
|
extern struct group_info init_groups;
|
|
|
|
#define INIT_STRUCT_PID { \
|
|
.count = ATOMIC_INIT(1), \
|
|
.tasks = { \
|
|
{ .first = &init_task.pids[PIDTYPE_PID].node }, \
|
|
{ .first = &init_task.pids[PIDTYPE_PGID].node }, \
|
|
{ .first = &init_task.pids[PIDTYPE_SID].node }, \
|
|
}, \
|
|
.rcu = RCU_HEAD_INIT, \
|
|
.level = 0, \
|
|
.numbers = { { \
|
|
.nr = 0, \
|
|
.ns = &init_pid_ns, \
|
|
.pid_chain = { .next = NULL, .pprev = NULL }, \
|
|
}, } \
|
|
}
|
|
|
|
#define INIT_PID_LINK(type) \
|
|
{ \
|
|
.node = { \
|
|
.next = NULL, \
|
|
.pprev = &init_struct_pid.tasks[type].first, \
|
|
}, \
|
|
.pid = &init_struct_pid, \
|
|
}
|
|
|
|
#ifdef CONFIG_AUDITSYSCALL
|
|
#define INIT_IDS \
|
|
.loginuid = -1, \
|
|
.sessionid = -1,
|
|
#else
|
|
#define INIT_IDS
|
|
#endif
|
|
|
|
#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
|
|
/*
|
|
* Because of the reduced scope of CAP_SETPCAP when filesystem
|
|
* capabilities are in effect, it is safe to allow CAP_SETPCAP to
|
|
* be available in the default configuration.
|
|
*/
|
|
# define CAP_INIT_BSET CAP_FULL_SET
|
|
#else
|
|
# define CAP_INIT_BSET CAP_INIT_EFF_SET
|
|
#endif
|
|
|
|
extern struct cred init_cred;
|
|
|
|
/*
|
|
* INIT_TASK is used to set up the first task table, touch at
|
|
* your own risk!. Base=0, limit=0x1fffff (=2MB)
|
|
*/
|
|
#define INIT_TASK(tsk) \
|
|
{ \
|
|
.state = 0, \
|
|
.stack = &init_thread_info, \
|
|
.usage = ATOMIC_INIT(2), \
|
|
.flags = PF_KTHREAD, \
|
|
.lock_depth = -1, \
|
|
.prio = MAX_PRIO-20, \
|
|
.static_prio = MAX_PRIO-20, \
|
|
.normal_prio = MAX_PRIO-20, \
|
|
.policy = SCHED_NORMAL, \
|
|
.cpus_allowed = CPU_MASK_ALL, \
|
|
.mm = NULL, \
|
|
.active_mm = &init_mm, \
|
|
.se = { \
|
|
.group_node = LIST_HEAD_INIT(tsk.se.group_node), \
|
|
}, \
|
|
.rt = { \
|
|
.run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
|
|
.time_slice = HZ, \
|
|
.nr_cpus_allowed = NR_CPUS, \
|
|
}, \
|
|
.tasks = LIST_HEAD_INIT(tsk.tasks), \
|
|
.ptraced = LIST_HEAD_INIT(tsk.ptraced), \
|
|
.ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \
|
|
.real_parent = &tsk, \
|
|
.parent = &tsk, \
|
|
.children = LIST_HEAD_INIT(tsk.children), \
|
|
.sibling = LIST_HEAD_INIT(tsk.sibling), \
|
|
.group_leader = &tsk, \
|
|
.real_cred = &init_cred, \
|
|
.cred = &init_cred, \
|
|
.cred_exec_mutex = \
|
|
__MUTEX_INITIALIZER(tsk.cred_exec_mutex), \
|
|
.comm = "swapper", \
|
|
.thread = INIT_THREAD, \
|
|
.fs = &init_fs, \
|
|
.files = &init_files, \
|
|
.signal = &init_signals, \
|
|
.sighand = &init_sighand, \
|
|
.nsproxy = &init_nsproxy, \
|
|
.pending = { \
|
|
.list = LIST_HEAD_INIT(tsk.pending.list), \
|
|
.signal = {{0}}}, \
|
|
.blocked = {{0}}, \
|
|
.alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
|
|
.journal_info = NULL, \
|
|
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
|
|
.fs_excl = ATOMIC_INIT(0), \
|
|
.pi_lock = __SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
|
|
.timer_slack_ns = 50000, /* 50 usec default slack */ \
|
|
.pids = { \
|
|
[PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
|
|
[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
|
|
[PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \
|
|
}, \
|
|
.dirties = INIT_PROP_LOCAL_SINGLE(dirties), \
|
|
INIT_IDS \
|
|
INIT_TRACE_IRQFLAGS \
|
|
INIT_LOCKDEP \
|
|
}
|
|
|
|
|
|
#define INIT_CPU_TIMERS(cpu_timers) \
|
|
{ \
|
|
LIST_HEAD_INIT(cpu_timers[0]), \
|
|
LIST_HEAD_INIT(cpu_timers[1]), \
|
|
LIST_HEAD_INIT(cpu_timers[2]), \
|
|
}
|
|
|
|
|
|
#endif
|