From e38447ee1f487eaccdbef4a61dc064f4ae94e2fa Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 5 Sep 2016 16:33:03 +0300 Subject: [PATCH 1/9] x86/vdso: Unmap vdso blob on vvar mapping failure If remapping of vDSO blob failed on vvar mapping, we need to unmap previously mapped vDSO blob. Signed-off-by: Dmitry Safonov Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: oleg@redhat.com Cc: linux-mm@kvack.org Cc: gorcunov@openvz.org Cc: xemul@virtuozzo.com Link: http://lkml.kernel.org/r/20160905133308.28234-2-dsafonov@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/entry/vdso/vma.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index f840766659a8..3bab6ba3ffc5 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -238,12 +238,14 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto up_fail; + do_munmap(mm, text_start, image->size); } up_fail: - if (ret) + if (ret) { current->mm->context.vdso = NULL; + current->mm->context.vdso_image = NULL; + } up_write(&mm->mmap_sem); return ret; From 576ebfefd37bd41e965787f60684c8e4b7f79457 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 5 Sep 2016 16:33:04 +0300 Subject: [PATCH 2/9] x86/vdso: Replace calculate_addr in map_vdso() with addr That will allow to specify address where to map vDSO blob. For the randomized vDSO mappings introduce map_vdso_randomized() which will simplify calls to map_vdso. Signed-off-by: Dmitry Safonov Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: oleg@redhat.com Cc: linux-mm@kvack.org Cc: gorcunov@openvz.org Cc: xemul@virtuozzo.com Link: http://lkml.kernel.org/r/20160905133308.28234-3-dsafonov@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/entry/vdso/vma.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 3bab6ba3ffc5..5bcb25a9e573 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -176,11 +176,16 @@ static int vvar_fault(const struct vm_special_mapping *sm, return VM_FAULT_SIGBUS; } -static int map_vdso(const struct vdso_image *image, bool calculate_addr) +/* + * Add vdso and vvar mappings to current process. + * @image - blob to map + * @addr - request a specific address (zero to map at free addr) + */ +static int map_vdso(const struct vdso_image *image, unsigned long addr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long addr, text_start; + unsigned long text_start; int ret = 0; static const struct vm_special_mapping vdso_mapping = { @@ -193,13 +198,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) .fault = vvar_fault, }; - if (calculate_addr) { - addr = vdso_addr(current->mm->start_stack, - image->size - image->sym_vvar_start); - } else { - addr = 0; - } - if (down_write_killable(&mm->mmap_sem)) return -EINTR; @@ -251,13 +249,20 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr) return ret; } +static int map_vdso_randomized(const struct vdso_image *image) +{ + unsigned long addr = vdso_addr(current->mm->start_stack, + image->size - image->sym_vvar_start); + return map_vdso(image, addr); +} + #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static int load_vdso32(void) { if (vdso32_enabled != 1) /* Other values all mean "disabled" */ return 0; - return map_vdso(&vdso_image_32, false); + return map_vdso(&vdso_image_32, 0); } #endif @@ -267,7 +272,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!vdso64_enabled) return 0; - return map_vdso(&vdso_image_64, true); + return map_vdso_randomized(&vdso_image_64); } #ifdef CONFIG_COMPAT @@ -278,8 +283,7 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm, if (test_thread_flag(TIF_X32)) { if (!vdso64_enabled) return 0; - - return map_vdso(&vdso_image_x32, true); + return map_vdso_randomized(&vdso_image_x32); } #endif #ifdef CONFIG_IA32_EMULATION From 2eefd8789698e89c4a5d610921dc3c1b66e3bd0d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 5 Sep 2016 16:33:05 +0300 Subject: [PATCH 3/9] x86/arch_prctl/vdso: Add ARCH_MAP_VDSO_* Add API to change vdso blob type with arch_prctl. As this is usefull only by needs of CRIU, expose this interface under CONFIG_CHECKPOINT_RESTORE. Signed-off-by: Dmitry Safonov Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: oleg@redhat.com Cc: linux-mm@kvack.org Cc: gorcunov@openvz.org Cc: xemul@virtuozzo.com Link: http://lkml.kernel.org/r/20160905133308.28234-4-dsafonov@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/entry/vdso/vma.c | 45 ++++++++++++++++++++++++------- arch/x86/include/asm/vdso.h | 2 ++ arch/x86/include/uapi/asm/prctl.h | 6 +++++ arch/x86/kernel/process_64.c | 25 +++++++++++++++++ include/linux/mm.h | 2 ++ mm/mmap.c | 8 ++++++ 6 files changed, 78 insertions(+), 10 deletions(-) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 5bcb25a9e573..4459e73e234d 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -176,6 +176,16 @@ static int vvar_fault(const struct vm_special_mapping *sm, return VM_FAULT_SIGBUS; } +static const struct vm_special_mapping vdso_mapping = { + .name = "[vdso]", + .fault = vdso_fault, + .mremap = vdso_mremap, +}; +static const struct vm_special_mapping vvar_mapping = { + .name = "[vvar]", + .fault = vvar_fault, +}; + /* * Add vdso and vvar mappings to current process. * @image - blob to map @@ -188,16 +198,6 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) unsigned long text_start; int ret = 0; - static const struct vm_special_mapping vdso_mapping = { - .name = "[vdso]", - .fault = vdso_fault, - .mremap = vdso_mremap, - }; - static const struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, - }; - if (down_write_killable(&mm->mmap_sem)) return -EINTR; @@ -256,6 +256,31 @@ static int map_vdso_randomized(const struct vdso_image *image) return map_vdso(image, addr); } +int map_vdso_once(const struct vdso_image *image, unsigned long addr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + down_write(&mm->mmap_sem); + /* + * Check if we have already mapped vdso blob - fail to prevent + * abusing from userspace install_speciall_mapping, which may + * not do accounting and rlimit right. + * We could search vma near context.vdso, but it's a slowpath, + * so let's explicitely check all VMAs to be completely sure. + */ + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (vma_is_special_mapping(vma, &vdso_mapping) || + vma_is_special_mapping(vma, &vvar_mapping)) { + up_write(&mm->mmap_sem); + return -EEXIST; + } + } + up_write(&mm->mmap_sem); + + return map_vdso(image, addr); +} + #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) static int load_vdso32(void) { diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 43dc55be524e..2444189cbe28 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -41,6 +41,8 @@ extern const struct vdso_image vdso_image_32; extern void __init init_vdso_image(const struct vdso_image *image); +extern int map_vdso_once(const struct vdso_image *image, unsigned long addr); + #endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_VDSO_H */ diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 3ac5032fae09..ae135de547f5 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -6,4 +6,10 @@ #define ARCH_GET_FS 0x1003 #define ARCH_GET_GS 0x1004 +#ifdef CONFIG_CHECKPOINT_RESTORE +# define ARCH_MAP_VDSO_X32 0x2001 +# define ARCH_MAP_VDSO_32 0x2002 +# define ARCH_MAP_VDSO_64 0x2003 +#endif + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63236d8f84bf..f240a465920b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -49,6 +49,7 @@ #include #include #include +#include asmlinkage extern void ret_from_fork(void); @@ -524,6 +525,17 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); +static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) +{ + int ret; + + ret = map_vdso_once(image, addr); + if (ret) + return ret; + + return (long)image->size; +} + long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) { int ret = 0; @@ -577,6 +589,19 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) break; } +#ifdef CONFIG_CHECKPOINT_RESTORE +#ifdef CONFIG_X86_X32 + case ARCH_MAP_VDSO_X32: + return prctl_map_vdso(&vdso_image_x32, addr); +#endif +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + case ARCH_MAP_VDSO_32: + return prctl_map_vdso(&vdso_image_32, addr); +#endif + case ARCH_MAP_VDSO_64: + return prctl_map_vdso(&vdso_image_64, addr); +#endif + default: ret = -EINVAL; break; diff --git a/include/linux/mm.h b/include/linux/mm.h index ef815b9cd426..5f14534f0c90 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2019,6 +2019,8 @@ extern struct file *get_task_exe_file(struct task_struct *task); extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages); extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages); +extern bool vma_is_special_mapping(const struct vm_area_struct *vma, + const struct vm_special_mapping *sm); extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, unsigned long flags, diff --git a/mm/mmap.c b/mm/mmap.c index ca9d91bca0d6..6373ebd358c0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3063,6 +3063,14 @@ static struct vm_area_struct *__install_special_mapping( return ERR_PTR(ret); } +bool vma_is_special_mapping(const struct vm_area_struct *vma, + const struct vm_special_mapping *sm) +{ + return vma->vm_private_data == sm && + (vma->vm_ops == &special_mapping_vmops || + vma->vm_ops == &legacy_special_mapping_vmops); +} + /* * Called with mm->mmap_sem held for writing. * Insert a new vma covering the given region, with the given flags. From 90954e7b940778478754452f1ec8b23ea9a9ad42 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 5 Sep 2016 16:33:06 +0300 Subject: [PATCH 4/9] x86/coredump: Use pr_reg size, rather that TIF_IA32 flag Killed PR_REG_SIZE and PR_REG_PTR macro as we can get regset size from regset view. I wish I could also kill PRSTATUS_SIZE nicely. Suggested-by: Oleg Nesterov Signed-off-by: Dmitry Safonov Cc: 0x7f454c46@gmail.com Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: gorcunov@openvz.org Cc: xemul@virtuozzo.com Link: http://lkml.kernel.org/r/20160905133308.28234-5-dsafonov@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/compat.h | 8 ++++---- fs/binfmt_elf.c | 23 ++++++++--------------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index a18806165fe4..03d269bed941 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -275,10 +275,10 @@ struct compat_shmid64_ds { #ifdef CONFIG_X86_X32_ABI typedef struct user_regs_struct compat_elf_gregset_t; -#define PR_REG_SIZE(S) (test_thread_flag(TIF_IA32) ? 68 : 216) -#define PRSTATUS_SIZE(S) (test_thread_flag(TIF_IA32) ? 144 : 296) -#define SET_PR_FPVALID(S,V) \ - do { *(int *) (((void *) &((S)->pr_reg)) + PR_REG_SIZE(0)) = (V); } \ +/* Full regset -- prstatus on x32, otherwise on ia32 */ +#define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296) +#define SET_PR_FPVALID(S, V, R) \ + do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \ while (0) #define COMPAT_USE_64BIT_TIME \ diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e5495f37c6ed..2472af2798c7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1624,20 +1624,12 @@ static void do_thread_regset_writeback(struct task_struct *task, regset->writeback(task, regset, 1); } -#ifndef PR_REG_SIZE -#define PR_REG_SIZE(S) sizeof(S) -#endif - #ifndef PRSTATUS_SIZE -#define PRSTATUS_SIZE(S) sizeof(S) -#endif - -#ifndef PR_REG_PTR -#define PR_REG_PTR(S) (&((S)->pr_reg)) +#define PRSTATUS_SIZE(S, R) sizeof(S) #endif #ifndef SET_PR_FPVALID -#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V)) +#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V)) #endif static int fill_thread_core_info(struct elf_thread_core_info *t, @@ -1645,6 +1637,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, long signr, size_t *total) { unsigned int i; + unsigned int regset_size = view->regsets[0].n * view->regsets[0].size; /* * NT_PRSTATUS is the one special case, because the regset data @@ -1653,12 +1646,11 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, * We assume that regset 0 is NT_PRSTATUS. */ fill_prstatus(&t->prstatus, t->task, signr); - (void) view->regsets[0].get(t->task, &view->regsets[0], - 0, PR_REG_SIZE(t->prstatus.pr_reg), - PR_REG_PTR(&t->prstatus), NULL); + (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size, + &t->prstatus.pr_reg, NULL); fill_note(&t->notes[0], "CORE", NT_PRSTATUS, - PRSTATUS_SIZE(t->prstatus), &t->prstatus); + PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus); *total += notesize(&t->notes[0]); do_thread_regset_writeback(t->task, &view->regsets[0]); @@ -1688,7 +1680,8 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, regset->core_note_type, size, data); else { - SET_PR_FPVALID(&t->prstatus, 1); + SET_PR_FPVALID(&t->prstatus, + 1, regset_size); fill_note(&t->notes[i], "CORE", NT_PRFPREG, size, data); } From cc87324b3dbb9bdf6916c7f479230db24c4aa309 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 5 Sep 2016 16:33:07 +0300 Subject: [PATCH 5/9] x86/ptrace: Down with test_thread_flag(TIF_IA32) As the task isn't executing at the moment of {GET,SET}REGS, return regset that corresponds to code selector, rather than value of TIF_IA32 flag. I.e. if we ptrace i386 elf binary that has just changed it's code selector to __USER_CS, than GET_REGS will return full x86_64 register set. Note, that this will work only if application has changed it's CS. If the application does 32-bit syscall with __USER_CS, ptrace will still return 64-bit register set. Which might be still confusing for tools that expect TS_COMPACT to be exposed [1, 2]. So this this change should make PTRACE_GETREGSET more reliable and this will be another step to drop TIF_{IA32,X32} flags. [1]: https://sourceforge.net/p/strace/mailman/message/30471411/ [2]: https://lkml.org/lkml/2012/1/18/320 Signed-off-by: Dmitry Safonov Cc: 0x7f454c46@gmail.com Cc: oleg@redhat.com Cc: linux-mm@kvack.org Cc: luto@kernel.org Cc: Pedro Alves Cc: gorcunov@openvz.org Cc: xemul@virtuozzo.com Link: http://lkml.kernel.org/r/20160905133308.28234-6-dsafonov@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f79576a541ff..ad0bab8fc594 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1358,7 +1358,7 @@ void update_regset_xstate_info(unsigned int size, u64 xstate_mask) const struct user_regset_view *task_user_regset_view(struct task_struct *task) { #ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) + if (!user_64bit_mode(task_pt_regs(task))) #endif #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION return &user_x86_32_view; From 6846351052e685c2d1428e80ead2d7ca3d7ed913 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 5 Sep 2016 16:33:08 +0300 Subject: [PATCH 6/9] x86/signal: Add SA_{X32,IA32}_ABI sa_flags Introduce new flags that defines which ABI to use on creating sigframe. Those flags kernel will set according to sigaction syscall ABI, which set handler for the signal being delivered. So that will drop the dependency on TIF_IA32/TIF_X32 flags on signal deliver. Those flags will be used only under CONFIG_COMPAT. Similar way ARM uses sa_flags to differ in which mode deliver signal for 26-bit applications (look at SA_THIRYTWO). Signed-off-by: Dmitry Safonov Reviewed-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: oleg@redhat.com Cc: linux-mm@kvack.org Cc: gorcunov@openvz.org Cc: xemul@virtuozzo.com Link: http://lkml.kernel.org/r/20160905133308.28234-7-dsafonov@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/ia32/ia32_signal.c | 2 +- arch/x86/include/asm/fpu/signal.h | 6 ++++++ arch/x86/include/asm/signal.h | 4 ++++ arch/x86/kernel/signal.c | 20 ++++++++++-------- arch/x86/kernel/signal_compat.c | 34 ++++++++++++++++++++++++++++--- kernel/signal.c | 7 +++++++ 6 files changed, 60 insertions(+), 13 deletions(-) diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 2f29f4e407c3..cb13c0564ea7 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -378,7 +378,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); - err |= copy_siginfo_to_user32(&frame->info, &ksig->info); + err |= __copy_siginfo_to_user32(&frame->info, &ksig->info, false); err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 0e970d00dfcd..20a1fbf7fe4e 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -19,6 +19,12 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, # define ia32_setup_rt_frame __setup_rt_frame #endif +#ifdef CONFIG_COMPAT +int __copy_siginfo_to_user32(compat_siginfo_t __user *to, + const siginfo_t *from, bool x32_ABI); +#endif + + extern void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk); extern void convert_to_fxsr(struct task_struct *tsk, diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index dd1e7d6387ab..8af22be0fe61 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -23,6 +23,10 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; +/* non-uapi in-kernel SA_FLAGS for those indicates ABI for a signal frame */ +#define SA_IA32_ABI 0x02000000u +#define SA_X32_ABI 0x01000000u + #ifndef CONFIG_COMPAT typedef sigset_t compat_sigset_t; #endif diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 04cb3212db2d..b1a5d252d482 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -42,6 +42,7 @@ #include #include +#include #define COPY(x) do { \ get_user_ex(regs->x, &sc->x); \ @@ -547,7 +548,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, return -EFAULT; if (ksig->ka.sa.sa_flags & SA_SIGINFO) { - if (copy_siginfo_to_user32(&frame->info, &ksig->info)) + if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true)) return -EFAULT; } @@ -660,20 +661,21 @@ asmlinkage long sys_rt_sigreturn(void) return 0; } -static inline int is_ia32_compat_frame(void) +static inline int is_ia32_compat_frame(struct ksignal *ksig) { return IS_ENABLED(CONFIG_IA32_EMULATION) && - test_thread_flag(TIF_IA32); + ksig->ka.sa.sa_flags & SA_IA32_ABI; } -static inline int is_ia32_frame(void) +static inline int is_ia32_frame(struct ksignal *ksig) { - return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(); + return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(ksig); } -static inline int is_x32_frame(void) +static inline int is_x32_frame(struct ksignal *ksig) { - return IS_ENABLED(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); + return IS_ENABLED(CONFIG_X86_X32_ABI) && + ksig->ka.sa.sa_flags & SA_X32_ABI; } static int @@ -684,12 +686,12 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) compat_sigset_t *cset = (compat_sigset_t *) set; /* Set up the stack frame */ - if (is_ia32_frame()) { + if (is_ia32_frame(ksig)) { if (ksig->ka.sa.sa_flags & SA_SIGINFO) return ia32_setup_rt_frame(usig, ksig, cset, regs); else return ia32_setup_frame(usig, ksig, cset, regs); - } else if (is_x32_frame()) { + } else if (is_x32_frame(ksig)) { return x32_setup_rt_frame(ksig, cset, regs); } else { return __setup_rt_frame(ksig->sig, ksig, set, regs); diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index b44564bf86a8..40df33753bae 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -1,5 +1,6 @@ #include #include +#include /* * The compat_siginfo_t structure and handing code is very easy @@ -92,10 +93,31 @@ static inline void signal_compat_build_tests(void) /* any new si_fields should be added here */ } -int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) +void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact) +{ + /* Don't leak in-kernel non-uapi flags to user-space */ + if (oact) + oact->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI); + + if (!act) + return; + + /* Don't let flags to be set from userspace */ + act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI); + + if (user_64bit_mode(current_pt_regs())) + return; + + if (in_ia32_syscall()) + act->sa.sa_flags |= SA_IA32_ABI; + if (in_x32_syscall()) + act->sa.sa_flags |= SA_X32_ABI; +} + +int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from, + bool x32_ABI) { int err = 0; - bool ia32 = test_thread_flag(TIF_IA32); signal_compat_build_tests(); @@ -146,7 +168,7 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) put_user_ex(from->si_arch, &to->si_arch); break; case __SI_CHLD >> 16: - if (ia32) { + if (!x32_ABI) { put_user_ex(from->si_utime, &to->si_utime); put_user_ex(from->si_stime, &to->si_stime); } else { @@ -180,6 +202,12 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) return err; } +/* from syscall's path, where we know the ABI */ +int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) +{ + return __copy_siginfo_to_user32(to, from, in_x32_syscall()); +} + int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) { int err = 0; diff --git a/kernel/signal.c b/kernel/signal.c index af21afc00d08..75761acc77cf 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3044,6 +3044,11 @@ void kernel_sigaction(int sig, __sighandler_t action) } EXPORT_SYMBOL(kernel_sigaction); +void __weak sigaction_compat_abi(struct k_sigaction *act, + struct k_sigaction *oact) +{ +} + int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { struct task_struct *p = current, *t; @@ -3059,6 +3064,8 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) if (oact) *oact = *k; + sigaction_compat_abi(act, oact); + if (act) { sigdelsetmask(&act->sa.sa_mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); From 91b7bd39e62e190700aa1398e451a6dfa6d24465 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Sep 2016 08:42:51 +0200 Subject: [PATCH 7/9] x86/vdso: Only define prctl_map_vdso() if CONFIG_CHECKPOINT_RESTORE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... otherwise the compiler complains: arch/x86/kernel/process_64.c:528:13: warning: ‘prctl_map_vdso’ defined but not used [-Wunused-function] Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Safonov Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: gorcunov@openvz.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: oleg@redhat.com Cc: xemul@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f240a465920b..b26a0092a01d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -525,6 +525,7 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); +#ifdef CONFIG_CHECKPOINT_RESTORE static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) { int ret; @@ -535,6 +536,7 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) return (long)image->size; } +#endif long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) { @@ -590,14 +592,14 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) } #ifdef CONFIG_CHECKPOINT_RESTORE -#ifdef CONFIG_X86_X32 +# ifdef CONFIG_X86_X32 case ARCH_MAP_VDSO_X32: return prctl_map_vdso(&vdso_image_x32, addr); -#endif -#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION +# endif +# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION case ARCH_MAP_VDSO_32: return prctl_map_vdso(&vdso_image_32, addr); -#endif +# endif case ARCH_MAP_VDSO_64: return prctl_map_vdso(&vdso_image_64, addr); #endif From 3947f49302e4d1576ee58addd8d20b477faef5ea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Sep 2016 08:56:21 +0200 Subject: [PATCH 8/9] x86/vdso: Only define map_vdso_randomized() if CONFIG_X86_64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... otherwise the compiler complains: arch/x86/entry/vdso/vma.c:252:12: warning: ‘map_vdso_randomized’ defined but not used [-Wunused-function] But the #ifdeffery here is getting pretty ugly, so move around vdso_addr() as well to cluster the dependencies a bit more. It's still not particulary pretty though ... Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Safonov Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: gorcunov@openvz.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: oleg@redhat.com Cc: xemul@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vma.c | 98 +++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 50 deletions(-) diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 4459e73e234d..23c881caabd1 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -37,54 +37,6 @@ void __init init_vdso_image(const struct vdso_image *image) struct linux_binprm; -/* - * Put the vdso above the (randomized) stack with another randomized - * offset. This way there is no hole in the middle of address space. - * To save memory make sure it is still in the same PTE as the stack - * top. This doesn't give that many random bits. - * - * Note that this algorithm is imperfect: the distribution of the vdso - * start address within a PMD is biased toward the end. - * - * Only used for the 64-bit and x32 vdsos. - */ -static unsigned long vdso_addr(unsigned long start, unsigned len) -{ -#ifdef CONFIG_X86_32 - return 0; -#else - unsigned long addr, end; - unsigned offset; - - /* - * Round up the start address. It can start out unaligned as a result - * of stack start randomization. - */ - start = PAGE_ALIGN(start); - - /* Round the lowest possible end address up to a PMD boundary. */ - end = (start + len + PMD_SIZE - 1) & PMD_MASK; - if (end >= TASK_SIZE_MAX) - end = TASK_SIZE_MAX; - end -= len; - - if (end > start) { - offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); - addr = start + (offset << PAGE_SHIFT); - } else { - addr = start; - } - - /* - * Forcibly align the final address in case we have a hardware - * issue that requires alignment for performance reasons. - */ - addr = align_vdso_addr(addr); - - return addr; -#endif -} - static int vdso_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -249,12 +201,58 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) return ret; } +#ifdef CONFIG_X86_64 +/* + * Put the vdso above the (randomized) stack with another randomized + * offset. This way there is no hole in the middle of address space. + * To save memory make sure it is still in the same PTE as the stack + * top. This doesn't give that many random bits. + * + * Note that this algorithm is imperfect: the distribution of the vdso + * start address within a PMD is biased toward the end. + * + * Only used for the 64-bit and x32 vdsos. + */ +static unsigned long vdso_addr(unsigned long start, unsigned len) +{ + unsigned long addr, end; + unsigned offset; + + /* + * Round up the start address. It can start out unaligned as a result + * of stack start randomization. + */ + start = PAGE_ALIGN(start); + + /* Round the lowest possible end address up to a PMD boundary. */ + end = (start + len + PMD_SIZE - 1) & PMD_MASK; + if (end >= TASK_SIZE_MAX) + end = TASK_SIZE_MAX; + end -= len; + + if (end > start) { + offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); + addr = start + (offset << PAGE_SHIFT); + } else { + addr = start; + } + + /* + * Forcibly align the final address in case we have a hardware + * issue that requires alignment for performance reasons. + */ + addr = align_vdso_addr(addr); + + return addr; +} + static int map_vdso_randomized(const struct vdso_image *image) { - unsigned long addr = vdso_addr(current->mm->start_stack, - image->size - image->sym_vvar_start); + unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start); + return map_vdso(image, addr); } +#endif int map_vdso_once(const struct vdso_image *image, unsigned long addr) { From 6e68b08728ce3365c713f8663c6b05a79e2bbca1 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 17 Sep 2016 00:51:53 +0000 Subject: [PATCH 9/9] x86/vdso: Use CONFIG_X86_X32_ABI to enable vdso prctl The prctl code which references vdso_image_x32 is built when CONFIG_X86_X32 is set. This results in the following build failure: LD init/built-in.o arch/x86/built-in.o: In function `do_arch_prctl': (.text+0x27466): undefined reference to `vdso_image_x32' vdso_image_x32 depends on CONFIG_X86_X32_ABI. So we need to make the prctl depend on that as well. [ tglx: Massaged changelog ] Fixes: 2eefd8789698 ("x86/arch_prctl/vdso: Add ARCH_MAP_VDSO_*") Signed-off-by: Vinson Lee Reviewed-by: Dmitry Safonov Cc: Andy Lutomirski Cc: Brian Gerst Cc: Borislav Petkov Cc: Dmitry Vyukov Link: http://lkml.kernel.org/r/1474073513-6656-1-git-send-email-vlee@freedesktop.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b26a0092a01d..b4603b71a659 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -592,7 +592,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) } #ifdef CONFIG_CHECKPOINT_RESTORE -# ifdef CONFIG_X86_X32 +# ifdef CONFIG_X86_X32_ABI case ARCH_MAP_VDSO_X32: return prctl_map_vdso(&vdso_image_x32, addr); # endif