Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, vm86: fix preemption bug
  x86, olpc: fix model detection without OFW
  x86, hpet: fix for LS21 + HPET = boot hang
  x86: CPA avoid repeated lazy mmu flush
  x86: warn if arch_flush_lazy_mmu_cpu is called in preemptible context
  x86/paravirt: make arch_flush_lazy_mmu/cpu disable preemption
  x86, pat: fix warn_on_once() while mapping 0-1MB range with /dev/mem
  x86/cpa: make sure cpa is safe to call in lazy mmu mode
  x86, ptrace, mm: fix double-free on race
This commit is contained in:
Linus Torvalds 2009-02-17 14:27:39 -08:00
commit 35010334aa
12 changed files with 116 additions and 83 deletions

View file

@ -57,7 +57,6 @@ typedef struct { pgdval_t pgd; } pgd_t;
typedef struct { pgprotval_t pgprot; } pgprot_t; typedef struct { pgprotval_t pgprot; } pgprot_t;
extern int page_is_ram(unsigned long pagenr); extern int page_is_ram(unsigned long pagenr);
extern int pagerange_is_ram(unsigned long start, unsigned long end);
extern int devmem_is_allowed(unsigned long pagenr); extern int devmem_is_allowed(unsigned long pagenr);
extern void map_devmem(unsigned long pfn, unsigned long size, extern void map_devmem(unsigned long pfn, unsigned long size,
pgprot_t vma_prot); pgprot_t vma_prot);

View file

@ -1352,14 +1352,7 @@ static inline void arch_leave_lazy_cpu_mode(void)
PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
} }
static inline void arch_flush_lazy_cpu_mode(void) void arch_flush_lazy_cpu_mode(void);
{
if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
arch_leave_lazy_cpu_mode();
arch_enter_lazy_cpu_mode();
}
}
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
static inline void arch_enter_lazy_mmu_mode(void) static inline void arch_enter_lazy_mmu_mode(void)
@ -1372,13 +1365,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
} }
static inline void arch_flush_lazy_mmu_mode(void) void arch_flush_lazy_mmu_mode(void);
{
if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
arch_leave_lazy_mmu_mode();
arch_enter_lazy_mmu_mode();
}
}
static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
unsigned long phys, pgprot_t flags) unsigned long phys, pgprot_t flags)

View file

@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode,
now = hpet_readl(HPET_COUNTER); now = hpet_readl(HPET_COUNTER);
cmp = now + (unsigned long) delta; cmp = now + (unsigned long) delta;
cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg = hpet_readl(HPET_Tn_CFG(timer));
/* Make sure we use edge triggered interrupts */
cfg &= ~HPET_TN_LEVEL;
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
HPET_TN_SETVAL | HPET_TN_32BIT; HPET_TN_SETVAL | HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer)); hpet_writel(cfg, HPET_Tn_CFG(timer));

View file

@ -203,7 +203,7 @@ static void __init platform_detect(void)
static void __init platform_detect(void) static void __init platform_detect(void)
{ {
/* stopgap until OFW support is added to the kernel */ /* stopgap until OFW support is added to the kernel */
olpc_platform_info.boardrev = 0xc2; olpc_platform_info.boardrev = olpc_board(0xc2);
} }
#endif #endif

View file

@ -268,6 +268,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return __get_cpu_var(paravirt_lazy_mode); return __get_cpu_var(paravirt_lazy_mode);
} }
void arch_flush_lazy_mmu_mode(void)
{
preempt_disable();
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
WARN_ON(preempt_count() == 1);
arch_leave_lazy_mmu_mode();
arch_enter_lazy_mmu_mode();
}
preempt_enable();
}
void arch_flush_lazy_cpu_mode(void)
{
preempt_disable();
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
WARN_ON(preempt_count() == 1);
arch_leave_lazy_cpu_mode();
arch_enter_lazy_cpu_mode();
}
preempt_enable();
}
struct pv_info pv_info = { struct pv_info pv_info = {
.name = "bare hardware", .name = "bare hardware",
.paravirt_enabled = 0, .paravirt_enabled = 0,

View file

@ -810,12 +810,16 @@ static void ptrace_bts_untrace(struct task_struct *child)
static void ptrace_bts_detach(struct task_struct *child) static void ptrace_bts_detach(struct task_struct *child)
{ {
if (unlikely(child->bts)) { /*
ds_release_bts(child->bts); * Ptrace_detach() races with ptrace_untrace() in case
child->bts = NULL; * the child dies and is reaped by another thread.
*
ptrace_bts_free_buffer(child); * We only do the memory accounting at this point and
} * leave the buffer deallocation and the bts tracer
* release to ptrace_bts_untrace() which will be called
* later on with tasklist_lock held.
*/
release_locked_buffer(child->bts_buffer, child->bts_size);
} }
#else #else
static inline void ptrace_bts_fork(struct task_struct *tsk) {} static inline void ptrace_bts_fork(struct task_struct *tsk) {}

View file

@ -99,6 +99,12 @@ static inline void preempt_conditional_sti(struct pt_regs *regs)
local_irq_enable(); local_irq_enable();
} }
static inline void conditional_cli(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
}
static inline void preempt_conditional_cli(struct pt_regs *regs) static inline void preempt_conditional_cli(struct pt_regs *regs)
{ {
if (regs->flags & X86_EFLAGS_IF) if (regs->flags & X86_EFLAGS_IF)
@ -626,8 +632,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
debug_vm86: debug_vm86:
/* reenable preemption: handle_vm86_trap() might sleep */
dec_preempt_count();
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
preempt_conditional_cli(regs); conditional_cli(regs);
return; return;
#endif #endif

View file

@ -134,25 +134,6 @@ int page_is_ram(unsigned long pagenr)
return 0; return 0;
} }
int pagerange_is_ram(unsigned long start, unsigned long end)
{
int ram_page = 0, not_rampage = 0;
unsigned long page_nr;
for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
++page_nr) {
if (page_is_ram(page_nr))
ram_page = 1;
else
not_rampage = 1;
if (ram_page == not_rampage)
return -1;
}
return ram_page;
}
/* /*
* Fix up the linear direct mapping of the kernel to avoid cache attribute * Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts. * conflicts.

View file

@ -575,7 +575,6 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
address = cpa->vaddr[cpa->curpage]; address = cpa->vaddr[cpa->curpage];
else else
address = *cpa->vaddr; address = *cpa->vaddr;
repeat: repeat:
kpte = lookup_address(address, &level); kpte = lookup_address(address, &level);
if (!kpte) if (!kpte)
@ -812,6 +811,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
vm_unmap_aliases(); vm_unmap_aliases();
/*
* If we're called with lazy mmu updates enabled, the
* in-memory pte state may be stale. Flush pending updates to
* bring them up to date.
*/
arch_flush_lazy_mmu_mode();
cpa.vaddr = addr; cpa.vaddr = addr;
cpa.numpages = numpages; cpa.numpages = numpages;
cpa.mask_set = mask_set; cpa.mask_set = mask_set;
@ -854,6 +860,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
} else } else
cpa_flush_all(cache); cpa_flush_all(cache);
/*
* If we've been called with lazy mmu updates enabled, then
* make sure that everything gets flushed out before we
* return.
*/
arch_flush_lazy_mmu_mode();
out: out:
return ret; return ret;
} }

View file

@ -211,6 +211,33 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
static struct memtype *cached_entry; static struct memtype *cached_entry;
static u64 cached_start; static u64 cached_start;
static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
{
int ram_page = 0, not_rampage = 0;
unsigned long page_nr;
for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
++page_nr) {
/*
* For legacy reasons, physical address range in the legacy ISA
* region is tracked as non-RAM. This will allow users of
* /dev/mem to map portions of legacy ISA region, even when
* some of those portions are listed(or not even listed) with
* different e820 types(RAM/reserved/..)
*/
if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
page_is_ram(page_nr))
ram_page = 1;
else
not_rampage = 1;
if (ram_page == not_rampage)
return -1;
}
return ram_page;
}
/* /*
* For RAM pages, mark the pages as non WB memory type using * For RAM pages, mark the pages as non WB memory type using
* PageNonWB (PG_arch_1). We allow only one set_memory_uc() or * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
@ -336,20 +363,12 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
if (new_type) if (new_type)
*new_type = actual_type; *new_type = actual_type;
/* is_range_ram = pat_pagerange_is_ram(start, end);
* For legacy reasons, some parts of the physical address range in the if (is_range_ram == 1)
* legacy 1MB region is treated as non-RAM (even when listed as RAM in return reserve_ram_pages_type(start, end, req_type,
* the e820 tables). So we will track the memory attributes of this new_type);
* legacy 1MB region using the linear memtype_list always. else if (is_range_ram < 0)
*/ return -EINVAL;
if (end >= ISA_END_ADDRESS) {
is_range_ram = pagerange_is_ram(start, end);
if (is_range_ram == 1)
return reserve_ram_pages_type(start, end, req_type,
new_type);
else if (is_range_ram < 0)
return -EINVAL;
}
new = kmalloc(sizeof(struct memtype), GFP_KERNEL); new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
if (!new) if (!new)
@ -446,19 +465,11 @@ int free_memtype(u64 start, u64 end)
if (is_ISA_range(start, end - 1)) if (is_ISA_range(start, end - 1))
return 0; return 0;
/* is_range_ram = pat_pagerange_is_ram(start, end);
* For legacy reasons, some parts of the physical address range in the if (is_range_ram == 1)
* legacy 1MB region is treated as non-RAM (even when listed as RAM in return free_ram_pages_type(start, end);
* the e820 tables). So we will track the memory attributes of this else if (is_range_ram < 0)
* legacy 1MB region using the linear memtype_list always. return -EINVAL;
*/
if (end >= ISA_END_ADDRESS) {
is_range_ram = pagerange_is_ram(start, end);
if (is_range_ram == 1)
return free_ram_pages_type(start, end);
else if (is_range_ram < 0)
return -EINVAL;
}
spin_lock(&memtype_lock); spin_lock(&memtype_lock);
list_for_each_entry(entry, &memtype_list, nd) { list_for_each_entry(entry, &memtype_list, nd) {
@ -626,17 +637,13 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
unsigned long flags; unsigned long flags;
unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
is_ram = pagerange_is_ram(paddr, paddr + size); is_ram = pat_pagerange_is_ram(paddr, paddr + size);
if (is_ram != 0) { /*
/* * reserve_pfn_range() doesn't support RAM pages.
* For mapping RAM pages, drivers need to call */
* set_memory_[uc|wc|wb] directly, for reserve and free, before if (is_ram != 0)
* setting up the PTE. return -EINVAL;
*/
WARN_ON_ONCE(1);
return 0;
}
ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
if (ret) if (ret)
@ -693,7 +700,7 @@ static void free_pfn_range(u64 paddr, unsigned long size)
{ {
int is_ram; int is_ram;
is_ram = pagerange_is_ram(paddr, paddr + size); is_ram = pat_pagerange_is_ram(paddr, paddr + size);
if (is_ram == 0) if (is_ram == 0)
free_memtype(paddr, paddr + size); free_memtype(paddr, paddr + size);
} }

View file

@ -1304,5 +1304,6 @@ void vmemmap_populate_print_last(void);
extern void *alloc_locked_buffer(size_t size); extern void *alloc_locked_buffer(size_t size);
extern void free_locked_buffer(void *buffer, size_t size); extern void free_locked_buffer(void *buffer, size_t size);
extern void release_locked_buffer(void *buffer, size_t size);
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */ #endif /* _LINUX_MM_H */

View file

@ -660,7 +660,7 @@ void *alloc_locked_buffer(size_t size)
return buffer; return buffer;
} }
void free_locked_buffer(void *buffer, size_t size) void release_locked_buffer(void *buffer, size_t size)
{ {
unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
@ -670,6 +670,11 @@ void free_locked_buffer(void *buffer, size_t size)
current->mm->locked_vm -= pgsz; current->mm->locked_vm -= pgsz;
up_write(&current->mm->mmap_sem); up_write(&current->mm->mmap_sem);
}
void free_locked_buffer(void *buffer, size_t size)
{
release_locked_buffer(buffer, size);
kfree(buffer); kfree(buffer);
} }