2005-04-16 16:20:36 -06:00
|
|
|
/*
|
|
|
|
* Copyright (C) 1995 Linus Torvalds
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This file handles the architecture-dependent parts of initialization
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/stddef.h>
|
|
|
|
#include <linux/unistd.h>
|
|
|
|
#include <linux/ptrace.h>
|
|
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/user.h>
|
2006-07-10 05:44:13 -06:00
|
|
|
#include <linux/screen_info.h>
|
2005-04-16 16:20:36 -06:00
|
|
|
#include <linux/ioport.h>
|
|
|
|
#include <linux/delay.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/initrd.h>
|
|
|
|
#include <linux/highmem.h>
|
|
|
|
#include <linux/bootmem.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#include <linux/console.h>
|
|
|
|
#include <linux/seq_file.h>
|
2006-01-09 21:51:47 -07:00
|
|
|
#include <linux/crash_dump.h>
|
2005-04-16 16:20:36 -06:00
|
|
|
#include <linux/root_dev.h>
|
|
|
|
#include <linux/pci.h>
|
2008-01-30 05:31:19 -07:00
|
|
|
#include <linux/efi.h>
|
2005-04-16 16:20:36 -06:00
|
|
|
#include <linux/acpi.h>
|
|
|
|
#include <linux/kallsyms.h>
|
|
|
|
#include <linux/edd.h>
|
2008-04-09 20:50:41 -06:00
|
|
|
#include <linux/iscsi_ibft.h>
|
2005-06-23 01:08:07 -06:00
|
|
|
#include <linux/mmzone.h>
|
2005-06-25 15:58:04 -06:00
|
|
|
#include <linux/kexec.h>
|
2005-12-02 11:43:20 -07:00
|
|
|
#include <linux/cpufreq.h>
|
2006-01-11 14:43:33 -07:00
|
|
|
#include <linux/dmi.h>
|
2006-01-11 14:44:42 -07:00
|
|
|
#include <linux/dma-mapping.h>
|
2006-03-25 08:29:46 -07:00
|
|
|
#include <linux/ctype.h>
|
2008-01-30 05:31:11 -07:00
|
|
|
#include <linux/uaccess.h>
|
x86: early boot debugging via FireWire (ohci1394_dma=early)
This patch adds a new configuration option, which adds support for a new
early_param which gets checked in arch/x86/kernel/setup_{32,64}.c:setup_arch()
to decide wether OHCI-1394 FireWire controllers should be initialized and
enabled for physical DMA access to allow remote debugging of early problems
like issues ACPI or other subsystems which are executed very early.
If the config option is not enabled, no code is changed, and if the boot
paramenter is not given, no new code is executed, and independent of that,
all new code is freed after boot, so the config option can be even enabled
in standard, non-debug kernels.
With specialized tools, it is then possible to get debugging information
from machines which have no serial ports (notebooks) such as the printk
buffer contents, or any data which can be referenced from global pointers,
if it is stored below the 4GB limit and even memory dumps of of the physical
RAM region below the 4GB limit can be taken without any cooperation from the
CPU of the host, so the machine can be crashed early, it does not matter.
In the extreme, even kernel debuggers can be accessed in this way. I wrote
a small kgdb module and an accompanying gdb stub for FireWire which allows
to gdb to talk to kgdb using remote remory reads and writes over FireWire.
An version of the gdb stub fore FireWire is able to read all global data
from a system which is running a a normal kernel without any kernel debugger,
without any interruption or support of the system's CPU. That way, e.g. the
task struct and so on can be read and even manipulated when the physical DMA
access is granted.
A HOWTO is included in this patch, in Documentation/debugging-via-ohci1394.txt
and I've put a copy online at
ftp://ftp.suse.de/private/bk/firewire/docs/debugging-via-ohci1394.txt
It also has links to all the tools which are available to make use of it
another copy of it is online at:
ftp://ftp.suse.de/private/bk/firewire/kernel/ohci1394_dma_early-v2.diff
Signed-Off-By: Bernhard Kaindl <bk@suse.de>
Tested-By: Thomas Renninger <trenn@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 05:34:11 -07:00
|
|
|
#include <linux/init_ohci1394_dma.h>
|
2005-06-23 01:08:07 -06:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
#include <asm/mtrr.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
|
|
#include <asm/system.h>
|
2008-01-30 05:32:39 -07:00
|
|
|
#include <asm/vsyscall.h>
|
2005-04-16 16:20:36 -06:00
|
|
|
#include <asm/io.h>
|
|
|
|
#include <asm/smp.h>
|
|
|
|
#include <asm/msr.h>
|
|
|
|
#include <asm/desc.h>
|
|
|
|
#include <video/edid.h>
|
|
|
|
#include <asm/e820.h>
|
|
|
|
#include <asm/dma.h>
|
x86: disable the GART early, 64-bit
For K8 system: 4G RAM with memory hole remapping enabled, or more than
4G RAM installed.
when try to use kexec second kernel, and the first doesn't include
gart_shutdown. the second kernel could have different aper position than
the first kernel. and second kernel could use that hole as RAM that is
still used by GART set by the first kernel. esp. when try to kexec
2.6.24 with sparse mem enable from previous kernel (from RHEL 5 or SLES
10). the new kernel will use aper by GART (set by first kernel) for
vmemmap. and after new kernel setting one new GART. the position will be
real RAM. the _mapcount set is lost.
Bad page state in process 'swapper'
page:ffffe2000e600020 flags:0x0000000000000000 mapping:0000000000000000 mapcount:1 count:0
Trying to fix it up, but a reboot is needed
Backtrace:
Pid: 0, comm: swapper Not tainted 2.6.24-rc7-smp-gcdf71a10-dirty #13
Call Trace:
[<ffffffff8026401f>] bad_page+0x63/0x8d
[<ffffffff80264169>] __free_pages_ok+0x7c/0x2a5
[<ffffffff80ba75d1>] free_all_bootmem_core+0xd0/0x198
[<ffffffff80ba3a42>] numa_free_all_bootmem+0x3b/0x76
[<ffffffff80ba3461>] mem_init+0x3b/0x152
[<ffffffff80b959d3>] start_kernel+0x236/0x2c2
[<ffffffff80b9511a>] _sinittext+0x11a/0x121
and
[ffffe2000e600000-ffffe2000e7fffff] PMD ->ffff81001c200000 on node 0
phys addr is : 0x1c200000
RHEL 5.1 kernel -53 said:
PCI-DMA: aperture base @ 1c000000 size 65536 KB
new kernel said:
Mapping aperture over 65536 KB of RAM @ 3c000000
So could try to disable that GART if possible.
According to Ingo
> hm, i'm wondering, instead of modifying the GART, why dont we simply
> _detect_ whatever GART settings we have inherited, and propagate that
> into our e820 maps? I.e. if there's inconsistency, then punch that out
> from the memory maps and just dont use that memory.
>
> that way it would not matter whether the GART settings came from a [old
> or crashing] Linux kernel that has not called gart_iommu_shutdown(), or
> whether it's a BIOS that has set up an aperture hole inconsistent with
> the memory map it passed. (or the memory map we _think_ i tried to pass
> us)
>
> it would also be more robust to only read and do a memory map quirk
> based on that, than actively trying to change the GART so early in the
> bootup. Later on we have to re-enable the GART _anyway_ and have to
> punch a hole for it.
>
> and as a bonus, we would have shored up our defenses against crappy
> BIOSes as well.
add e820 modification for gart inconsistent setting.
gart_fix_e820=off could be used to disable e820 fix.
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 05:33:09 -07:00
|
|
|
#include <asm/gart.h>
|
2005-04-16 16:20:36 -06:00
|
|
|
#include <asm/mpspec.h>
|
|
|
|
#include <asm/mmu_context.h>
|
|
|
|
#include <asm/proto.h>
|
|
|
|
#include <asm/setup.h>
|
|
|
|
#include <asm/numa.h>
|
2005-11-05 09:25:53 -07:00
|
|
|
#include <asm/sections.h>
|
2006-03-25 08:30:22 -07:00
|
|
|
#include <asm/dmi.h>
|
2007-10-21 17:42:01 -06:00
|
|
|
#include <asm/cacheflush.h>
|
2008-01-30 05:30:17 -07:00
|
|
|
#include <asm/mce.h>
|
2008-01-30 05:31:09 -07:00
|
|
|
#include <asm/ds.h>
|
2008-01-30 05:33:11 -07:00
|
|
|
#include <asm/topology.h>
|
2008-04-10 15:28:10 -06:00
|
|
|
#include <asm/trampoline.h>
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-03-25 15:10:46 -06:00
|
|
|
#include <mach_apic.h>
|
2008-01-30 05:31:11 -07:00
|
|
|
#ifdef CONFIG_PARAVIRT
|
|
|
|
#include <asm/paravirt.h>
|
|
|
|
#else
|
|
|
|
#define ARCH_SETUP
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
/*
|
|
|
|
* Machine setup..
|
|
|
|
*/
|
|
|
|
|
2005-09-06 16:17:45 -06:00
|
|
|
struct cpuinfo_x86 boot_cpu_data __read_mostly;
|
2006-06-26 05:59:44 -06:00
|
|
|
EXPORT_SYMBOL(boot_cpu_data);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-01-30 05:33:20 -07:00
|
|
|
__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
unsigned long mmu_cr4_features;
|
|
|
|
|
|
|
|
/* Boot loader ID as an integer, for the benefit of proc_dointvec */
|
|
|
|
int bootloader_type;
|
|
|
|
|
|
|
|
unsigned long saved_video_mode;
|
|
|
|
|
2007-05-02 11:27:12 -06:00
|
|
|
int force_mwait __cpuinitdata;
|
|
|
|
|
2008-01-30 05:30:39 -07:00
|
|
|
/*
|
2006-03-25 08:30:22 -07:00
|
|
|
* Early DMI memory
|
|
|
|
*/
|
|
|
|
int dmi_alloc_index;
|
|
|
|
char dmi_alloc_data[DMI_MAX_DATA];
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
/*
|
|
|
|
* Setup options
|
|
|
|
*/
|
|
|
|
struct screen_info screen_info;
|
2006-06-26 05:59:44 -06:00
|
|
|
EXPORT_SYMBOL(screen_info);
|
2005-04-16 16:20:36 -06:00
|
|
|
struct sys_desc_table_struct {
|
|
|
|
unsigned short length;
|
|
|
|
unsigned char table[0];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct edid_info edid_info;
|
2006-06-26 01:26:37 -06:00
|
|
|
EXPORT_SYMBOL_GPL(edid_info);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
extern int root_mountflags;
|
|
|
|
|
2007-02-12 01:54:25 -07:00
|
|
|
char __initdata command_line[COMMAND_LINE_SIZE];
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
struct resource standard_io_resources[] = {
|
|
|
|
{ .name = "dma1", .start = 0x00, .end = 0x1f,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "pic1", .start = 0x20, .end = 0x21,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "timer0", .start = 0x40, .end = 0x43,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "timer1", .start = 0x50, .end = 0x53,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "keyboard", .start = 0x60, .end = 0x6f,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "dma page reg", .start = 0x80, .end = 0x8f,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "pic2", .start = 0xa0, .end = 0xa1,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "dma2", .start = 0xc0, .end = 0xdf,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
|
|
{ .name = "fpu", .start = 0xf0, .end = 0xff,
|
|
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO }
|
|
|
|
};
|
|
|
|
|
|
|
|
#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
|
|
|
|
|
2008-01-30 05:30:32 -07:00
|
|
|
static struct resource data_resource = {
|
2005-04-16 16:20:36 -06:00
|
|
|
.name = "Kernel data",
|
|
|
|
.start = 0,
|
|
|
|
.end = 0,
|
|
|
|
.flags = IORESOURCE_RAM,
|
|
|
|
};
|
2008-01-30 05:30:32 -07:00
|
|
|
static struct resource code_resource = {
|
2005-04-16 16:20:36 -06:00
|
|
|
.name = "Kernel code",
|
|
|
|
.start = 0,
|
|
|
|
.end = 0,
|
|
|
|
.flags = IORESOURCE_RAM,
|
|
|
|
};
|
2008-01-30 05:30:32 -07:00
|
|
|
static struct resource bss_resource = {
|
2007-10-21 17:42:01 -06:00
|
|
|
.name = "Kernel bss",
|
|
|
|
.start = 0,
|
|
|
|
.end = 0,
|
|
|
|
.flags = IORESOURCE_RAM,
|
|
|
|
};
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-01-30 05:30:16 -07:00
|
|
|
static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
|
|
|
|
|
2006-09-26 02:52:32 -06:00
|
|
|
#ifdef CONFIG_PROC_VMCORE
|
|
|
|
/* elfcorehdr= specifies the location of elf core header
|
|
|
|
* stored by the crashed kernel. This option will be passed
|
|
|
|
* by kexec loader to the capture kernel.
|
|
|
|
*/
|
|
|
|
static int __init setup_elfcorehdr(char *arg)
|
2006-03-25 08:29:46 -07:00
|
|
|
{
|
2006-09-26 02:52:32 -06:00
|
|
|
char *end;
|
|
|
|
if (!arg)
|
|
|
|
return -EINVAL;
|
|
|
|
elfcorehdr_addr = memparse(arg, &end);
|
|
|
|
return end > arg ? 0 : -EINVAL;
|
2006-03-25 08:29:46 -07:00
|
|
|
}
|
2006-09-26 02:52:32 -06:00
|
|
|
early_param("elfcorehdr", setup_elfcorehdr);
|
2006-02-25 20:18:46 -07:00
|
|
|
#endif
|
|
|
|
|
2005-06-23 01:08:06 -06:00
|
|
|
#ifndef CONFIG_NUMA
|
2005-06-23 01:08:07 -06:00
|
|
|
static void __init
|
|
|
|
contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2005-06-23 01:08:07 -06:00
|
|
|
unsigned long bootmap_size, bootmap;
|
|
|
|
|
|
|
|
bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
|
2008-02-01 09:49:41 -07:00
|
|
|
bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
|
|
|
|
PAGE_SIZE);
|
2005-06-23 01:08:07 -06:00
|
|
|
if (bootmap == -1L)
|
2008-01-30 05:30:39 -07:00
|
|
|
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
|
2005-06-23 01:08:07 -06:00
|
|
|
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
|
2006-09-27 02:49:52 -06:00
|
|
|
e820_register_active_regions(0, start_pfn, end_pfn);
|
|
|
|
free_bootmem_with_active_regions(0, end_pfn);
|
2008-02-07 01:15:17 -07:00
|
|
|
reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
|
2008-01-30 05:30:39 -07:00
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
|
|
|
|
struct edd edd;
|
|
|
|
#ifdef CONFIG_EDD_MODULE
|
|
|
|
EXPORT_SYMBOL(edd);
|
|
|
|
#endif
|
|
|
|
/**
|
|
|
|
* copy_edd() - Copy the BIOS EDD information
|
|
|
|
* from boot_params into a safe place.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static inline void copy_edd(void)
|
|
|
|
{
|
2007-10-15 18:13:22 -06:00
|
|
|
memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
|
|
|
|
sizeof(edd.mbr_signature));
|
|
|
|
memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
|
|
|
|
edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
|
|
|
|
edd.edd_info_nr = boot_params.eddbuf_entries;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline void copy_edd(void)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2007-10-19 00:40:59 -06:00
|
|
|
#ifdef CONFIG_KEXEC
|
|
|
|
static void __init reserve_crashkernel(void)
|
|
|
|
{
|
2008-02-07 01:15:19 -07:00
|
|
|
unsigned long long total_mem;
|
2007-10-19 00:40:59 -06:00
|
|
|
unsigned long long crash_size, crash_base;
|
|
|
|
int ret;
|
|
|
|
|
2008-02-07 01:15:19 -07:00
|
|
|
total_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
|
2007-10-19 00:40:59 -06:00
|
|
|
|
2008-02-07 01:15:19 -07:00
|
|
|
ret = parse_crashkernel(boot_command_line, total_mem,
|
2007-10-19 00:40:59 -06:00
|
|
|
&crash_size, &crash_base);
|
|
|
|
if (ret == 0 && crash_size) {
|
2008-02-07 01:15:19 -07:00
|
|
|
if (crash_base <= 0) {
|
2007-10-19 00:40:59 -06:00
|
|
|
printk(KERN_INFO "crashkernel reservation failed - "
|
|
|
|
"you have to specify a base address\n");
|
2008-02-07 01:15:19 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (reserve_bootmem(crash_base, crash_size,
|
|
|
|
BOOTMEM_EXCLUSIVE) < 0) {
|
|
|
|
printk(KERN_INFO "crashkernel reservation failed - "
|
|
|
|
"memory is in use\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
|
|
|
|
"for crashkernel (System RAM: %ldMB)\n",
|
|
|
|
(unsigned long)(crash_size >> 20),
|
|
|
|
(unsigned long)(crash_base >> 20),
|
|
|
|
(unsigned long)(total_mem >> 20));
|
|
|
|
crashk_res.start = crash_base;
|
|
|
|
crashk_res.end = crash_base + crash_size - 1;
|
2008-02-22 18:07:16 -07:00
|
|
|
insert_resource(&iomem_resource, &crashk_res);
|
2007-10-19 00:40:59 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static inline void __init reserve_crashkernel(void)
|
|
|
|
{}
|
|
|
|
#endif
|
|
|
|
|
2008-01-30 05:31:11 -07:00
|
|
|
/* Overridden in paravirt.c if CONFIG_PARAVIRT */
|
2008-01-30 05:32:49 -07:00
|
|
|
void __attribute__((weak)) __init memory_setup(void)
|
2008-01-30 05:31:11 -07:00
|
|
|
{
|
|
|
|
machine_specific_memory_setup();
|
|
|
|
}
|
|
|
|
|
x86: early boot debugging via FireWire (ohci1394_dma=early)
This patch adds a new configuration option, which adds support for a new
early_param which gets checked in arch/x86/kernel/setup_{32,64}.c:setup_arch()
to decide wether OHCI-1394 FireWire controllers should be initialized and
enabled for physical DMA access to allow remote debugging of early problems
like issues ACPI or other subsystems which are executed very early.
If the config option is not enabled, no code is changed, and if the boot
paramenter is not given, no new code is executed, and independent of that,
all new code is freed after boot, so the config option can be even enabled
in standard, non-debug kernels.
With specialized tools, it is then possible to get debugging information
from machines which have no serial ports (notebooks) such as the printk
buffer contents, or any data which can be referenced from global pointers,
if it is stored below the 4GB limit and even memory dumps of of the physical
RAM region below the 4GB limit can be taken without any cooperation from the
CPU of the host, so the machine can be crashed early, it does not matter.
In the extreme, even kernel debuggers can be accessed in this way. I wrote
a small kgdb module and an accompanying gdb stub for FireWire which allows
to gdb to talk to kgdb using remote remory reads and writes over FireWire.
An version of the gdb stub fore FireWire is able to read all global data
from a system which is running a a normal kernel without any kernel debugger,
without any interruption or support of the system's CPU. That way, e.g. the
task struct and so on can be read and even manipulated when the physical DMA
access is granted.
A HOWTO is included in this patch, in Documentation/debugging-via-ohci1394.txt
and I've put a copy online at
ftp://ftp.suse.de/private/bk/firewire/docs/debugging-via-ohci1394.txt
It also has links to all the tools which are available to make use of it
another copy of it is online at:
ftp://ftp.suse.de/private/bk/firewire/kernel/ohci1394_dma_early-v2.diff
Signed-Off-By: Bernhard Kaindl <bk@suse.de>
Tested-By: Thomas Renninger <trenn@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 05:34:11 -07:00
|
|
|
/*
|
|
|
|
* setup_arch - architecture-specific boot-time initializations
|
|
|
|
*
|
|
|
|
* Note: On x86_64, fixmaps are ready for use even before this is called.
|
|
|
|
*/
|
2005-04-16 16:20:36 -06:00
|
|
|
void __init setup_arch(char **cmdline_p)
|
|
|
|
{
|
2008-01-30 05:30:39 -07:00
|
|
|
unsigned i;
|
|
|
|
|
2007-02-12 01:54:25 -07:00
|
|
|
printk(KERN_INFO "Command line: %s\n", boot_command_line);
|
2006-09-26 02:52:32 -06:00
|
|
|
|
2007-10-15 18:13:22 -06:00
|
|
|
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
|
|
|
|
screen_info = boot_params.screen_info;
|
|
|
|
edid_info = boot_params.edid_info;
|
|
|
|
saved_video_mode = boot_params.hdr.vid_mode;
|
|
|
|
bootloader_type = boot_params.hdr.type_of_loader;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
#ifdef CONFIG_BLK_DEV_RAM
|
2007-10-15 18:13:22 -06:00
|
|
|
rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
|
|
|
|
rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
|
|
|
|
rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
|
2005-04-16 16:20:36 -06:00
|
|
|
#endif
|
2008-01-30 05:31:19 -07:00
|
|
|
#ifdef CONFIG_EFI
|
|
|
|
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
|
|
|
|
"EL64", 4))
|
|
|
|
efi_enabled = 1;
|
|
|
|
#endif
|
2008-01-30 05:31:11 -07:00
|
|
|
|
|
|
|
ARCH_SETUP
|
|
|
|
|
|
|
|
memory_setup();
|
2005-04-16 16:20:36 -06:00
|
|
|
copy_edd();
|
|
|
|
|
2007-10-15 18:13:22 -06:00
|
|
|
if (!boot_params.hdr.root_flags)
|
2005-04-16 16:20:36 -06:00
|
|
|
root_mountflags &= ~MS_RDONLY;
|
|
|
|
init_mm.start_code = (unsigned long) &_text;
|
|
|
|
init_mm.end_code = (unsigned long) &_etext;
|
|
|
|
init_mm.end_data = (unsigned long) &_edata;
|
|
|
|
init_mm.brk = (unsigned long) &_end;
|
|
|
|
|
Revert "[PATCH] x86: __pa and __pa_symbol address space separation"
This was broken. It adds complexity, for no good reason. Rather than
separate __pa() and __pa_symbol(), we should deprecate __pa_symbol(),
and preferably __pa() too - and just use "virt_to_phys()" instead, which
is more readable and has nicer semantics.
However, right now, just undo the separation, and make __pa_symbol() be
the exact same as __pa(). That fixes the bugs this patch introduced,
and we can do the fairly obvious cleanups later.
Do the new __phys_addr() function (which is now the actual workhorse for
the unified __pa()/__pa_symbol()) as a real external function, that way
all the potential issues with compile/link-time optimizations of
constant symbol addresses go away, and we can also, if we choose to, add
more sanity-checking of the argument.
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Vivek Goyal <vgoyal@in.ibm.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-07 09:44:24 -06:00
|
|
|
code_resource.start = virt_to_phys(&_text);
|
|
|
|
code_resource.end = virt_to_phys(&_etext)-1;
|
|
|
|
data_resource.start = virt_to_phys(&_etext);
|
|
|
|
data_resource.end = virt_to_phys(&_edata)-1;
|
2007-10-21 17:42:01 -06:00
|
|
|
bss_resource.start = virt_to_phys(&__bss_start);
|
|
|
|
bss_resource.end = virt_to_phys(&__bss_stop)-1;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
early_identify_cpu(&boot_cpu_data);
|
|
|
|
|
2007-02-12 01:54:25 -07:00
|
|
|
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
|
2006-09-26 02:52:32 -06:00
|
|
|
*cmdline_p = command_line;
|
|
|
|
|
|
|
|
parse_early_param();
|
|
|
|
|
x86: early boot debugging via FireWire (ohci1394_dma=early)
This patch adds a new configuration option, which adds support for a new
early_param which gets checked in arch/x86/kernel/setup_{32,64}.c:setup_arch()
to decide wether OHCI-1394 FireWire controllers should be initialized and
enabled for physical DMA access to allow remote debugging of early problems
like issues ACPI or other subsystems which are executed very early.
If the config option is not enabled, no code is changed, and if the boot
paramenter is not given, no new code is executed, and independent of that,
all new code is freed after boot, so the config option can be even enabled
in standard, non-debug kernels.
With specialized tools, it is then possible to get debugging information
from machines which have no serial ports (notebooks) such as the printk
buffer contents, or any data which can be referenced from global pointers,
if it is stored below the 4GB limit and even memory dumps of of the physical
RAM region below the 4GB limit can be taken without any cooperation from the
CPU of the host, so the machine can be crashed early, it does not matter.
In the extreme, even kernel debuggers can be accessed in this way. I wrote
a small kgdb module and an accompanying gdb stub for FireWire which allows
to gdb to talk to kgdb using remote remory reads and writes over FireWire.
An version of the gdb stub fore FireWire is able to read all global data
from a system which is running a a normal kernel without any kernel debugger,
without any interruption or support of the system's CPU. That way, e.g. the
task struct and so on can be read and even manipulated when the physical DMA
access is granted.
A HOWTO is included in this patch, in Documentation/debugging-via-ohci1394.txt
and I've put a copy online at
ftp://ftp.suse.de/private/bk/firewire/docs/debugging-via-ohci1394.txt
It also has links to all the tools which are available to make use of it
another copy of it is online at:
ftp://ftp.suse.de/private/bk/firewire/kernel/ohci1394_dma_early-v2.diff
Signed-Off-By: Bernhard Kaindl <bk@suse.de>
Tested-By: Thomas Renninger <trenn@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 05:34:11 -07:00
|
|
|
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
|
|
if (init_ohci1394_dma_early)
|
|
|
|
init_ohci1394_dma_on_all_controllers();
|
|
|
|
#endif
|
|
|
|
|
2006-09-26 02:52:32 -06:00
|
|
|
finish_e820_parsing();
|
2006-09-26 02:52:32 -06:00
|
|
|
|
2008-02-22 18:07:16 -07:00
|
|
|
/* after parse_early_param, so could debug it */
|
|
|
|
insert_resource(&iomem_resource, &code_resource);
|
|
|
|
insert_resource(&iomem_resource, &data_resource);
|
|
|
|
insert_resource(&iomem_resource, &bss_resource);
|
|
|
|
|
x86: disable the GART early, 64-bit
For K8 system: 4G RAM with memory hole remapping enabled, or more than
4G RAM installed.
when try to use kexec second kernel, and the first doesn't include
gart_shutdown. the second kernel could have different aper position than
the first kernel. and second kernel could use that hole as RAM that is
still used by GART set by the first kernel. esp. when try to kexec
2.6.24 with sparse mem enable from previous kernel (from RHEL 5 or SLES
10). the new kernel will use aper by GART (set by first kernel) for
vmemmap. and after new kernel setting one new GART. the position will be
real RAM. the _mapcount set is lost.
Bad page state in process 'swapper'
page:ffffe2000e600020 flags:0x0000000000000000 mapping:0000000000000000 mapcount:1 count:0
Trying to fix it up, but a reboot is needed
Backtrace:
Pid: 0, comm: swapper Not tainted 2.6.24-rc7-smp-gcdf71a10-dirty #13
Call Trace:
[<ffffffff8026401f>] bad_page+0x63/0x8d
[<ffffffff80264169>] __free_pages_ok+0x7c/0x2a5
[<ffffffff80ba75d1>] free_all_bootmem_core+0xd0/0x198
[<ffffffff80ba3a42>] numa_free_all_bootmem+0x3b/0x76
[<ffffffff80ba3461>] mem_init+0x3b/0x152
[<ffffffff80b959d3>] start_kernel+0x236/0x2c2
[<ffffffff80b9511a>] _sinittext+0x11a/0x121
and
[ffffe2000e600000-ffffe2000e7fffff] PMD ->ffff81001c200000 on node 0
phys addr is : 0x1c200000
RHEL 5.1 kernel -53 said:
PCI-DMA: aperture base @ 1c000000 size 65536 KB
new kernel said:
Mapping aperture over 65536 KB of RAM @ 3c000000
So could try to disable that GART if possible.
According to Ingo
> hm, i'm wondering, instead of modifying the GART, why dont we simply
> _detect_ whatever GART settings we have inherited, and propagate that
> into our e820 maps? I.e. if there's inconsistency, then punch that out
> from the memory maps and just dont use that memory.
>
> that way it would not matter whether the GART settings came from a [old
> or crashing] Linux kernel that has not called gart_iommu_shutdown(), or
> whether it's a BIOS that has set up an aperture hole inconsistent with
> the memory map it passed. (or the memory map we _think_ i tried to pass
> us)
>
> it would also be more robust to only read and do a memory map quirk
> based on that, than actively trying to change the GART so early in the
> bootup. Later on we have to re-enable the GART _anyway_ and have to
> punch a hole for it.
>
> and as a bonus, we would have shored up our defenses against crappy
> BIOSes as well.
add e820 modification for gart inconsistent setting.
gart_fix_e820=off could be used to disable e820 fix.
Signed-off-by: Yinghai Lu <yinghai.lu@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 05:33:09 -07:00
|
|
|
early_gart_iommu_check();
|
|
|
|
|
2006-09-27 02:49:52 -06:00
|
|
|
e820_register_active_regions(0, 0, -1UL);
|
2005-04-16 16:20:36 -06:00
|
|
|
/*
|
|
|
|
* partially used pages are not usable - thus
|
|
|
|
* we are rounding upwards:
|
|
|
|
*/
|
|
|
|
end_pfn = e820_end_of_ram();
|
x86, 32-bit: trim memory not covered by wb mtrrs
On some machines, buggy BIOSes don't properly setup WB MTRRs to cover all
available RAM, meaning the last few megs (or even gigs) of memory will be
marked uncached. Since Linux tends to allocate from high memory addresses
first, this causes the machine to be unusably slow as soon as the kernel
starts really using memory (i.e. right around init time).
This patch works around the problem by scanning the MTRRs at boot and
figuring out whether the current end_pfn value (setup by early e820 code)
goes beyond the highest WB MTRR range, and if so, trimming it to match. A
fairly obnoxious KERN_WARNING is printed too, letting the user know that
not all of their memory is available due to a likely BIOS bug.
Something similar could be done on i386 if needed, but the boot ordering
would be slightly different, since the MTRR code on i386 depends on the
boot_cpu_data structure being setup.
This patch fixes a bug in the last patch that caused the code to run on
non-Intel machines (AMD machines apparently don't need it and it's untested
on other non-Intel machines, so best keep it off).
Further enhancements and fixes from:
Yinghai Lu <Yinghai.Lu@Sun.COM>
Andi Kleen <ak@suse.de>
Signed-off-by: Jesse Barnes <jesse.barnes@intel.com>
Tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-01-30 05:33:18 -07:00
|
|
|
/* update e820 for memory not covered by WB MTRRs */
|
|
|
|
mtrr_bp_init();
|
|
|
|
if (mtrr_trim_uncached_memory(end_pfn)) {
|
|
|
|
e820_register_active_regions(0, 0, -1UL);
|
|
|
|
end_pfn = e820_end_of_ram();
|
|
|
|
}
|
|
|
|
|
2006-09-26 02:52:31 -06:00
|
|
|
num_physpages = end_pfn;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
check_efer();
|
|
|
|
|
2008-03-11 20:53:28 -06:00
|
|
|
max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
|
2008-01-30 05:31:19 -07:00
|
|
|
if (efi_enabled)
|
|
|
|
efi_init();
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-02-11 12:16:03 -07:00
|
|
|
vsmp_init();
|
|
|
|
|
2006-03-25 08:30:22 -07:00
|
|
|
dmi_scan_machine();
|
|
|
|
|
2008-01-30 05:30:05 -07:00
|
|
|
io_delay_init();
|
|
|
|
|
2007-10-19 12:35:03 -06:00
|
|
|
#ifdef CONFIG_SMP
|
2008-01-30 05:33:11 -07:00
|
|
|
/* setup to use the early static init tables during kernel startup */
|
2008-01-30 05:33:33 -07:00
|
|
|
x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
|
|
|
|
x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
|
2008-01-30 05:33:12 -07:00
|
|
|
#ifdef CONFIG_NUMA
|
2008-01-30 05:33:33 -07:00
|
|
|
x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
|
2007-10-19 12:35:03 -06:00
|
|
|
#endif
|
2008-01-30 05:33:12 -07:00
|
|
|
#endif
|
2007-10-19 12:35:03 -06:00
|
|
|
|
2005-08-24 10:07:20 -06:00
|
|
|
#ifdef CONFIG_ACPI
|
2005-04-16 16:20:36 -06:00
|
|
|
/*
|
|
|
|
* Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
|
|
|
|
* Call this early for SRAT node setup.
|
|
|
|
*/
|
|
|
|
acpi_boot_table_init();
|
|
|
|
#endif
|
|
|
|
|
2006-09-26 02:52:31 -06:00
|
|
|
/* How many end-of-memory variables you have, grandma! */
|
|
|
|
max_low_pfn = end_pfn;
|
|
|
|
max_pfn = end_pfn;
|
|
|
|
high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
|
|
|
|
|
2006-09-27 02:49:52 -06:00
|
|
|
/* Remove active ranges so rediscovery with NUMA-awareness happens */
|
|
|
|
remove_all_active_ranges();
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
#ifdef CONFIG_ACPI_NUMA
|
|
|
|
/*
|
|
|
|
* Parse SRAT to discover nodes.
|
|
|
|
*/
|
|
|
|
acpi_numa_init();
|
|
|
|
#endif
|
|
|
|
|
2005-06-23 01:08:06 -06:00
|
|
|
#ifdef CONFIG_NUMA
|
2008-01-30 05:30:39 -07:00
|
|
|
numa_initmem_init(0, end_pfn);
|
2005-04-16 16:20:36 -06:00
|
|
|
#else
|
2005-06-23 01:08:07 -06:00
|
|
|
contig_initmem_init(0, end_pfn);
|
2005-04-16 16:20:36 -06:00
|
|
|
#endif
|
|
|
|
|
2008-01-30 05:33:17 -07:00
|
|
|
early_res_to_bootmem();
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2007-07-28 01:33:16 -06:00
|
|
|
#ifdef CONFIG_ACPI_SLEEP
|
2005-04-16 16:20:36 -06:00
|
|
|
/*
|
2008-01-30 05:30:39 -07:00
|
|
|
* Reserve low memory region for sleep support.
|
2005-04-16 16:20:36 -06:00
|
|
|
*/
|
2008-01-30 05:30:39 -07:00
|
|
|
acpi_reserve_bootmem();
|
|
|
|
#endif
|
2008-01-30 05:31:19 -07:00
|
|
|
|
2008-01-30 05:34:10 -07:00
|
|
|
if (efi_enabled)
|
2008-01-30 05:31:19 -07:00
|
|
|
efi_reserve_bootmem();
|
|
|
|
|
2008-01-30 05:30:39 -07:00
|
|
|
/*
|
|
|
|
* Find and reserve possible boot-time SMP configuration:
|
|
|
|
*/
|
2005-04-16 16:20:36 -06:00
|
|
|
find_smp_config();
|
|
|
|
#ifdef CONFIG_BLK_DEV_INITRD
|
2007-10-15 18:13:22 -06:00
|
|
|
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
|
|
|
|
unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
|
|
|
|
unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
|
|
|
|
unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
|
|
|
|
unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
|
|
|
|
|
|
|
|
if (ramdisk_end <= end_of_mem) {
|
|
|
|
reserve_bootmem_generic(ramdisk_image, ramdisk_size);
|
|
|
|
initrd_start = ramdisk_image + PAGE_OFFSET;
|
|
|
|
initrd_end = initrd_start+ramdisk_size;
|
|
|
|
} else {
|
2008-01-30 05:33:17 -07:00
|
|
|
/* Assumes everything on node 0 */
|
|
|
|
free_bootmem(ramdisk_image, ramdisk_size);
|
2005-04-16 16:20:36 -06:00
|
|
|
printk(KERN_ERR "initrd extends beyond end of memory "
|
2007-10-15 18:13:22 -06:00
|
|
|
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
|
|
|
|
ramdisk_end, end_of_mem);
|
2005-04-16 16:20:36 -06:00
|
|
|
initrd_start = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
2007-10-19 00:40:59 -06:00
|
|
|
reserve_crashkernel();
|
2008-04-09 20:50:41 -06:00
|
|
|
|
|
|
|
reserve_ibft_region();
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
paging_init();
|
2008-01-30 05:32:39 -07:00
|
|
|
map_vsyscall();
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2006-09-26 02:52:30 -06:00
|
|
|
early_quirks();
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2005-08-24 10:07:20 -06:00
|
|
|
#ifdef CONFIG_ACPI
|
2005-04-16 16:20:36 -06:00
|
|
|
/*
|
|
|
|
* Read APIC and some other early information from ACPI tables.
|
|
|
|
*/
|
|
|
|
acpi_boot_init();
|
|
|
|
#endif
|
|
|
|
|
2006-01-11 14:45:36 -07:00
|
|
|
init_cpu_to_node();
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
/*
|
|
|
|
* get boot-time SMP configuration:
|
|
|
|
*/
|
|
|
|
if (smp_found_config)
|
|
|
|
get_smp_config();
|
|
|
|
init_apic_mappings();
|
2008-01-30 05:30:19 -07:00
|
|
|
ioapic_init_mappings();
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/*
|
2007-02-13 05:26:24 -07:00
|
|
|
* We trust e820 completely. No explicit ROM probing in memory.
|
2008-01-30 05:30:39 -07:00
|
|
|
*/
|
2008-02-22 18:07:16 -07:00
|
|
|
e820_reserve_resources();
|
2006-09-26 00:32:46 -06:00
|
|
|
e820_mark_nosave_regions();
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/* request I/O space for devices used on all i[345]86 PCs */
|
2006-09-29 17:47:55 -06:00
|
|
|
for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
|
2005-04-16 16:20:36 -06:00
|
|
|
request_resource(&ioport_resource, &standard_io_resources[i]);
|
|
|
|
|
2005-04-16 16:25:12 -06:00
|
|
|
e820_setup_gap();
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
#ifdef CONFIG_VT
|
|
|
|
#if defined(CONFIG_VGA_CONSOLE)
|
2008-01-30 05:31:19 -07:00
|
|
|
if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
|
|
|
|
conswitchp = &vga_con;
|
2005-04-16 16:20:36 -06:00
|
|
|
#elif defined(CONFIG_DUMMY_CONSOLE)
|
|
|
|
conswitchp = &dummy_con;
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
[PATCH] x86_64: Change init sections for CPU hotplug support
This patch adds __cpuinit and __cpuinitdata sections that need to exist past
boot to support cpu hotplug.
Caveat: This is done *only* for EM64T CPU Hotplug support, on request from
Andi Kleen. Much of the generic hotplug code in kernel, and none of the other
archs that support CPU hotplug today, i386, ia64, ppc64, s390 and parisc dont
mark sections with __cpuinit, but only mark them as __devinit, and
__devinitdata.
If someone is motivated to change generic code, we need to make sure all
existing hotplug code does not break, on other arch's that dont use __cpuinit,
and __cpudevinit.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Acked-by: Zwane Mwaikambo <zwane@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-25 15:54:58 -06:00
|
|
|
static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
unsigned int *v;
|
|
|
|
|
2005-04-16 16:25:18 -06:00
|
|
|
if (c->extended_cpuid_level < 0x80000004)
|
2005-04-16 16:20:36 -06:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
v = (unsigned int *) c->x86_model_id;
|
|
|
|
cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
|
|
|
|
cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
|
|
|
|
cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
|
|
|
|
c->x86_model_id[48] = 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
[PATCH] x86_64: Change init sections for CPU hotplug support
This patch adds __cpuinit and __cpuinitdata sections that need to exist past
boot to support cpu hotplug.
Caveat: This is done *only* for EM64T CPU Hotplug support, on request from
Andi Kleen. Much of the generic hotplug code in kernel, and none of the other
archs that support CPU hotplug today, i386, ia64, ppc64, s390 and parisc dont
mark sections with __cpuinit, but only mark them as __devinit, and
__devinitdata.
If someone is motivated to change generic code, we need to make sure all
existing hotplug code does not break, on other arch's that dont use __cpuinit,
and __cpudevinit.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Acked-by: Zwane Mwaikambo <zwane@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-25 15:54:58 -06:00
|
|
|
static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
unsigned int n, dummy, eax, ebx, ecx, edx;
|
|
|
|
|
2005-04-16 16:25:18 -06:00
|
|
|
n = c->extended_cpuid_level;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
if (n >= 0x80000005) {
|
|
|
|
cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
|
2008-01-30 05:30:39 -07:00
|
|
|
printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
|
|
|
|
"D cache %dK (%d bytes/line)\n",
|
|
|
|
edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
|
|
|
|
c->x86_cache_size = (ecx>>24) + (edx>>24);
|
2005-04-16 16:20:36 -06:00
|
|
|
/* On K8 L1 TLB is inclusive, so don't count it */
|
|
|
|
c->x86_tlbsize = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (n >= 0x80000006) {
|
|
|
|
cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
|
|
|
|
ecx = cpuid_ecx(0x80000006);
|
|
|
|
c->x86_cache_size = ecx >> 16;
|
|
|
|
c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
|
|
|
|
|
|
|
|
printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
|
|
|
|
c->x86_cache_size, ecx & 0xFF);
|
|
|
|
}
|
|
|
|
if (n >= 0x80000008) {
|
2008-01-30 05:30:39 -07:00
|
|
|
cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
|
2005-04-16 16:20:36 -06:00
|
|
|
c->x86_virt_bits = (eax >> 8) & 0xff;
|
|
|
|
c->x86_phys_bits = eax & 0xff;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-09-12 10:49:24 -06:00
|
|
|
#ifdef CONFIG_NUMA
|
2008-02-17 05:22:46 -07:00
|
|
|
static int __cpuinit nearby_node(int apicid)
|
2005-09-12 10:49:24 -06:00
|
|
|
{
|
2008-01-30 05:30:39 -07:00
|
|
|
int i, node;
|
|
|
|
|
2005-09-12 10:49:24 -06:00
|
|
|
for (i = apicid - 1; i >= 0; i--) {
|
2008-01-30 05:30:39 -07:00
|
|
|
node = apicid_to_node[i];
|
2005-09-12 10:49:24 -06:00
|
|
|
if (node != NUMA_NO_NODE && node_online(node))
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
|
2008-01-30 05:30:39 -07:00
|
|
|
node = apicid_to_node[i];
|
2005-09-12 10:49:24 -06:00
|
|
|
if (node != NUMA_NO_NODE && node_online(node))
|
|
|
|
return node;
|
|
|
|
}
|
|
|
|
return first_node(node_online_map); /* Shouldn't happen */
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2005-04-16 16:25:16 -06:00
|
|
|
/*
|
|
|
|
* On a AMD dual core setup the lower bits of the APIC id distingush the cores.
|
|
|
|
* Assumes number of cores is a power of two.
|
|
|
|
*/
|
2008-01-30 05:33:37 -07:00
|
|
|
static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
|
2005-04-16 16:25:16 -06:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_SMP
|
2005-05-20 15:27:55 -06:00
|
|
|
unsigned bits;
|
2005-09-12 10:49:24 -06:00
|
|
|
#ifdef CONFIG_NUMA
|
2006-06-26 05:58:17 -06:00
|
|
|
int cpu = smp_processor_id();
|
2005-09-12 10:49:24 -06:00
|
|
|
int node = 0;
|
2006-03-25 08:30:04 -07:00
|
|
|
unsigned apicid = hard_smp_processor_id();
|
2005-09-12 10:49:24 -06:00
|
|
|
#endif
|
2008-01-30 05:30:39 -07:00
|
|
|
bits = c->x86_coreid_bits;
|
2005-05-20 15:27:55 -06:00
|
|
|
|
|
|
|
/* Low order bits define the core id (index of core in socket) */
|
2008-03-06 14:46:39 -07:00
|
|
|
c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
|
|
|
|
/* Convert the initial APIC ID into the socket ID */
|
|
|
|
c->phys_proc_id = c->initial_apicid >> bits;
|
2005-04-16 16:25:16 -06:00
|
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
2008-01-30 05:30:39 -07:00
|
|
|
node = c->phys_proc_id;
|
|
|
|
if (apicid_to_node[apicid] != NUMA_NO_NODE)
|
|
|
|
node = apicid_to_node[apicid];
|
|
|
|
if (!node_online(node)) {
|
|
|
|
/* Two possibilities here:
|
|
|
|
- The CPU is missing memory and no node was created.
|
|
|
|
In that case try picking one from a nearby CPU
|
|
|
|
- The APIC IDs differ from the HyperTransport node IDs
|
|
|
|
which the K8 northbridge parsing fills in.
|
|
|
|
Assume they are all increased by a constant offset,
|
|
|
|
but in the same order as the HT nodeids.
|
|
|
|
If that doesn't result in a usable node fall back to the
|
|
|
|
path for the previous case. */
|
|
|
|
|
2008-03-06 14:46:39 -07:00
|
|
|
int ht_nodeid = c->initial_apicid;
|
2008-01-30 05:30:39 -07:00
|
|
|
|
|
|
|
if (ht_nodeid >= 0 &&
|
|
|
|
apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
|
|
|
|
node = apicid_to_node[ht_nodeid];
|
|
|
|
/* Pick a nearby node */
|
|
|
|
if (!node_online(node))
|
|
|
|
node = nearby_node(apicid);
|
|
|
|
}
|
2005-11-05 09:25:53 -07:00
|
|
|
numa_set_node(cpu, node);
|
2005-09-12 10:49:24 -06:00
|
|
|
|
2006-06-26 05:59:14 -06:00
|
|
|
printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
|
2005-04-16 16:25:16 -06:00
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-01-30 05:32:40 -07:00
|
|
|
static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
|
2008-01-30 05:30:39 -07:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
unsigned bits, ecx;
|
|
|
|
|
|
|
|
/* Multi core CPU? */
|
|
|
|
if (c->extended_cpuid_level < 0x80000008)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ecx = cpuid_ecx(0x80000008);
|
|
|
|
|
|
|
|
c->x86_max_cores = (ecx & 0xff) + 1;
|
|
|
|
|
|
|
|
/* CPU telling us the core id bits shift? */
|
|
|
|
bits = (ecx >> 12) & 0xF;
|
|
|
|
|
|
|
|
/* Otherwise recompute */
|
|
|
|
if (bits == 0) {
|
|
|
|
while ((1 << bits) < c->x86_max_cores)
|
|
|
|
bits++;
|
|
|
|
}
|
|
|
|
|
|
|
|
c->x86_coreid_bits = bits;
|
|
|
|
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2007-10-12 15:04:07 -06:00
|
|
|
#define ENABLE_C1E_MASK 0x18000000
|
|
|
|
#define CPUID_PROCESSOR_SIGNATURE 1
|
|
|
|
#define CPUID_XFAM 0x0ff00000
|
|
|
|
#define CPUID_XFAM_K8 0x00000000
|
|
|
|
#define CPUID_XFAM_10H 0x00100000
|
|
|
|
#define CPUID_XFAM_11H 0x00200000
|
|
|
|
#define CPUID_XMOD 0x000f0000
|
|
|
|
#define CPUID_XMOD_REV_F 0x00040000
|
|
|
|
|
|
|
|
/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
|
|
|
|
static __cpuinit int amd_apic_timer_broken(void)
|
|
|
|
{
|
2008-01-30 05:30:39 -07:00
|
|
|
u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
|
|
|
|
|
2007-10-12 15:04:07 -06:00
|
|
|
switch (eax & CPUID_XFAM) {
|
|
|
|
case CPUID_XFAM_K8:
|
|
|
|
if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
|
|
|
|
break;
|
|
|
|
case CPUID_XFAM_10H:
|
|
|
|
case CPUID_XFAM_11H:
|
|
|
|
rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
|
|
|
|
if (lo & ENABLE_C1E_MASK)
|
|
|
|
return 1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* err on the side of caution */
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-01-30 05:32:40 -07:00
|
|
|
static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
|
|
|
|
{
|
|
|
|
early_init_amd_mc(c);
|
|
|
|
|
|
|
|
/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
|
|
|
|
if (c->x86_power & (1<<8))
|
|
|
|
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
|
|
|
}
|
|
|
|
|
2006-09-26 02:52:36 -06:00
|
|
|
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2006-02-03 13:51:02 -07:00
|
|
|
unsigned level;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2005-09-17 16:41:04 -06:00
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
unsigned long value;
|
|
|
|
|
2005-09-29 14:05:55 -06:00
|
|
|
/*
|
|
|
|
* Disable TLB flush filter by setting HWCR.FFDIS on K8
|
|
|
|
* bit 6 of msr C001_0015
|
2008-01-30 05:30:39 -07:00
|
|
|
*
|
2005-09-29 14:05:55 -06:00
|
|
|
* Errata 63 for SH-B3 steppings
|
|
|
|
* Errata 122 for all steppings (F+ have it disabled by default)
|
|
|
|
*/
|
|
|
|
if (c->x86 == 15) {
|
|
|
|
rdmsrl(MSR_K8_HWCR, value);
|
|
|
|
value |= 1 << 6;
|
|
|
|
wrmsrl(MSR_K8_HWCR, value);
|
|
|
|
}
|
2005-09-17 16:41:04 -06:00
|
|
|
#endif
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
|
|
|
|
3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
|
2008-02-26 00:54:01 -07:00
|
|
|
clear_cpu_cap(c, 0*32+31);
|
2008-01-30 05:30:39 -07:00
|
|
|
|
2006-02-03 13:51:02 -07:00
|
|
|
/* On C+ stepping K8 rep microcode works well for copy/memset */
|
|
|
|
level = cpuid_eax(1);
|
2008-01-30 05:30:39 -07:00
|
|
|
if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
|
|
|
|
level >= 0x0f58))
|
2008-01-30 05:30:55 -07:00
|
|
|
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
2007-10-17 10:04:41 -06:00
|
|
|
if (c->x86 == 0x10 || c->x86 == 0x11)
|
2008-01-30 05:30:55 -07:00
|
|
|
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
2006-02-03 13:51:02 -07:00
|
|
|
|
2006-04-19 18:36:45 -06:00
|
|
|
/* Enable workaround for FXSAVE leak */
|
|
|
|
if (c->x86 >= 6)
|
2008-01-30 05:30:55 -07:00
|
|
|
set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
|
2006-04-19 18:36:45 -06:00
|
|
|
|
2006-06-26 05:59:14 -06:00
|
|
|
level = get_model_name(c);
|
|
|
|
if (!level) {
|
2008-01-30 05:30:39 -07:00
|
|
|
switch (c->x86) {
|
2005-04-16 16:20:36 -06:00
|
|
|
case 15:
|
|
|
|
/* Should distinguish Models here, but this is only
|
|
|
|
a fallback anyways. */
|
|
|
|
strcpy(c->x86_model_id, "Hammer");
|
2008-01-30 05:30:39 -07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
display_cacheinfo(c);
|
|
|
|
|
2006-06-26 05:56:10 -06:00
|
|
|
/* Multi core CPU? */
|
|
|
|
if (c->extended_cpuid_level >= 0x80000008)
|
2005-04-16 16:25:16 -06:00
|
|
|
amd_detect_cmp(c);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2007-07-21 09:10:03 -06:00
|
|
|
if (c->extended_cpuid_level >= 0x80000006 &&
|
|
|
|
(cpuid_edx(0x80000006) & 0xf000))
|
|
|
|
num_cache_leaves = 4;
|
|
|
|
else
|
|
|
|
num_cache_leaves = 3;
|
2006-09-26 02:52:41 -06:00
|
|
|
|
2007-07-22 03:12:34 -06:00
|
|
|
if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
|
2008-01-30 05:30:55 -07:00
|
|
|
set_cpu_cap(c, X86_FEATURE_K8);
|
2007-07-22 03:12:34 -06:00
|
|
|
|
2008-01-30 05:32:37 -07:00
|
|
|
/* MFENCE stops RDTSC speculation */
|
|
|
|
set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
|
2007-05-02 11:27:12 -06:00
|
|
|
|
2007-10-12 15:04:07 -06:00
|
|
|
if (amd_apic_timer_broken())
|
|
|
|
disable_apic_timer = 1;
|
2008-03-11 20:53:32 -06:00
|
|
|
|
|
|
|
if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
|
|
|
|
unsigned long long tseg;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Split up direct mapping around the TSEG SMM area.
|
|
|
|
* Don't do it for gbpages because there seems very little
|
|
|
|
* benefit in doing so.
|
|
|
|
*/
|
|
|
|
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
|
|
|
|
(tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
|
|
|
|
set_memory_4k((unsigned long)__va(tseg), 1);
|
|
|
|
}
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2008-01-30 05:31:39 -07:00
|
|
|
void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_SMP
|
2008-01-30 05:30:39 -07:00
|
|
|
u32 eax, ebx, ecx, edx;
|
|
|
|
int index_msb, core_bits;
|
2005-11-05 09:25:54 -07:00
|
|
|
|
|
|
|
cpuid(1, &eax, &ebx, &ecx, &edx);
|
|
|
|
|
|
|
|
|
2006-06-26 05:59:14 -06:00
|
|
|
if (!cpu_has(c, X86_FEATURE_HT))
|
2005-04-16 16:20:36 -06:00
|
|
|
return;
|
2008-01-30 05:30:39 -07:00
|
|
|
if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
|
2006-06-26 05:59:14 -06:00
|
|
|
goto out;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
smp_num_siblings = (ebx & 0xff0000) >> 16;
|
2005-11-05 09:25:54 -07:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
if (smp_num_siblings == 1) {
|
|
|
|
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
|
2008-01-30 05:30:39 -07:00
|
|
|
} else if (smp_num_siblings > 1) {
|
2005-11-05 09:25:54 -07:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
if (smp_num_siblings > NR_CPUS) {
|
2008-01-30 05:30:39 -07:00
|
|
|
printk(KERN_WARNING "CPU: Unsupported number of "
|
|
|
|
"siblings %d", smp_num_siblings);
|
2005-04-16 16:20:36 -06:00
|
|
|
smp_num_siblings = 1;
|
|
|
|
return;
|
|
|
|
}
|
2005-11-05 09:25:54 -07:00
|
|
|
|
|
|
|
index_msb = get_count_order(smp_num_siblings);
|
2006-06-26 05:58:17 -06:00
|
|
|
c->phys_proc_id = phys_pkg_id(index_msb);
|
2005-04-16 16:25:15 -06:00
|
|
|
|
2005-11-05 09:25:54 -07:00
|
|
|
smp_num_siblings = smp_num_siblings / c->x86_max_cores;
|
2005-04-16 16:25:15 -06:00
|
|
|
|
2008-01-30 05:30:39 -07:00
|
|
|
index_msb = get_count_order(smp_num_siblings);
|
2005-11-05 09:25:54 -07:00
|
|
|
|
|
|
|
core_bits = get_count_order(c->x86_max_cores);
|
2005-04-16 16:25:15 -06:00
|
|
|
|
2006-06-26 05:58:17 -06:00
|
|
|
c->cpu_core_id = phys_pkg_id(index_msb) &
|
2005-11-05 09:25:54 -07:00
|
|
|
((1 << core_bits) - 1);
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
2006-06-26 05:59:14 -06:00
|
|
|
out:
|
|
|
|
if ((c->x86_max_cores * smp_num_siblings) > 1) {
|
2008-01-30 05:30:39 -07:00
|
|
|
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
|
|
|
|
c->phys_proc_id);
|
|
|
|
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
|
|
|
|
c->cpu_core_id);
|
2006-06-26 05:59:14 -06:00
|
|
|
}
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2005-04-16 16:25:15 -06:00
|
|
|
/*
|
|
|
|
* find out the number of processor cores on the die
|
|
|
|
*/
|
[PATCH] x86_64: Change init sections for CPU hotplug support
This patch adds __cpuinit and __cpuinitdata sections that need to exist past
boot to support cpu hotplug.
Caveat: This is done *only* for EM64T CPU Hotplug support, on request from
Andi Kleen. Much of the generic hotplug code in kernel, and none of the other
archs that support CPU hotplug today, i386, ia64, ppc64, s390 and parisc dont
mark sections with __cpuinit, but only mark them as __devinit, and
__devinitdata.
If someone is motivated to change generic code, we need to make sure all
existing hotplug code does not break, on other arch's that dont use __cpuinit,
and __cpudevinit.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Acked-by: Zwane Mwaikambo <zwane@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-25 15:54:58 -06:00
|
|
|
static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
|
2005-04-16 16:25:15 -06:00
|
|
|
{
|
2006-06-26 05:58:02 -06:00
|
|
|
unsigned int eax, t;
|
2005-04-16 16:25:15 -06:00
|
|
|
|
|
|
|
if (c->cpuid_level < 4)
|
|
|
|
return 1;
|
|
|
|
|
2006-06-26 05:58:02 -06:00
|
|
|
cpuid_count(4, 0, &eax, &t, &t, &t);
|
2005-04-16 16:25:15 -06:00
|
|
|
|
|
|
|
if (eax & 0x1f)
|
|
|
|
return ((eax >> 26) + 1);
|
|
|
|
else
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2008-02-17 05:22:47 -07:00
|
|
|
static void __cpuinit srat_detect_node(void)
|
2005-09-12 10:49:24 -06:00
|
|
|
{
|
|
|
|
#ifdef CONFIG_NUMA
|
2005-10-03 11:36:28 -06:00
|
|
|
unsigned node;
|
2005-09-12 10:49:24 -06:00
|
|
|
int cpu = smp_processor_id();
|
2006-06-26 05:59:14 -06:00
|
|
|
int apicid = hard_smp_processor_id();
|
2005-09-12 10:49:24 -06:00
|
|
|
|
|
|
|
/* Don't do the funky fallback heuristics the AMD version employs
|
|
|
|
for now. */
|
2006-06-26 05:59:14 -06:00
|
|
|
node = apicid_to_node[apicid];
|
2008-02-25 00:23:09 -07:00
|
|
|
if (node == NUMA_NO_NODE || !node_online(node))
|
2006-05-30 14:47:57 -06:00
|
|
|
node = first_node(node_online_map);
|
2005-11-05 09:25:53 -07:00
|
|
|
numa_set_node(cpu, node);
|
2005-09-12 10:49:24 -06:00
|
|
|
|
2006-09-26 02:52:33 -06:00
|
|
|
printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
|
2005-09-12 10:49:24 -06:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2008-01-30 05:32:40 -07:00
|
|
|
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
|
|
|
|
{
|
|
|
|
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
|
|
|
|
(c->x86 == 0x6 && c->x86_model >= 0x0e))
|
2008-02-26 00:54:01 -07:00
|
|
|
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
2008-01-30 05:32:40 -07:00
|
|
|
}
|
|
|
|
|
[PATCH] x86_64: Change init sections for CPU hotplug support
This patch adds __cpuinit and __cpuinitdata sections that need to exist past
boot to support cpu hotplug.
Caveat: This is done *only* for EM64T CPU Hotplug support, on request from
Andi Kleen. Much of the generic hotplug code in kernel, and none of the other
archs that support CPU hotplug today, i386, ia64, ppc64, s390 and parisc dont
mark sections with __cpuinit, but only mark them as __devinit, and
__devinitdata.
If someone is motivated to change generic code, we need to make sure all
existing hotplug code does not break, on other arch's that dont use __cpuinit,
and __cpudevinit.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Acked-by: Zwane Mwaikambo <zwane@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-25 15:54:58 -06:00
|
|
|
static void __cpuinit init_intel(struct cpuinfo_x86 *c)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
/* Cache sizes */
|
|
|
|
unsigned n;
|
|
|
|
|
|
|
|
init_intel_cacheinfo(c);
|
2008-01-30 05:30:39 -07:00
|
|
|
if (c->cpuid_level > 9) {
|
2006-06-26 05:59:59 -06:00
|
|
|
unsigned eax = cpuid_eax(10);
|
|
|
|
/* Check for version and the number of counters */
|
|
|
|
if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
|
2008-01-30 05:30:55 -07:00
|
|
|
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
|
2006-06-26 05:59:59 -06:00
|
|
|
}
|
|
|
|
|
2006-12-06 18:14:01 -07:00
|
|
|
if (cpu_has_ds) {
|
|
|
|
unsigned int l1, l2;
|
|
|
|
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
|
2006-12-06 18:14:11 -07:00
|
|
|
if (!(l1 & (1<<11)))
|
2008-01-30 05:30:55 -07:00
|
|
|
set_cpu_cap(c, X86_FEATURE_BTS);
|
2006-12-06 18:14:01 -07:00
|
|
|
if (!(l1 & (1<<12)))
|
2008-01-30 05:30:55 -07:00
|
|
|
set_cpu_cap(c, X86_FEATURE_PEBS);
|
2006-12-06 18:14:01 -07:00
|
|
|
}
|
|
|
|
|
2008-01-30 05:31:09 -07:00
|
|
|
|
|
|
|
if (cpu_has_bts)
|
|
|
|
ds_init_intel(c);
|
|
|
|
|
2005-04-16 16:25:18 -06:00
|
|
|
n = c->extended_cpuid_level;
|
2005-04-16 16:20:36 -06:00
|
|
|
if (n >= 0x80000008) {
|
|
|
|
unsigned eax = cpuid_eax(0x80000008);
|
|
|
|
c->x86_virt_bits = (eax >> 8) & 0xff;
|
|
|
|
c->x86_phys_bits = eax & 0xff;
|
2005-11-05 09:25:54 -07:00
|
|
|
/* CPUID workaround for Intel 0F34 CPU */
|
|
|
|
if (c->x86_vendor == X86_VENDOR_INTEL &&
|
|
|
|
c->x86 == 0xF && c->x86_model == 0x3 &&
|
|
|
|
c->x86_mask == 0x4)
|
|
|
|
c->x86_phys_bits = 36;
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
if (c->x86 == 15)
|
|
|
|
c->x86_cache_alignment = c->x86_clflush_size * 2;
|
2006-09-26 02:52:41 -06:00
|
|
|
if (c->x86 == 6)
|
2008-01-30 05:30:55 -07:00
|
|
|
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
2008-01-30 05:32:37 -07:00
|
|
|
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
|
2008-01-30 05:30:39 -07:00
|
|
|
c->x86_max_cores = intel_num_cpu_cores(c);
|
2005-09-12 10:49:24 -06:00
|
|
|
|
|
|
|
srat_detect_node();
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2008-03-26 10:09:16 -06:00
|
|
|
static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
|
|
|
|
{
|
|
|
|
if (c->x86 == 0x6 && c->x86_model >= 0xf)
|
|
|
|
set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
|
|
|
|
{
|
|
|
|
/* Cache sizes */
|
|
|
|
unsigned n;
|
|
|
|
|
|
|
|
n = c->extended_cpuid_level;
|
|
|
|
if (n >= 0x80000008) {
|
|
|
|
unsigned eax = cpuid_eax(0x80000008);
|
|
|
|
c->x86_virt_bits = (eax >> 8) & 0xff;
|
|
|
|
c->x86_phys_bits = eax & 0xff;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c->x86 == 0x6 && c->x86_model >= 0xf) {
|
|
|
|
c->x86_cache_alignment = c->x86_clflush_size * 2;
|
|
|
|
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
|
|
|
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
|
|
|
}
|
|
|
|
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
|
|
|
|
}
|
|
|
|
|
2005-09-10 01:27:21 -06:00
|
|
|
static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
char *v = c->x86_vendor_id;
|
|
|
|
|
|
|
|
if (!strcmp(v, "AuthenticAMD"))
|
|
|
|
c->x86_vendor = X86_VENDOR_AMD;
|
|
|
|
else if (!strcmp(v, "GenuineIntel"))
|
|
|
|
c->x86_vendor = X86_VENDOR_INTEL;
|
2008-03-26 10:09:16 -06:00
|
|
|
else if (!strcmp(v, "CentaurHauls"))
|
|
|
|
c->x86_vendor = X86_VENDOR_CENTAUR;
|
2005-04-16 16:20:36 -06:00
|
|
|
else
|
|
|
|
c->x86_vendor = X86_VENDOR_UNKNOWN;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Do some early cpuid on the boot CPU to get some parameter that are
|
|
|
|
needed before check_bugs. Everything advanced is in identify_cpu
|
|
|
|
below. */
|
2008-01-30 05:30:16 -07:00
|
|
|
static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
2008-01-30 05:30:39 -07:00
|
|
|
u32 tfms, xlvl;
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
c->loops_per_jiffy = loops_per_jiffy;
|
|
|
|
c->x86_cache_size = -1;
|
|
|
|
c->x86_vendor = X86_VENDOR_UNKNOWN;
|
|
|
|
c->x86_model = c->x86_mask = 0; /* So far unknown... */
|
|
|
|
c->x86_vendor_id[0] = '\0'; /* Unset */
|
|
|
|
c->x86_model_id[0] = '\0'; /* Unset */
|
|
|
|
c->x86_clflush_size = 64;
|
|
|
|
c->x86_cache_alignment = c->x86_clflush_size;
|
2005-11-05 09:25:54 -07:00
|
|
|
c->x86_max_cores = 1;
|
2008-01-30 05:30:39 -07:00
|
|
|
c->x86_coreid_bits = 0;
|
2005-04-16 16:25:18 -06:00
|
|
|
c->extended_cpuid_level = 0;
|
2005-04-16 16:20:36 -06:00
|
|
|
memset(&c->x86_capability, 0, sizeof c->x86_capability);
|
|
|
|
|
|
|
|
/* Get vendor name */
|
|
|
|
cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
|
|
|
|
(unsigned int *)&c->x86_vendor_id[0],
|
|
|
|
(unsigned int *)&c->x86_vendor_id[8],
|
|
|
|
(unsigned int *)&c->x86_vendor_id[4]);
|
2008-01-30 05:30:39 -07:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
get_cpu_vendor(c);
|
|
|
|
|
|
|
|
/* Initialize the standard set of capabilities */
|
|
|
|
/* Note that the vendor-specific code below might override */
|
|
|
|
|
|
|
|
/* Intel-defined flags: level 0x00000001 */
|
|
|
|
if (c->cpuid_level >= 0x00000001) {
|
|
|
|
__u32 misc;
|
|
|
|
cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
|
|
|
|
&c->x86_capability[0]);
|
|
|
|
c->x86 = (tfms >> 8) & 0xf;
|
|
|
|
c->x86_model = (tfms >> 4) & 0xf;
|
|
|
|
c->x86_mask = tfms & 0xf;
|
2005-11-05 09:25:53 -07:00
|
|
|
if (c->x86 == 0xf)
|
2005-04-16 16:20:36 -06:00
|
|
|
c->x86 += (tfms >> 20) & 0xff;
|
2005-11-05 09:25:53 -07:00
|
|
|
if (c->x86 >= 0x6)
|
2005-04-16 16:20:36 -06:00
|
|
|
c->x86_model += ((tfms >> 16) & 0xF) << 4;
|
2008-02-26 00:54:01 -07:00
|
|
|
if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
|
2005-04-16 16:20:36 -06:00
|
|
|
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
|
|
|
|
} else {
|
|
|
|
/* Have CPUID level 0 only - unheard of */
|
|
|
|
c->x86 = 4;
|
|
|
|
}
|
2005-05-16 22:53:21 -06:00
|
|
|
|
2008-03-06 14:46:39 -07:00
|
|
|
c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
|
2005-05-16 22:53:21 -06:00
|
|
|
#ifdef CONFIG_SMP
|
2008-03-06 14:46:39 -07:00
|
|
|
c->phys_proc_id = c->initial_apicid;
|
2005-05-16 22:53:21 -06:00
|
|
|
#endif
|
2005-04-16 16:20:36 -06:00
|
|
|
/* AMD-defined flags: level 0x80000001 */
|
|
|
|
xlvl = cpuid_eax(0x80000000);
|
2005-04-16 16:25:18 -06:00
|
|
|
c->extended_cpuid_level = xlvl;
|
2005-04-16 16:20:36 -06:00
|
|
|
if ((xlvl & 0xffff0000) == 0x80000000) {
|
|
|
|
if (xlvl >= 0x80000001) {
|
|
|
|
c->x86_capability[1] = cpuid_edx(0x80000001);
|
2005-05-01 09:58:49 -06:00
|
|
|
c->x86_capability[6] = cpuid_ecx(0x80000001);
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
if (xlvl >= 0x80000004)
|
|
|
|
get_model_name(c); /* Default name */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Transmeta-defined flags: level 0x80860001 */
|
|
|
|
xlvl = cpuid_eax(0x80860000);
|
|
|
|
if ((xlvl & 0xffff0000) == 0x80860000) {
|
|
|
|
/* Don't set x86_cpuid_level here for now to not confuse. */
|
|
|
|
if (xlvl >= 0x80860001)
|
|
|
|
c->x86_capability[2] = cpuid_edx(0x80860001);
|
|
|
|
}
|
|
|
|
|
2008-01-30 05:32:41 -07:00
|
|
|
c->extended_cpuid_level = cpuid_eax(0x80000000);
|
|
|
|
if (c->extended_cpuid_level >= 0x80000007)
|
|
|
|
c->x86_power = cpuid_edx(0x80000007);
|
|
|
|
|
2008-03-25 00:24:34 -06:00
|
|
|
|
|
|
|
clear_cpu_cap(c, X86_FEATURE_PAT);
|
|
|
|
|
2008-01-30 05:30:39 -07:00
|
|
|
switch (c->x86_vendor) {
|
|
|
|
case X86_VENDOR_AMD:
|
|
|
|
early_init_amd(c);
|
2008-03-25 00:24:34 -06:00
|
|
|
if (c->x86 >= 0xf && c->x86 <= 0x11)
|
|
|
|
set_cpu_cap(c, X86_FEATURE_PAT);
|
2008-01-30 05:30:39 -07:00
|
|
|
break;
|
2008-01-30 05:33:18 -07:00
|
|
|
case X86_VENDOR_INTEL:
|
|
|
|
early_init_intel(c);
|
2008-03-25 00:24:34 -06:00
|
|
|
if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
|
|
|
|
set_cpu_cap(c, X86_FEATURE_PAT);
|
2008-01-30 05:33:18 -07:00
|
|
|
break;
|
2008-03-26 10:09:16 -06:00
|
|
|
case X86_VENDOR_CENTAUR:
|
|
|
|
early_init_centaur(c);
|
|
|
|
break;
|
2008-01-30 05:30:39 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This does the hard work of actually picking apart the CPU stuff...
|
|
|
|
*/
|
|
|
|
void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
early_identify_cpu(c);
|
|
|
|
|
2007-07-11 13:18:32 -06:00
|
|
|
init_scattered_cpuid_features(c);
|
|
|
|
|
2006-03-27 02:15:22 -07:00
|
|
|
c->apicid = phys_pkg_id(0);
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
/*
|
|
|
|
* Vendor-specific initialization. In this section we
|
|
|
|
* canonicalize the feature flags, meaning if there are
|
|
|
|
* features a certain CPU supports which CPUID doesn't
|
|
|
|
* tell us, CPUID claiming incorrect flags, or other bugs,
|
|
|
|
* we handle them here.
|
|
|
|
*
|
|
|
|
* At the end of this section, c->x86_capability better
|
|
|
|
* indicate the features this CPU genuinely supports!
|
|
|
|
*/
|
|
|
|
switch (c->x86_vendor) {
|
|
|
|
case X86_VENDOR_AMD:
|
|
|
|
init_amd(c);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case X86_VENDOR_INTEL:
|
|
|
|
init_intel(c);
|
|
|
|
break;
|
|
|
|
|
2008-03-26 10:09:16 -06:00
|
|
|
case X86_VENDOR_CENTAUR:
|
|
|
|
init_centaur(c);
|
|
|
|
break;
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
case X86_VENDOR_UNKNOWN:
|
|
|
|
default:
|
|
|
|
display_cacheinfo(c);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2008-01-30 05:30:39 -07:00
|
|
|
detect_ht(c);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* On SMP, boot_cpu_data holds the common feature set between
|
|
|
|
* all CPUs; so make sure that we indicate which features are
|
|
|
|
* common between the CPUs. The first time this routine gets
|
|
|
|
* executed, c == &boot_cpu_data.
|
|
|
|
*/
|
|
|
|
if (c != &boot_cpu_data) {
|
|
|
|
/* AND the already accumulated flags with these */
|
2008-01-30 05:30:39 -07:00
|
|
|
for (i = 0; i < NCAPINTS; i++)
|
2005-04-16 16:20:36 -06:00
|
|
|
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
|
|
|
|
}
|
|
|
|
|
2008-01-30 05:33:20 -07:00
|
|
|
/* Clear all flags overriden by options */
|
|
|
|
for (i = 0; i < NCAPINTS; i++)
|
x86: fix boot failure on 486 due to TSC breakage
> Diffing dmesg between git7 and git8 doesn't sched any light since
> git8 also removed the printouts of the x86 caps as they were being
> initialised and updated. I'm currently adding those printouts back
> in the hope of seeing where and when the caps get broken.
That turned out to be very illuminating:
--- dmesg-2.6.24-git7 2008-02-24 18:01:25.295851000 +0100
+++ dmesg-2.6.24-git8 2008-02-24 18:01:25.530358000 +0100
...
CPU: After generic identify, caps: 00000003 00000000 00000000 00000000 00000000 00000000 00000000 00000000
CPU: After all inits, caps: 00000003 00000000 00000000 00000000 00000000 00000000 00000000 00000000
+CPU: After applying cleared_cpu_caps, caps: 00000013 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Notice how the TSC cap bit goes from Off to On.
(The first two lines are printout loops from -git7 forward-ported
to -git8, the third line is the same printout loop added just after
the xor-with-cleared_cpu_caps[] loop.)
Here's how the breakage occurs:
1. arch/x86/kernel/tsc_32.c:tsc_init() sees !cpu_has_tsc,
so bails and calls setup_clear_cpu_cap(X86_FEATURE_TSC).
2. include/asm-x86/cpufeature.h:setup_clear_cpu_cap(bit) clears
the bit in boot_cpu_data and sets it in cleared_cpu_caps
3. arch/x86/kernel/cpu/common.c:identify_cpu() XORs all caps
in with cleared_cpu_caps
HOWEVER, at this point c->x86_capability correctly has TSC
Off, cleared_cpu_caps has TSC On, so the XOR incorrectly
sets TSC to On in c->x86_capability, with disastrous results.
The real bug is that clearing bits with XOR only works if the
bits are known to be 1 prior to the XOR, and that's not true here.
A simple fix is to convert the XOR to AND-NOT instead. The following
patch does that, and allows my 486 to boot 2.6.25-rc kernels again.
[ mingo@elte.hu: fixed a similar bug in setup_64.c as well. ]
The breakage was introduced via commit 7d851c8d3db0.
Signed-off-by: Mikael Pettersson <mikpe@it.uu.se>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-02-24 10:27:03 -07:00
|
|
|
c->x86_capability[i] &= ~cleared_cpu_caps[i];
|
2008-01-30 05:33:20 -07:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
#ifdef CONFIG_X86_MCE
|
|
|
|
mcheck_init(c);
|
|
|
|
#endif
|
2008-01-30 05:33:18 -07:00
|
|
|
select_idle_routine(c);
|
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
#ifdef CONFIG_NUMA
|
2005-07-28 22:15:28 -06:00
|
|
|
numa_add_cpu(smp_processor_id());
|
2005-04-16 16:20:36 -06:00
|
|
|
#endif
|
2008-01-30 05:32:40 -07:00
|
|
|
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2008-03-19 11:25:02 -06:00
|
|
|
void __cpuinit identify_boot_cpu(void)
|
|
|
|
{
|
|
|
|
identify_cpu(&boot_cpu_data);
|
|
|
|
}
|
|
|
|
|
|
|
|
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
|
|
|
|
{
|
|
|
|
BUG_ON(c == &boot_cpu_data);
|
|
|
|
identify_cpu(c);
|
|
|
|
mtrr_ap_init();
|
|
|
|
}
|
|
|
|
|
2008-01-30 05:33:21 -07:00
|
|
|
static __init int setup_noclflush(char *arg)
|
|
|
|
{
|
|
|
|
setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
__setup("noclflush", setup_noclflush);
|
|
|
|
|
[PATCH] x86_64: Change init sections for CPU hotplug support
This patch adds __cpuinit and __cpuinitdata sections that need to exist past
boot to support cpu hotplug.
Caveat: This is done *only* for EM64T CPU Hotplug support, on request from
Andi Kleen. Much of the generic hotplug code in kernel, and none of the other
archs that support CPU hotplug today, i386, ia64, ppc64, s390 and parisc dont
mark sections with __cpuinit, but only mark them as __devinit, and
__devinitdata.
If someone is motivated to change generic code, we need to make sure all
existing hotplug code does not break, on other arch's that dont use __cpuinit,
and __cpudevinit.
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Acked-by: Andi Kleen <ak@muc.de>
Acked-by: Zwane Mwaikambo <zwane@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-06-25 15:54:58 -06:00
|
|
|
void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
|
2005-04-16 16:20:36 -06:00
|
|
|
{
|
|
|
|
if (c->x86_model_id[0])
|
2008-02-01 13:31:51 -07:00
|
|
|
printk(KERN_CONT "%s", c->x86_model_id);
|
2005-04-16 16:20:36 -06:00
|
|
|
|
2008-01-30 05:30:39 -07:00
|
|
|
if (c->x86_mask || c->cpuid_level >= 0)
|
|
|
|
printk(KERN_CONT " stepping %02x\n", c->x86_mask);
|
2005-04-16 16:20:36 -06:00
|
|
|
else
|
2008-01-30 05:30:39 -07:00
|
|
|
printk(KERN_CONT "\n");
|
2005-04-16 16:20:36 -06:00
|
|
|
}
|
|
|
|
|
2008-01-30 05:33:21 -07:00
|
|
|
static __init int setup_disablecpuid(char *arg)
|
|
|
|
{
|
|
|
|
int bit;
|
|
|
|
if (get_option(&arg, &bit) && bit < NCAPINTS*32)
|
|
|
|
setup_clear_cpu_cap(bit);
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
__setup("clearcpuid=", setup_disablecpuid);
|