Merge branch 'x86-stage-3-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-stage-3-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (190 commits)
  Revert "cpuacct: reduce one NULL check in fast-path"
  Revert "x86: don't compile vsmp_64 for 32bit"
  x86: Correct behaviour of irq affinity
  x86: early_ioremap_init(), use __fix_to_virt(), because we are sure it's safe
  x86: use default_cpu_mask_to_apicid for 64bit
  x86: fix set_extra_move_desc calling
  x86, PAT, PCI: Change vma prot in pci_mmap to reflect inherited prot
  x86/dmi: fix dmi_alloc() section mismatches
  x86: e820 fix various signedness issues in setup.c and e820.c
  x86: apic/io_apic.c define msi_ir_chip and ir_ioapic_chip all the time
  x86: irq.c keep CONFIG_X86_LOCAL_APIC interrupts together
  x86: irq.c use same path for show_interrupts
  x86: cpu/cpu.h cleanup
  x86: Fix a couple of sparse warnings in arch/x86/kernel/apic/io_apic.c
  Revert "x86: create a non-zero sized bm_pte only when needed"
  x86: pci-nommu.c cleanup
  x86: io_delay.c cleanup
  x86: rtc.c cleanup
  x86: i8253 cleanup
  x86: kdebugfs.c cleanup
  ...
This commit is contained in:
Linus Torvalds 2009-03-30 11:38:31 -07:00
commit 019abbc870
145 changed files with 6472 additions and 3590 deletions

View file

@ -0,0 +1,101 @@
Mini-HOWTO for using the earlyprintk=dbgp boot option with a
USB2 Debug port key and a debug cable, on x86 systems.
You need two computers, the 'USB debug key' special gadget and
and two USB cables, connected like this:
[host/target] <-------> [USB debug key] <-------> [client/console]
1. There are three specific hardware requirements:
a.) Host/target system needs to have USB debug port capability.
You can check this capability by looking at a 'Debug port' bit in
the lspci -vvv output:
# lspci -vvv
...
00:1d.7 USB Controller: Intel Corporation 82801H (ICH8 Family) USB2 EHCI Controller #1 (rev 03) (prog-if 20 [EHCI])
Subsystem: Lenovo ThinkPad T61
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx-
Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
Latency: 0
Interrupt: pin D routed to IRQ 19
Region 0: Memory at fe227000 (32-bit, non-prefetchable) [size=1K]
Capabilities: [50] Power Management version 2
Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
Status: D0 PME-Enable- DSel=0 DScale=0 PME+
Capabilities: [58] Debug port: BAR=1 offset=00a0
^^^^^^^^^^^ <==================== [ HERE ]
Kernel driver in use: ehci_hcd
Kernel modules: ehci-hcd
...
( If your system does not list a debug port capability then you probably
wont be able to use the USB debug key. )
b.) You also need a Netchip USB debug cable/key:
http://www.plxtech.com/products/NET2000/NET20DC/default.asp
This is a small blue plastic connector with two USB connections,
it draws power from its USB connections.
c.) Thirdly, you need a second client/console system with a regular USB port.
2. Software requirements:
a.) On the host/target system:
You need to enable the following kernel config option:
CONFIG_EARLY_PRINTK_DBGP=y
And you need to add the boot command line: "earlyprintk=dbgp".
(If you are using Grub, append it to the 'kernel' line in
/etc/grub.conf)
NOTE: normally earlyprintk console gets turned off once the
regular console is alive - use "earlyprintk=dbgp,keep" to keep
this channel open beyond early bootup. This can be useful for
debugging crashes under Xorg, etc.
b.) On the client/console system:
You should enable the following kernel config option:
CONFIG_USB_SERIAL_DEBUG=y
On the next bootup with the modified kernel you should
get a /dev/ttyUSBx device(s).
Now this channel of kernel messages is ready to be used: start
your favorite terminal emulator (minicom, etc.) and set
it up to use /dev/ttyUSB0 - or use a raw 'cat /dev/ttyUSBx' to
see the raw output.
c.) On Nvidia Southbridge based systems: the kernel will try to probe
and find out which port has debug device connected.
3. Testing that it works fine:
You can test the output by using earlyprintk=dbgp,keep and provoking
kernel messages on the host/target system. You can provoke a harmless
kernel message by for example doing:
echo h > /proc/sysrq-trigger
On the host/target system you should see this help line in "dmesg" output:
SysRq : HELP : loglevel(0-9) reBoot Crashdump terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) saK show-backtrace-all-active-cpus(L) show-memory-usage(M) nice-all-RT-tasks(N) powerOff show-registers(P) show-all-timers(Q) unRaw Sync show-task-states(T) Unmount show-blocked-tasks(W) dump-ftrace-buffer(Z)
On the client/console system do:
cat /dev/ttyUSB0
And you should see the help line above displayed shortly after you've
provoked it on the host system.
If it does not work then please ask about it on the linux-kernel@vger.kernel.org
mailing list or contact the x86 maintainers.

View file

@ -786,6 +786,11 @@ config X86_MCE_AMD
Additional support for AMD specific MCE features such as
the DRAM Error Threshold.
config X86_MCE_THRESHOLD
depends on X86_MCE_AMD || X86_MCE_INTEL
bool
default y
config X86_MCE_NONFATAL
tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
depends on X86_32 && X86_MCE
@ -929,6 +934,12 @@ config X86_CPUID
with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
/dev/cpu/31/cpuid.
config X86_CPU_DEBUG
tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support"
---help---
If you select this option, this will provide various x86 CPUs
information through debugfs.
choice
prompt "High Memory Support"
default HIGHMEM4G if !X86_NUMAQ
@ -1121,7 +1132,7 @@ config NUMA_EMU
config NODES_SHIFT
int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
range 1 9 if X86_64
range 1 9
default "9" if MAXSMP
default "6" if X86_64
default "4" if X86_NUMAQ
@ -1429,7 +1440,7 @@ config CRASH_DUMP
config KEXEC_JUMP
bool "kexec jump (EXPERIMENTAL)"
depends on EXPERIMENTAL
depends on KEXEC && HIBERNATION && X86_32
depends on KEXEC && HIBERNATION
---help---
Jump between original kernel and kexeced kernel and invoke
code in physical address mode via KEXEC

View file

@ -456,24 +456,9 @@ config CPU_SUP_AMD
If unsure, say N.
config CPU_SUP_CENTAUR_32
config CPU_SUP_CENTAUR
default y
bool "Support Centaur processors" if PROCESSOR_SELECT
depends on !64BIT
---help---
This enables detection, tunings and quirks for Centaur processors
You need this enabled if you want your kernel to run on a
Centaur CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a Centaur
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_CENTAUR_64
default y
bool "Support Centaur processors" if PROCESSOR_SELECT
depends on 64BIT
---help---
This enables detection, tunings and quirks for Centaur processors

View file

@ -153,34 +153,23 @@ endif
boot := arch/x86/boot
PHONY += zImage bzImage compressed zlilo bzlilo \
zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage install
PHONY += bzImage $(BOOT_TARGETS)
# Default kernel to build
all: bzImage
# KBUILD_IMAGE specify target image being built
KBUILD_IMAGE := $(boot)/bzImage
zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage
zImage bzImage: vmlinux
bzImage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
compressed: zImage
zlilo bzlilo: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo
zdisk bzdisk: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
fdimage fdimage144 fdimage288 isoimage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
install:
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
PHONY += vdso_install
vdso_install:
@ -205,7 +194,3 @@ define archhelp
echo ' FDARGS="..." arguments for the booted kernel'
echo ' FDINITRD=file initrd for the booted kernel'
endef
CLEAN_FILES += arch/x86/boot/fdimage \
arch/x86/boot/image.iso \
arch/x86/boot/mtools.conf

View file

@ -6,6 +6,7 @@
# for more details.
#
# Copyright (C) 1994 by Linus Torvalds
# Changed by many, many contributors over the years.
#
# ROOT_DEV specifies the default root-device when making the image.
@ -21,11 +22,8 @@ ROOT_DEV := CURRENT
SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
# If you want the RAM disk device, define this to be the size in blocks.
#RAMDISK := -DRAMDISK=512
targets := vmlinux.bin setup.bin setup.elf zImage bzImage
targets := vmlinux.bin setup.bin setup.elf bzImage
targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
subdir- := compressed
setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
@ -71,17 +69,13 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
KBUILD_CFLAGS += $(call cc-option,-m32)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
$(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK)
$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__
$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
$(obj)/bzImage: BUILDFLAGS := -b
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
quiet_cmd_image = BUILD $@
cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \
$(obj)/vmlinux.bin $(ROOT_DEV) > $@
cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
$(ROOT_DEV) > $@
$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \
$(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
@ -116,9 +110,11 @@ $(obj)/setup.bin: $(obj)/setup.elf FORCE
$(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed $@
# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
# Set this if you want to pass append arguments to the
# bzdisk/fdimage/isoimage kernel
FDARGS =
# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel
# Set this if you want an initrd included with the
# bzdisk/fdimage/isoimage kernel
FDINITRD =
image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
@ -127,7 +123,7 @@ $(obj)/mtools.conf: $(src)/mtools.conf.in
sed -e 's|@OBJ@|$(obj)|g' < $< > $@
# This requires write access to /dev/fd0
zdisk: $(BOOTIMAGE) $(obj)/mtools.conf
bzdisk: $(obj)/bzImage $(obj)/mtools.conf
MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync
syslinux /dev/fd0 ; sync
echo '$(image_cmdline)' | \
@ -135,10 +131,10 @@ zdisk: $(BOOTIMAGE) $(obj)/mtools.conf
if [ -f '$(FDINITRD)' ] ; then \
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \
fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux ; sync
# These require being root or having syslinux 2.02 or higher installed
fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf
fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync
syslinux $(obj)/fdimage ; sync
@ -147,9 +143,9 @@ fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf
if [ -f '$(FDINITRD)' ] ; then \
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \
fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux ; sync
fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
fdimage288: $(obj)/bzImage $(obj)/mtools.conf
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync
syslinux $(obj)/fdimage ; sync
@ -158,9 +154,9 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
if [ -f '$(FDINITRD)' ] ; then \
MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \
fi
MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync
MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux ; sync
isoimage: $(BOOTIMAGE)
isoimage: $(obj)/bzImage
-rm -rf $(obj)/isoimage
mkdir $(obj)/isoimage
for i in lib lib64 share end ; do \
@ -170,7 +166,7 @@ isoimage: $(BOOTIMAGE)
fi ; \
if [ $$i = end ] ; then exit 1 ; fi ; \
done
cp $(BOOTIMAGE) $(obj)/isoimage/linux
cp $(obj)/bzImage $(obj)/isoimage/linux
echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
if [ -f '$(FDINITRD)' ] ; then \
cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \
@ -181,12 +177,13 @@ isoimage: $(BOOTIMAGE)
isohybrid $(obj)/image.iso 2>/dev/null || true
rm -rf $(obj)/isoimage
zlilo: $(BOOTIMAGE)
bzlilo: $(obj)/bzImage
if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz
cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz
cp System.map $(INSTALL_PATH)/
if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
install:
sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)"
sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
System.map "$(INSTALL_PATH)"

View file

@ -24,12 +24,8 @@
#include "boot.h"
#include "offsets.h"
SETUPSECTS = 4 /* default nr of setup-sectors */
BOOTSEG = 0x07C0 /* original address of boot-sector */
SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */
SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
/* to be loaded */
ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
SYSSEG = 0x1000 /* historical load address >> 4 */
#ifndef SVGA_MODE
#define SVGA_MODE ASK_VGA
@ -97,12 +93,12 @@ bugger_off_msg:
.section ".header", "a"
.globl hdr
hdr:
setup_sects: .byte SETUPSECTS
setup_sects: .byte 0 /* Filled in by build.c */
root_flags: .word ROOT_RDONLY
syssize: .long SYSSIZE
ram_size: .word RAMDISK
syssize: .long 0 /* Filled in by build.c */
ram_size: .word 0 /* Obsolete */
vid_mode: .word SVGA_MODE
root_dev: .word ROOT_DEV
root_dev: .word 0 /* Filled in by build.c */
boot_flag: .word 0xAA55
# offset 512, entry point
@ -123,14 +119,15 @@ _start:
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
start_sys_seg: .word SYSSEG
start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
# in case something decided to "use" it
.word kernel_version-512 # pointing to kernel version string
# above section of header is compatible
# with loadlin-1.5 (header v1.5). Don't
# change it.
type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin,
# Bootlin, SYSLX, bootsect...)
type_of_loader: .byte 0 # 0 means ancient bootloader, newer
# bootloaders know to change this.
# See Documentation/i386/boot.txt for
# assigned ids
@ -142,11 +139,7 @@ CAN_USE_HEAP = 0x80 # If set, the loader also has set
# space behind setup.S can be used for
# heap purposes.
# Only the loader knows what is free
#ifndef __BIG_KERNEL__
.byte 0
#else
.byte LOADED_HIGH
#endif
setup_move_size: .word 0x8000 # size to move, when setup is not
# loaded at 0x90000. We will move setup
@ -157,11 +150,7 @@ setup_move_size: .word 0x8000 # size to move, when setup is not
code32_start: # here loaders can put a different
# start address for 32-bit code.
#ifndef __BIG_KERNEL__
.long 0x1000 # 0x1000 = default for zImage
#else
.long 0x100000 # 0x100000 = default for big kernel
#endif
ramdisk_image: .long 0 # address of loaded ramdisk image
# Here the loader puts the 32-bit

View file

@ -32,47 +32,6 @@ static void realmode_switch_hook(void)
}
}
/*
* A zImage kernel is loaded at 0x10000 but wants to run at 0x1000.
* A bzImage kernel is loaded and runs at 0x100000.
*/
static void move_kernel_around(void)
{
/* Note: rely on the compile-time option here rather than
the LOADED_HIGH flag. The Qemu kernel loader unconditionally
sets the loadflags to zero. */
#ifndef __BIG_KERNEL__
u16 dst_seg, src_seg;
u32 syssize;
dst_seg = 0x1000 >> 4;
src_seg = 0x10000 >> 4;
syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */
while (syssize) {
int paras = (syssize >= 0x1000) ? 0x1000 : syssize;
int dwords = paras << 2;
asm volatile("pushw %%es ; "
"pushw %%ds ; "
"movw %1,%%es ; "
"movw %2,%%ds ; "
"xorw %%di,%%di ; "
"xorw %%si,%%si ; "
"rep;movsl ; "
"popw %%ds ; "
"popw %%es"
: "+c" (dwords)
: "r" (dst_seg), "r" (src_seg)
: "esi", "edi");
syssize -= paras;
dst_seg += paras;
src_seg += paras;
}
#endif
}
/*
* Disable all interrupts at the legacy PIC.
*/
@ -147,9 +106,6 @@ void go_to_protected_mode(void)
/* Hook before leaving real mode, also disables interrupts */
realmode_switch_hook();
/* Move the kernel/setup to their final resting places */
move_kernel_around();
/* Enable the A20 gate */
if (enable_a20()) {
puts("A20 gate not responding, unable to boot...\n");

View file

@ -47,6 +47,7 @@ GLOBAL(protected_mode_jump)
ENDPROC(protected_mode_jump)
.code32
.section ".text32","ax"
GLOBAL(in_pm32)
# Set up data segments for flat 32-bit mode
movl %ecx, %ds

View file

@ -17,7 +17,8 @@ SECTIONS
.header : { *(.header) }
.inittext : { *(.inittext) }
.initdata : { *(.initdata) }
.text : { *(.text*) }
.text : { *(.text) }
.text32 : { *(.text32) }
. = ALIGN(16);
.rodata : { *(.rodata*) }

View file

@ -130,7 +130,7 @@ static void die(const char * str, ...)
static void usage(void)
{
die("Usage: build [-b] setup system [rootdev] [> image]");
die("Usage: build setup system [rootdev] [> image]");
}
int main(int argc, char ** argv)
@ -145,11 +145,6 @@ int main(int argc, char ** argv)
void *kernel;
u32 crc = 0xffffffffUL;
if (argc > 2 && !strcmp(argv[1], "-b"))
{
is_big_kernel = 1;
argc--, argv++;
}
if ((argc < 3) || (argc > 4))
usage();
if (argc > 3) {
@ -216,8 +211,6 @@ int main(int argc, char ** argv)
die("Unable to mmap '%s': %m", argv[2]);
/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
sys_size = (sz + 15 + 4) / 16;
if (!is_big_kernel && sys_size > DEF_SYSSIZE)
die("System is too big. Try using bzImage or modules.");
/* Patch the setup code with the appropriate size parameters */
buf[0x1f1] = setup_sectors-1;

View file

@ -129,16 +129,20 @@ u16 vga_crtc(void)
return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4;
}
static void vga_set_480_scanlines(int end)
static void vga_set_480_scanlines(int lines)
{
u16 crtc;
u8 csel;
u16 crtc; /* CRTC base address */
u8 csel; /* CRTC miscellaneous output register */
u8 ovfw; /* CRTC overflow register */
int end = lines-1;
crtc = vga_crtc();
ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40);
out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */
out_idx(0x0b, crtc, 0x06); /* Vertical total */
out_idx(0x3e, crtc, 0x07); /* Vertical overflow */
out_idx(ovfw, crtc, 0x07); /* Vertical overflow */
out_idx(0xea, crtc, 0x10); /* Vertical sync start */
out_idx(end, crtc, 0x12); /* Vertical display end */
out_idx(0xe7, crtc, 0x15); /* Vertical blank start */
@ -146,24 +150,24 @@ static void vga_set_480_scanlines(int end)
csel = inb(0x3cc);
csel &= 0x0d;
csel |= 0xe2;
outb(csel, 0x3cc);
outb(csel, 0x3c2);
}
static void vga_set_80x30(void)
{
vga_set_480_scanlines(0xdf);
vga_set_480_scanlines(30*16);
}
static void vga_set_80x34(void)
{
vga_set_14font();
vga_set_480_scanlines(0xdb);
vga_set_480_scanlines(34*14);
}
static void vga_set_80x60(void)
{
vga_set_8font();
vga_set_480_scanlines(0xdf);
vga_set_480_scanlines(60*8);
}
static int vga_set_mode(struct mode_info *mode)

View file

@ -75,7 +75,7 @@ static inline void default_inquire_remote_apic(int apicid)
#define setup_secondary_clock setup_secondary_APIC_clock
#endif
#ifdef CONFIG_X86_VSMP
#ifdef CONFIG_X86_64
extern int is_vsmp_box(void);
#else
static inline int is_vsmp_box(void)
@ -108,6 +108,16 @@ extern void native_apic_icr_write(u32 low, u32 id);
extern u64 native_apic_icr_read(void);
#ifdef CONFIG_X86_X2APIC
/*
* Make previous memory operations globally visible before
* sending the IPI through x2apic wrmsr. We need a serializing instruction or
* mfence for this.
*/
static inline void x2apic_wrmsr_fence(void)
{
asm volatile("mfence" : : : "memory");
}
static inline void native_apic_msr_write(u32 reg, u32 v)
{
if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
@ -184,6 +194,9 @@ static inline int x2apic_enabled(void)
{
return 0;
}
#define x2apic 0
#endif
extern int get_physical_broadcast(void);
@ -379,6 +392,7 @@ static inline u32 safe_apic_wait_icr_idle(void)
static inline void ack_APIC_irq(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
/*
* ack_APIC_irq() actually gets compiled as a single instruction
* ... yummie.
@ -386,6 +400,7 @@ static inline void ack_APIC_irq(void)
/* Docs say use 0 for future compatibility */
apic_write(APIC_EOI, 0);
#endif
}
static inline unsigned default_get_apic_id(unsigned long x)
@ -474,10 +489,19 @@ static inline int default_apic_id_registered(void)
return physid_isset(read_apic_id(), phys_cpu_present_map);
}
static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
{
return cpuid_apic >> index_msb;
}
extern int default_apicid_to_node(int logical_apicid);
#endif
static inline unsigned int
default_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
return cpumask_bits(cpumask)[0];
return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
}
static inline unsigned int
@ -491,15 +515,6 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
return (unsigned int)(mask1 & mask2 & mask3);
}
static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
{
return cpuid_apic >> index_msb;
}
extern int default_apicid_to_node(int logical_apicid);
#endif
static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid)
{
return physid_isset(apicid, bitmap);

View file

@ -53,6 +53,7 @@
#define APIC_ESR_SENDILL 0x00020
#define APIC_ESR_RECVILL 0x00040
#define APIC_ESR_ILLREGA 0x00080
#define APIC_LVTCMCI 0x2f0
#define APIC_ICR 0x300
#define APIC_DEST_SELF 0x40000
#define APIC_DEST_ALLINC 0x80000

View file

@ -1,10 +1,6 @@
#ifndef _ASM_X86_BOOT_H
#define _ASM_X86_BOOT_H
/* Don't touch these, unless you really know what you're doing. */
#define DEF_SYSSEG 0x1000
#define DEF_SYSSIZE 0x7F00
/* Internal svga startup constants */
#define NORMAL_VGA 0xffff /* 80x25 mode */
#define EXTENDED_VGA 0xfffe /* 80x50 mode */

View file

@ -90,6 +90,9 @@ int set_memory_4k(unsigned long addr, int numpages);
int set_memory_array_uc(unsigned long *addr, int addrinarray);
int set_memory_array_wb(unsigned long *addr, int addrinarray);
int set_pages_array_uc(struct page **pages, int addrinarray);
int set_pages_array_wb(struct page **pages, int addrinarray);
/*
* For legacy compatibility with the old APIs, a few functions
* are provided that work on a "struct page".

226
arch/x86/include/asm/cpu_debug.h Executable file
View file

@ -0,0 +1,226 @@
#ifndef _ASM_X86_CPU_DEBUG_H
#define _ASM_X86_CPU_DEBUG_H
/*
* CPU x86 architecture debug
*
* Copyright(C) 2009 Jaswinder Singh Rajput
*/
/* Register flags */
enum cpu_debug_bit {
/* Model Specific Registers (MSRs) */
CPU_MC_BIT, /* Machine Check */
CPU_MONITOR_BIT, /* Monitor */
CPU_TIME_BIT, /* Time */
CPU_PMC_BIT, /* Performance Monitor */
CPU_PLATFORM_BIT, /* Platform */
CPU_APIC_BIT, /* APIC */
CPU_POWERON_BIT, /* Power-on */
CPU_CONTROL_BIT, /* Control */
CPU_FEATURES_BIT, /* Features control */
CPU_LBRANCH_BIT, /* Last Branch */
CPU_BIOS_BIT, /* BIOS */
CPU_FREQ_BIT, /* Frequency */
CPU_MTTR_BIT, /* MTRR */
CPU_PERF_BIT, /* Performance */
CPU_CACHE_BIT, /* Cache */
CPU_SYSENTER_BIT, /* Sysenter */
CPU_THERM_BIT, /* Thermal */
CPU_MISC_BIT, /* Miscellaneous */
CPU_DEBUG_BIT, /* Debug */
CPU_PAT_BIT, /* PAT */
CPU_VMX_BIT, /* VMX */
CPU_CALL_BIT, /* System Call */
CPU_BASE_BIT, /* BASE Address */
CPU_VER_BIT, /* Version ID */
CPU_CONF_BIT, /* Configuration */
CPU_SMM_BIT, /* System mgmt mode */
CPU_SVM_BIT, /*Secure Virtual Machine*/
CPU_OSVM_BIT, /* OS-Visible Workaround*/
/* Standard Registers */
CPU_TSS_BIT, /* Task Stack Segment */
CPU_CR_BIT, /* Control Registers */
CPU_DT_BIT, /* Descriptor Table */
/* End of Registers flags */
CPU_REG_ALL_BIT, /* Select all Registers */
};
#define CPU_REG_ALL (~0) /* Select all Registers */
#define CPU_MC (1 << CPU_MC_BIT)
#define CPU_MONITOR (1 << CPU_MONITOR_BIT)
#define CPU_TIME (1 << CPU_TIME_BIT)
#define CPU_PMC (1 << CPU_PMC_BIT)
#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT)
#define CPU_APIC (1 << CPU_APIC_BIT)
#define CPU_POWERON (1 << CPU_POWERON_BIT)
#define CPU_CONTROL (1 << CPU_CONTROL_BIT)
#define CPU_FEATURES (1 << CPU_FEATURES_BIT)
#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT)
#define CPU_BIOS (1 << CPU_BIOS_BIT)
#define CPU_FREQ (1 << CPU_FREQ_BIT)
#define CPU_MTRR (1 << CPU_MTTR_BIT)
#define CPU_PERF (1 << CPU_PERF_BIT)
#define CPU_CACHE (1 << CPU_CACHE_BIT)
#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT)
#define CPU_THERM (1 << CPU_THERM_BIT)
#define CPU_MISC (1 << CPU_MISC_BIT)
#define CPU_DEBUG (1 << CPU_DEBUG_BIT)
#define CPU_PAT (1 << CPU_PAT_BIT)
#define CPU_VMX (1 << CPU_VMX_BIT)
#define CPU_CALL (1 << CPU_CALL_BIT)
#define CPU_BASE (1 << CPU_BASE_BIT)
#define CPU_VER (1 << CPU_VER_BIT)
#define CPU_CONF (1 << CPU_CONF_BIT)
#define CPU_SMM (1 << CPU_SMM_BIT)
#define CPU_SVM (1 << CPU_SVM_BIT)
#define CPU_OSVM (1 << CPU_OSVM_BIT)
#define CPU_TSS (1 << CPU_TSS_BIT)
#define CPU_CR (1 << CPU_CR_BIT)
#define CPU_DT (1 << CPU_DT_BIT)
/* Register file flags */
enum cpu_file_bit {
CPU_INDEX_BIT, /* index */
CPU_VALUE_BIT, /* value */
};
#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT)
/*
* DisplayFamily_DisplayModel Processor Families/Processor Number Series
* -------------------------- ------------------------------------------
* 05_01, 05_02, 05_04 Pentium, Pentium with MMX
*
* 06_01 Pentium Pro
* 06_03, 06_05 Pentium II Xeon, Pentium II
* 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III
*
* 06_09, 060D Pentium M
*
* 06_0E Core Duo, Core Solo
*
* 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series,
* Core 2 Quad, Core 2 Extreme, Core 2 Duo,
* Pentium dual-core
* 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650
*
* 06_1C Atom
*
* 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4
* 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D
*
* 0F_06 Xeon 7100, 5000 Series, Xeon MP,
* Pentium 4, Pentium D
*/
/* Register processors bits */
enum cpu_processor_bit {
CPU_NONE,
/* Intel */
CPU_INTEL_PENTIUM_BIT,
CPU_INTEL_P6_BIT,
CPU_INTEL_PENTIUM_M_BIT,
CPU_INTEL_CORE_BIT,
CPU_INTEL_CORE2_BIT,
CPU_INTEL_ATOM_BIT,
CPU_INTEL_XEON_P4_BIT,
CPU_INTEL_XEON_MP_BIT,
/* AMD */
CPU_AMD_K6_BIT,
CPU_AMD_K7_BIT,
CPU_AMD_K8_BIT,
CPU_AMD_0F_BIT,
CPU_AMD_10_BIT,
CPU_AMD_11_BIT,
};
#define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT)
#define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT)
#define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT)
#define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT)
#define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT)
#define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT)
#define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT)
#define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT)
#define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M)
#define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2)
#define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP)
#define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM)
#define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM)
#define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM)
#define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON)
#define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON)
#define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT)
#define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON)
#define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON)
#define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT)
#define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX)
#define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE)
#define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE)
#define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT)
#define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE)
#define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT)
#define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE)
/* Select all supported Intel CPUs */
#define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE)
#define CPU_AMD_K6 (1 << CPU_AMD_K6_BIT)
#define CPU_AMD_K7 (1 << CPU_AMD_K7_BIT)
#define CPU_AMD_K8 (1 << CPU_AMD_K8_BIT)
#define CPU_AMD_0F (1 << CPU_AMD_0F_BIT)
#define CPU_AMD_10 (1 << CPU_AMD_10_BIT)
#define CPU_AMD_11 (1 << CPU_AMD_11_BIT)
#define CPU_K10_PLUS (CPU_AMD_10 | CPU_AMD_11)
#define CPU_K0F_PLUS (CPU_AMD_0F | CPU_K10_PLUS)
#define CPU_K8_PLUS (CPU_AMD_K8 | CPU_K0F_PLUS)
#define CPU_K7_PLUS (CPU_AMD_K7 | CPU_K8_PLUS)
/* Select all supported AMD CPUs */
#define CPU_AMD_ALL (CPU_AMD_K6 | CPU_K7_PLUS)
/* Select all supported CPUs */
#define CPU_ALL (CPU_INTEL_ALL | CPU_AMD_ALL)
#define MAX_CPU_FILES 512
struct cpu_private {
unsigned cpu;
unsigned type;
unsigned reg;
unsigned file;
};
struct cpu_debug_base {
char *name; /* Register name */
unsigned flag; /* Register flag */
unsigned write; /* Register write flag */
};
/*
* Currently it looks similar to cpu_debug_base but once we add more files
* cpu_file_base will go in different direction
*/
struct cpu_file_base {
char *name; /* Register file name */
unsigned flag; /* Register file flag */
unsigned write; /* Register write flag */
};
struct cpu_cpuX_base {
struct dentry *dentry; /* Register dentry */
int init; /* Register index file */
};
struct cpu_debug_range {
unsigned min; /* Register range min */
unsigned max; /* Register range max */
unsigned flag; /* Supported flags */
unsigned model; /* Supported models */
};
#endif /* _ASM_X86_CPU_DEBUG_H */

View file

@ -91,7 +91,6 @@ static inline int desc_empty(const void *ptr)
#define store_gdt(dtr) native_store_gdt(dtr)
#define store_idt(dtr) native_store_idt(dtr)
#define store_tr(tr) (tr = native_store_tr())
#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
#define load_TLS(t, cpu) native_load_tls(t, cpu)
#define set_ldt native_set_ldt
@ -112,6 +111,8 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
}
#endif /* CONFIG_PARAVIRT */
#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
static inline void native_write_idt_entry(gate_desc *idt, int entry,
const gate_desc *gate)
{

View file

@ -1,22 +1,15 @@
#ifndef _ASM_X86_DMI_H
#define _ASM_X86_DMI_H
#include <linux/compiler.h>
#include <linux/init.h>
#include <asm/io.h>
#include <asm/setup.h>
#define DMI_MAX_DATA 2048
extern int dmi_alloc_index;
extern char dmi_alloc_data[DMI_MAX_DATA];
/* This is so early that there is no good way to allocate dynamic memory.
Allocate data in an BSS array. */
static inline void *dmi_alloc(unsigned len)
static __always_inline __init void *dmi_alloc(unsigned len)
{
int idx = dmi_alloc_index;
if ((dmi_alloc_index + len) > DMI_MAX_DATA)
return NULL;
dmi_alloc_index += len;
return dmi_alloc_data + idx;
return extend_brk(len, sizeof(int));
}
/* Use early IO mappings for DMI because it's initialized early */

View file

@ -72,7 +72,7 @@ extern int e820_all_mapped(u64 start, u64 end, unsigned type);
extern void e820_add_region(u64 start, u64 size, int type);
extern void e820_print_map(char *who);
extern int
sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map);
sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map);
extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
unsigned new_type);
extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,

View file

@ -33,6 +33,8 @@ BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
smp_invalidate_interrupt)
#endif
BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR)
/*
* every pentium local APIC has two 'local interrupts', with a
* soft-definable vector attached to both interrupts, one of

View file

@ -12,6 +12,7 @@ typedef struct {
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int irq_spurious_count;
#endif
unsigned int generic_irqs; /* arch dependent */
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;

View file

@ -63,6 +63,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
void *kmap_atomic(struct page *page, enum km_type type);
void kunmap_atomic(void *kvaddr, enum km_type type);
void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
struct page *kmap_atomic_to_page(void *ptr);
#ifndef CONFIG_PARAVIRT

View file

@ -27,6 +27,7 @@
/* Interrupt handlers registered during init_IRQ */
extern void apic_timer_interrupt(void);
extern void generic_interrupt(void);
extern void error_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);

View file

@ -0,0 +1,18 @@
#ifndef _ASM_X86_INIT_32_H
#define _ASM_X86_INIT_32_H
#ifdef CONFIG_X86_32
extern void __init early_ioremap_page_table_range_init(void);
#endif
extern unsigned long __init
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask);
extern unsigned long __initdata e820_table_start;
extern unsigned long __meminitdata e820_table_end;
extern unsigned long __meminitdata e820_table_top;
#endif /* _ASM_X86_INIT_32_H */

View file

@ -162,7 +162,8 @@ extern int (*ioapic_renumber_irq)(int ioapic, int irq);
extern void ioapic_init_mappings(void);
#ifdef CONFIG_X86_64
extern int save_mask_IO_APIC_setup(void);
extern int save_IO_APIC_setup(void);
extern void mask_IO_APIC_setup(void);
extern void restore_IO_APIC_setup(void);
extern void reinit_intr_remapped_IO_APIC(int);
#endif
@ -172,7 +173,7 @@ extern void probe_nr_irqs_gsi(void);
extern int setup_ioapic_entry(int apic, int irq,
struct IO_APIC_route_entry *entry,
unsigned int destination, int trigger,
int polarity, int vector);
int polarity, int vector, int pin);
extern void ioapic_write_entry(int apic, int pin,
struct IO_APIC_route_entry e);
#else /* !CONFIG_X86_IO_APIC */

View file

@ -36,6 +36,7 @@ static inline int irq_canonicalize(int irq)
extern void fixup_irqs(void);
#endif
extern void (*generic_interrupt_extension)(void);
extern void init_IRQ(void);
extern void native_init_IRQ(void);
extern bool handle_irq(unsigned irq, struct pt_regs *regs);

View file

@ -1,8 +1,6 @@
#ifndef _ASM_X86_IRQ_REMAPPING_H
#define _ASM_X86_IRQ_REMAPPING_H
extern int x2apic;
#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
#endif /* _ASM_X86_IRQ_REMAPPING_H */

View file

@ -111,6 +111,11 @@
*/
#define LOCAL_PERF_VECTOR 0xee
/*
* Generic system vector for platform specific use
*/
#define GENERIC_INTERRUPT_VECTOR 0xed
/*
* First APIC vector available to drivers: (vectors 0x30-0xee) we
* start at 0x31(0x41) to spread out vectors evenly between priority

View file

@ -9,13 +9,13 @@
# define PAGES_NR 4
#else
# define PA_CONTROL_PAGE 0
# define PA_TABLE_PAGE 1
# define PAGES_NR 2
# define VA_CONTROL_PAGE 1
# define PA_TABLE_PAGE 2
# define PA_SWAP_PAGE 3
# define PAGES_NR 4
#endif
#ifdef CONFIG_X86_32
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
#endif
#ifndef __ASSEMBLY__
@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
unsigned int has_pae,
unsigned int preserve_context);
#else
NORET_TYPE void
unsigned long
relocate_kernel(unsigned long indirection_page,
unsigned long page_list,
unsigned long start_address) ATTRIB_NORET;
unsigned long start_address,
unsigned int preserve_context);
#endif
#define ARCH_HAS_KIMAGE_ARCH

View file

@ -1,14 +1,11 @@
#ifndef _ASM_X86_LINKAGE_H
#define _ASM_X86_LINKAGE_H
#include <linux/stringify.h>
#undef notrace
#define notrace __attribute__((no_instrument_function))
#ifdef CONFIG_X86_64
#define __ALIGN .p2align 4,,15
#define __ALIGN_STR ".p2align 4,,15"
#endif
#ifdef CONFIG_X86_32
#define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
/*
@ -50,16 +47,20 @@
__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
"g" (arg4), "g" (arg5), "g" (arg6))
#endif
#endif /* CONFIG_X86_32 */
#ifdef __ASSEMBLY__
#define GLOBAL(name) \
.globl name; \
name:
#ifdef CONFIG_X86_ALIGNMENT_16
#define __ALIGN .align 16,0x90
#define __ALIGN_STR ".align 16,0x90"
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
#define __ALIGN .p2align 4, 0x90
#define __ALIGN_STR __stringify(__ALIGN)
#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_LINKAGE_H */

View file

@ -11,6 +11,8 @@
*/
#define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */
#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
#define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */
#define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */
@ -90,14 +92,29 @@ extern int mce_disabled;
#include <asm/atomic.h>
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct sys_device, device_mce);
extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
/*
* To support more than 128 would need to escape the predefined
* Linux defined extended banks first.
*/
#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
#ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c);
void cmci_clear(void);
void cmci_reenable(void);
void cmci_rediscover(int dying);
void cmci_recheck(void);
#else
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
static inline void cmci_clear(void) {}
static inline void cmci_reenable(void) {}
static inline void cmci_rediscover(int dying) {}
static inline void cmci_recheck(void) {}
#endif
#ifdef CONFIG_X86_MCE_AMD
@ -106,11 +123,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c);
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
#endif
void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
extern int mce_available(struct cpuinfo_x86 *c);
void mce_log_therm_throt_event(__u64 status);
extern atomic_t mce_entry;
extern void do_machine_check(struct pt_regs *, long);
typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
enum mcp_flags {
MCP_TIMESTAMP = (1 << 0), /* log time stamp */
MCP_UC = (1 << 1), /* log uncorrected errors */
};
extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
extern int mce_notify_user(void);
#endif /* !CONFIG_X86_32 */
@ -120,8 +149,8 @@ extern void mcheck_init(struct cpuinfo_x86 *c);
#else
#define mcheck_init(c) do { } while (0)
#endif
extern void stop_mce(void);
extern void restart_mce(void);
extern void (*mce_threshold_vector)(void);
#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */

View file

@ -47,6 +47,7 @@
#define MSI_ADDR_DEST_ID_MASK 0x00ffff0
#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
MSI_ADDR_DEST_ID_MASK)
#define MSI_ADDR_EXT_DEST_ID(dest) ((dest) & 0xffffff00)
#define MSI_ADDR_IR_EXT_INT (1 << 4)
#define MSI_ADDR_IR_SHV (1 << 3)

View file

@ -81,6 +81,11 @@
#define MSR_IA32_MC0_ADDR 0x00000402
#define MSR_IA32_MC0_MISC 0x00000403
/* These are consecutive and not in the normal 4er MCE bank block */
#define MSR_IA32_MC0_CTL2 0x00000280
#define CMCI_EN (1ULL << 30)
#define CMCI_THRESHOLD_MASK 0xffffULL
#define MSR_P6_PERFCTR0 0x000000c1
#define MSR_P6_PERFCTR1 0x000000c2
#define MSR_P6_EVNTSEL0 0x00000186

View file

@ -39,6 +39,11 @@
#define __VIRTUAL_MASK_SHIFT 32
#endif /* CONFIG_X86_PAE */
/*
* Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
*/
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
#ifndef __ASSEMBLY__
/*

View file

@ -40,14 +40,8 @@
#ifndef __ASSEMBLY__
struct pgprot;
extern int page_is_ram(unsigned long pagenr);
extern int devmem_is_allowed(unsigned long pagenr);
extern void map_devmem(unsigned long pfn, unsigned long size,
struct pgprot vma_prot);
extern void unmap_devmem(unsigned long pfn, unsigned long size,
struct pgprot vma_prot);
extern unsigned long max_low_pfn_mapped;
extern unsigned long max_pfn_mapped;

View file

@ -317,8 +317,6 @@ struct pv_mmu_ops {
#if PAGETABLE_LEVELS >= 3
#ifdef CONFIG_X86_PAE
void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);
void (*pmd_clear)(pmd_t *pmdp);
@ -389,7 +387,7 @@ extern struct pv_lock_ops pv_lock_ops;
#define paravirt_type(op) \
[paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
[paravirt_opptr] "m" (op)
[paravirt_opptr] "i" (&(op))
#define paravirt_clobber(clobber) \
[paravirt_clobber] "i" (clobber)
@ -443,7 +441,7 @@ int paravirt_disable_iospace(void);
* offset into the paravirt_patch_template structure, and can therefore be
* freely converted back into a structure offset.
*/
#define PARAVIRT_CALL "call *%[paravirt_opptr];"
#define PARAVIRT_CALL "call *%c[paravirt_opptr];"
/*
* These macros are intended to wrap calls through one of the paravirt
@ -1365,13 +1363,6 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
pte.pte, pte.pte >> 32);
}
static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
/* 5 arg words */
pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
}
static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
@ -1388,12 +1379,6 @@ static inline void set_pte_atomic(pte_t *ptep, pte_t pte)
set_pte(ptep, pte);
}
static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
set_pte(ptep, pte);
}
static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{

View file

@ -2,6 +2,7 @@
#define _ASM_X86_PAT_H
#include <linux/types.h>
#include <asm/pgtable_types.h>
#ifdef CONFIG_X86_PAT
extern int pat_enabled;
@ -17,5 +18,9 @@ extern int free_memtype(u64 start, u64 end);
extern int kernel_map_sync_memtype(u64 base, unsigned long size,
unsigned long flag);
extern void map_devmem(unsigned long pfn, unsigned long size,
struct pgprot vma_prot);
extern void unmap_devmem(unsigned long pfn, unsigned long size,
struct pgprot vma_prot);
#endif /* _ASM_X86_PAT_H */

View file

@ -26,13 +26,6 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
native_set_pte(ptep, pte);
}
static inline void native_set_pte_present(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep, pte_t pte)
{
native_set_pte(ptep, pte);
}
static inline void native_pmd_clear(pmd_t *pmdp)
{
native_set_pmd(pmdp, __pmd(0));

View file

@ -31,23 +31,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
ptep->pte_low = pte.pte_low;
}
/*
* Since this is only called on user PTEs, and the page fault handler
* must handle the already racy situation of simultaneous page faults,
* we are justified in merely clearing the PTE present bit, followed
* by a set. The ordering here is important.
*/
static inline void native_set_pte_present(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep, pte_t pte)
{
ptep->pte_low = 0;
smp_wmb();
ptep->pte_high = pte.pte_high;
smp_wmb();
ptep->pte_low = pte.pte_low;
}
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
set_64bit((unsigned long long *)(ptep), native_pte_val(pte));

View file

@ -31,8 +31,6 @@ extern struct list_head pgd_list;
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
#define set_pte_present(mm, addr, ptep, pte) \
native_set_pte_present(mm, addr, ptep, pte)
#define set_pte_atomic(ptep, pte) \
native_set_pte_atomic(ptep, pte)

View file

@ -42,9 +42,6 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
*/
#undef TEST_ACCESS_OK
/* The boot page tables (all created as a single array) */
extern unsigned long pg0[];
#ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h>
#else

View file

@ -25,6 +25,11 @@
* area for the same reason. ;)
*/
#define VMALLOC_OFFSET (8 * 1024 * 1024)
#ifndef __ASSEMBLER__
extern bool __vmalloc_start_set; /* set once high_memory is set */
#endif
#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
#ifdef CONFIG_X86_PAE
#define LAST_PKMAP 512

View file

@ -273,6 +273,7 @@ typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
extern int nx_enabled;
extern void set_nx(void);
#define pgprot_writecombine pgprot_writecombine
extern pgprot_t pgprot_writecombine(pgprot_t prot);

View file

@ -75,9 +75,9 @@ struct cpuinfo_x86 {
#else
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
int x86_tlbsize;
#endif
__u8 x86_virt_bits;
__u8 x86_phys_bits;
#endif
/* CPUID returned core id bits: */
__u8 x86_coreid_bits;
/* Max extended CPUID function supported: */
@ -391,6 +391,9 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
DECLARE_INIT_PER_CPU(irq_stack_union);
DECLARE_PER_CPU(char *, irq_stack_ptr);
DECLARE_PER_CPU(unsigned int, irq_count);
extern unsigned long kernel_eflags;
extern asmlinkage void ignore_sysret(void);
#else /* X86_64 */
#ifdef CONFIG_CC_STACKPROTECTOR
DECLARE_PER_CPU(unsigned long, stack_canary);

View file

@ -1 +1,8 @@
#ifndef _ASM_X86_SECTIONS_H
#define _ASM_X86_SECTIONS_H
#include <asm-generic/sections.h>
extern char __brk_base[], __brk_limit[];
#endif /* _ASM_X86_SECTIONS_H */

View file

@ -64,7 +64,7 @@ extern void x86_quirk_time_init(void);
#include <asm/bootparam.h>
/* Interrupt control for vSMPowered x86_64 systems */
#ifdef CONFIG_X86_VSMP
#ifdef CONFIG_X86_64
void vsmp_init(void);
#else
static inline void vsmp_init(void) { }
@ -100,20 +100,51 @@ extern struct boot_params boot_params;
*/
#define LOWMEMSIZE() (0x9f000)
/* exceedingly early brk-like allocator */
extern unsigned long _brk_end;
void *extend_brk(size_t size, size_t align);
/*
* Reserve space in the brk section. The name must be unique within
* the file, and somewhat descriptive. The size is in bytes. Must be
* used at file scope.
*
* (This uses a temp function to wrap the asm so we can pass it the
* size parameter; otherwise we wouldn't be able to. We can't use a
* "section" attribute on a normal variable because it always ends up
* being @progbits, which ends up allocating space in the vmlinux
* executable.)
*/
#define RESERVE_BRK(name,sz) \
static void __section(.discard) __used \
__brk_reservation_fn_##name##__(void) { \
asm volatile ( \
".pushsection .brk_reservation,\"aw\",@nobits;" \
".brk." #name ":" \
" 1:.skip %c0;" \
" .size .brk." #name ", . - 1b;" \
" .popsection" \
: : "i" (sz)); \
}
#ifdef __i386__
void __init i386_start_kernel(void);
extern void probe_roms(void);
extern unsigned long init_pg_tables_start;
extern unsigned long init_pg_tables_end;
#else
void __init x86_64_start_kernel(char *real_mode);
void __init x86_64_start_reservations(char *real_mode_data);
#endif /* __i386__ */
#endif /* _SETUP */
#else
#define RESERVE_BRK(name,sz) \
.pushsection .brk_reservation,"aw",@nobits; \
.brk.name: \
1: .skip sz; \
.size .brk.name,.-1b; \
.popsection
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */

View file

@ -199,6 +199,10 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
#define SCIR_CPU_ACTIVITY 0x02 /* not idle */
#define SCIR_CPU_HB_INTERVAL (HZ) /* once per second */
/* Loop through all installed blades */
#define for_each_possible_blade(bid) \
for ((bid) = 0; (bid) < uv_num_possible_blades(); (bid)++)
/*
* Macros for converting between kernel virtual addresses, socket local physical
* addresses, and UV global physical addresses.

View file

@ -296,6 +296,8 @@ HYPERVISOR_get_debugreg(int reg)
static inline int
HYPERVISOR_update_descriptor(u64 ma, u64 desc)
{
if (sizeof(u64) == sizeof(long))
return _hypercall2(int, update_descriptor, ma, desc);
return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
}

View file

@ -70,7 +70,6 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-$(CONFIG_X86_VSMP) += vsmp_64.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_MODULES) += module_$(BITS).o
obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
@ -111,7 +110,7 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o
obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o
@ -120,4 +119,5 @@ ifeq ($(CONFIG_X86_64),y)
obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
obj-y += vsmp_64.o
endif

View file

@ -414,9 +414,17 @@ void __init alternative_instructions(void)
that might execute the to be patched code.
Other CPUs are not running. */
stop_nmi();
#ifdef CONFIG_X86_MCE
stop_mce();
#endif
/*
* Don't stop machine check exceptions while patching.
* MCEs only happen when something got corrupted and in this
* case we must do something about the corruption.
* Ignoring it is worse than a unlikely patching race.
* Also machine checks tend to be broadcast and if one CPU
* goes into machine check the others follow quickly, so we don't
* expect a machine check to cause undue problems during to code
* patching.
*/
apply_alternatives(__alt_instructions, __alt_instructions_end);
@ -456,9 +464,6 @@ void __init alternative_instructions(void)
(unsigned long)__smp_locks_end);
restart_nmi();
#ifdef CONFIG_X86_MCE
restart_mce();
#endif
}
/**

View file

@ -46,6 +46,7 @@
#include <asm/idle.h>
#include <asm/mtrr.h>
#include <asm/smp.h>
#include <asm/mce.h>
unsigned int num_processors;
@ -808,7 +809,7 @@ void clear_local_APIC(void)
u32 v;
/* APIC hasn't been mapped yet */
if (!apic_phys)
if (!x2apic && !apic_phys)
return;
maxlvt = lapic_get_maxlvt();
@ -842,6 +843,14 @@ void clear_local_APIC(void)
apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
}
#endif
#ifdef CONFIG_X86_MCE_INTEL
if (maxlvt >= 6) {
v = apic_read(APIC_LVTCMCI);
if (!(v & APIC_LVT_MASKED))
apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
}
#endif
/*
* Clean APIC state for other OSs:
*/
@ -1241,6 +1250,12 @@ void __cpuinit setup_local_APIC(void)
apic_write(APIC_LVT1, value);
preempt_enable();
#ifdef CONFIG_X86_MCE_INTEL
/* Recheck CMCI information after local APIC is up on CPU #0 */
if (smp_processor_id() == 0)
cmci_recheck();
#endif
}
void __cpuinit end_local_APIC_setup(void)
@ -1319,15 +1334,16 @@ void __init enable_IR_x2apic(void)
return;
}
local_irq_save(flags);
mask_8259A();
ret = save_mask_IO_APIC_setup();
ret = save_IO_APIC_setup();
if (ret) {
pr_info("Saving IO-APIC state failed: %d\n", ret);
goto end;
}
local_irq_save(flags);
mask_IO_APIC_setup();
mask_8259A();
ret = enable_intr_remapping(1);
if (ret && x2apic_preenabled) {
@ -1352,10 +1368,10 @@ void __init enable_IR_x2apic(void)
else
reinit_intr_remapped_IO_APIC(x2apic_preenabled);
end:
unmask_8259A();
local_irq_restore(flags);
end:
if (!ret) {
if (!x2apic_preenabled)
pr_info("Enabled x2apic and interrupt-remapping\n");
@ -1508,12 +1524,10 @@ void __init early_init_lapic_mapping(void)
*/
void __init init_apic_mappings(void)
{
#ifdef CONFIG_X86_X2APIC
if (x2apic) {
boot_cpu_physical_apicid = read_apic_id();
return;
}
#endif
/*
* If no local APIC can be found then set up a fake all
@ -1957,12 +1971,9 @@ static int lapic_resume(struct sys_device *dev)
local_irq_save(flags);
#ifdef CONFIG_X86_X2APIC
if (x2apic)
enable_x2apic();
else
#endif
{
else {
/*
* Make sure the APICBASE points to the right address
*

View file

@ -159,20 +159,6 @@ static int flat_apic_id_registered(void)
return physid_isset(read_xapic_id(), phys_cpu_present_map);
}
static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
}
static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
const struct cpumask *andmask)
{
unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
return mask1 & mask2;
}
static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
{
return hard_smp_processor_id() >> index_msb;
@ -213,8 +199,8 @@ struct apic apic_flat = {
.set_apic_id = set_apic_id,
.apic_id_mask = 0xFFu << 24,
.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
.cpu_mask_to_apicid = default_cpu_mask_to_apicid,
.cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
.send_IPI_mask = flat_send_IPI_mask,
.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,

View file

@ -389,6 +389,8 @@ struct io_apic {
unsigned int index;
unsigned int unused[3];
unsigned int data;
unsigned int unused2[11];
unsigned int eoi;
};
static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
@ -397,6 +399,12 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+ (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
}
static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
writel(vector, &io_apic->eoi);
}
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
@ -546,16 +554,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
apic = entry->apic;
pin = entry->pin;
#ifdef CONFIG_INTR_REMAP
/*
* With interrupt-remapping, destination information comes
* from interrupt-remapping table entry.
*/
if (!irq_remapped(irq))
io_apic_write(apic, 0x11 + pin*2, dest);
#else
io_apic_write(apic, 0x11 + pin*2, dest);
#endif
reg = io_apic_read(apic, 0x10 + pin*2);
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
reg |= vector;
@ -588,10 +592,12 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
if (assign_irq_vector(irq, cfg, mask))
return BAD_APICID;
cpumask_and(desc->affinity, cfg->domain, mask);
/* check that before desc->addinity get updated */
set_extra_move_desc(desc, mask);
return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
cpumask_copy(desc->affinity, mask);
return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
}
static void
@ -849,9 +855,9 @@ __setup("pirq=", ioapic_pirq_setup);
static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
/*
* Saves and masks all the unmasked IO-APIC RTE's
* Saves all the IO-APIC RTE's
*/
int save_mask_IO_APIC_setup(void)
int save_IO_APIC_setup(void)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
@ -876,16 +882,9 @@ int save_mask_IO_APIC_setup(void)
}
for (apic = 0; apic < nr_ioapics; apic++)
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
struct IO_APIC_route_entry entry;
entry = early_ioapic_entries[apic][pin] =
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
early_ioapic_entries[apic][pin] =
ioapic_read_entry(apic, pin);
if (!entry.mask) {
entry.mask = 1;
ioapic_write_entry(apic, pin, entry);
}
}
return 0;
@ -898,6 +897,25 @@ int save_mask_IO_APIC_setup(void)
return -ENOMEM;
}
void mask_IO_APIC_setup(void)
{
int apic, pin;
for (apic = 0; apic < nr_ioapics; apic++) {
if (!early_ioapic_entries[apic])
break;
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
struct IO_APIC_route_entry entry;
entry = early_ioapic_entries[apic][pin];
if (!entry.mask) {
entry.mask = 1;
ioapic_write_entry(apic, pin, entry);
}
}
}
}
void restore_IO_APIC_setup(void)
{
int apic, pin;
@ -1411,9 +1429,7 @@ void __setup_vector_irq(int cpu)
}
static struct irq_chip ioapic_chip;
#ifdef CONFIG_INTR_REMAP
static struct irq_chip ir_ioapic_chip;
#endif
#define IOAPIC_AUTO -1
#define IOAPIC_EDGE 0
@ -1452,7 +1468,6 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
else
desc->status &= ~IRQ_LEVEL;
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
desc->status |= IRQ_MOVE_PCNTXT;
if (trigger)
@ -1464,7 +1479,7 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
handle_edge_irq, "edge");
return;
}
#endif
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
trigger == IOAPIC_LEVEL)
set_irq_chip_and_handler_name(irq, &ioapic_chip,
@ -1478,14 +1493,13 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
int setup_ioapic_entry(int apic_id, int irq,
struct IO_APIC_route_entry *entry,
unsigned int destination, int trigger,
int polarity, int vector)
int polarity, int vector, int pin)
{
/*
* add it to the IO-APIC irq-routing table:
*/
memset(entry,0,sizeof(*entry));
#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled) {
struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
struct irte irte;
@ -1504,7 +1518,14 @@ int setup_ioapic_entry(int apic_id, int irq,
irte.present = 1;
irte.dst_mode = apic->irq_dest_mode;
irte.trigger_mode = trigger;
/*
* Trigger mode in the IRTE will always be edge, and the
* actual level or edge trigger will be setup in the IO-APIC
* RTE. This will help simplify level triggered irq migration.
* For more details, see the comments above explainig IO-APIC
* irq migration in the presence of interrupt-remapping.
*/
irte.trigger_mode = 0;
irte.dlvry_mode = apic->irq_delivery_mode;
irte.vector = vector;
irte.dest_id = IRTE_DEST(destination);
@ -1515,18 +1536,21 @@ int setup_ioapic_entry(int apic_id, int irq,
ir_entry->zero = 0;
ir_entry->format = 1;
ir_entry->index = (index & 0x7fff);
} else
#endif
{
/*
* IO-APIC RTE will be configured with virtual vector.
* irq handler will do the explicit EOI to the io-apic.
*/
ir_entry->vector = pin;
} else {
entry->delivery_mode = apic->irq_delivery_mode;
entry->dest_mode = apic->irq_dest_mode;
entry->dest = destination;
entry->vector = vector;
}
entry->mask = 0; /* enable IRQ */
entry->trigger = trigger;
entry->polarity = polarity;
entry->vector = vector;
/* Mask level triggered irqs.
* Use IRQ_DELAYED_DISABLE for edge triggered irqs.
@ -1561,7 +1585,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
dest, trigger, polarity, cfg->vector)) {
dest, trigger, polarity, cfg->vector, pin)) {
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mp_ioapics[apic_id].apicid, pin);
__clear_irq_vector(irq, cfg);
@ -1642,10 +1666,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
{
struct IO_APIC_route_entry entry;
#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled)
return;
#endif
memset(&entry, 0, sizeof(entry));
@ -2040,8 +2062,13 @@ void disable_IO_APIC(void)
* If the i8259 is routed through an IOAPIC
* Put that IOAPIC in virtual wire mode
* so legacy interrupts can be delivered.
*
* With interrupt-remapping, for now we will use virtual wire A mode,
* as virtual wire B is little complex (need to configure both
* IOAPIC RTE aswell as interrupt-remapping table entry).
* As this gets called during crash dump, keep this simple for now.
*/
if (ioapic_i8259.pin != -1) {
if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
struct IO_APIC_route_entry entry;
memset(&entry, 0, sizeof(entry));
@ -2061,7 +2088,10 @@ void disable_IO_APIC(void)
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
}
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
/*
* Use virtual wire A mode when interrupt remapping is enabled.
*/
disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
}
#ifdef CONFIG_X86_32
@ -2303,37 +2333,24 @@ static int ioapic_retrigger_irq(unsigned int irq)
#ifdef CONFIG_SMP
#ifdef CONFIG_INTR_REMAP
static void ir_irq_migration(struct work_struct *work);
static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
/*
* Migrate the IO-APIC irq in the presence of intr-remapping.
*
* For edge triggered, irq migration is a simple atomic update(of vector
* and cpu destination) of IRTE and flush the hardware cache.
* For both level and edge triggered, irq migration is a simple atomic
* update(of vector and cpu destination) of IRTE and flush the hardware cache.
*
* For level triggered, we need to modify the io-apic RTE aswell with the update
* vector information, along with modifying IRTE with vector and destination.
* So irq migration for level triggered is little bit more complex compared to
* edge triggered migration. But the good news is, we use the same algorithm
* for level triggered migration as we have today, only difference being,
* we now initiate the irq migration from process context instead of the
* interrupt context.
*
* In future, when we do a directed EOI (combined with cpu EOI broadcast
* suppression) to the IO-APIC, level triggered irq migration will also be
* as simple as edge triggered migration and we can do the irq migration
* with a simple atomic update to IO-APIC RTE.
* For level triggered, we eliminate the io-apic RTE modification (with the
* updated vector information), by using a virtual vector (io-apic pin number).
* Real vector that is used for interrupting cpu will be coming from
* the interrupt-remapping table entry.
*/
static void
migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
struct irte irte;
int modify_ioapic_rte;
unsigned int dest;
unsigned long flags;
unsigned int irq;
if (!cpumask_intersects(mask, cpu_online_mask))
@ -2351,13 +2368,6 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
modify_ioapic_rte = desc->status & IRQ_LEVEL;
if (modify_ioapic_rte) {
spin_lock_irqsave(&ioapic_lock, flags);
__target_IO_APIC_irq(irq, dest, cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@ -2372,73 +2382,12 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
cpumask_copy(desc->affinity, mask);
}
static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
{
int ret = -1;
struct irq_cfg *cfg = desc->chip_data;
mask_IO_APIC_irq_desc(desc);
if (io_apic_level_ack_pending(cfg)) {
/*
* Interrupt in progress. Migrating irq now will change the
* vector information in the IO-APIC RTE and that will confuse
* the EOI broadcast performed by cpu.
* So, delay the irq migration to the next instance.
*/
schedule_delayed_work(&ir_migration_work, 1);
goto unmask;
}
/* everthing is clear. we have right of way */
migrate_ioapic_irq_desc(desc, desc->pending_mask);
ret = 0;
desc->status &= ~IRQ_MOVE_PENDING;
cpumask_clear(desc->pending_mask);
unmask:
unmask_IO_APIC_irq_desc(desc);
return ret;
}
static void ir_irq_migration(struct work_struct *work)
{
unsigned int irq;
struct irq_desc *desc;
for_each_irq_desc(irq, desc) {
if (desc->status & IRQ_MOVE_PENDING) {
unsigned long flags;
spin_lock_irqsave(&desc->lock, flags);
if (!desc->chip->set_affinity ||
!(desc->status & IRQ_MOVE_PENDING)) {
desc->status &= ~IRQ_MOVE_PENDING;
spin_unlock_irqrestore(&desc->lock, flags);
continue;
}
desc->chip->set_affinity(irq, desc->pending_mask);
spin_unlock_irqrestore(&desc->lock, flags);
}
}
}
/*
* Migrates the IRQ destination in the process context.
*/
static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
const struct cpumask *mask)
{
if (desc->status & IRQ_LEVEL) {
desc->status |= IRQ_MOVE_PENDING;
cpumask_copy(desc->pending_mask, mask);
migrate_irq_remapped_level_desc(desc);
return;
}
migrate_ioapic_irq_desc(desc, mask);
}
static void set_ir_ioapic_affinity_irq(unsigned int irq,
@ -2448,6 +2397,11 @@ static void set_ir_ioapic_affinity_irq(unsigned int irq,
set_ir_ioapic_affinity_irq_desc(desc, mask);
}
#else
static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
const struct cpumask *mask)
{
}
#endif
asmlinkage void smp_irq_move_cleanup_interrupt(void)
@ -2461,6 +2415,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
me = smp_processor_id();
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
unsigned int irq;
unsigned int irr;
struct irq_desc *desc;
struct irq_cfg *cfg;
irq = __get_cpu_var(vector_irq)[vector];
@ -2480,6 +2435,18 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
goto unlock;
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
/*
* Check if the vector that needs to be cleanedup is
* registered at the cpu's IRR. If so, then this is not
* the best time to clean it up. Lets clean it up in the
* next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
* to myself.
*/
if (irr & (1 << (vector % 32))) {
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
goto unlock;
}
__get_cpu_var(vector_irq)[vector] = -1;
cfg->move_cleanup_count--;
unlock:
@ -2529,9 +2496,44 @@ static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
#ifdef CONFIG_INTR_REMAP
static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
int apic, pin;
struct irq_pin_list *entry;
entry = cfg->irq_2_pin;
for (;;) {
if (!entry)
break;
apic = entry->apic;
pin = entry->pin;
io_apic_eoi(apic, pin);
entry = entry->next;
}
}
static void
eoi_ioapic_irq(struct irq_desc *desc)
{
struct irq_cfg *cfg;
unsigned long flags;
unsigned int irq;
irq = desc->irq;
cfg = desc->chip_data;
spin_lock_irqsave(&ioapic_lock, flags);
__eoi_ioapic_irq(irq, cfg);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
static void ack_x2apic_level(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
ack_x2APIC_irq();
eoi_ioapic_irq(desc);
}
static void ack_x2apic_edge(unsigned int irq)
@ -2662,20 +2664,20 @@ static struct irq_chip ioapic_chip __read_mostly = {
.retrigger = ioapic_retrigger_irq,
};
#ifdef CONFIG_INTR_REMAP
static struct irq_chip ir_ioapic_chip __read_mostly = {
.name = "IR-IO-APIC",
.startup = startup_ioapic_irq,
.mask = mask_IO_APIC_irq,
.unmask = unmask_IO_APIC_irq,
#ifdef CONFIG_INTR_REMAP
.ack = ack_x2apic_edge,
.eoi = ack_x2apic_level,
#ifdef CONFIG_SMP
.set_affinity = set_ir_ioapic_affinity_irq,
#endif
#endif
.retrigger = ioapic_retrigger_irq,
};
#endif
static inline void init_IO_APIC_traps(void)
{
@ -2901,10 +2903,8 @@ static inline void __init check_timer(void)
* 8259A.
*/
if (pin1 == -1) {
#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled)
panic("BIOS bug: timer not connected to IO-APIC");
#endif
pin1 = pin2;
apic1 = apic2;
no_pin1 = 1;
@ -2940,10 +2940,8 @@ static inline void __init check_timer(void)
clear_IO_APIC_pin(0, pin1);
goto out;
}
#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled)
panic("timer doesn't work through Interrupt-remapped IO-APIC");
#endif
local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
@ -3237,9 +3235,7 @@ void destroy_irq(unsigned int irq)
if (desc)
desc->chip_data = cfg;
#ifdef CONFIG_INTR_REMAP
free_irte(irq);
#endif
spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq, cfg);
spin_unlock_irqrestore(&vector_lock, flags);
@ -3265,7 +3261,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
struct irte irte;
int ir_index;
@ -3291,10 +3286,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
MSI_ADDR_IR_SHV |
MSI_ADDR_IR_INDEX1(ir_index) |
MSI_ADDR_IR_INDEX2(ir_index);
} else
#endif
{
} else {
if (x2apic_enabled())
msg->address_hi = MSI_ADDR_BASE_HI |
MSI_ADDR_EXT_DEST_ID(dest);
else
msg->address_hi = MSI_ADDR_BASE_HI;
msg->address_lo =
MSI_ADDR_BASE_LO |
((apic->irq_dest_mode == 0) ?
@ -3394,14 +3392,15 @@ static struct irq_chip msi_chip = {
.retrigger = ioapic_retrigger_irq,
};
#ifdef CONFIG_INTR_REMAP
static struct irq_chip msi_ir_chip = {
.name = "IR-PCI-MSI",
.unmask = unmask_msi_irq,
.mask = mask_msi_irq,
#ifdef CONFIG_INTR_REMAP
.ack = ack_x2apic_edge,
#ifdef CONFIG_SMP
.set_affinity = ir_set_msi_irq_affinity,
#endif
#endif
.retrigger = ioapic_retrigger_irq,
};
@ -3432,7 +3431,6 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
}
return index;
}
#endif
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
{
@ -3446,7 +3444,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
set_irq_msi(irq, msidesc);
write_msi_msg(irq, &msg);
#ifdef CONFIG_INTR_REMAP
if (irq_remapped(irq)) {
struct irq_desc *desc = irq_to_desc(irq);
/*
@ -3455,7 +3452,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
desc->status |= IRQ_MOVE_PCNTXT;
set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
} else
#endif
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
@ -3469,11 +3465,8 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
int ret, sub_handle;
struct msi_desc *msidesc;
unsigned int irq_want;
#ifdef CONFIG_INTR_REMAP
struct intel_iommu *iommu = 0;
struct intel_iommu *iommu = NULL;
int index = 0;
#endif
irq_want = nr_irqs_gsi;
sub_handle = 0;
@ -3482,7 +3475,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (irq == 0)
return -1;
irq_want = irq + 1;
#ifdef CONFIG_INTR_REMAP
if (!intr_remapping_enabled)
goto no_ir;
@ -3510,7 +3502,6 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
set_irte_irq(irq, iommu, index, sub_handle);
}
no_ir:
#endif
ret = setup_msi_irq(dev, msidesc, irq);
if (ret < 0)
goto error;
@ -3528,7 +3519,7 @@ void arch_teardown_msi_irq(unsigned int irq)
destroy_irq(irq);
}
#ifdef CONFIG_DMAR
#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
#ifdef CONFIG_SMP
static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
@ -3609,7 +3600,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
#endif /* CONFIG_SMP */
struct irq_chip hpet_msi_type = {
static struct irq_chip hpet_msi_type = {
.name = "HPET_MSI",
.unmask = hpet_msi_unmask,
.mask = hpet_msi_mask,
@ -4045,11 +4036,9 @@ void __init setup_ioapic_dest(void)
else
mask = apic->target_cpus();
#ifdef CONFIG_INTR_REMAP
if (intr_remapping_enabled)
set_ir_ioapic_affinity_irq_desc(desc, mask);
else
#endif
set_ioapic_affinity_irq_desc(desc, mask);
}
@ -4142,10 +4131,13 @@ static int __init ioapic_insert_resources(void)
struct resource *r = ioapic_resources;
if (!r) {
if (nr_ioapics > 0) {
printk(KERN_ERR
"IO APIC resources could be not be allocated.\n");
"IO APIC resources couldn't be allocated.\n");
return -1;
}
return 0;
}
for (i = 0; i < nr_ioapics; i++) {
insert_resource(&iomem_resource, r);

View file

@ -68,6 +68,13 @@ void __init default_setup_apic_routing(void)
apic = &apic_physflat;
printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
}
/*
* Now that apic routing model is selected, configure the
* fault handling for intr remapping.
*/
if (intr_remapping_enabled)
enable_drhd_fault_handling();
}
/* Same for both flat and physical. */

View file

@ -57,6 +57,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
unsigned long query_cpu;
unsigned long flags;
x2apic_wrmsr_fence();
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
__x2apic_send_IPI_dest(
@ -73,6 +75,8 @@ static void
unsigned long query_cpu;
unsigned long flags;
x2apic_wrmsr_fence();
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
if (query_cpu == this_cpu)
@ -90,6 +94,8 @@ static void x2apic_send_IPI_allbutself(int vector)
unsigned long query_cpu;
unsigned long flags;
x2apic_wrmsr_fence();
local_irq_save(flags);
for_each_online_cpu(query_cpu) {
if (query_cpu == this_cpu)

View file

@ -58,6 +58,8 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
unsigned long query_cpu;
unsigned long flags;
x2apic_wrmsr_fence();
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
@ -73,6 +75,8 @@ static void
unsigned long query_cpu;
unsigned long flags;
x2apic_wrmsr_fence();
local_irq_save(flags);
for_each_cpu(query_cpu, mask) {
if (query_cpu != this_cpu)
@ -89,6 +93,8 @@ static void x2apic_send_IPI_allbutself(int vector)
unsigned long query_cpu;
unsigned long flags;
x2apic_wrmsr_fence();
local_irq_save(flags);
for_each_online_cpu(query_cpu) {
if (query_cpu == this_cpu)

View file

@ -83,15 +83,15 @@ void __init setup_bios_corruption_check(void)
u64 size;
addr = find_e820_area_size(addr, &size, PAGE_SIZE);
if (addr == 0)
if (!(addr + 1))
break;
if (addr >= corruption_check_size)
break;
if ((addr + size) > corruption_check_size)
size = corruption_check_size - addr;
if (size == 0)
break;
e820_update_range(addr, size, E820_RAM, E820_RESERVED);
scan_areas[num_scan_areas].addr = addr;
scan_areas[num_scan_areas].size = size;

View file

@ -14,11 +14,12 @@ obj-y += vmware.o hypervisor.o
obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
obj-$(CONFIG_X86_64) += bugs_64.o
obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o
obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o

View file

@ -29,7 +29,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
u32 regs[4];
const struct cpuid_bit *cb;
static const struct cpuid_bit cpuid_bits[] = {
static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
{ 0, 0, 0, 0 }
};

View file

@ -5,6 +5,7 @@
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
#ifdef CONFIG_X86_64
# include <asm/numa_64.h>
@ -141,6 +142,55 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
}
}
static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
/* calling is from identify_secondary_cpu() ? */
if (c->cpu_index == boot_cpu_id)
return;
/*
* Certain Athlons might work (for various values of 'work') in SMP
* but they are not certified as MP capable.
*/
/* Athlon 660/661 is valid. */
if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
(c->x86_mask == 1)))
goto valid_k7;
/* Duron 670 is valid */
if ((c->x86_model == 7) && (c->x86_mask == 0))
goto valid_k7;
/*
* Athlon 662, Duron 671, and Athlon >model 7 have capability
* bit. It's worth noting that the A5 stepping (662) of some
* Athlon XP's have the MP bit set.
* See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
* more.
*/
if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
((c->x86_model == 7) && (c->x86_mask >= 1)) ||
(c->x86_model > 7))
if (cpu_has_mp)
goto valid_k7;
/* If we get here, not a certified SMP capable AMD system. */
/*
* Don't taint if we are running SMP kernel on a single non-MP
* approved Athlon
*/
WARN_ONCE(1, "WARNING: This combination of AMD"
"processors is not suitable for SMP.\n");
if (!test_taint(TAINT_UNSAFE_SMP))
add_taint(TAINT_UNSAFE_SMP);
valid_k7:
;
#endif
}
static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
{
u32 l, h;
@ -175,6 +225,8 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
}
set_cpu_cap(c, X86_FEATURE_K7);
amd_k7_smp_check(c);
}
#endif
@ -450,7 +502,7 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int
}
#endif
static struct cpu_dev amd_cpu_dev __cpuinitdata = {
static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
.c_vendor = "AMD",
.c_ident = { "AuthenticAMD" },
#ifdef CONFIG_X86_32

View file

@ -1,11 +1,11 @@
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/bitops.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
#include "cpu.h"
@ -276,7 +276,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
*/
c->x86_capability[5] = cpuid_edx(0xC0000001);
}
#ifdef CONFIG_X86_32
/* Cyrix III family needs CX8 & PGE explicitly enabled. */
if (c->x86_model >= 6 && c->x86_model <= 9) {
rdmsr(MSR_VIA_FCR, lo, hi);
@ -288,6 +288,11 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
/* Before Nehemiah, the C3's had 3dNOW! */
if (c->x86_model >= 6 && c->x86_model < 9)
set_cpu_cap(c, X86_FEATURE_3DNOW);
#endif
if (c->x86 == 0x6 && c->x86_model >= 0xf) {
c->x86_cache_alignment = c->x86_clflush_size * 2;
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
}
display_cacheinfo(c);
}
@ -316,16 +321,25 @@ enum {
static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
{
switch (c->x86) {
#ifdef CONFIG_X86_32
case 5:
/* Emulate MTRRs using Centaur's MCR. */
set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
break;
#endif
case 6:
if (c->x86_model >= 0xf)
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
break;
}
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#endif
}
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
char *name;
u32 fcr_set = 0;
u32 fcr_clr = 0;
@ -337,8 +351,10 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
* 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
*/
clear_cpu_cap(c, 0*32+31);
#endif
early_init_centaur(c);
switch (c->x86) {
#ifdef CONFIG_X86_32
case 5:
switch (c->x86_model) {
case 4:
@ -442,16 +458,20 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
}
sprintf(c->x86_model_id, "WinChip %s", name);
break;
#endif
case 6:
init_c3(c);
break;
}
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
#endif
}
static unsigned int __cpuinit
centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
#ifdef CONFIG_X86_32
/* VIA C3 CPUs (670-68F) need further shifting. */
if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
size >>= 8;
@ -464,11 +484,11 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
if ((c->x86 == 6) && (c->x86_model == 9) &&
(c->x86_mask == 1) && (size == 65))
size -= 1;
#endif
return size;
}
static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
static const struct cpu_dev __cpuinitconst centaur_cpu_dev = {
.c_vendor = "Centaur",
.c_ident = { "CentaurHauls" },
.c_early_init = early_init_centaur,

View file

@ -1,37 +0,0 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include "cpu.h"
static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
{
if (c->x86 == 0x6 && c->x86_model >= 0xf)
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
}
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
{
early_init_centaur(c);
if (c->x86 == 0x6 && c->x86_model >= 0xf) {
c->x86_cache_alignment = c->x86_clflush_size * 2;
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
}
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
}
static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
.c_vendor = "Centaur",
.c_ident = { "CentaurHauls" },
.c_early_init = early_init_centaur,
.c_init = init_centaur,
.c_x86_vendor = X86_VENDOR_CENTAUR,
};
cpu_dev_register(centaur_cpu_dev);

View file

@ -1,52 +1,52 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/bootmem.h>
#include <linux/linkage.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kgdb.h>
#include <linux/topology.h>
#include <linux/delay.h>
#include <linux/smp.h>
#include <linux/percpu.h>
#include <asm/i387.h>
#include <asm/msr.h>
#include <asm/io.h>
#include <asm/linkage.h>
#include <linux/string.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/kgdb.h>
#include <linux/smp.h>
#include <linux/io.h>
#include <asm/stackprotector.h>
#include <asm/mmu_context.h>
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/pat.h>
#include <asm/asm.h>
#include <asm/numa.h>
#include <asm/smp.h>
#include <asm/cpu.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/topology.h>
#include <asm/cpumask.h>
#include <asm/pgtable.h>
#include <asm/atomic.h>
#include <asm/proto.h>
#include <asm/setup.h>
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/mtrr.h>
#include <asm/numa.h>
#include <asm/asm.h>
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/pat.h>
#include <asm/smp.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
#endif
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/desc.h>
#include <asm/atomic.h>
#include <asm/proto.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/hypervisor.h>
#include <asm/stackprotector.h>
#include "cpu.h"
#ifdef CONFIG_X86_64
/* all of these masks are initialized in setup_cpu_local_masks() */
cpumask_var_t cpu_callin_mask;
cpumask_var_t cpu_callout_mask;
cpumask_var_t cpu_initialized_mask;
cpumask_var_t cpu_callout_mask;
cpumask_var_t cpu_callin_mask;
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
@ -62,15 +62,15 @@ void __init setup_cpu_local_masks(void)
#else /* CONFIG_X86_32 */
cpumask_t cpu_callin_map;
cpumask_t cpu_sibling_setup_map;
cpumask_t cpu_callout_map;
cpumask_t cpu_initialized;
cpumask_t cpu_sibling_setup_map;
cpumask_t cpu_callin_map;
#endif /* CONFIG_X86_32 */
static struct cpu_dev *this_cpu __cpuinitdata;
static const struct cpu_dev *this_cpu __cpuinitdata;
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
@ -79,7 +79,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
* IRET will check the segment types kkeil 2000/10/28
* Also sysret mandates a special GDT layout
*
* The TLS descriptors are currently at a different place compared to i386.
* TLS descriptors are currently at a different place compared to i386.
* Hopefully nobody expects them at a fixed place (Wine?)
*/
[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
@ -174,6 +174,7 @@ static inline int flag_is_changeable_p(u32 flag)
"pushfl \n\t"
"popl %0 \n\t"
"popfl \n\t"
: "=&r" (f1), "=&r" (f2)
: "ir" (flag));
@ -188,19 +189,23 @@ static int __cpuinit have_cpuid_p(void)
static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
/* Disable processor serial number */
unsigned long lo, hi;
if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
return;
/* Disable processor serial number: */
rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
lo |= 0x200000;
wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
printk(KERN_NOTICE "CPU serial number disabled.\n");
clear_cpu_cap(c, X86_FEATURE_PN);
/* Disabling the serial number may affect the cpuid level */
c->cpuid_level = cpuid_eax(0);
}
}
static int __init x86_serial_nr_setup(char *s)
{
@ -232,6 +237,7 @@ struct cpuid_dependent_feature {
u32 feature;
u32 level;
};
static const struct cpuid_dependent_feature __cpuinitconst
cpuid_dependent_features[] = {
{ X86_FEATURE_MWAIT, 0x00000005 },
@ -243,7 +249,11 @@ cpuid_dependent_features[] = {
static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
{
const struct cpuid_dependent_feature *df;
for (df = cpuid_dependent_features; df->feature; df++) {
if (!cpu_has(c, df->feature))
continue;
/*
* Note: cpuid_level is set to -1 if unavailable, but
* extended_extended_level is set to 0 if unavailable
@ -251,32 +261,32 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
* when signed; hence the weird messing around with
* signs here...
*/
if (cpu_has(c, df->feature) &&
((s32)df->level < 0 ?
if (!((s32)df->level < 0 ?
(u32)df->level > (u32)c->extended_cpuid_level :
(s32)df->level > (s32)c->cpuid_level)) {
(s32)df->level > (s32)c->cpuid_level))
continue;
clear_cpu_cap(c, df->feature);
if (warn)
if (!warn)
continue;
printk(KERN_WARNING
"CPU: CPU feature %s disabled "
"due to lack of CPUID level 0x%x\n",
x86_cap_flags[df->feature],
df->level);
}
"CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
x86_cap_flags[df->feature], df->level);
}
}
/*
* Naming convention should be: <Name> [(<Codename>)]
* This table only is used unless init_<vendor>() below doesn't set it;
* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
*
* in particular, if CPUID levels 0x80000002..4 are supported, this
* isn't used
*/
/* Look up CPU names by table lookup. */
static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
{
struct cpu_model_info *info;
const struct cpu_model_info *info;
if (c->x86_model >= 16)
return NULL; /* Range check */
@ -307,8 +317,10 @@ void load_percpu_segment(int cpu)
load_stack_canary_segment();
}
/* Current gdt points %fs at the "master" per-cpu area: after this,
* it's on the real one. */
/*
* Current gdt points %fs at the "master" per-cpu area: after this,
* it's on the real one.
*/
void switch_to_new_gdt(int cpu)
{
struct desc_ptr gdt_descr;
@ -321,7 +333,7 @@ void switch_to_new_gdt(int cpu)
load_percpu_segment(cpu);
}
static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
static void __cpuinit default_init(struct cpuinfo_x86 *c)
{
@ -340,7 +352,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c)
#endif
}
static struct cpu_dev __cpuinitdata default_cpu = {
static const struct cpu_dev __cpuinitconst default_cpu = {
.c_init = default_init,
.c_vendor = "Unknown",
.c_x86_vendor = X86_VENDOR_UNKNOWN,
@ -360,8 +372,10 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
c->x86_model_id[48] = 0;
/* Intel chips right-justify this string for some dumb reason;
undo that brain damage */
/*
* Intel chips right-justify this string for some dumb reason;
* undo that brain damage:
*/
p = q = &c->x86_model_id[0];
while (*p == ' ')
p++;
@ -438,10 +452,14 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
if (smp_num_siblings == 1) {
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
} else if (smp_num_siblings > 1) {
goto out;
}
if (smp_num_siblings <= 1)
goto out;
if (smp_num_siblings > nr_cpu_ids) {
printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
pr_warning("CPU: Unsupported number of siblings %d",
smp_num_siblings);
smp_num_siblings = 1;
return;
@ -458,7 +476,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
((1 << core_bits) - 1);
}
out:
if ((c->x86_max_cores * smp_num_siblings) > 1) {
@ -473,8 +490,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
{
char *v = c->x86_vendor_id;
int i;
static int printed;
int i;
for (i = 0; i < X86_VENDOR_NUM; i++) {
if (!cpu_devs[i])
@ -483,6 +500,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
(cpu_devs[i]->c_ident[1] &&
!strcmp(v, cpu_devs[i]->c_ident[1]))) {
this_cpu = cpu_devs[i];
c->x86_vendor = this_cpu->c_x86_vendor;
return;
@ -491,7 +509,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
if (!printed) {
printed++;
printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
printk(KERN_ERR
"CPU: vendor_id '%s' unknown, using generic init.\n", v);
printk(KERN_ERR "CPU: Your system may be unstable.\n");
}
@ -511,14 +531,17 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
/* Intel-defined flags: level 0x00000001 */
if (c->cpuid_level >= 0x00000001) {
u32 junk, tfms, cap0, misc;
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
c->x86 = (tfms >> 8) & 0xf;
c->x86_model = (tfms >> 4) & 0xf;
c->x86_mask = tfms & 0xf;
if (c->x86 == 0xf)
c->x86 += (tfms >> 20) & 0xff;
if (c->x86 >= 0x6)
c->x86_model += ((tfms >> 16) & 0xf) << 4;
if (cap0 & (1<<19)) {
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
c->x86_cache_alignment = c->x86_clflush_size;
@ -534,6 +557,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
/* Intel-defined flags: level 0x00000001 */
if (c->cpuid_level >= 0x00000001) {
u32 capability, excap;
cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
c->x86_capability[0] = capability;
c->x86_capability[4] = excap;
@ -542,6 +566,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
/* AMD-defined flags: level 0x80000001 */
xlvl = cpuid_eax(0x80000000);
c->extended_cpuid_level = xlvl;
if ((xlvl & 0xffff0000) == 0x80000000) {
if (xlvl >= 0x80000001) {
c->x86_capability[1] = cpuid_edx(0x80000001);
@ -549,13 +574,15 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
}
}
#ifdef CONFIG_X86_64
if (c->extended_cpuid_level >= 0x80000008) {
u32 eax = cpuid_eax(0x80000008);
c->x86_virt_bits = (eax >> 8) & 0xff;
c->x86_phys_bits = eax & 0xff;
}
#ifdef CONFIG_X86_32
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
c->x86_phys_bits = 36;
#endif
if (c->extended_cpuid_level >= 0x80000007)
@ -602,8 +629,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
c->x86_clflush_size = 64;
c->x86_phys_bits = 36;
c->x86_virt_bits = 48;
#else
c->x86_clflush_size = 32;
c->x86_phys_bits = 32;
c->x86_virt_bits = 32;
#endif
c->x86_cache_alignment = c->x86_clflush_size;
@ -634,12 +665,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
void __init early_cpu_init(void)
{
struct cpu_dev **cdev;
const struct cpu_dev *const *cdev;
int count = 0;
printk("KERNEL supported cpus:\n");
printk(KERN_INFO "KERNEL supported cpus:\n");
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
struct cpu_dev *cpudev = *cdev;
const struct cpu_dev *cpudev = *cdev;
unsigned int j;
if (count >= X86_VENDOR_NUM)
@ -650,7 +681,7 @@ void __init early_cpu_init(void)
for (j = 0; j < 2; j++) {
if (!cpudev->c_ident[j])
continue;
printk(" %s %s\n", cpudev->c_vendor,
printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
cpudev->c_ident[j]);
}
}
@ -726,9 +757,13 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
c->x86_coreid_bits = 0;
#ifdef CONFIG_X86_64
c->x86_clflush_size = 64;
c->x86_phys_bits = 36;
c->x86_virt_bits = 48;
#else
c->cpuid_level = -1; /* CPUID not detected */
c->x86_clflush_size = 32;
c->x86_phys_bits = 32;
c->x86_virt_bits = 32;
#endif
c->x86_cache_alignment = c->x86_clflush_size;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
@ -759,8 +794,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
squash_the_stupid_serial_number(c);
/*
* The vendor-specific functions might have changed features. Now
* we do "generic changes."
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
*/
/* Filter out anything that depends on CPUID levels we don't have */
@ -768,7 +803,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
/* If the model name is still unset, do table lookup. */
if (!c->x86_model_id[0]) {
char *p;
const char *p;
p = table_lookup_model(c);
if (p)
strcpy(c->x86_model_id, p);
@ -847,7 +882,7 @@ struct msr_range {
unsigned max;
};
static struct msr_range msr_range_array[] __cpuinitdata = {
static const struct msr_range msr_range_array[] __cpuinitconst = {
{ 0x00000000, 0x00000418},
{ 0xc0000000, 0xc000040b},
{ 0xc0010000, 0xc0010142},
@ -856,14 +891,15 @@ static struct msr_range msr_range_array[] __cpuinitdata = {
static void __cpuinit print_cpu_msr(void)
{
unsigned index_min, index_max;
unsigned index;
u64 val;
int i;
unsigned index_min, index_max;
for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
index_min = msr_range_array[i].min;
index_max = msr_range_array[i].max;
for (index = index_min; index < index_max; index++) {
if (rdmsrl_amd_safe(index, &val))
continue;
@ -873,6 +909,7 @@ static void __cpuinit print_cpu_msr(void)
}
static int show_msr __cpuinitdata;
static __init int setup_show_msr(char *arg)
{
int num;
@ -894,12 +931,14 @@ __setup("noclflush", setup_noclflush);
void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
{
char *vendor = NULL;
const char *vendor = NULL;
if (c->x86_vendor < X86_VENDOR_NUM)
if (c->x86_vendor < X86_VENDOR_NUM) {
vendor = this_cpu->c_vendor;
else if (c->cpuid_level >= 0)
} else {
if (c->cpuid_level >= 0)
vendor = c->x86_vendor_id;
}
if (vendor && !strstr(c->x86_model_id, vendor))
printk(KERN_CONT "%s ", vendor);
@ -926,10 +965,12 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
static __init int setup_disablecpuid(char *arg)
{
int bit;
if (get_option(&arg, &bit) && bit < NCAPINTS*32)
setup_clear_cpu_cap(bit);
else
return 0;
return 1;
}
__setup("clearcpuid=", setup_disablecpuid);
@ -939,6 +980,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
@ -948,12 +990,21 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
/*
* Special IST stacks which the CPU switches to when it calls
* an IST-marked descriptor entry. Up to 7 stacks (hardware
* limit), all of them are 4K, except the debug stack which
* is 8K.
*/
static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
[DEBUG_STACK - 1] = DEBUG_STKSZ
};
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
__aligned(PAGE_SIZE);
extern asmlinkage void ignore_sysret(void);
/* May not be marked __init: used by software suspend */
void syscall_init(void)
{
@ -983,7 +1034,7 @@ unsigned long kernel_eflags;
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);
#else /* x86_64 */
#else /* CONFIG_X86_64 */
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, stack_canary);
@ -995,9 +1046,26 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
memset(regs, 0, sizeof(struct pt_regs));
regs->fs = __KERNEL_PERCPU;
regs->gs = __KERNEL_STACK_CANARY;
return regs;
}
#endif /* x86_64 */
#endif /* CONFIG_X86_64 */
/*
* Clear all 6 debug registers:
*/
static void clear_all_debug_regs(void)
{
int i;
for (i = 0; i < 8; i++) {
/* Ignore db4, db5 */
if ((i == 4) || (i == 5))
continue;
set_debugreg(0, i);
}
}
/*
* cpu_init() initializes state that is per-CPU. Some data is already
@ -1007,15 +1075,20 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
* A lot of state is already set up in PDA init for 64 bit
*/
#ifdef CONFIG_X86_64
void __cpuinit cpu_init(void)
{
int cpu = stack_smp_processor_id();
struct tss_struct *t = &per_cpu(init_tss, cpu);
struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
unsigned long v;
struct orig_ist *orig_ist;
struct task_struct *me;
struct tss_struct *t;
unsigned long v;
int cpu;
int i;
cpu = stack_smp_processor_id();
t = &per_cpu(init_tss, cpu);
orig_ist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
if (cpu != 0 && percpu_read(node_number) == 0 &&
cpu_to_node(cpu) != NUMA_NO_NODE)
@ -1056,19 +1129,17 @@ void __cpuinit cpu_init(void)
* set up and load the per-CPU TSS
*/
if (!orig_ist->ist[0]) {
static const unsigned int sizes[N_EXCEPTION_STACKS] = {
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
[DEBUG_STACK - 1] = DEBUG_STKSZ
};
char *estacks = per_cpu(exception_stacks, cpu);
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
estacks += sizes[v];
estacks += exception_stack_sizes[v];
orig_ist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
}
}
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
/*
* <= is required because the CPU will access up to
* 8 bits beyond the end of the IO permission bitmap.
@ -1078,8 +1149,7 @@ void __cpuinit cpu_init(void)
atomic_inc(&init_mm.mm_count);
me->active_mm = &init_mm;
if (me->mm)
BUG();
BUG_ON(me->mm);
enter_lazy_tlb(&init_mm, me);
load_sp0(t, &current->thread);
@ -1098,17 +1168,7 @@ void __cpuinit cpu_init(void)
arch_kgdb_ops.correct_hw_break();
else
#endif
{
/*
* Clear all 6 debug registers:
*/
set_debugreg(0UL, 0);
set_debugreg(0UL, 1);
set_debugreg(0UL, 2);
set_debugreg(0UL, 3);
set_debugreg(0UL, 6);
set_debugreg(0UL, 7);
}
clear_all_debug_regs();
fpu_init();
@ -1129,7 +1189,8 @@ void __cpuinit cpu_init(void)
if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
for (;;) local_irq_enable();
for (;;)
local_irq_enable();
}
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
@ -1145,8 +1206,7 @@ void __cpuinit cpu_init(void)
*/
atomic_inc(&init_mm.mm_count);
curr->active_mm = &init_mm;
if (curr->mm)
BUG();
BUG_ON(curr->mm);
enter_lazy_tlb(&init_mm, curr);
load_sp0(t, thread);
@ -1159,13 +1219,7 @@ void __cpuinit cpu_init(void)
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
#endif
/* Clear all 6 debug registers: */
set_debugreg(0, 0);
set_debugreg(0, 1);
set_debugreg(0, 2);
set_debugreg(0, 3);
set_debugreg(0, 6);
set_debugreg(0, 7);
clear_all_debug_regs();
/*
* Force FPU initialization:
@ -1185,6 +1239,4 @@ void __cpuinit cpu_init(void)
xsave_init();
}
#endif

View file

@ -5,31 +5,32 @@
struct cpu_model_info {
int vendor;
int family;
char *model_names[16];
const char *model_names[16];
};
/* attempt to consolidate cpu attributes */
struct cpu_dev {
char * c_vendor;
const char *c_vendor;
/* some have two possibilities for cpuid string */
char * c_ident[2];
const char *c_ident[2];
struct cpu_model_info c_models[4];
void (*c_early_init)(struct cpuinfo_x86 *c);
void (*c_init)(struct cpuinfo_x86 * c);
void (*c_identify)(struct cpuinfo_x86 * c);
unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
void (*c_early_init)(struct cpuinfo_x86 *);
void (*c_init)(struct cpuinfo_x86 *);
void (*c_identify)(struct cpuinfo_x86 *);
unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
int c_x86_vendor;
};
#define cpu_dev_register(cpu_devX) \
static struct cpu_dev *__cpu_dev_##cpu_devX __used \
static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
__attribute__((__section__(".x86_cpu_dev.init"))) = \
&cpu_devX;
extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[];
extern const struct cpu_dev *const __x86_cpu_dev_start[],
*const __x86_cpu_dev_end[];
extern void display_cacheinfo(struct cpuinfo_x86 *c);

901
arch/x86/kernel/cpu/cpu_debug.c Executable file
View file

@ -0,0 +1,901 @@
/*
* CPU x86 architecture debug code
*
* Copyright(C) 2009 Jaswinder Singh Rajput
*
* For licencing details see kernel-base/COPYING
*/
#include <linux/interrupt.h>
#include <linux/compiler.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/signal.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/smp.h>
#include <asm/cpu_debug.h>
#include <asm/paravirt.h>
#include <asm/system.h>
#include <asm/traps.h>
#include <asm/apic.h>
#include <asm/desc.h>
static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
static DEFINE_PER_CPU(unsigned, cpu_modelflag);
static DEFINE_PER_CPU(int, cpu_priv_count);
static DEFINE_PER_CPU(unsigned, cpu_model);
static DEFINE_MUTEX(cpu_debug_lock);
static struct dentry *cpu_debugfs_dir;
static struct cpu_debug_base cpu_base[] = {
{ "mc", CPU_MC, 0 },
{ "monitor", CPU_MONITOR, 0 },
{ "time", CPU_TIME, 0 },
{ "pmc", CPU_PMC, 1 },
{ "platform", CPU_PLATFORM, 0 },
{ "apic", CPU_APIC, 0 },
{ "poweron", CPU_POWERON, 0 },
{ "control", CPU_CONTROL, 0 },
{ "features", CPU_FEATURES, 0 },
{ "lastbranch", CPU_LBRANCH, 0 },
{ "bios", CPU_BIOS, 0 },
{ "freq", CPU_FREQ, 0 },
{ "mtrr", CPU_MTRR, 0 },
{ "perf", CPU_PERF, 0 },
{ "cache", CPU_CACHE, 0 },
{ "sysenter", CPU_SYSENTER, 0 },
{ "therm", CPU_THERM, 0 },
{ "misc", CPU_MISC, 0 },
{ "debug", CPU_DEBUG, 0 },
{ "pat", CPU_PAT, 0 },
{ "vmx", CPU_VMX, 0 },
{ "call", CPU_CALL, 0 },
{ "base", CPU_BASE, 0 },
{ "ver", CPU_VER, 0 },
{ "conf", CPU_CONF, 0 },
{ "smm", CPU_SMM, 0 },
{ "svm", CPU_SVM, 0 },
{ "osvm", CPU_OSVM, 0 },
{ "tss", CPU_TSS, 0 },
{ "cr", CPU_CR, 0 },
{ "dt", CPU_DT, 0 },
{ "registers", CPU_REG_ALL, 0 },
};
static struct cpu_file_base cpu_file[] = {
{ "index", CPU_REG_ALL, 0 },
{ "value", CPU_REG_ALL, 1 },
};
/* Intel Registers Range */
static struct cpu_debug_range cpu_intel_range[] = {
{ 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL },
{ 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE },
{ 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL },
{ 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM },
{ 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE },
{ 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE },
{ 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE },
{ 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON },
{ 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON },
{ 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE },
{ 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE },
{ 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT },
{ 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT },
{ 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM },
{ 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE },
{ 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 },
{ 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE },
{ 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON },
{ 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT },
{ 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT },
{ 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT },
{ 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE },
{ 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 },
{ 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 },
{ 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX },
{ 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 },
{ 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT },
{ 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE },
{ 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE },
{ 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE },
{ 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT },
{ 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE },
{ 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE },
{ 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE },
{ 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE },
{ 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT },
{ 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON },
{ 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE },
{ 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON },
{ 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE },
{ 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 },
{ 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE },
{ 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 },
{ 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE },
{ 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE },
{ 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE },
{ 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE },
{ 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE },
{ 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE },
{ 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON },
{ 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE },
{ 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON },
{ 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT },
{ 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON },
{ 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT },
{ 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON },
{ 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON },
{ 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON },
{ 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON },
{ 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE },
{ 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON },
{ 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE },
{ 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON },
{ 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE },
{ 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON },
{ 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE },
{ 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON },
{ 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE },
{ 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON },
{ 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE },
{ 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE },
{ 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE },
{ 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE },
{ 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON },
{ 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON },
{ 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP },
{ 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON },
{ 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON },
{ 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON },
{ 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON },
};
/* AMD Registers Range */
static struct cpu_debug_range cpu_amd_range[] = {
{ 0x00000000, 0x00000001, CPU_MC, CPU_K10_PLUS, },
{ 0x00000010, 0x00000010, CPU_TIME, CPU_K8_PLUS, },
{ 0x0000001B, 0x0000001B, CPU_APIC, CPU_K8_PLUS, },
{ 0x0000002A, 0x0000002A, CPU_POWERON, CPU_K7_PLUS },
{ 0x0000008B, 0x0000008B, CPU_VER, CPU_K8_PLUS },
{ 0x000000FE, 0x000000FE, CPU_MTRR, CPU_K8_PLUS, },
{ 0x00000174, 0x00000176, CPU_SYSENTER, CPU_K8_PLUS, },
{ 0x00000179, 0x0000017B, CPU_MC, CPU_K8_PLUS, },
{ 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_K8_PLUS, },
{ 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_K8_PLUS, },
{ 0x00000200, 0x0000020F, CPU_MTRR, CPU_K8_PLUS, },
{ 0x00000250, 0x00000250, CPU_MTRR, CPU_K8_PLUS, },
{ 0x00000258, 0x00000259, CPU_MTRR, CPU_K8_PLUS, },
{ 0x00000268, 0x0000026F, CPU_MTRR, CPU_K8_PLUS, },
{ 0x00000277, 0x00000277, CPU_PAT, CPU_K8_PLUS, },
{ 0x000002FF, 0x000002FF, CPU_MTRR, CPU_K8_PLUS, },
{ 0x00000400, 0x00000413, CPU_MC, CPU_K8_PLUS, },
{ 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_AMD_ALL, },
{ 0xC0000081, 0xC0000084, CPU_CALL, CPU_K8_PLUS, },
{ 0xC0000100, 0xC0000102, CPU_BASE, CPU_K8_PLUS, },
{ 0xC0000103, 0xC0000103, CPU_TIME, CPU_K10_PLUS, },
{ 0xC0010000, 0xC0010007, CPU_PMC, CPU_K8_PLUS, },
{ 0xC0010010, 0xC0010010, CPU_CONF, CPU_K7_PLUS, },
{ 0xC0010015, 0xC0010015, CPU_CONF, CPU_K7_PLUS, },
{ 0xC0010016, 0xC001001A, CPU_MTRR, CPU_K8_PLUS, },
{ 0xC001001D, 0xC001001D, CPU_MTRR, CPU_K8_PLUS, },
{ 0xC001001F, 0xC001001F, CPU_CONF, CPU_K8_PLUS, },
{ 0xC0010030, 0xC0010035, CPU_BIOS, CPU_K8_PLUS, },
{ 0xC0010044, 0xC0010048, CPU_MC, CPU_K8_PLUS, },
{ 0xC0010050, 0xC0010056, CPU_SMM, CPU_K0F_PLUS, },
{ 0xC0010058, 0xC0010058, CPU_CONF, CPU_K10_PLUS, },
{ 0xC0010060, 0xC0010060, CPU_CACHE, CPU_AMD_11, },
{ 0xC0010061, 0xC0010068, CPU_SMM, CPU_K10_PLUS, },
{ 0xC0010069, 0xC001006B, CPU_SMM, CPU_AMD_11, },
{ 0xC0010070, 0xC0010071, CPU_SMM, CPU_K10_PLUS, },
{ 0xC0010111, 0xC0010113, CPU_SMM, CPU_K8_PLUS, },
{ 0xC0010114, 0xC0010118, CPU_SVM, CPU_K10_PLUS, },
{ 0xC0010140, 0xC0010141, CPU_OSVM, CPU_K10_PLUS, },
{ 0xC0011022, 0xC0011023, CPU_CONF, CPU_K10_PLUS, },
};
/* Intel */
static int get_intel_modelflag(unsigned model)
{
int flag;
switch (model) {
case 0x0501:
case 0x0502:
case 0x0504:
flag = CPU_INTEL_PENTIUM;
break;
case 0x0601:
case 0x0603:
case 0x0605:
case 0x0607:
case 0x0608:
case 0x060A:
case 0x060B:
flag = CPU_INTEL_P6;
break;
case 0x0609:
case 0x060D:
flag = CPU_INTEL_PENTIUM_M;
break;
case 0x060E:
flag = CPU_INTEL_CORE;
break;
case 0x060F:
case 0x0617:
flag = CPU_INTEL_CORE2;
break;
case 0x061C:
flag = CPU_INTEL_ATOM;
break;
case 0x0F00:
case 0x0F01:
case 0x0F02:
case 0x0F03:
case 0x0F04:
flag = CPU_INTEL_XEON_P4;
break;
case 0x0F06:
flag = CPU_INTEL_XEON_MP;
break;
default:
flag = CPU_NONE;
break;
}
return flag;
}
/* AMD */
static int get_amd_modelflag(unsigned model)
{
int flag;
switch (model >> 8) {
case 0x6:
flag = CPU_AMD_K6;
break;
case 0x7:
flag = CPU_AMD_K7;
break;
case 0x8:
flag = CPU_AMD_K8;
break;
case 0xf:
flag = CPU_AMD_0F;
break;
case 0x10:
flag = CPU_AMD_10;
break;
case 0x11:
flag = CPU_AMD_11;
break;
default:
flag = CPU_NONE;
break;
}
return flag;
}
static int get_cpu_modelflag(unsigned cpu)
{
int flag;
flag = per_cpu(cpu_model, cpu);
switch (flag >> 16) {
case X86_VENDOR_INTEL:
flag = get_intel_modelflag(flag);
break;
case X86_VENDOR_AMD:
flag = get_amd_modelflag(flag & 0xffff);
break;
default:
flag = CPU_NONE;
break;
}
return flag;
}
static int get_cpu_range_count(unsigned cpu)
{
int index;
switch (per_cpu(cpu_model, cpu) >> 16) {
case X86_VENDOR_INTEL:
index = ARRAY_SIZE(cpu_intel_range);
break;
case X86_VENDOR_AMD:
index = ARRAY_SIZE(cpu_amd_range);
break;
default:
index = 0;
break;
}
return index;
}
static int is_typeflag_valid(unsigned cpu, unsigned flag)
{
unsigned vendor, modelflag;
int i, index;
/* Standard Registers should be always valid */
if (flag >= CPU_TSS)
return 1;
modelflag = per_cpu(cpu_modelflag, cpu);
vendor = per_cpu(cpu_model, cpu) >> 16;
index = get_cpu_range_count(cpu);
for (i = 0; i < index; i++) {
switch (vendor) {
case X86_VENDOR_INTEL:
if ((cpu_intel_range[i].model & modelflag) &&
(cpu_intel_range[i].flag & flag))
return 1;
break;
case X86_VENDOR_AMD:
if ((cpu_amd_range[i].model & modelflag) &&
(cpu_amd_range[i].flag & flag))
return 1;
break;
}
}
/* Invalid */
return 0;
}
static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
int index, unsigned flag)
{
unsigned modelflag;
modelflag = per_cpu(cpu_modelflag, cpu);
*max = 0;
switch (per_cpu(cpu_model, cpu) >> 16) {
case X86_VENDOR_INTEL:
if ((cpu_intel_range[index].model & modelflag) &&
(cpu_intel_range[index].flag & flag)) {
*min = cpu_intel_range[index].min;
*max = cpu_intel_range[index].max;
}
break;
case X86_VENDOR_AMD:
if ((cpu_amd_range[index].model & modelflag) &&
(cpu_amd_range[index].flag & flag)) {
*min = cpu_amd_range[index].min;
*max = cpu_amd_range[index].max;
}
break;
}
return *max;
}
/* This function can also be called with seq = NULL for printk */
static void print_cpu_data(struct seq_file *seq, unsigned type,
u32 low, u32 high)
{
struct cpu_private *priv;
u64 val = high;
if (seq) {
priv = seq->private;
if (priv->file) {
val = (val << 32) | low;
seq_printf(seq, "0x%llx\n", val);
} else
seq_printf(seq, " %08x: %08x_%08x\n",
type, high, low);
} else
printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
}
/* This function can also be called with seq = NULL for printk */
static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
{
unsigned msr, msr_min, msr_max;
struct cpu_private *priv;
u32 low, high;
int i, range;
if (seq) {
priv = seq->private;
if (priv->file) {
if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
&low, &high))
print_cpu_data(seq, priv->reg, low, high);
return;
}
}
range = get_cpu_range_count(cpu);
for (i = 0; i < range; i++) {
if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
continue;
for (msr = msr_min; msr <= msr_max; msr++) {
if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
continue;
print_cpu_data(seq, msr, low, high);
}
}
}
static void print_tss(void *arg)
{
struct pt_regs *regs = task_pt_regs(current);
struct seq_file *seq = arg;
unsigned int seg;
seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
seq_printf(seq, " RSI\t: %016lx\n", regs->si);
seq_printf(seq, " RDI\t: %016lx\n", regs->di);
seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
#ifdef CONFIG_X86_64
seq_printf(seq, " R08\t: %016lx\n", regs->r8);
seq_printf(seq, " R09\t: %016lx\n", regs->r9);
seq_printf(seq, " R10\t: %016lx\n", regs->r10);
seq_printf(seq, " R11\t: %016lx\n", regs->r11);
seq_printf(seq, " R12\t: %016lx\n", regs->r12);
seq_printf(seq, " R13\t: %016lx\n", regs->r13);
seq_printf(seq, " R14\t: %016lx\n", regs->r14);
seq_printf(seq, " R15\t: %016lx\n", regs->r15);
#endif
asm("movl %%cs,%0" : "=r" (seg));
seq_printf(seq, " CS\t: %04x\n", seg);
asm("movl %%ds,%0" : "=r" (seg));
seq_printf(seq, " DS\t: %04x\n", seg);
seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff);
asm("movl %%es,%0" : "=r" (seg));
seq_printf(seq, " ES\t: %04x\n", seg);
asm("movl %%fs,%0" : "=r" (seg));
seq_printf(seq, " FS\t: %04x\n", seg);
asm("movl %%gs,%0" : "=r" (seg));
seq_printf(seq, " GS\t: %04x\n", seg);
seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
}
static void print_cr(void *arg)
{
struct seq_file *seq = arg;
seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
#ifdef CONFIG_X86_64
seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
#endif
}
static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
{
seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
}
static void print_dt(void *seq)
{
struct desc_ptr dt;
unsigned long ldt;
/* IDT */
store_idt((struct desc_ptr *)&dt);
print_desc_ptr("IDT", seq, dt);
/* GDT */
store_gdt((struct desc_ptr *)&dt);
print_desc_ptr("GDT", seq, dt);
/* LDT */
store_ldt(ldt);
seq_printf(seq, " LDT\t: %016lx\n", ldt);
/* TR */
store_tr(ldt);
seq_printf(seq, " TR\t: %016lx\n", ldt);
}
static void print_dr(void *arg)
{
struct seq_file *seq = arg;
unsigned long dr;
int i;
for (i = 0; i < 8; i++) {
/* Ignore db4, db5 */
if ((i == 4) || (i == 5))
continue;
get_debugreg(dr, i);
seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
}
seq_printf(seq, "\n MSR\t:\n");
}
static void print_apic(void *arg)
{
struct seq_file *seq = arg;
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(seq, " LAPIC\t:\n");
seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24);
seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR));
seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI));
seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI));
seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI));
seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR));
seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR));
seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV));
seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR));
seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR));
seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR));
seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2));
seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT));
seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR));
seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC));
seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0));
seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1));
seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR));
seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
#endif /* CONFIG_X86_LOCAL_APIC */
seq_printf(seq, "\n MSR\t:\n");
}
static int cpu_seq_show(struct seq_file *seq, void *v)
{
struct cpu_private *priv = seq->private;
if (priv == NULL)
return -EINVAL;
switch (cpu_base[priv->type].flag) {
case CPU_TSS:
smp_call_function_single(priv->cpu, print_tss, seq, 1);
break;
case CPU_CR:
smp_call_function_single(priv->cpu, print_cr, seq, 1);
break;
case CPU_DT:
smp_call_function_single(priv->cpu, print_dt, seq, 1);
break;
case CPU_DEBUG:
if (priv->file == CPU_INDEX_BIT)
smp_call_function_single(priv->cpu, print_dr, seq, 1);
print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
break;
case CPU_APIC:
if (priv->file == CPU_INDEX_BIT)
smp_call_function_single(priv->cpu, print_apic, seq, 1);
print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
break;
default:
print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
break;
}
seq_printf(seq, "\n");
return 0;
}
static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
{
if (*pos == 0) /* One time is enough ;-) */
return seq;
return NULL;
}
static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
(*pos)++;
return cpu_seq_start(seq, pos);
}
static void cpu_seq_stop(struct seq_file *seq, void *v)
{
}
static const struct seq_operations cpu_seq_ops = {
.start = cpu_seq_start,
.next = cpu_seq_next,
.stop = cpu_seq_stop,
.show = cpu_seq_show,
};
static int cpu_seq_open(struct inode *inode, struct file *file)
{
struct cpu_private *priv = inode->i_private;
struct seq_file *seq;
int err;
err = seq_open(file, &cpu_seq_ops);
if (!err) {
seq = file->private_data;
seq->private = priv;
}
return err;
}
static int write_msr(struct cpu_private *priv, u64 val)
{
u32 low, high;
high = (val >> 32) & 0xffffffff;
low = val & 0xffffffff;
if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high))
return 0;
return -EPERM;
}
static int write_cpu_register(struct cpu_private *priv, const char *buf)
{
int ret = -EPERM;
u64 val;
ret = strict_strtoull(buf, 0, &val);
if (ret < 0)
return ret;
/* Supporting only MSRs */
if (priv->type < CPU_TSS_BIT)
return write_msr(priv, val);
return ret;
}
static ssize_t cpu_write(struct file *file, const char __user *ubuf,
size_t count, loff_t *off)
{
struct seq_file *seq = file->private_data;
struct cpu_private *priv = seq->private;
char buf[19];
if ((priv == NULL) || (count >= sizeof(buf)))
return -EINVAL;
if (copy_from_user(&buf, ubuf, count))
return -EFAULT;
buf[count] = 0;
if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write))
if (!write_cpu_register(priv, buf))
return count;
return -EACCES;
}
static const struct file_operations cpu_fops = {
.owner = THIS_MODULE,
.open = cpu_seq_open,
.read = seq_read,
.write = cpu_write,
.llseek = seq_lseek,
.release = seq_release,
};
static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
unsigned file, struct dentry *dentry)
{
struct cpu_private *priv = NULL;
/* Already intialized */
if (file == CPU_INDEX_BIT)
if (per_cpu(cpu_arr[type].init, cpu))
return 0;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (priv == NULL)
return -ENOMEM;
priv->cpu = cpu;
priv->type = type;
priv->reg = reg;
priv->file = file;
mutex_lock(&cpu_debug_lock);
per_cpu(priv_arr[type], cpu) = priv;
per_cpu(cpu_priv_count, cpu)++;
mutex_unlock(&cpu_debug_lock);
if (file)
debugfs_create_file(cpu_file[file].name, S_IRUGO,
dentry, (void *)priv, &cpu_fops);
else {
debugfs_create_file(cpu_base[type].name, S_IRUGO,
per_cpu(cpu_arr[type].dentry, cpu),
(void *)priv, &cpu_fops);
mutex_lock(&cpu_debug_lock);
per_cpu(cpu_arr[type].init, cpu) = 1;
mutex_unlock(&cpu_debug_lock);
}
return 0;
}
static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
struct dentry *dentry)
{
unsigned file;
int err = 0;
for (file = 0; file < ARRAY_SIZE(cpu_file); file++) {
err = cpu_create_file(cpu, type, reg, file, dentry);
if (err)
return err;
}
return err;
}
static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
{
struct dentry *cpu_dentry = NULL;
unsigned reg, reg_min, reg_max;
int i, range, err = 0;
char reg_dir[12];
u32 low, high;
range = get_cpu_range_count(cpu);
for (i = 0; i < range; i++) {
if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
cpu_base[type].flag))
continue;
for (reg = reg_min; reg <= reg_max; reg++) {
if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
continue;
sprintf(reg_dir, "0x%x", reg);
cpu_dentry = debugfs_create_dir(reg_dir, dentry);
err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
if (err)
return err;
}
}
return err;
}
static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
{
struct dentry *cpu_dentry = NULL;
unsigned type;
int err = 0;
for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) {
if (!is_typeflag_valid(cpu, cpu_base[type].flag))
continue;
cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
if (type < CPU_TSS_BIT)
err = cpu_init_msr(cpu, type, cpu_dentry);
else
err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
cpu_dentry);
if (err)
return err;
}
return err;
}
static int cpu_init_cpu(void)
{
struct dentry *cpu_dentry = NULL;
struct cpuinfo_x86 *cpui;
char cpu_dir[12];
unsigned cpu;
int err = 0;
for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
cpui = &cpu_data(cpu);
if (!cpu_has(cpui, X86_FEATURE_MSR))
continue;
per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
(cpui->x86 << 8) |
(cpui->x86_model));
per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
sprintf(cpu_dir, "cpu%d", cpu);
cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
err = cpu_init_allreg(cpu, cpu_dentry);
pr_info("cpu%d(%d) debug files %d\n",
cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
pr_err("Register files count %d exceeds limit %d\n",
per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
err = -ENFILE;
}
if (err)
return err;
}
return err;
}
static int __init cpu_debug_init(void)
{
cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
return cpu_init_cpu();
}
static void __exit cpu_debug_exit(void)
{
int i, cpu;
if (cpu_debugfs_dir)
debugfs_remove_recursive(cpu_debugfs_dir);
for (cpu = 0; cpu < nr_cpu_ids; cpu++)
for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
kfree(per_cpu(priv_arr[i], cpu));
}
module_init(cpu_debug_init);
module_exit(cpu_debug_exit);
MODULE_AUTHOR("Jaswinder Singh Rajput");
MODULE_DESCRIPTION("CPU Debug module");
MODULE_LICENSE("GPL");

View file

@ -61,23 +61,23 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
*/
static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
static char Cx86_model[][9] __cpuinitdata = {
static const char __cpuinitconst Cx86_model[][9] = {
"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
"M II ", "Unknown"
};
static char Cx486_name[][5] __cpuinitdata = {
static const char __cpuinitconst Cx486_name[][5] = {
"SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
"SRx2", "DRx2"
};
static char Cx486S_name[][4] __cpuinitdata = {
static const char __cpuinitconst Cx486S_name[][4] = {
"S", "S2", "Se", "S2e"
};
static char Cx486D_name[][4] __cpuinitdata = {
static const char __cpuinitconst Cx486D_name[][4] = {
"DX", "DX2", "?", "?", "?", "DX4"
};
static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
static char cyrix_model_mult1[] __cpuinitdata = "12??43";
static char cyrix_model_mult2[] __cpuinitdata = "12233445";
static const char __cpuinitconst cyrix_model_mult1[] = "12??43";
static const char __cpuinitconst cyrix_model_mult2[] = "12233445";
/*
* Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
@ -435,7 +435,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
}
}
static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = {
.c_vendor = "Cyrix",
.c_ident = { "CyrixInstead" },
.c_early_init = early_init_cyrix,
@ -446,7 +446,7 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
cpu_dev_register(cyrix_cpu_dev);
static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
static const struct cpu_dev __cpuinitconst nsc_cpu_dev = {
.c_vendor = "NSC",
.c_ident = { "Geode by NSC" },
.c_init = init_nsc,

View file

@ -14,6 +14,7 @@
#include <asm/uaccess.h>
#include <asm/ds.h>
#include <asm/bugs.h>
#include <asm/cpu.h>
#ifdef CONFIG_X86_64
#include <asm/topology.h>
@ -54,6 +55,11 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
c->x86_cache_alignment = 128;
#endif
/* CPUID workaround for 0F33/0F34 CPU */
if (c->x86 == 0xF && c->x86_model == 0x3
&& (c->x86_mask == 0x3 || c->x86_mask == 0x4))
c->x86_phys_bits = 36;
/*
* c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
* with P/T states and does not stop in deep C-states.
@ -116,6 +122,28 @@ static void __cpuinit trap_init_f00f_bug(void)
}
#endif
static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
/* calling is from identify_secondary_cpu() ? */
if (c->cpu_index == boot_cpu_id)
return;
/*
* Mask B, Pentium, but not Pentium MMX
*/
if (c->x86 == 5 &&
c->x86_mask >= 1 && c->x86_mask <= 4 &&
c->x86_model <= 3) {
/*
* Remember we have B step Pentia with bugs
*/
WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
"with B stepping processors.\n");
}
#endif
}
static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
{
unsigned long lo, hi;
@ -192,6 +220,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_NUMAQ
numaq_tsc_disable();
#endif
intel_smp_check(c);
}
#else
static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
@ -391,7 +421,7 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
}
#endif
static struct cpu_dev intel_cpu_dev __cpuinitdata = {
static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
.c_vendor = "Intel",
.c_ident = { "GenuineIntel" },
#ifdef CONFIG_X86_32

View file

@ -32,7 +32,7 @@ struct _cache_table
};
/* all the cache descriptor types we care about (no TLB or trace cache entries) */
static struct _cache_table cache_table[] __cpuinitdata =
static const struct _cache_table __cpuinitconst cache_table[] =
{
{ 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
{ 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
@ -206,15 +206,15 @@ union l3_cache {
unsigned val;
};
static unsigned short assocs[] __cpuinitdata = {
static const unsigned short __cpuinitconst assocs[] = {
[1] = 1, [2] = 2, [4] = 4, [6] = 8,
[8] = 16, [0xa] = 32, [0xb] = 48,
[0xc] = 64,
[0xf] = 0xffff // ??
};
static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
static void __cpuinit
amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,

View file

@ -4,3 +4,4 @@ obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o
obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o

View file

@ -60,20 +60,6 @@ void mcheck_init(struct cpuinfo_x86 *c)
}
}
static unsigned long old_cr4 __initdata;
void __init stop_mce(void)
{
old_cr4 = read_cr4();
clear_in_cr4(X86_CR4_MCE);
}
void __init restart_mce(void)
{
if (old_cr4 & X86_CR4_MCE)
set_in_cr4(X86_CR4_MCE);
}
static int __init mcheck_disable(char *str)
{
mce_disabled = 1;

View file

@ -3,6 +3,8 @@
* K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
* Rest from unknown author(s).
* 2004 Andi Kleen. Rewrote most of it.
* Copyright 2008 Intel Corporation
* Author: Andi Kleen
*/
#include <linux/init.h>
@ -24,6 +26,9 @@
#include <linux/ctype.h>
#include <linux/kmod.h>
#include <linux/kdebug.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <linux/ratelimit.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>
@ -32,7 +37,6 @@
#include <asm/idle.h>
#define MISC_MCELOG_MINOR 227
#define NR_SYSFS_BANKS 6
atomic_t mce_entry;
@ -47,7 +51,7 @@ static int mce_dont_init;
*/
static int tolerant = 1;
static int banks;
static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL };
static u64 *bank;
static unsigned long notify_user;
static int rip_msr;
static int mce_bootlog = -1;
@ -58,6 +62,19 @@ static char *trigger_argv[2] = { trigger, NULL };
static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
};
/* Do initial initialization of a struct mce */
void mce_setup(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
m->cpu = smp_processor_id();
rdtscll(m->tsc);
}
/*
* Lockless MCE logging infrastructure.
* This avoids deadlocks on printk locks without having to break locks. Also
@ -119,11 +136,11 @@ static void print_mce(struct mce *m)
print_symbol("{%s}", m->ip);
printk("\n");
}
printk(KERN_EMERG "TSC %Lx ", m->tsc);
printk(KERN_EMERG "TSC %llx ", m->tsc);
if (m->addr)
printk("ADDR %Lx ", m->addr);
printk("ADDR %llx ", m->addr);
if (m->misc)
printk("MISC %Lx ", m->misc);
printk("MISC %llx ", m->misc);
printk("\n");
printk(KERN_EMERG "This is not a software problem!\n");
printk(KERN_EMERG "Run through mcelog --ascii to decode "
@ -149,8 +166,10 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start)
panic(msg);
}
static int mce_available(struct cpuinfo_x86 *c)
int mce_available(struct cpuinfo_x86 *c)
{
if (mce_dont_init)
return 0;
return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
}
@ -172,7 +191,77 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
}
/*
* The actual machine check handler
* Poll for corrected events or events that happened before reset.
* Those are just logged through /dev/mcelog.
*
* This is executed in standard interrupt context.
*/
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
struct mce m;
int i;
mce_setup(&m);
rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
for (i = 0; i < banks; i++) {
if (!bank[i] || !test_bit(i, *b))
continue;
m.misc = 0;
m.addr = 0;
m.bank = i;
m.tsc = 0;
barrier();
rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
if (!(m.status & MCI_STATUS_VAL))
continue;
/*
* Uncorrected events are handled by the exception handler
* when it is enabled. But when the exception is disabled log
* everything.
*
* TBD do the same check for MCI_STATUS_EN here?
*/
if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
continue;
if (m.status & MCI_STATUS_MISCV)
rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
if (m.status & MCI_STATUS_ADDRV)
rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
mce_log(&m);
add_taint(TAINT_MACHINE_CHECK);
/*
* Clear state for this bank.
*/
wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
}
/*
* Don't clear MCG_STATUS here because it's only defined for
* exceptions.
*/
}
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
*
* This is executed in NMI context not subject to normal locking rules. This
* implies that most kernel services cannot be safely used. Don't even
* think about putting a printk in there!
*/
void do_machine_check(struct pt_regs * regs, long error_code)
{
@ -190,17 +279,18 @@ void do_machine_check(struct pt_regs * regs, long error_code)
* error.
*/
int kill_it = 0;
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
atomic_inc(&mce_entry);
if ((regs
&& notify_die(DIE_NMI, "machine check", regs, error_code,
if (notify_die(DIE_NMI, "machine check", regs, error_code,
18, SIGKILL) == NOTIFY_STOP)
|| !banks)
goto out2;
if (!banks)
goto out2;
memset(&m, 0, sizeof(struct mce));
m.cpu = smp_processor_id();
mce_setup(&m);
rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
/* if the restart IP is not valid, we're done for */
if (!(m.mcgstatus & MCG_STATUS_RIPV))
@ -210,18 +300,32 @@ void do_machine_check(struct pt_regs * regs, long error_code)
barrier();
for (i = 0; i < banks; i++) {
if (i < NR_SYSFS_BANKS && !bank[i])
__clear_bit(i, toclear);
if (!bank[i])
continue;
m.misc = 0;
m.addr = 0;
m.bank = i;
m.tsc = 0;
rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
if ((m.status & MCI_STATUS_VAL) == 0)
continue;
/*
* Non uncorrected errors are handled by machine_check_poll
* Leave them alone.
*/
if ((m.status & MCI_STATUS_UC) == 0)
continue;
/*
* Set taint even when machine check was not enabled.
*/
add_taint(TAINT_MACHINE_CHECK);
__set_bit(i, toclear);
if (m.status & MCI_STATUS_EN) {
/* if PCC was set, there's no way out */
no_way_out |= !!(m.status & MCI_STATUS_PCC);
@ -235,6 +339,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
no_way_out = 1;
kill_it = 1;
}
} else {
/*
* Machine check event was not enabled. Clear, but
* ignore.
*/
continue;
}
if (m.status & MCI_STATUS_MISCV)
@ -243,9 +353,6 @@ void do_machine_check(struct pt_regs * regs, long error_code)
rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
mce_get_rip(&m, regs);
if (error_code >= 0)
rdtscll(m.tsc);
if (error_code != -2)
mce_log(&m);
/* Did this bank cause the exception? */
@ -255,14 +362,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
panicm = m;
panicm_found = 1;
}
add_taint(TAINT_MACHINE_CHECK);
}
/* Never do anything final in the polling timer */
if (!regs)
goto out;
/* If we didn't find an uncorrectable error, pick
the last one (shouldn't happen, just being safe). */
if (!panicm_found)
@ -309,10 +410,11 @@ void do_machine_check(struct pt_regs * regs, long error_code)
/* notify userspace ASAP */
set_thread_flag(TIF_MCE_NOTIFY);
out:
/* the last thing we do is clear state */
for (i = 0; i < banks; i++)
for (i = 0; i < banks; i++) {
if (test_bit(i, toclear))
wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
}
wrmsrl(MSR_IA32_MCG_STATUS, 0);
out2:
atomic_dec(&mce_entry);
@ -332,15 +434,13 @@ void do_machine_check(struct pt_regs * regs, long error_code)
* and historically has been the register value of the
* MSR_IA32_THERMAL_STATUS (Intel) msr.
*/
void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
void mce_log_therm_throt_event(__u64 status)
{
struct mce m;
memset(&m, 0, sizeof(m));
m.cpu = cpu;
mce_setup(&m);
m.bank = MCE_THERMAL_BANK;
m.status = status;
rdtscll(m.tsc);
mce_log(&m);
}
#endif /* CONFIG_X86_MCE_INTEL */
@ -353,18 +453,18 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
static int check_interval = 5 * 60; /* 5 minutes */
static int next_interval; /* in jiffies */
static void mcheck_timer(struct work_struct *work);
static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
static void mcheck_timer(unsigned long);
static DEFINE_PER_CPU(struct timer_list, mce_timer);
static void mcheck_check_cpu(void *info)
static void mcheck_timer(unsigned long data)
{
struct timer_list *t = &per_cpu(mce_timer, data);
WARN_ON(smp_processor_id() != data);
if (mce_available(&current_cpu_data))
do_machine_check(NULL, 0);
}
static void mcheck_timer(struct work_struct *work)
{
on_each_cpu(mcheck_check_cpu, NULL, 1);
machine_check_poll(MCP_TIMESTAMP,
&__get_cpu_var(mce_poll_banks));
/*
* Alert userspace if needed. If we logged an MCE, reduce the
@ -377,31 +477,41 @@ static void mcheck_timer(struct work_struct *work)
(int)round_jiffies_relative(check_interval*HZ));
}
schedule_delayed_work(&mcheck_work, next_interval);
t->expires = jiffies + next_interval;
add_timer(t);
}
static void mce_do_trigger(struct work_struct *work)
{
call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
}
static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
/*
* This is only called from process context. This is where we do
* anything we need to alert userspace about new MCEs. This is called
* directly from the poller and also from entry.S and idle, thanks to
* TIF_MCE_NOTIFY.
* Notify the user(s) about new machine check events.
* Can be called from interrupt context, but not from machine check/NMI
* context.
*/
int mce_notify_user(void)
{
/* Not more than two messages every minute */
static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
clear_thread_flag(TIF_MCE_NOTIFY);
if (test_and_clear_bit(0, &notify_user)) {
static unsigned long last_print;
unsigned long now = jiffies;
wake_up_interruptible(&mce_wait);
if (trigger[0])
call_usermodehelper(trigger, trigger_argv, NULL,
UMH_NO_WAIT);
if (time_after_eq(now, last_print + (check_interval*HZ))) {
last_print = now;
/*
* There is no risk of missing notifications because
* work_pending is always cleared before the function is
* executed.
*/
if (trigger[0] && !work_pending(&mce_trigger_work))
schedule_work(&mce_trigger_work);
if (__ratelimit(&ratelimit))
printk(KERN_INFO "Machine check events logged\n");
}
return 1;
}
@ -425,63 +535,78 @@ static struct notifier_block mce_idle_notifier = {
static __init int periodic_mcheck_init(void)
{
next_interval = check_interval * HZ;
if (next_interval)
schedule_delayed_work(&mcheck_work,
round_jiffies_relative(next_interval));
idle_notifier_register(&mce_idle_notifier);
return 0;
}
__initcall(periodic_mcheck_init);
/*
* Initialize Machine Checks for a CPU.
*/
static void mce_init(void *dummy)
static int mce_cap_init(void)
{
u64 cap;
int i;
unsigned b;
rdmsrl(MSR_IA32_MCG_CAP, cap);
banks = cap & 0xff;
if (banks > MCE_EXTENDED_BANK) {
banks = MCE_EXTENDED_BANK;
printk(KERN_INFO "MCE: warning: using only %d banks\n",
MCE_EXTENDED_BANK);
b = cap & 0xff;
if (b > MAX_NR_BANKS) {
printk(KERN_WARNING
"MCE: Using only %u machine check banks out of %u\n",
MAX_NR_BANKS, b);
b = MAX_NR_BANKS;
}
/* Don't support asymmetric configurations today */
WARN_ON(banks != 0 && b != banks);
banks = b;
if (!bank) {
bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
if (!bank)
return -ENOMEM;
memset(bank, 0xff, banks * sizeof(u64));
}
/* Use accurate RIP reporting if available. */
if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
rip_msr = MSR_IA32_MCG_EIP;
/* Log the machine checks left over from the previous reset.
This also clears all registers */
do_machine_check(NULL, mce_bootlog ? -1 : -2);
return 0;
}
static void mce_init(void *dummy)
{
u64 cap;
int i;
mce_banks_t all_banks;
/*
* Log the machine checks left over from the previous reset.
*/
bitmap_fill(all_banks, MAX_NR_BANKS);
machine_check_poll(MCP_UC, &all_banks);
set_in_cr4(X86_CR4_MCE);
rdmsrl(MSR_IA32_MCG_CAP, cap);
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
for (i = 0; i < banks; i++) {
if (i < NR_SYSFS_BANKS)
wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
else
wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
}
}
/* Add per CPU specific workarounds here */
static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
static void mce_cpu_quirks(struct cpuinfo_x86 *c)
{
/* This should be disabled by the BIOS, but isn't always */
if (c->x86_vendor == X86_VENDOR_AMD) {
if(c->x86 == 15)
if (c->x86 == 15 && banks > 4)
/* disable GART TBL walk error reporting, which trips off
incorrectly with the IOMMU & 3ware & Cerberus. */
clear_bit(10, &bank[4]);
clear_bit(10, (unsigned long *)&bank[4]);
if(c->x86 <= 17 && mce_bootlog < 0)
/* Lots of broken BIOS around that don't clear them
by default and leave crap in there. Don't log. */
@ -504,20 +629,38 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
}
}
static void mce_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
/* data race harmless because everyone sets to the same value */
if (!next_interval)
next_interval = check_interval * HZ;
if (!next_interval)
return;
setup_timer(t, mcheck_timer, smp_processor_id());
t->expires = round_jiffies(jiffies + next_interval);
add_timer(t);
}
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off.
*/
void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
{
mce_cpu_quirks(c);
if (mce_dont_init ||
!mce_available(c))
if (!mce_available(c))
return;
if (mce_cap_init() < 0) {
mce_dont_init = 1;
return;
}
mce_cpu_quirks(c);
mce_init(NULL);
mce_cpu_features(c);
mce_init_timer();
}
/*
@ -573,7 +716,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
{
unsigned long *cpu_tsc;
static DEFINE_MUTEX(mce_read_mutex);
unsigned next;
unsigned prev, next;
char __user *buf = ubuf;
int i, err;
@ -592,25 +735,32 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
}
err = 0;
for (i = 0; i < next; i++) {
prev = 0;
do {
for (i = prev; i < next; i++) {
unsigned long start = jiffies;
while (!mcelog.entry[i].finished) {
if (time_after_eq(jiffies, start + 2)) {
memset(mcelog.entry + i,0, sizeof(struct mce));
memset(mcelog.entry + i, 0,
sizeof(struct mce));
goto timeout;
}
cpu_relax();
}
smp_rmb();
err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
err |= copy_to_user(buf, mcelog.entry + i,
sizeof(struct mce));
buf += sizeof(struct mce);
timeout:
;
}
memset(mcelog.entry, 0, next * sizeof(struct mce));
mcelog.next = 0;
memset(mcelog.entry + prev, 0,
(next - prev) * sizeof(struct mce));
prev = next;
next = cmpxchg(&mcelog.next, prev, 0);
} while (next != prev);
synchronize_sched();
@ -680,20 +830,6 @@ static struct miscdevice mce_log_device = {
&mce_chrdev_ops,
};
static unsigned long old_cr4 __initdata;
void __init stop_mce(void)
{
old_cr4 = read_cr4();
clear_in_cr4(X86_CR4_MCE);
}
void __init restart_mce(void)
{
if (old_cr4 & X86_CR4_MCE)
set_in_cr4(X86_CR4_MCE);
}
/*
* Old style boot options parsing. Only for compatibility.
*/
@ -703,8 +839,7 @@ static int __init mcheck_disable(char *str)
return 1;
}
/* mce=off disables machine check. Note you can re-enable it later
using sysfs.
/* mce=off disables machine check.
mce=TOLERANCELEVEL (number, see above)
mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
mce=nobootlog Don't log MCEs from before booting. */
@ -728,6 +863,29 @@ __setup("mce=", mcheck_enable);
* Sysfs support
*/
/*
* Disable machine checks on suspend and shutdown. We can't really handle
* them later.
*/
static int mce_disable(void)
{
int i;
for (i = 0; i < banks; i++)
wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
return 0;
}
static int mce_suspend(struct sys_device *dev, pm_message_t state)
{
return mce_disable();
}
static int mce_shutdown(struct sys_device *dev)
{
return mce_disable();
}
/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
Only one CPU is active at this time, the others get readded later using
CPU hotplug. */
@ -738,20 +896,24 @@ static int mce_resume(struct sys_device *dev)
return 0;
}
static void mce_cpu_restart(void *data)
{
del_timer_sync(&__get_cpu_var(mce_timer));
if (mce_available(&current_cpu_data))
mce_init(NULL);
mce_init_timer();
}
/* Reinit MCEs after user configuration changes */
static void mce_restart(void)
{
if (next_interval)
cancel_delayed_work(&mcheck_work);
/* Timer race is harmless here */
on_each_cpu(mce_init, NULL, 1);
next_interval = check_interval * HZ;
if (next_interval)
schedule_delayed_work(&mcheck_work,
round_jiffies_relative(next_interval));
on_each_cpu(mce_cpu_restart, NULL, 1);
}
static struct sysdev_class mce_sysclass = {
.suspend = mce_suspend,
.shutdown = mce_shutdown,
.resume = mce_resume,
.name = "machinecheck",
};
@ -778,16 +940,26 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit
} \
static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
/*
* TBD should generate these dynamically based on number of available banks.
* Have only 6 contol banks in /sysfs until then.
*/
ACCESSOR(bank0ctl,bank[0],mce_restart())
ACCESSOR(bank1ctl,bank[1],mce_restart())
ACCESSOR(bank2ctl,bank[2],mce_restart())
ACCESSOR(bank3ctl,bank[3],mce_restart())
ACCESSOR(bank4ctl,bank[4],mce_restart())
ACCESSOR(bank5ctl,bank[5],mce_restart())
static struct sysdev_attribute *bank_attrs;
static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
char *buf)
{
u64 b = bank[attr - bank_attrs];
return sprintf(buf, "%llx\n", b);
}
static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
const char *buf, size_t siz)
{
char *end;
u64 new = simple_strtoull(buf, &end, 0);
if (end == buf)
return -EINVAL;
bank[attr - bank_attrs] = new;
mce_restart();
return end-buf;
}
static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
char *buf)
@ -814,8 +986,6 @@ static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
ACCESSOR(check_interval,check_interval,mce_restart())
static struct sysdev_attribute *mce_attributes[] = {
&attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
&attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
&attr_tolerant.attr, &attr_check_interval, &attr_trigger,
NULL
};
@ -845,11 +1015,22 @@ static __cpuinit int mce_create_device(unsigned int cpu)
if (err)
goto error;
}
for (i = 0; i < banks; i++) {
err = sysdev_create_file(&per_cpu(device_mce, cpu),
&bank_attrs[i]);
if (err)
goto error2;
}
cpu_set(cpu, mce_device_initialized);
return 0;
error2:
while (--i >= 0) {
sysdev_remove_file(&per_cpu(device_mce, cpu),
&bank_attrs[i]);
}
error:
while (i--) {
while (--i >= 0) {
sysdev_remove_file(&per_cpu(device_mce,cpu),
mce_attributes[i]);
}
@ -868,15 +1049,46 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
for (i = 0; mce_attributes[i]; i++)
sysdev_remove_file(&per_cpu(device_mce,cpu),
mce_attributes[i]);
for (i = 0; i < banks; i++)
sysdev_remove_file(&per_cpu(device_mce, cpu),
&bank_attrs[i]);
sysdev_unregister(&per_cpu(device_mce,cpu));
cpu_clear(cpu, mce_device_initialized);
}
/* Make sure there are no machine checks on offlined CPUs. */
static void mce_disable_cpu(void *h)
{
int i;
unsigned long action = *(unsigned long *)h;
if (!mce_available(&current_cpu_data))
return;
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
for (i = 0; i < banks; i++)
wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
}
static void mce_reenable_cpu(void *h)
{
int i;
unsigned long action = *(unsigned long *)h;
if (!mce_available(&current_cpu_data))
return;
if (!(action & CPU_TASKS_FROZEN))
cmci_reenable();
for (i = 0; i < banks; i++)
wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
}
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct timer_list *t = &per_cpu(mce_timer, cpu);
switch (action) {
case CPU_ONLINE:
@ -891,6 +1103,21 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
threshold_cpu_callback(action, cpu);
mce_remove_device(cpu);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
del_timer_sync(t);
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
t->expires = round_jiffies(jiffies + next_interval);
add_timer_on(t, cpu);
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break;
case CPU_POST_DEAD:
/* intentionally ignoring frozen here */
cmci_rediscover(cpu);
break;
}
return NOTIFY_OK;
}
@ -899,6 +1126,34 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
.notifier_call = mce_cpu_callback,
};
static __init int mce_init_banks(void)
{
int i;
bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
GFP_KERNEL);
if (!bank_attrs)
return -ENOMEM;
for (i = 0; i < banks; i++) {
struct sysdev_attribute *a = &bank_attrs[i];
a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
if (!a->attr.name)
goto nomem;
a->attr.mode = 0644;
a->show = show_bank;
a->store = set_bank;
}
return 0;
nomem:
while (--i >= 0)
kfree(bank_attrs[i].attr.name);
kfree(bank_attrs);
bank_attrs = NULL;
return -ENOMEM;
}
static __init int mce_init_device(void)
{
int err;
@ -906,6 +1161,11 @@ static __init int mce_init_device(void)
if (!mce_available(&boot_cpu_data))
return -EIO;
err = mce_init_banks();
if (err)
return err;
err = sysdev_class_register(&mce_sysclass);
if (err)
return err;

View file

@ -79,6 +79,8 @@ static unsigned char shared_bank[NR_BANKS] = {
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
static void amd_threshold_interrupt(void);
/*
* CPU Initialization
*/
@ -90,7 +92,8 @@ struct thresh_restart {
};
/* must be called with correct cpu affinity */
static long threshold_restart_bank(void *_tr)
/* Called via smp_call_function_single() */
static void threshold_restart_bank(void *_tr)
{
struct thresh_restart *tr = _tr;
u32 mci_misc_hi, mci_misc_lo;
@ -117,7 +120,6 @@ static long threshold_restart_bank(void *_tr)
mci_misc_hi |= MASK_COUNT_EN_HI;
wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
return 0;
}
/* cpu init entry point, called from mce.c with preempt off */
@ -174,6 +176,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
tr.reset = 0;
tr.old_limit = 0;
threshold_restart_bank(&tr);
mce_threshold_vector = amd_threshold_interrupt;
}
}
}
@ -187,19 +191,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
* the interrupt goes off when error_count reaches threshold_limit.
* the handler will simply log mcelog w/ software defined bank number.
*/
asmlinkage void mce_threshold_interrupt(void)
static void amd_threshold_interrupt(void)
{
unsigned int bank, block;
struct mce m;
u32 low = 0, high = 0, address = 0;
ack_APIC_irq();
exit_idle();
irq_enter();
memset(&m, 0, sizeof(m));
rdtscll(m.tsc);
m.cpu = smp_processor_id();
mce_setup(&m);
/* assume first bank caused it */
for (bank = 0; bank < NR_BANKS; ++bank) {
@ -233,7 +231,8 @@ asmlinkage void mce_threshold_interrupt(void)
/* Log the machine check that caused the threshold
event. */
do_machine_check(NULL, 0);
machine_check_poll(MCP_TIMESTAMP,
&__get_cpu_var(mce_poll_banks));
if (high & MASK_OVERFLOW_HI) {
rdmsrl(address, m.misc);
@ -243,13 +242,10 @@ asmlinkage void mce_threshold_interrupt(void)
+ bank * NR_BLOCKS
+ block;
mce_log(&m);
goto out;
return;
}
}
}
out:
inc_irq_stat(irq_threshold_count);
irq_exit();
}
/*
@ -283,7 +279,7 @@ static ssize_t store_interrupt_enable(struct threshold_block *b,
tr.b = b;
tr.reset = 0;
tr.old_limit = 0;
work_on_cpu(b->cpu, threshold_restart_bank, &tr);
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
return end - buf;
}
@ -305,23 +301,32 @@ static ssize_t store_threshold_limit(struct threshold_block *b,
tr.b = b;
tr.reset = 0;
work_on_cpu(b->cpu, threshold_restart_bank, &tr);
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
return end - buf;
}
static long local_error_count(void *_b)
struct threshold_block_cross_cpu {
struct threshold_block *tb;
long retval;
};
static void local_error_count_handler(void *_tbcc)
{
struct threshold_block *b = _b;
struct threshold_block_cross_cpu *tbcc = _tbcc;
struct threshold_block *b = tbcc->tb;
u32 low, high;
rdmsr(b->address, low, high);
return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
}
static ssize_t show_error_count(struct threshold_block *b, char *buf)
{
return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
struct threshold_block_cross_cpu tbcc = { .tb = b, };
smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1);
return sprintf(buf, "%lx\n", tbcc.retval);
}
static ssize_t store_error_count(struct threshold_block *b,
@ -329,7 +334,7 @@ static ssize_t store_error_count(struct threshold_block *b,
{
struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
work_on_cpu(b->cpu, threshold_restart_bank, &tr);
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
return 1;
}
@ -398,7 +403,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
return 0;
if (rdmsr_safe(address, &low, &high))
if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
return 0;
if (!(high & MASK_VALID_HI)) {
@ -462,12 +467,11 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
return err;
}
static __cpuinit long local_allocate_threshold_blocks(void *_bank)
static __cpuinit long
local_allocate_threshold_blocks(int cpu, unsigned int bank)
{
unsigned int *bank = _bank;
return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
MSR_IA32_MC0_MISC + *bank * 4);
return allocate_threshold_blocks(cpu, bank, 0,
MSR_IA32_MC0_MISC + bank * 4);
}
/* symlinks sibling shared banks to first core. first core owns dir/files. */
@ -530,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
per_cpu(threshold_banks, cpu)[bank] = b;
err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
err = local_allocate_threshold_blocks(cpu, bank);
if (err)
goto out_free;

View file

@ -1,6 +1,8 @@
/*
* Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/
#include <linux/init.h>
@ -13,6 +15,7 @@
#include <asm/hw_irq.h>
#include <asm/idle.h>
#include <asm/therm_throt.h>
#include <asm/apic.h>
asmlinkage void smp_thermal_interrupt(void)
{
@ -25,7 +28,7 @@ asmlinkage void smp_thermal_interrupt(void)
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
if (therm_throt_process(msr_val & 1))
mce_log_therm_throt_event(smp_processor_id(), msr_val);
mce_log_therm_throt_event(msr_val);
inc_irq_stat(irq_thermal_count);
irq_exit();
@ -85,7 +88,209 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
return;
}
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static DEFINE_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1
static int cmci_supported(int *banks)
{
u64 cap;
/*
* Vendor check is not strictly needed, but the initial
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 0;
if (!cpu_has_apic || lapic_get_maxlvt() < 6)
return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
return !!(cap & MCG_CMCI_P);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static void intel_threshold_interrupt(void)
{
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
mce_notify_user();
}
static void print_update(char *type, int *hdr, int num)
{
if (*hdr == 0)
printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
*hdr = 1;
printk(KERN_CONT " %s:%d", type, num);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static void cmci_discover(int banks, int boot)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
int hdr = 0;
int i;
spin_lock(&cmci_discover_lock);
for (i = 0; i < banks; i++) {
u64 val;
if (test_bit(i, owned))
continue;
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
/* Already owned by someone else? */
if (val & CMCI_EN) {
if (test_and_clear_bit(i, owned) || boot)
print_update("SHD", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
continue;
}
val |= CMCI_EN | CMCI_THRESHOLD;
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
/* Did the enable bit stick? -- the bank supports CMCI */
if (val & CMCI_EN) {
if (!test_and_set_bit(i, owned) || boot)
print_update("CMCI", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
} else {
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
spin_unlock(&cmci_discover_lock);
if (hdr)
printk(KERN_CONT "\n");
}
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks.
*/
void cmci_recheck(void)
{
unsigned long flags;
int banks;
if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
return;
local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
local_irq_restore(flags);
}
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void cmci_clear(void)
{
int i;
int banks;
u64 val;
if (!cmci_supported(&banks))
return;
spin_lock(&cmci_discover_lock);
for (i = 0; i < banks; i++) {
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
/* Disable CMCI */
rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
spin_unlock(&cmci_discover_lock);
}
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context.
*/
void cmci_rediscover(int dying)
{
int banks;
int cpu;
cpumask_var_t old;
if (!cmci_supported(&banks))
return;
if (!alloc_cpumask_var(&old, GFP_KERNEL))
return;
cpumask_copy(old, &current->cpus_allowed);
for_each_online_cpu (cpu) {
if (cpu == dying)
continue;
if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)))
continue;
/* Recheck banks in case CPUs don't all have the same */
if (cmci_supported(&banks))
cmci_discover(banks, 0);
}
set_cpus_allowed_ptr(current, old);
free_cpumask_var(old);
}
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void cmci_reenable(void)
{
int banks;
if (cmci_supported(&banks))
cmci_discover(banks, 0);
}
static void intel_init_cmci(void)
{
int banks;
if (!cmci_supported(&banks))
return;
mce_threshold_vector = intel_threshold_interrupt;
cmci_discover(banks, 1);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
cmci_recheck();
}
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
intel_init_thermal(c);
intel_init_cmci();
}

View file

@ -0,0 +1,29 @@
/*
* Common corrected MCE threshold handler code:
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <asm/irq_vectors.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
static void default_threshold_interrupt(void)
{
printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
THRESHOLD_APIC_VECTOR);
}
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
asmlinkage void mce_threshold_interrupt(void)
{
exit_idle();
irq_enter();
inc_irq_stat(irq_threshold_count);
mce_threshold_vector();
irq_exit();
/* Ack only at the end to avoid potential reentry */
ack_APIC_irq();
}

View file

@ -1,3 +1,3 @@
obj-y := main.o if.o generic.o state.o
obj-y := main.o if.o generic.o state.o cleanup.o
obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o

File diff suppressed because it is too large Load diff

View file

@ -33,13 +33,31 @@ u64 mtrr_tom2;
struct mtrr_state_type mtrr_state = {};
EXPORT_SYMBOL_GPL(mtrr_state);
static int __initdata mtrr_show;
static int __init mtrr_debug(char *opt)
/**
* BIOS is expected to clear MtrrFixDramModEn bit, see for example
* "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
* Opteron Processors" (26094 Rev. 3.30 February 2006), section
* "13.2.1.2 SYSCFG Register": "The MtrrFixDramModEn bit should be set
* to 1 during BIOS initalization of the fixed MTRRs, then cleared to
* 0 for operation."
*/
static inline void k8_check_syscfg_dram_mod_en(void)
{
mtrr_show = 1;
return 0;
u32 lo, hi;
if (!((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
(boot_cpu_data.x86 >= 0x0f)))
return;
rdmsr(MSR_K8_SYSCFG, lo, hi);
if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
" not cleared by BIOS, clearing this bit\n",
smp_processor_id());
lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi);
}
}
early_param("mtrr.show", mtrr_debug);
/*
* Returns the effective MTRR type for the region
@ -174,6 +192,8 @@ get_fixed_ranges(mtrr_type * frs)
unsigned int *p = (unsigned int *) frs;
int i;
k8_check_syscfg_dram_mod_en();
rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
for (i = 0; i < 2; i++)
@ -188,18 +208,94 @@ void mtrr_save_fixed_ranges(void *info)
get_fixed_ranges(mtrr_state.fixed_ranges);
}
static void print_fixed(unsigned base, unsigned step, const mtrr_type*types)
static unsigned __initdata last_fixed_start;
static unsigned __initdata last_fixed_end;
static mtrr_type __initdata last_fixed_type;
static void __init print_fixed_last(void)
{
if (!last_fixed_end)
return;
printk(KERN_DEBUG " %05X-%05X %s\n", last_fixed_start,
last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
last_fixed_end = 0;
}
static void __init update_fixed_last(unsigned base, unsigned end,
mtrr_type type)
{
last_fixed_start = base;
last_fixed_end = end;
last_fixed_type = type;
}
static void __init print_fixed(unsigned base, unsigned step,
const mtrr_type *types)
{
unsigned i;
for (i = 0; i < 8; ++i, ++types, base += step)
printk(KERN_INFO "MTRR %05X-%05X %s\n",
base, base + step - 1, mtrr_attrib_to_str(*types));
for (i = 0; i < 8; ++i, ++types, base += step) {
if (last_fixed_end == 0) {
update_fixed_last(base, base + step, *types);
continue;
}
if (last_fixed_end == base && last_fixed_type == *types) {
last_fixed_end = base + step;
continue;
}
/* new segments: gap or different type */
print_fixed_last();
update_fixed_last(base, base + step, *types);
}
}
static void prepare_set(void);
static void post_set(void);
static void __init print_mtrr_state(void)
{
unsigned int i;
int high_width;
printk(KERN_DEBUG "MTRR default type: %s\n",
mtrr_attrib_to_str(mtrr_state.def_type));
if (mtrr_state.have_fixed) {
printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n",
mtrr_state.enabled & 1 ? "en" : "dis");
print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
for (i = 0; i < 2; ++i)
print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
for (i = 0; i < 8; ++i)
print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
/* tail */
print_fixed_last();
}
printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
mtrr_state.enabled & 2 ? "en" : "dis");
high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n",
i,
high_width,
mtrr_state.var_ranges[i].base_hi,
mtrr_state.var_ranges[i].base_lo >> 12,
high_width,
mtrr_state.var_ranges[i].mask_hi,
mtrr_state.var_ranges[i].mask_lo >> 12,
mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
else
printk(KERN_DEBUG " %u disabled\n", i);
}
if (mtrr_tom2) {
printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n",
mtrr_tom2, mtrr_tom2>>20);
}
}
/* Grab all of the MTRR state for this CPU into *state */
void __init get_mtrr_state(void)
{
@ -231,41 +327,9 @@ void __init get_mtrr_state(void)
mtrr_tom2 |= low;
mtrr_tom2 &= 0xffffff800000ULL;
}
if (mtrr_show) {
int high_width;
printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type));
if (mtrr_state.have_fixed) {
printk(KERN_INFO "MTRR fixed ranges %sabled:\n",
mtrr_state.enabled & 1 ? "en" : "dis");
print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
for (i = 0; i < 2; ++i)
print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
for (i = 0; i < 8; ++i)
print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
}
printk(KERN_INFO "MTRR variable ranges %sabled:\n",
mtrr_state.enabled & 2 ? "en" : "dis");
high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n",
i,
high_width,
mtrr_state.var_ranges[i].base_hi,
mtrr_state.var_ranges[i].base_lo >> 12,
high_width,
mtrr_state.var_ranges[i].mask_hi,
mtrr_state.var_ranges[i].mask_lo >> 12,
mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
else
printk(KERN_INFO "MTRR %u disabled\n", i);
}
if (mtrr_tom2) {
printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
mtrr_tom2, mtrr_tom2>>20);
}
}
print_mtrr_state();
mtrr_state_set = 1;
/* PAT setup for BP. We need to go through sync steps here */
@ -307,28 +371,11 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
smp_processor_id(), msr, a, b);
}
/**
* Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs
* see AMD publication no. 24593, chapter 3.2.1 for more information
*/
static inline void k8_enable_fixed_iorrs(void)
{
unsigned lo, hi;
rdmsr(MSR_K8_SYSCFG, lo, hi);
mtrr_wrmsr(MSR_K8_SYSCFG, lo
| K8_MTRRFIXRANGE_DRAM_ENABLE
| K8_MTRRFIXRANGE_DRAM_MODIFY, hi);
}
/**
* set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have
* @msr: MSR address of the MTTR which should be checked and updated
* @changed: pointer which indicates whether the MTRR needed to be changed
* @msrwords: pointer to the MSR values which the MSR should have
*
* If K8 extentions are wanted, update the K8 SYSCFG MSR also.
* See AMD publication no. 24593, chapter 7.8.1, page 233 for more information.
*/
static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
{
@ -337,10 +384,6 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
rdmsr(msr, lo, hi);
if (lo != msrwords[0] || hi != msrwords[1]) {
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
(boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
k8_enable_fixed_iorrs();
mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
*changed = true;
}
@ -376,22 +419,31 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
{
unsigned int mask_lo, mask_hi, base_lo, base_hi;
unsigned int tmp, hi;
int cpu;
/*
* get_mtrr doesn't need to update mtrr_state, also it could be called
* from any cpu, so try to print it out directly.
*/
cpu = get_cpu();
rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
if ((mask_lo & 0x800) == 0) {
/* Invalid (i.e. free) range */
*base = 0;
*size = 0;
*type = 0;
return;
goto out_put_cpu;
}
rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
/* Work out the shifted address mask. */
/* Work out the shifted address mask: */
tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
mask_lo = size_or_mask | tmp;
/* Expand tmp with high bits to all 1s*/
/* Expand tmp with high bits to all 1s: */
hi = fls(tmp);
if (hi > 0) {
tmp |= ~((1<<(hi - 1)) - 1);
@ -402,11 +454,19 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
}
}
/* This works correctly if size is a power of two, i.e. a
contiguous range. */
/*
* This works correctly if size is a power of two, i.e. a
* contiguous range:
*/
*size = -mask_lo;
*base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
*type = base_lo & 0xff;
printk(KERN_DEBUG " get_mtrr: cpu%d reg%02d base=%010lx size=%010lx %s\n",
cpu, reg, *base, *size,
mtrr_attrib_to_str(*type & 0xff));
out_put_cpu:
put_cpu();
}
/**
@ -419,6 +479,8 @@ static int set_fixed_ranges(mtrr_type * frs)
bool changed = false;
int block=-1, range;
k8_check_syscfg_dram_mod_en();
while (fixed_range_blocks[++block].ranges)
for (range=0; range < fixed_range_blocks[block].ranges; range++)
set_fixed_range(fixed_range_blocks[block].base_msr + range,

File diff suppressed because it is too large Load diff

View file

@ -79,6 +79,7 @@ extern struct mtrr_ops * mtrr_if;
extern unsigned int num_var_ranges;
extern u64 mtrr_tom2;
extern struct mtrr_state_type mtrr_state;
void mtrr_state_warn(void);
const char *mtrr_attrib_to_str(int x);
@ -88,3 +89,6 @@ void mtrr_wrmsr(unsigned, unsigned, unsigned);
int amd_init_mtrr(void);
int cyrix_init_mtrr(void);
int centaur_init_mtrr(void);
extern int changed_by_mtrr_cleanup;
extern int mtrr_cleanup(unsigned address_bits);

View file

@ -98,7 +98,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
#endif
}
static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
static const struct cpu_dev __cpuinitconst transmeta_cpu_dev = {
.c_vendor = "Transmeta",
.c_ident = { "GenuineTMx86", "TransmetaCPU" },
.c_early_init = early_init_transmeta,

View file

@ -8,7 +8,7 @@
* so no special init takes place.
*/
static struct cpu_dev umc_cpu_dev __cpuinitdata = {
static const struct cpu_dev __cpuinitconst umc_cpu_dev = {
.c_vendor = "UMC",
.c_ident = { "UMC UMC UMC" },
.c_models = {

View file

@ -110,19 +110,50 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
/*
* Add a memory region to the kernel e820 map.
*/
void __init e820_add_region(u64 start, u64 size, int type)
static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
int type)
{
int x = e820.nr_map;
int x = e820x->nr_map;
if (x == ARRAY_SIZE(e820.map)) {
if (x == ARRAY_SIZE(e820x->map)) {
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
return;
}
e820.map[x].addr = start;
e820.map[x].size = size;
e820.map[x].type = type;
e820.nr_map++;
e820x->map[x].addr = start;
e820x->map[x].size = size;
e820x->map[x].type = type;
e820x->nr_map++;
}
void __init e820_add_region(u64 start, u64 size, int type)
{
__e820_add_region(&e820, start, size, type);
}
static void __init e820_print_type(u32 type)
{
switch (type) {
case E820_RAM:
case E820_RESERVED_KERN:
printk(KERN_CONT "(usable)");
break;
case E820_RESERVED:
printk(KERN_CONT "(reserved)");
break;
case E820_ACPI:
printk(KERN_CONT "(ACPI data)");
break;
case E820_NVS:
printk(KERN_CONT "(ACPI NVS)");
break;
case E820_UNUSABLE:
printk(KERN_CONT "(unusable)");
break;
default:
printk(KERN_CONT "type %u", type);
break;
}
}
void __init e820_print_map(char *who)
@ -134,27 +165,8 @@ void __init e820_print_map(char *who)
(unsigned long long) e820.map[i].addr,
(unsigned long long)
(e820.map[i].addr + e820.map[i].size));
switch (e820.map[i].type) {
case E820_RAM:
case E820_RESERVED_KERN:
printk(KERN_CONT "(usable)\n");
break;
case E820_RESERVED:
printk(KERN_CONT "(reserved)\n");
break;
case E820_ACPI:
printk(KERN_CONT "(ACPI data)\n");
break;
case E820_NVS:
printk(KERN_CONT "(ACPI NVS)\n");
break;
case E820_UNUSABLE:
printk("(unusable)\n");
break;
default:
printk(KERN_CONT "type %u\n", e820.map[i].type);
break;
}
e820_print_type(e820.map[i].type);
printk(KERN_CONT "\n");
}
}
@ -221,7 +233,7 @@ void __init e820_print_map(char *who)
*/
int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
int *pnr_map)
u32 *pnr_map)
{
struct change_member {
struct e820entry *pbios; /* pointer to original bios entry */
@ -417,11 +429,12 @@ static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
return __append_e820_map(biosmap, nr_map);
}
static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
u64 size, unsigned old_type,
unsigned new_type)
{
int i;
u64 end;
unsigned int i;
u64 real_updated_size = 0;
BUG_ON(old_type == new_type);
@ -429,27 +442,55 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
if (size > (ULLONG_MAX - start))
size = ULLONG_MAX - start;
for (i = 0; i < e820.nr_map; i++) {
end = start + size;
printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
(unsigned long long) start,
(unsigned long long) end);
e820_print_type(old_type);
printk(KERN_CONT " ==> ");
e820_print_type(new_type);
printk(KERN_CONT "\n");
for (i = 0; i < e820x->nr_map; i++) {
struct e820entry *ei = &e820x->map[i];
u64 final_start, final_end;
u64 ei_end;
if (ei->type != old_type)
continue;
/* totally covered? */
if (ei->addr >= start &&
(ei->addr + ei->size) <= (start + size)) {
ei_end = ei->addr + ei->size;
/* totally covered by new range? */
if (ei->addr >= start && ei_end <= end) {
ei->type = new_type;
real_updated_size += ei->size;
continue;
}
/* new range is totally covered? */
if (ei->addr < start && ei_end > end) {
__e820_add_region(e820x, start, size, new_type);
__e820_add_region(e820x, end, ei_end - end, ei->type);
ei->size = start - ei->addr;
real_updated_size += size;
continue;
}
/* partially covered */
final_start = max(start, ei->addr);
final_end = min(start + size, ei->addr + ei->size);
final_end = min(end, ei_end);
if (final_start >= final_end)
continue;
e820_add_region(final_start, final_end - final_start,
__e820_add_region(e820x, final_start, final_end - final_start,
new_type);
real_updated_size += final_end - final_start;
/*
* left range could be head or tail, so need to update
* size at first.
*/
ei->size -= final_end - final_start;
if (ei->addr < final_start)
continue;
@ -461,13 +502,13 @@ static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
unsigned new_type)
{
return e820_update_range_map(&e820, start, size, old_type, new_type);
return __e820_update_range(&e820, start, size, old_type, new_type);
}
static u64 __init e820_update_range_saved(u64 start, u64 size,
unsigned old_type, unsigned new_type)
{
return e820_update_range_map(&e820_saved, start, size, old_type,
return __e820_update_range(&e820_saved, start, size, old_type,
new_type);
}
@ -511,7 +552,7 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
void __init update_e820(void)
{
int nr_map;
u32 nr_map;
nr_map = e820.nr_map;
if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
@ -522,7 +563,7 @@ void __init update_e820(void)
}
static void __init update_e820_saved(void)
{
int nr_map;
u32 nr_map;
nr_map = e820_saved.nr_map;
if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
@ -1020,8 +1061,8 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
continue;
return addr;
}
return -1UL;
return -1ULL;
}
/*
@ -1034,13 +1075,22 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
u64 start;
start = startt;
while (size < sizet)
while (size < sizet && (start + 1))
start = find_e820_area_size(start, &size, align);
if (size < sizet)
return 0;
#ifdef CONFIG_X86_32
if (start >= MAXMEM)
return 0;
if (start + size > MAXMEM)
size = MAXMEM - start;
#endif
addr = round_down(start + size - sizet, align);
if (addr < start)
return 0;
e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
printk(KERN_INFO "update e820 for early_reserve_e820\n");
@ -1253,7 +1303,7 @@ early_param("memmap", parse_memmap_opt);
void __init finish_e820_parsing(void)
{
if (userdef) {
int nr = e820.nr_map;
u32 nr = e820.nr_map;
if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
early_panic("Invalid user supplied memory map");
@ -1336,7 +1386,7 @@ void __init e820_reserve_resources_late(void)
char *__init default_machine_specific_memory_setup(void)
{
char *who = "BIOS-e820";
int new_nr;
u32 new_nr;
/*
* Try to copy the BIOS-supplied E820-map.
*

View file

@ -250,7 +250,7 @@ static int dbgp_wait_until_complete(void)
return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
}
static void dbgp_mdelay(int ms)
static void __init dbgp_mdelay(int ms)
{
int i;
@ -311,7 +311,7 @@ static void dbgp_set_data(const void *buf, int size)
writel(hi, &ehci_debug->data47);
}
static void dbgp_get_data(void *buf, int size)
static void __init dbgp_get_data(void *buf, int size)
{
unsigned char *bytes = buf;
u32 lo, hi;
@ -355,7 +355,7 @@ static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
return ret;
}
static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
int size)
{
u32 pids, addr, ctrl;
@ -386,8 +386,8 @@ static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
return ret;
}
static int dbgp_control_msg(unsigned devnum, int requesttype, int request,
int value, int index, void *data, int size)
static int __init dbgp_control_msg(unsigned devnum, int requesttype,
int request, int value, int index, void *data, int size)
{
u32 pids, addr, ctrl;
struct usb_ctrlrequest req;
@ -489,7 +489,7 @@ static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
return 0;
}
static int ehci_reset_port(int port)
static int __init ehci_reset_port(int port)
{
u32 portsc;
u32 delay_time, delay;
@ -532,7 +532,7 @@ static int ehci_reset_port(int port)
return -EBUSY;
}
static int ehci_wait_for_port(int port)
static int __init ehci_wait_for_port(int port)
{
u32 status;
int ret, reps;
@ -557,13 +557,13 @@ static inline void dbgp_printk(const char *fmt, ...) { }
typedef void (*set_debug_port_t)(int port);
static void default_set_debug_port(int port)
static void __init default_set_debug_port(int port)
{
}
static set_debug_port_t set_debug_port = default_set_debug_port;
static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
static void nvidia_set_debug_port(int port)
static void __init nvidia_set_debug_port(int port)
{
u32 dword;
dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,

View file

@ -442,8 +442,7 @@ sysenter_past_esp:
GET_THREAD_INFO(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz sysenter_audit
sysenter_do_call:
cmpl $(nr_syscalls), %eax
@ -454,7 +453,7 @@ sysenter_do_call:
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
testl $_TIF_ALLWORK_MASK, %ecx
jne sysexit_audit
sysenter_exit:
/* if something modifies registers it must also disable sysexit */
@ -468,7 +467,7 @@ sysenter_exit:
#ifdef CONFIG_AUDITSYSCALL
sysenter_audit:
testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
jnz syscall_trace_entry
addl $4,%esp
CFI_ADJUST_CFA_OFFSET -4
@ -485,7 +484,7 @@ sysenter_audit:
jmp sysenter_do_call
sysexit_audit:
testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
jne syscall_exit_work
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
@ -498,7 +497,7 @@ sysexit_audit:
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
jne syscall_exit_work
movl PT_EAX(%esp),%eax /* reload syscall return value */
jmp sysenter_exit
@ -523,8 +522,7 @@ ENTRY(system_call)
SAVE_ALL
GET_THREAD_INFO(%ebp)
# system call tracing in operation / emulation
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax
jae syscall_badsys
@ -538,7 +536,7 @@ syscall_exit:
# between sampling and the iret
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx # current->work
testl $_TIF_ALLWORK_MASK, %ecx # current->work
jne syscall_exit_work
restore_all:
@ -673,7 +671,7 @@ END(syscall_trace_entry)
# perform syscall exit tracing
ALIGN
syscall_exit_work:
testb $_TIF_WORK_SYSCALL_EXIT, %cl
testl $_TIF_WORK_SYSCALL_EXIT, %ecx
jz work_pending
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call

View file

@ -368,6 +368,7 @@ ENTRY(save_rest)
END(save_rest)
/* save complete stack frame */
.pushsection .kprobes.text, "ax"
ENTRY(save_paranoid)
XCPT_FRAME 1 RDI+8
cld
@ -396,6 +397,7 @@ ENTRY(save_paranoid)
1: ret
CFI_ENDPROC
END(save_paranoid)
.popsection
/*
* A newly forked process directly context switches into this address.
@ -416,7 +418,6 @@ ENTRY(ret_from_fork)
GET_THREAD_INFO(%rcx)
CFI_REMEMBER_STATE
RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
@ -428,7 +429,6 @@ ENTRY(ret_from_fork)
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
jmp ret_from_sys_call # go to the SYSRET fastpath
CFI_RESTORE_STATE
CFI_ENDPROC
END(ret_from_fork)
@ -984,6 +984,8 @@ apicinterrupt UV_BAU_MESSAGE \
#endif
apicinterrupt LOCAL_TIMER_VECTOR \
apic_timer_interrupt smp_apic_timer_interrupt
apicinterrupt GENERIC_INTERRUPT_VECTOR \
generic_interrupt smp_generic_interrupt
#ifdef CONFIG_SMP
apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \

View file

@ -18,7 +18,7 @@ void __init i386_start_kernel(void)
{
reserve_trampoline_memory();
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
@ -29,9 +29,6 @@ void __init i386_start_kernel(void)
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
reserve_early(init_pg_tables_start, init_pg_tables_end,
"INIT_PG_TABLE");
reserve_ebda_region();
/*

View file

@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
reserve_trampoline_memory();
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */

View file

@ -38,42 +38,40 @@
#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
/*
* This is how much memory *in addition to the memory covered up to
* and including _end* we need mapped initially.
* This is how much memory in addition to the memory covered up to
* and including _end we need mapped initially.
* We need:
* - one bit for each possible page, but only in low memory, which means
* 2^32/4096/8 = 128K worst case (4G/4G split.)
* - enough space to map all low memory, which means
* (2^32/4096) / 1024 pages (worst case, non PAE)
* (2^32/4096) / 512 + 4 pages (worst case for PAE)
* - a few pages for allocator use before the kernel pagetable has
* been set up
* (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
* (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
*
* Modulo rounding, each megabyte assigned here requires a kilobyte of
* memory, which is currently unreclaimed.
*
* This should be a multiple of a page.
*
* KERNEL_IMAGE_SIZE should be greater than pa(_end)
* and small than max_low_pfn, otherwise will waste some page table entries
*/
LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
/*
* To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
* pagetables from above the 16MB DMA limit, so we'll have to set
* up pagetables 16MB more (worst-case):
*/
#ifdef CONFIG_DEBUG_PAGEALLOC
LOW_PAGES = LOW_PAGES + 0x1000000
#endif
#if PTRS_PER_PMD > 1
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
#else
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
#endif
BOOTBITMAP_SIZE = LOW_PAGES / 8
ALLOCATOR_SLOP = 4
INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
/* Enough space to fit pagetables for the low memory linear map */
MAPPING_BEYOND_END = \
PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
/*
* Worst-case size of the kernel mapping we need to make:
* the worst-case size of the kernel itself, plus the extra we need
* to map for the linear map.
*/
KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT
INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
RESERVE_BRK(pagetables, INIT_MAP_SIZE)
/*
* 32-bit kernel entrypoint; only used by the boot CPU. On entry,
@ -166,10 +164,10 @@ num_subarch_entries = (. - subarch_entries) / 4
/*
* Initialize page tables. This creates a PDE and a set of page
* tables, which are located immediately beyond _end. The variable
* init_pg_tables_end is set up to point to the first "safe" location.
* tables, which are located immediately beyond __brk_base. The variable
* _brk_end is set up to point to the first "safe" location.
* Mappings are created both at virtual address 0 (identity mapping)
* and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
* and PAGE_OFFSET for up to _end.
*
* Note that the stack is not yet set up!
*/
@ -190,8 +188,7 @@ default_entry:
xorl %ebx,%ebx /* %ebx is kept at zero */
movl $pa(pg0), %edi
movl %edi, pa(init_pg_tables_start)
movl $pa(__brk_base), %edi
movl $pa(swapper_pg_pmd), %edx
movl $PTE_IDENT_ATTR, %eax
10:
@ -209,14 +206,14 @@ default_entry:
loop 11b
/*
* End condition: we must map up to and including INIT_MAP_BEYOND_END
* bytes beyond the end of our own page tables.
* End condition: we must map up to the end + MAPPING_BEYOND_END.
*/
leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
1:
movl %edi,pa(init_pg_tables_end)
addl $__PAGE_OFFSET, %edi
movl %edi, pa(_brk_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
@ -227,8 +224,7 @@ default_entry:
page_pde_offset = (__PAGE_OFFSET >> 20);
movl $pa(pg0), %edi
movl %edi, pa(init_pg_tables_start)
movl $pa(__brk_base), %edi
movl $pa(swapper_pg_dir), %edx
movl $PTE_IDENT_ATTR, %eax
10:
@ -242,14 +238,13 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
addl $0x1000,%eax
loop 11b
/*
* End condition: we must map up to and including INIT_MAP_BEYOND_END
* bytes beyond the end of our own page tables; the +0x007 is
* the attribute bits
* End condition: we must map up to the end + MAPPING_BEYOND_END.
*/
leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
cmpl %ebp,%eax
jb 10b
movl %edi,pa(init_pg_tables_end)
addl $__PAGE_OFFSET, %edi
movl %edi, pa(_brk_end)
shrl $12, %eax
movl %eax, pa(max_pfn_mapped)
@ -636,6 +631,7 @@ swapper_pg_fixmap:
.fill 1024,4,0
ENTRY(empty_zero_page)
.fill 4096,1,0
/*
* This starts the data section.
*/

View file

@ -3,17 +3,17 @@
*
*/
#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/io.h>
#include <asm/smp.h>
#include <asm/delay.h>
#include <asm/i8253.h>
#include <asm/io.h>
#include <asm/hpet.h>
#include <asm/smp.h>
DEFINE_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
@ -95,7 +95,7 @@ static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
* registered. This mechanism replaces the previous #ifdef LOCAL_APIC -
* !using_apic_timer decisions in do_timer_interrupt_hook()
*/
static struct clock_event_device pit_clockevent = {
static struct clock_event_device pit_ce = {
.name = "pit",
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
.set_mode = init_pit_timer,
@ -114,15 +114,13 @@ void __init setup_pit_timer(void)
* Start pit with the boot cpu mask and make it global after the
* IO_APIC has been initialized.
*/
pit_clockevent.cpumask = cpumask_of(smp_processor_id());
pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
pit_clockevent.shift);
pit_clockevent.max_delta_ns =
clockevent_delta2ns(0x7FFF, &pit_clockevent);
pit_clockevent.min_delta_ns =
clockevent_delta2ns(0xF, &pit_clockevent);
clockevents_register_device(&pit_clockevent);
global_clock_event = &pit_clockevent;
pit_ce.cpumask = cpumask_of(smp_processor_id());
pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift);
pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce);
pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce);
clockevents_register_device(&pit_ce);
global_clock_event = &pit_ce;
}
#ifndef CONFIG_X86_64
@ -133,11 +131,11 @@ void __init setup_pit_timer(void)
*/
static cycle_t pit_read(void)
{
static int old_count;
static u32 old_jifs;
unsigned long flags;
int count;
u32 jifs;
static int old_count;
static u32 old_jifs;
spin_lock_irqsave(&i8253_lock, flags);
/*
@ -179,9 +177,9 @@ static cycle_t pit_read(void)
* Previous attempts to handle these cases intelligently were
* buggy, so we just do the simple thing now.
*/
if (count > old_count && jifs == old_jifs) {
if (count > old_count && jifs == old_jifs)
count = old_count;
}
old_count = count;
old_jifs = jifs;
@ -192,7 +190,7 @@ static cycle_t pit_read(void)
return (cycle_t)(jifs * LATCH) + count;
}
static struct clocksource clocksource_pit = {
static struct clocksource pit_cs = {
.name = "pit",
.rating = 110,
.read = pit_read,
@ -206,9 +204,9 @@ static void pit_disable_clocksource(void)
/*
* Use mult to check whether it is registered or not
*/
if (clocksource_pit.mult) {
clocksource_unregister(&clocksource_pit);
clocksource_pit.mult = 0;
if (pit_cs.mult) {
clocksource_unregister(&pit_cs);
pit_cs.mult = 0;
}
}
@ -222,13 +220,13 @@ static int __init init_pit_clocksource(void)
* - when local APIC timer is active (PIT is switched off)
*/
if (num_possible_cpus() > 1 || is_hpet_enabled() ||
pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
pit_ce.mode != CLOCK_EVT_MODE_PERIODIC)
return 0;
clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
clocksource_pit.shift);
return clocksource_register(&clocksource_pit);
pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift);
return clocksource_register(&pit_cs);
}
arch_initcall(init_pit_clocksource);
#endif
#endif /* !CONFIG_X86_64 */

View file

@ -7,10 +7,10 @@
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/dmi.h>
#include <asm/io.h>
#include <linux/io.h>
int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
@ -47,8 +47,7 @@ EXPORT_SYMBOL(native_io_delay);
static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
{
if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
printk(KERN_NOTICE "%s: using 0xed I/O delay port\n",
id->ident);
pr_notice("%s: using 0xed I/O delay port\n", id->ident);
io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
}

View file

@ -15,6 +15,9 @@
atomic_t irq_err_count;
/* Function pointer for generic interrupt vector handling */
void (*generic_interrupt_extension)(void) = NULL;
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
* each architecture has to answer this themselves.
@ -42,55 +45,60 @@ void ack_bad_irq(unsigned int irq)
/*
* /proc/interrupts printing:
*/
static int show_other_interrupts(struct seq_file *p)
static int show_other_interrupts(struct seq_file *p, int prec)
{
int j;
seq_printf(p, "NMI: ");
seq_printf(p, "%*s: ", prec, "NMI");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
seq_printf(p, " Non-maskable interrupts\n");
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
seq_printf(p, "%*s: ", prec, "LOC");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
seq_printf(p, " Local timer interrupts\n");
seq_printf(p, "%*s: ", prec, "SPU");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
seq_printf(p, " Spurious interrupts\n");
#endif
if (generic_interrupt_extension) {
seq_printf(p, "PLT: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->generic_irqs);
seq_printf(p, " Platform interrupts\n");
}
#ifdef CONFIG_SMP
seq_printf(p, "RES: ");
seq_printf(p, "%*s: ", prec, "RES");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
seq_printf(p, " Rescheduling interrupts\n");
seq_printf(p, "CAL: ");
seq_printf(p, "%*s: ", prec, "CAL");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
seq_printf(p, " Function call interrupts\n");
seq_printf(p, "TLB: ");
seq_printf(p, "%*s: ", prec, "TLB");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
seq_printf(p, " TLB shootdowns\n");
#endif
#ifdef CONFIG_X86_MCE
seq_printf(p, "TRM: ");
seq_printf(p, "%*s: ", prec, "TRM");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
seq_printf(p, " Thermal event interrupts\n");
# ifdef CONFIG_X86_64
seq_printf(p, "THR: ");
seq_printf(p, "%*s: ", prec, "THR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
seq_printf(p, " Threshold APIC interrupts\n");
# endif
#endif
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "SPU: ");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
seq_printf(p, " Spurious interrupts\n");
#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
#endif
return 0;
}
@ -98,19 +106,22 @@ static int show_other_interrupts(struct seq_file *p)
int show_interrupts(struct seq_file *p, void *v)
{
unsigned long flags, any_count = 0;
int i = *(loff_t *) v, j;
int i = *(loff_t *) v, j, prec;
struct irqaction *action;
struct irq_desc *desc;
if (i > nr_irqs)
return 0;
for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
j *= 10;
if (i == nr_irqs)
return show_other_interrupts(p);
return show_other_interrupts(p, prec);
/* print header */
if (i == 0) {
seq_printf(p, " ");
seq_printf(p, "%*s", prec + 8, "");
for_each_online_cpu(j)
seq_printf(p, "CPU%-8d", j);
seq_putc(p, '\n');
@ -121,23 +132,15 @@ int show_interrupts(struct seq_file *p, void *v)
return 0;
spin_lock_irqsave(&desc->lock, flags);
#ifndef CONFIG_SMP
any_count = kstat_irqs(i);
#else
for_each_online_cpu(j)
any_count |= kstat_irqs_cpu(i, j);
#endif
action = desc->action;
if (!action && !any_count)
goto out;
seq_printf(p, "%3d: ", i);
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
seq_printf(p, "%*d: ", prec, i);
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#endif
seq_printf(p, " %8s", desc->chip->name);
seq_printf(p, "-%-8s", desc->name);
@ -162,7 +165,10 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
#ifdef CONFIG_X86_LOCAL_APIC
sum += irq_stats(cpu)->apic_timer_irqs;
sum += irq_stats(cpu)->irq_spurious_count;
#endif
if (generic_interrupt_extension)
sum += irq_stats(cpu)->generic_irqs;
#ifdef CONFIG_SMP
sum += irq_stats(cpu)->irq_resched_count;
sum += irq_stats(cpu)->irq_call_count;
@ -173,9 +179,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
# ifdef CONFIG_X86_64
sum += irq_stats(cpu)->irq_threshold_count;
#endif
#endif
#ifdef CONFIG_X86_LOCAL_APIC
sum += irq_stats(cpu)->irq_spurious_count;
#endif
return sum;
}
@ -226,4 +229,27 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
return 1;
}
/*
* Handler for GENERIC_INTERRUPT_VECTOR.
*/
void smp_generic_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
ack_APIC_irq();
exit_idle();
irq_enter();
inc_irq_stat(generic_irqs);
if (generic_interrupt_extension)
generic_interrupt_extension();
irq_exit();
set_irq_regs(old_regs);
}
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);

View file

@ -175,6 +175,9 @@ void __init native_init_IRQ(void)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
/* generic IPI for platform specific use */
alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);

View file

@ -147,6 +147,9 @@ static void __init apic_intr_init(void)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
/* generic IPI for platform specific use */
alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);

View file

@ -8,11 +8,11 @@
*/
#include <linux/debugfs.h>
#include <linux/uaccess.h>
#include <linux/stat.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/stat.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <asm/setup.h>
@ -26,9 +26,8 @@ struct setup_data_node {
u32 len;
};
static ssize_t
setup_data_read(struct file *file, char __user *user_buf, size_t count,
loff_t *ppos)
static ssize_t setup_data_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
struct setup_data_node *node = file->private_data;
unsigned long remain;
@ -39,20 +38,21 @@ setup_data_read(struct file *file, char __user *user_buf, size_t count,
if (pos < 0)
return -EINVAL;
if (pos >= node->len)
return 0;
if (count > node->len - pos)
count = node->len - pos;
pa = node->paddr + sizeof(struct setup_data) + pos;
pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
if (PageHighMem(pg)) {
p = ioremap_cache(pa, count);
if (!p)
return -ENXIO;
} else {
} else
p = __va(pa);
}
remain = copy_to_user(user_buf, p, count);
@ -70,6 +70,7 @@ setup_data_read(struct file *file, char __user *user_buf, size_t count,
static int setup_data_open(struct inode *inode, struct file *file)
{
file->private_data = inode->i_private;
return 0;
}
@ -84,57 +85,50 @@ create_setup_data_node(struct dentry *parent, int no,
{
struct dentry *d, *type, *data;
char buf[16];
int error;
sprintf(buf, "%d", no);
d = debugfs_create_dir(buf, parent);
if (!d) {
error = -ENOMEM;
goto err_return;
}
if (!d)
return -ENOMEM;
type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
if (!type) {
error = -ENOMEM;
if (!type)
goto err_dir;
}
data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
if (!data) {
error = -ENOMEM;
if (!data)
goto err_type;
}
return 0;
err_type:
debugfs_remove(type);
err_dir:
debugfs_remove(d);
err_return:
return error;
return -ENOMEM;
}
static int __init create_setup_data_nodes(struct dentry *parent)
{
struct setup_data_node *node;
struct setup_data *data;
int error, no = 0;
int error = -ENOMEM;
struct dentry *d;
struct page *pg;
u64 pa_data;
int no = 0;
d = debugfs_create_dir("setup_data", parent);
if (!d) {
error = -ENOMEM;
goto err_return;
}
if (!d)
return -ENOMEM;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
node = kmalloc(sizeof(*node), GFP_KERNEL);
if (!node) {
error = -ENOMEM;
if (!node)
goto err_dir;
}
pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
if (PageHighMem(pg)) {
data = ioremap_cache(pa_data, sizeof(*data));
@ -143,9 +137,8 @@ static int __init create_setup_data_nodes(struct dentry *parent)
error = -ENXIO;
goto err_dir;
}
} else {
} else
data = __va(pa_data);
}
node->paddr = pa_data;
node->type = data->type;
@ -159,11 +152,11 @@ static int __init create_setup_data_nodes(struct dentry *parent)
goto err_dir;
no++;
}
return 0;
err_dir:
debugfs_remove(d);
err_return:
return error;
}
@ -175,28 +168,26 @@ static struct debugfs_blob_wrapper boot_params_blob = {
static int __init boot_params_kdebugfs_init(void)
{
struct dentry *dbp, *version, *data;
int error;
int error = -ENOMEM;
dbp = debugfs_create_dir("boot_params", NULL);
if (!dbp) {
error = -ENOMEM;
goto err_return;
}
if (!dbp)
return -ENOMEM;
version = debugfs_create_x16("version", S_IRUGO, dbp,
&boot_params.hdr.version);
if (!version) {
error = -ENOMEM;
if (!version)
goto err_dir;
}
data = debugfs_create_blob("data", S_IRUGO, dbp,
&boot_params_blob);
if (!data) {
error = -ENOMEM;
if (!data)
goto err_version;
}
error = create_setup_data_nodes(dbp);
if (error)
goto err_data;
return 0;
err_data:
@ -205,10 +196,9 @@ static int __init boot_params_kdebugfs_init(void)
debugfs_remove(version);
err_dir:
debugfs_remove(dbp);
err_return:
return error;
}
#endif
#endif /* CONFIG_DEBUG_BOOT_PARAMS */
static int __init arch_kdebugfs_init(void)
{

View file

@ -193,7 +193,7 @@ static int __kprobes can_boost(kprobe_opcode_t *opcodes)
kprobe_opcode_t opcode;
kprobe_opcode_t *orig_opcodes = opcodes;
if (search_exception_tables(opcodes))
if (search_exception_tables((unsigned long)opcodes))
return 0; /* Page fault may occur on this address. */
retry:

View file

@ -138,12 +138,6 @@ static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
kvm_mmu_write(ptep, pte_val(pte));
}
static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
kvm_mmu_write(ptep, pte_val(pte));
}
static void kvm_pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
@ -220,7 +214,6 @@ static void paravirt_ops_setup(void)
#if PAGETABLE_LEVELS >= 3
#ifdef CONFIG_X86_PAE
pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
pv_mmu_ops.set_pte_present = kvm_set_pte_present;
pv_mmu_ops.pte_clear = kvm_pte_clear;
pv_mmu_ops.pmd_clear = kvm_pmd_clear;
#endif

View file

@ -14,12 +14,12 @@
#include <linux/ftrace.h>
#include <linux/suspend.h>
#include <linux/gfp.h>
#include <linux/io.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/io.h>
#include <asm/apic.h>
#include <asm/cpufeature.h>
#include <asm/desc.h>
@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image)
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
/* We need to put APICs in legacy mode so that we can
/*
* We need to put APICs in legacy mode so that we can
* get timer interrupts in second kernel. kexec/kdump
* paths already have calls to disable_IO_APIC() in
* one form or other. kexec jump path also need
@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
<< PAGE_SHIFT);
/* The segment registers are funny things, they have both a
/*
* The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
* set to a specific selector, the invisible part is loaded
* with from a table in memory. At no other time is the
@ -237,7 +239,8 @@ void machine_kexec(struct kimage *image)
* segments, before I zap the gdt with an invalid value.
*/
load_segments();
/* The gdt & idt are now invalid.
/*
* The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
set_gdt(phys_to_virt(0), 0);

View file

@ -12,11 +12,47 @@
#include <linux/reboot.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
#include <linux/io.h>
#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/io.h>
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
unsigned long addr)
{
pud_t *pud;
pmd_t *pmd;
struct page *page;
int result = -ENOMEM;
addr &= PMD_MASK;
pgd += pgd_index(addr);
if (!pgd_present(*pgd)) {
page = kimage_alloc_control_pages(image, 0);
if (!page)
goto out;
pud = (pud_t *)page_address(page);
memset(pud, 0, PAGE_SIZE);
set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
}
pud = pud_offset(pgd, addr);
if (!pud_present(*pud)) {
page = kimage_alloc_control_pages(image, 0);
if (!page)
goto out;
pmd = (pmd_t *)page_address(page);
memset(pmd, 0, PAGE_SIZE);
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
}
pmd = pmd_offset(pud, addr);
if (!pmd_present(*pmd))
set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
result = 0;
out:
return result;
}
static void init_level2_page(pmd_t *level2p, unsigned long addr)
{
@ -83,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
}
level3p = (pud_t *)page_address(page);
result = init_level3_page(image, level3p, addr, last_addr);
if (result) {
if (result)
goto out;
}
set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
addr += PGDIR_SIZE;
}
@ -154,6 +189,13 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
int result;
level4p = (pgd_t *)__va(start_pgtable);
result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
if (result)
return result;
/*
* image->start may be outside 0 ~ max_pfn, for example when
* jump back to original kernel from kexeced kernel
*/
result = init_one_level2_page(image, level4p, image->start);
if (result)
return result;
return init_transition_pgtable(image, level4p);
@ -229,20 +271,45 @@ void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
int save_ftrace_enabled;
tracer_disable();
#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context)
save_processor_state();
#endif
save_ftrace_enabled = __ftrace_enabled_save();
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
/*
* We need to put APICs in legacy mode so that we can
* get timer interrupts in second kernel. kexec/kdump
* paths already have calls to disable_IO_APIC() in
* one form or other. kexec jump path also need
* one.
*/
disable_IO_APIC();
#endif
}
control_page = page_address(image->control_code_page) + PAGE_SIZE;
memcpy(control_page, relocate_kernel, PAGE_SIZE);
memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_TABLE_PAGE] =
(unsigned long)__pa(page_address(image->control_code_page));
/* The segment registers are funny things, they have both a
if (image->type == KEXEC_TYPE_DEFAULT)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
<< PAGE_SHIFT);
/*
* The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
* set to a specific selector, the invisible part is loaded
* with from a table in memory. At no other time is the
@ -252,15 +319,25 @@ void machine_kexec(struct kimage *image)
* segments, before I zap the gdt with an invalid value.
*/
load_segments();
/* The gdt & idt are now invalid.
/*
* The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
set_gdt(phys_to_virt(0), 0);
set_idt(phys_to_virt(0), 0);
/* now call it */
relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
image->start);
image->start = relocate_kernel((unsigned long)image->head,
(unsigned long)page_list,
image->start,
image->preserve_context);
#ifdef CONFIG_KEXEC_JUMP
if (kexec_image->preserve_context)
restore_processor_state();
#endif
__ftrace_enabled_restore(save_ftrace_enabled);
}
void arch_crash_save_vmcoreinfo(void)

View file

@ -226,7 +226,7 @@ static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d)
return 0;
}
static struct dmi_system_id __devinitdata mmconf_dmi_table[] = {
static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = {
{
.callback = set_check_enable_amd_mmconf,
.ident = "Sun Microsystems Machine",

View file

@ -109,9 +109,6 @@ static void __init MP_bus_info(struct mpc_bus *m)
} else
printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
}
#endif
#ifdef CONFIG_X86_IO_APIC
static int bad_ioapic(unsigned long address)
{
@ -224,8 +221,12 @@ static void __init MP_intsrc_info(struct mpc_intsrc *m)
if (++mp_irq_entries == MAX_IRQ_SOURCES)
panic("Max # of irq sources exceeded!!\n");
}
#else /* CONFIG_X86_IO_APIC */
static inline void __init MP_bus_info(struct mpc_bus *m) {}
static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {}
#endif /* CONFIG_X86_IO_APIC */
#endif
static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
{
@ -275,6 +276,20 @@ static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
return 1;
}
static void skip_entry(unsigned char **ptr, int *count, int size)
{
*ptr += size;
*count += size;
}
static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
{
printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"
"type %x\n", *mpt);
print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
1, mpc, mpc->length, 1);
}
static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
{
char str[16];
@ -310,61 +325,30 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
while (count < mpc->length) {
switch (*mpt) {
case MP_PROCESSOR:
{
struct mpc_cpu *m = (struct mpc_cpu *)mpt;
/* ACPI may have already provided this data */
if (!acpi_lapic)
MP_processor_info(m);
mpt += sizeof(*m);
count += sizeof(*m);
MP_processor_info((struct mpc_cpu *)mpt);
skip_entry(&mpt, &count, sizeof(struct mpc_cpu));
break;
}
case MP_BUS:
{
struct mpc_bus *m = (struct mpc_bus *)mpt;
#ifdef CONFIG_X86_IO_APIC
MP_bus_info(m);
#endif
mpt += sizeof(*m);
count += sizeof(*m);
MP_bus_info((struct mpc_bus *)mpt);
skip_entry(&mpt, &count, sizeof(struct mpc_bus));
break;
}
case MP_IOAPIC:
{
#ifdef CONFIG_X86_IO_APIC
struct mpc_ioapic *m = (struct mpc_ioapic *)mpt;
MP_ioapic_info(m);
#endif
mpt += sizeof(struct mpc_ioapic);
count += sizeof(struct mpc_ioapic);
MP_ioapic_info((struct mpc_ioapic *)mpt);
skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
break;
}
case MP_INTSRC:
{
#ifdef CONFIG_X86_IO_APIC
struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
MP_intsrc_info(m);
#endif
mpt += sizeof(struct mpc_intsrc);
count += sizeof(struct mpc_intsrc);
MP_intsrc_info((struct mpc_intsrc *)mpt);
skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
break;
}
case MP_LINTSRC:
{
struct mpc_lintsrc *m =
(struct mpc_lintsrc *)mpt;
MP_lintsrc_info(m);
mpt += sizeof(*m);
count += sizeof(*m);
MP_lintsrc_info((struct mpc_lintsrc *)mpt);
skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc));
break;
}
default:
/* wrong mptable */
printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
printk(KERN_ERR "type %x\n", *mpt);
print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
1, mpc, mpc->length, 1);
smp_dump_mptable(mpc, mpt);
count = mpc->length;
break;
}
@ -558,6 +542,68 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
static struct mpf_intel *mpf_found;
static unsigned long __init get_mpc_size(unsigned long physptr)
{
struct mpc_table *mpc;
unsigned long size;
mpc = early_ioremap(physptr, PAGE_SIZE);
size = mpc->length;
early_iounmap(mpc, PAGE_SIZE);
apic_printk(APIC_VERBOSE, " mpc: %lx-%lx\n", physptr, physptr + size);
return size;
}
static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
{
struct mpc_table *mpc;
unsigned long size;
size = get_mpc_size(mpf->physptr);
mpc = early_ioremap(mpf->physptr, size);
/*
* Read the physical hardware table. Anything here will
* override the defaults.
*/
if (!smp_read_mpc(mpc, early)) {
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 0;
#endif
printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"
"... disabling SMP support. (tell your hw vendor)\n");
early_iounmap(mpc, size);
return -1;
}
early_iounmap(mpc, size);
if (early)
return -1;
#ifdef CONFIG_X86_IO_APIC
/*
* If there are no explicit MP IRQ entries, then we are
* broken. We set up most of the low 16 IO-APIC pins to
* ISA defaults and hope it will work.
*/
if (!mp_irq_entries) {
struct mpc_bus bus;
printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
"using default mptable. (tell your hw vendor)\n");
bus.type = MP_BUS;
bus.busid = 0;
memcpy(bus.bustype, "ISA ", 6);
MP_bus_info(&bus);
construct_default_ioirq_mptable(0);
}
#endif
return 0;
}
/*
* Scan the memory blocks for an SMP configuration block.
*/
@ -611,45 +657,8 @@ static void __init __get_smp_config(unsigned int early)
construct_default_ISA_mptable(mpf->feature1);
} else if (mpf->physptr) {
/*
* Read the physical hardware table. Anything here will
* override the defaults.
*/
if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
#ifdef CONFIG_X86_LOCAL_APIC
smp_found_config = 0;
#endif
printk(KERN_ERR
"BIOS bug, MP table errors detected!...\n");
printk(KERN_ERR "... disabling SMP support. "
"(tell your hw vendor)\n");
if (check_physptr(mpf, early))
return;
}
if (early)
return;
#ifdef CONFIG_X86_IO_APIC
/*
* If there are no explicit MP IRQ entries, then we are
* broken. We set up most of the low 16 IO-APIC pins to
* ISA defaults and hope it will work.
*/
if (!mp_irq_entries) {
struct mpc_bus bus;
printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
"using default mptable. "
"(tell your hw vendor)\n");
bus.type = MP_BUS;
bus.busid = 0;
memcpy(bus.bustype, "ISA ", 6);
MP_bus_info(&bus);
construct_default_ioirq_mptable(0);
}
#endif
} else
BUG();
@ -670,6 +679,31 @@ void __init get_smp_config(void)
__get_smp_config(0);
}
static void smp_reserve_bootmem(struct mpf_intel *mpf)
{
unsigned long size = get_mpc_size(mpf->physptr);
#ifdef CONFIG_X86_32
/*
* We cannot access to MPC table to compute table size yet,
* as only few megabytes from the bottom is mapped now.
* PC-9800's MPC table places on the very last of physical
* memory; so that simply reserving PAGE_SIZE from mpf->physptr
* yields BUG() in reserve_bootmem.
* also need to make sure physptr is below than max_low_pfn
* we don't need reserve the area above max_low_pfn
*/
unsigned long end = max_low_pfn * PAGE_SIZE;
if (mpf->physptr < end) {
if (mpf->physptr + size > end)
size = end - mpf->physptr;
reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
}
#else
reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
#endif
}
static int __init smp_scan_config(unsigned long base, unsigned long length,
unsigned reserve)
{
@ -697,36 +731,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
if (!reserve)
return 1;
reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
BOOTMEM_DEFAULT);
if (mpf->physptr) {
unsigned long size = PAGE_SIZE;
#ifdef CONFIG_X86_32
/*
* We cannot access to MPC table to compute
* table size yet, as only few megabytes from
* the bottom is mapped now.
* PC-9800's MPC table places on the very last
* of physical memory; so that simply reserving
* PAGE_SIZE from mpf->physptr yields BUG()
* in reserve_bootmem.
* also need to make sure physptr is below than
* max_low_pfn
* we don't need reserve the area above max_low_pfn
*/
unsigned long end = max_low_pfn * PAGE_SIZE;
if (mpf->physptr < end) {
if (mpf->physptr + size > end)
size = end - mpf->physptr;
reserve_bootmem_generic(mpf->physptr, size,
BOOTMEM_DEFAULT);
}
#else
reserve_bootmem_generic(mpf->physptr, size,
BOOTMEM_DEFAULT);
#endif
}
if (mpf->physptr)
smp_reserve_bootmem(mpf);
return 1;
}
@ -829,7 +837,57 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
#define SPARE_SLOT_NUM 20
static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
#endif
static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
{
int i;
apic_printk(APIC_VERBOSE, "OLD ");
print_MP_intsrc_info(m);
i = get_MP_intsrc_index(m);
if (i > 0) {
assign_to_mpc_intsrc(&mp_irqs[i], m);
apic_printk(APIC_VERBOSE, "NEW ");
print_mp_irq_info(&mp_irqs[i]);
return;
}
if (!i) {
/* legacy, do nothing */
return;
}
if (*nr_m_spare < SPARE_SLOT_NUM) {
/*
* not found (-1), or duplicated (-2) are invalid entries,
* we need to use the slot later
*/
m_spare[*nr_m_spare] = m;
*nr_m_spare += 1;
}
}
#else /* CONFIG_X86_IO_APIC */
static inline void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
#endif /* CONFIG_X86_IO_APIC */
static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
int count)
{
if (!mpc_new_phys) {
pr_info("No spare slots, try to append...take your risk, "
"new mpc_length %x\n", count);
} else {
if (count <= mpc_new_length)
pr_info("No spare slots, try to append..., "
"new mpc_length %x\n", count);
else {
pr_err("mpc_new_length %lx is too small\n",
mpc_new_length);
return -1;
}
}
return 0;
}
static int __init replace_intsrc_all(struct mpc_table *mpc,
unsigned long mpc_new_phys,
@ -837,77 +895,33 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
{
#ifdef CONFIG_X86_IO_APIC
int i;
int nr_m_spare = 0;
#endif
int count = sizeof(*mpc);
int nr_m_spare = 0;
unsigned char *mpt = ((unsigned char *)mpc) + count;
printk(KERN_INFO "mpc_length %x\n", mpc->length);
while (count < mpc->length) {
switch (*mpt) {
case MP_PROCESSOR:
{
struct mpc_cpu *m = (struct mpc_cpu *)mpt;
mpt += sizeof(*m);
count += sizeof(*m);
skip_entry(&mpt, &count, sizeof(struct mpc_cpu));
break;
}
case MP_BUS:
{
struct mpc_bus *m = (struct mpc_bus *)mpt;
mpt += sizeof(*m);
count += sizeof(*m);
skip_entry(&mpt, &count, sizeof(struct mpc_bus));
break;
}
case MP_IOAPIC:
{
mpt += sizeof(struct mpc_ioapic);
count += sizeof(struct mpc_ioapic);
skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
break;
}
case MP_INTSRC:
{
#ifdef CONFIG_X86_IO_APIC
struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
printk(KERN_INFO "OLD ");
print_MP_intsrc_info(m);
i = get_MP_intsrc_index(m);
if (i > 0) {
assign_to_mpc_intsrc(&mp_irqs[i], m);
printk(KERN_INFO "NEW ");
print_mp_irq_info(&mp_irqs[i]);
} else if (!i) {
/* legacy, do nothing */
} else if (nr_m_spare < SPARE_SLOT_NUM) {
/*
* not found (-1), or duplicated (-2)
* are invalid entries,
* we need to use the slot later
*/
m_spare[nr_m_spare] = m;
nr_m_spare++;
}
#endif
mpt += sizeof(struct mpc_intsrc);
count += sizeof(struct mpc_intsrc);
check_irq_src((struct mpc_intsrc *)mpt, &nr_m_spare);
skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
break;
}
case MP_LINTSRC:
{
struct mpc_lintsrc *m =
(struct mpc_lintsrc *)mpt;
mpt += sizeof(*m);
count += sizeof(*m);
skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc));
break;
}
default:
/* wrong mptable */
printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
printk(KERN_ERR "type %x\n", *mpt);
print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
1, mpc, mpc->length, 1);
smp_dump_mptable(mpc, mpt);
goto out;
}
}
@ -924,23 +938,15 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
continue;
if (nr_m_spare > 0) {
printk(KERN_INFO "*NEW* found ");
apic_printk(APIC_VERBOSE, "*NEW* found\n");
nr_m_spare--;
assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
m_spare[nr_m_spare] = NULL;
} else {
struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
count += sizeof(struct mpc_intsrc);
if (!mpc_new_phys) {
printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
} else {
if (count <= mpc_new_length)
printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count);
else {
printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length);
if (!check_slot(mpc_new_phys, mpc_new_length, count))
goto out;
}
}
assign_to_mpc_intsrc(&mp_irqs[i], m);
mpc->length = count;
mpt += sizeof(struct mpc_intsrc);

Some files were not shown because too many files have changed in this diff Show more