80a8c9fffa
Vegard Nossum reported oprofile + hibernation problems: > Now some warnings: > > ------------[ cut here ]------------ > WARNING: at /uio/arkimedes/s29/vegardno/git-working/linux-2.6/kernel/smp.c:328 s > mp_call_function_mask+0x194/0x1a0() The usual problem: the suspend function when interrupts are already disabled calls smp_call_function which is not allowed with interrupt off. But at this point all the other CPUs should be already down anyways, so it should be enough to just drop that. This patch should fix that problem at least by fixing cpu hotplug& suspend support. [ mingo@elte.hu: fixed 5 coding style errors. ] Signed-off-by: Andi Kleen <ak@linux.intel.com> Tested-by: Vegard Nossum <vegard.nossum@gmail.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
514 lines
11 KiB
C
514 lines
11 KiB
C
/**
|
|
* @file nmi_int.c
|
|
*
|
|
* @remark Copyright 2002 OProfile authors
|
|
* @remark Read the file COPYING
|
|
*
|
|
* @author John Levon <levon@movementarian.org>
|
|
*/
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/oprofile.h>
|
|
#include <linux/sysdev.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/moduleparam.h>
|
|
#include <linux/kdebug.h>
|
|
#include <linux/cpu.h>
|
|
#include <asm/nmi.h>
|
|
#include <asm/msr.h>
|
|
#include <asm/apic.h>
|
|
|
|
#include "op_counter.h"
|
|
#include "op_x86_model.h"
|
|
|
|
static struct op_x86_model_spec const *model;
|
|
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
|
|
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
|
|
|
|
static int nmi_start(void);
|
|
static void nmi_stop(void);
|
|
static void nmi_cpu_start(void *dummy);
|
|
static void nmi_cpu_stop(void *dummy);
|
|
|
|
/* 0 == registered but off, 1 == registered and on */
|
|
static int nmi_enabled = 0;
|
|
|
|
#ifdef CONFIG_SMP
|
|
static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
|
|
void *data)
|
|
{
|
|
int cpu = (unsigned long)data;
|
|
switch (action) {
|
|
case CPU_DOWN_FAILED:
|
|
case CPU_ONLINE:
|
|
smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
|
|
break;
|
|
case CPU_DOWN_PREPARE:
|
|
smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
|
|
break;
|
|
}
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
static struct notifier_block oprofile_cpu_nb = {
|
|
.notifier_call = oprofile_cpu_notifier
|
|
};
|
|
#endif
|
|
|
|
#ifdef CONFIG_PM
|
|
|
|
static int nmi_suspend(struct sys_device *dev, pm_message_t state)
|
|
{
|
|
/* Only one CPU left, just stop that one */
|
|
if (nmi_enabled == 1)
|
|
nmi_cpu_stop(NULL);
|
|
return 0;
|
|
}
|
|
|
|
static int nmi_resume(struct sys_device *dev)
|
|
{
|
|
if (nmi_enabled == 1)
|
|
nmi_cpu_start(NULL);
|
|
return 0;
|
|
}
|
|
|
|
static struct sysdev_class oprofile_sysclass = {
|
|
.name = "oprofile",
|
|
.resume = nmi_resume,
|
|
.suspend = nmi_suspend,
|
|
};
|
|
|
|
static struct sys_device device_oprofile = {
|
|
.id = 0,
|
|
.cls = &oprofile_sysclass,
|
|
};
|
|
|
|
static int __init init_sysfs(void)
|
|
{
|
|
int error;
|
|
|
|
error = sysdev_class_register(&oprofile_sysclass);
|
|
if (!error)
|
|
error = sysdev_register(&device_oprofile);
|
|
return error;
|
|
}
|
|
|
|
static void exit_sysfs(void)
|
|
{
|
|
sysdev_unregister(&device_oprofile);
|
|
sysdev_class_unregister(&oprofile_sysclass);
|
|
}
|
|
|
|
#else
|
|
#define init_sysfs() do { } while (0)
|
|
#define exit_sysfs() do { } while (0)
|
|
#endif /* CONFIG_PM */
|
|
|
|
static int profile_exceptions_notify(struct notifier_block *self,
|
|
unsigned long val, void *data)
|
|
{
|
|
struct die_args *args = (struct die_args *)data;
|
|
int ret = NOTIFY_DONE;
|
|
int cpu = smp_processor_id();
|
|
|
|
switch (val) {
|
|
case DIE_NMI:
|
|
if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
|
|
ret = NOTIFY_STOP;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static void nmi_cpu_save_registers(struct op_msrs *msrs)
|
|
{
|
|
unsigned int const nr_ctrs = model->num_counters;
|
|
unsigned int const nr_ctrls = model->num_controls;
|
|
struct op_msr *counters = msrs->counters;
|
|
struct op_msr *controls = msrs->controls;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < nr_ctrs; ++i) {
|
|
if (counters[i].addr) {
|
|
rdmsr(counters[i].addr,
|
|
counters[i].saved.low,
|
|
counters[i].saved.high);
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < nr_ctrls; ++i) {
|
|
if (controls[i].addr) {
|
|
rdmsr(controls[i].addr,
|
|
controls[i].saved.low,
|
|
controls[i].saved.high);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void nmi_save_registers(void *dummy)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
|
|
nmi_cpu_save_registers(msrs);
|
|
}
|
|
|
|
static void free_msrs(void)
|
|
{
|
|
int i;
|
|
for_each_possible_cpu(i) {
|
|
kfree(per_cpu(cpu_msrs, i).counters);
|
|
per_cpu(cpu_msrs, i).counters = NULL;
|
|
kfree(per_cpu(cpu_msrs, i).controls);
|
|
per_cpu(cpu_msrs, i).controls = NULL;
|
|
}
|
|
}
|
|
|
|
static int allocate_msrs(void)
|
|
{
|
|
int success = 1;
|
|
size_t controls_size = sizeof(struct op_msr) * model->num_controls;
|
|
size_t counters_size = sizeof(struct op_msr) * model->num_counters;
|
|
|
|
int i;
|
|
for_each_possible_cpu(i) {
|
|
per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
|
|
GFP_KERNEL);
|
|
if (!per_cpu(cpu_msrs, i).counters) {
|
|
success = 0;
|
|
break;
|
|
}
|
|
per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
|
|
GFP_KERNEL);
|
|
if (!per_cpu(cpu_msrs, i).controls) {
|
|
success = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!success)
|
|
free_msrs();
|
|
|
|
return success;
|
|
}
|
|
|
|
static void nmi_cpu_setup(void *dummy)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
|
|
spin_lock(&oprofilefs_lock);
|
|
model->setup_ctrs(msrs);
|
|
spin_unlock(&oprofilefs_lock);
|
|
per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
|
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
|
}
|
|
|
|
static struct notifier_block profile_exceptions_nb = {
|
|
.notifier_call = profile_exceptions_notify,
|
|
.next = NULL,
|
|
.priority = 0
|
|
};
|
|
|
|
static int nmi_setup(void)
|
|
{
|
|
int err = 0;
|
|
int cpu;
|
|
|
|
if (!allocate_msrs())
|
|
return -ENOMEM;
|
|
|
|
err = register_die_notifier(&profile_exceptions_nb);
|
|
if (err) {
|
|
free_msrs();
|
|
return err;
|
|
}
|
|
|
|
/* We need to serialize save and setup for HT because the subset
|
|
* of msrs are distinct for save and setup operations
|
|
*/
|
|
|
|
/* Assume saved/restored counters are the same on all CPUs */
|
|
model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
|
|
for_each_possible_cpu(cpu) {
|
|
if (cpu != 0) {
|
|
memcpy(per_cpu(cpu_msrs, cpu).counters,
|
|
per_cpu(cpu_msrs, 0).counters,
|
|
sizeof(struct op_msr) * model->num_counters);
|
|
|
|
memcpy(per_cpu(cpu_msrs, cpu).controls,
|
|
per_cpu(cpu_msrs, 0).controls,
|
|
sizeof(struct op_msr) * model->num_controls);
|
|
}
|
|
|
|
}
|
|
on_each_cpu(nmi_save_registers, NULL, 1);
|
|
on_each_cpu(nmi_cpu_setup, NULL, 1);
|
|
nmi_enabled = 1;
|
|
return 0;
|
|
}
|
|
|
|
static void nmi_restore_registers(struct op_msrs *msrs)
|
|
{
|
|
unsigned int const nr_ctrs = model->num_counters;
|
|
unsigned int const nr_ctrls = model->num_controls;
|
|
struct op_msr *counters = msrs->counters;
|
|
struct op_msr *controls = msrs->controls;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < nr_ctrls; ++i) {
|
|
if (controls[i].addr) {
|
|
wrmsr(controls[i].addr,
|
|
controls[i].saved.low,
|
|
controls[i].saved.high);
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < nr_ctrs; ++i) {
|
|
if (counters[i].addr) {
|
|
wrmsr(counters[i].addr,
|
|
counters[i].saved.low,
|
|
counters[i].saved.high);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void nmi_cpu_shutdown(void *dummy)
|
|
{
|
|
unsigned int v;
|
|
int cpu = smp_processor_id();
|
|
struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
|
|
|
|
/* restoring APIC_LVTPC can trigger an apic error because the delivery
|
|
* mode and vector nr combination can be illegal. That's by design: on
|
|
* power on apic lvt contain a zero vector nr which are legal only for
|
|
* NMI delivery mode. So inhibit apic err before restoring lvtpc
|
|
*/
|
|
v = apic_read(APIC_LVTERR);
|
|
apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
|
|
apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
|
|
apic_write(APIC_LVTERR, v);
|
|
nmi_restore_registers(msrs);
|
|
}
|
|
|
|
static void nmi_shutdown(void)
|
|
{
|
|
struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
|
|
nmi_enabled = 0;
|
|
on_each_cpu(nmi_cpu_shutdown, NULL, 1);
|
|
unregister_die_notifier(&profile_exceptions_nb);
|
|
model->shutdown(msrs);
|
|
free_msrs();
|
|
put_cpu_var(cpu_msrs);
|
|
}
|
|
|
|
static void nmi_cpu_start(void *dummy)
|
|
{
|
|
struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
|
|
model->start(msrs);
|
|
}
|
|
|
|
static int nmi_start(void)
|
|
{
|
|
on_each_cpu(nmi_cpu_start, NULL, 1);
|
|
return 0;
|
|
}
|
|
|
|
static void nmi_cpu_stop(void *dummy)
|
|
{
|
|
struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
|
|
model->stop(msrs);
|
|
}
|
|
|
|
static void nmi_stop(void)
|
|
{
|
|
on_each_cpu(nmi_cpu_stop, NULL, 1);
|
|
}
|
|
|
|
struct op_counter_config counter_config[OP_MAX_COUNTER];
|
|
|
|
static int nmi_create_files(struct super_block *sb, struct dentry *root)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < model->num_counters; ++i) {
|
|
struct dentry *dir;
|
|
char buf[4];
|
|
|
|
/* quick little hack to _not_ expose a counter if it is not
|
|
* available for use. This should protect userspace app.
|
|
* NOTE: assumes 1:1 mapping here (that counters are organized
|
|
* sequentially in their struct assignment).
|
|
*/
|
|
if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
|
|
continue;
|
|
|
|
snprintf(buf, sizeof(buf), "%d", i);
|
|
dir = oprofilefs_mkdir(sb, root, buf);
|
|
oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
|
|
oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
|
|
oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
|
|
oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
|
|
oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
|
|
oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int p4force;
|
|
module_param(p4force, int, 0);
|
|
|
|
static int __init p4_init(char **cpu_type)
|
|
{
|
|
__u8 cpu_model = boot_cpu_data.x86_model;
|
|
|
|
if (!p4force && (cpu_model > 6 || cpu_model == 5))
|
|
return 0;
|
|
|
|
#ifndef CONFIG_SMP
|
|
*cpu_type = "i386/p4";
|
|
model = &op_p4_spec;
|
|
return 1;
|
|
#else
|
|
switch (smp_num_siblings) {
|
|
case 1:
|
|
*cpu_type = "i386/p4";
|
|
model = &op_p4_spec;
|
|
return 1;
|
|
|
|
case 2:
|
|
*cpu_type = "i386/p4-ht";
|
|
model = &op_p4_ht2_spec;
|
|
return 1;
|
|
}
|
|
#endif
|
|
|
|
printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
|
|
printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
|
|
return 0;
|
|
}
|
|
|
|
static int __init ppro_init(char **cpu_type)
|
|
{
|
|
__u8 cpu_model = boot_cpu_data.x86_model;
|
|
|
|
switch (cpu_model) {
|
|
case 0 ... 2:
|
|
*cpu_type = "i386/ppro";
|
|
break;
|
|
case 3 ... 5:
|
|
*cpu_type = "i386/pii";
|
|
break;
|
|
case 6 ... 8:
|
|
*cpu_type = "i386/piii";
|
|
break;
|
|
case 9:
|
|
*cpu_type = "i386/p6_mobile";
|
|
break;
|
|
case 10 ... 13:
|
|
*cpu_type = "i386/p6";
|
|
break;
|
|
case 14:
|
|
*cpu_type = "i386/core";
|
|
break;
|
|
case 15: case 23:
|
|
*cpu_type = "i386/core_2";
|
|
break;
|
|
case 26:
|
|
*cpu_type = "i386/core_2";
|
|
break;
|
|
default:
|
|
/* Unknown */
|
|
return 0;
|
|
}
|
|
|
|
model = &op_ppro_spec;
|
|
return 1;
|
|
}
|
|
|
|
/* in order to get sysfs right */
|
|
static int using_nmi;
|
|
|
|
int __init op_nmi_init(struct oprofile_operations *ops)
|
|
{
|
|
__u8 vendor = boot_cpu_data.x86_vendor;
|
|
__u8 family = boot_cpu_data.x86;
|
|
char *cpu_type;
|
|
|
|
if (!cpu_has_apic)
|
|
return -ENODEV;
|
|
|
|
switch (vendor) {
|
|
case X86_VENDOR_AMD:
|
|
/* Needs to be at least an Athlon (or hammer in 32bit mode) */
|
|
|
|
switch (family) {
|
|
default:
|
|
return -ENODEV;
|
|
case 6:
|
|
model = &op_athlon_spec;
|
|
cpu_type = "i386/athlon";
|
|
break;
|
|
case 0xf:
|
|
model = &op_athlon_spec;
|
|
/* Actually it could be i386/hammer too, but give
|
|
user space an consistent name. */
|
|
cpu_type = "x86-64/hammer";
|
|
break;
|
|
case 0x10:
|
|
model = &op_athlon_spec;
|
|
cpu_type = "x86-64/family10";
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case X86_VENDOR_INTEL:
|
|
switch (family) {
|
|
/* Pentium IV */
|
|
case 0xf:
|
|
if (!p4_init(&cpu_type))
|
|
return -ENODEV;
|
|
break;
|
|
|
|
/* A P6-class processor */
|
|
case 6:
|
|
if (!ppro_init(&cpu_type))
|
|
return -ENODEV;
|
|
break;
|
|
|
|
default:
|
|
return -ENODEV;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
return -ENODEV;
|
|
}
|
|
|
|
init_sysfs();
|
|
#ifdef CONFIG_SMP
|
|
register_cpu_notifier(&oprofile_cpu_nb);
|
|
#endif
|
|
using_nmi = 1;
|
|
ops->create_files = nmi_create_files;
|
|
ops->setup = nmi_setup;
|
|
ops->shutdown = nmi_shutdown;
|
|
ops->start = nmi_start;
|
|
ops->stop = nmi_stop;
|
|
ops->cpu_type = cpu_type;
|
|
printk(KERN_INFO "oprofile: using NMI interrupt.\n");
|
|
return 0;
|
|
}
|
|
|
|
void op_nmi_exit(void)
|
|
{
|
|
if (using_nmi) {
|
|
exit_sysfs();
|
|
#ifdef CONFIG_SMP
|
|
unregister_cpu_notifier(&oprofile_cpu_nb);
|
|
#endif
|
|
}
|
|
}
|