diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 537b2d840e69..d698cddcfbdd 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -6,4 +6,4 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-y := $(DRIVER_OBJS) init.o backtrace.o hwsampler.o diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c new file mode 100644 index 000000000000..3d48f4db246d --- /dev/null +++ b/arch/s390/oprofile/hwsampler.c @@ -0,0 +1,1256 @@ +/** + * arch/s390/oprofile/hwsampler.c + * + * Copyright IBM Corp. 2010 + * Author: Heinz Graalfs + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "hwsampler.h" + +#define MAX_NUM_SDB 511 +#define MIN_NUM_SDB 1 + +#define ALERT_REQ_MASK 0x4000000000000000ul +#define BUFFER_FULL_MASK 0x8000000000000000ul + +#define EI_IEA (1 << 31) /* invalid entry address */ +#define EI_ISE (1 << 30) /* incorrect SDBT entry */ +#define EI_PRA (1 << 29) /* program request alert */ +#define EI_SACA (1 << 23) /* sampler authorization change alert */ +#define EI_LSDA (1 << 22) /* loss of sample data alert */ + +DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); + +struct hws_execute_parms { + void *buffer; + signed int rc; +}; + +DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); +EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer); + +static DEFINE_MUTEX(hws_sem); +static DEFINE_MUTEX(hws_sem_oom); + +static unsigned char hws_flush_all; +static unsigned int hws_oom; +static struct workqueue_struct *hws_wq; + +static unsigned int hws_state; +enum { + HWS_INIT = 1, + HWS_DEALLOCATED, + HWS_STOPPED, + HWS_STARTED, + HWS_STOPPING }; + +/* set to 1 if called by kernel during memory allocation */ +static unsigned char oom_killer_was_active; +/* size of SDBT and SDB as of allocate API */ +static unsigned long num_sdbt = 100; +static unsigned long num_sdb = 511; +/* sampling interval (machine cycles) */ +static unsigned long interval; + +static unsigned long min_sampler_rate; +static unsigned long max_sampler_rate; + +static int ssctl(void *buffer) +{ + int cc; + + /* set in order to detect a program check */ + cc = 1; + + asm volatile( + "0: .insn s,0xB2870000,0(%1)\n" + "1: ipm %0\n" + " srl %0,28\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "+d" (cc), "+a" (buffer) + : "m" (*((struct hws_ssctl_request_block *)buffer)) + : "cc", "memory"); + + return cc ? -EINVAL : 0 ; +} + +static int qsi(void *buffer) +{ + int cc; + cc = 1; + + asm volatile( + "0: .insn s,0xB2860000,0(%1)\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "=d" (cc), "+a" (buffer) + : "m" (*((struct hws_qsi_info_block *)buffer)) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +static void execute_qsi(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = qsi(ep->buffer); +} + +static void execute_ssctl(void *parms) +{ + struct hws_execute_parms *ep = parms; + + ep->rc = ssctl(ep->buffer); +} + +static int smp_ctl_ssctl_stop(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 0; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) { + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + dump_stack(); + } + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.es || cb->qsi.cs) { + printk(KERN_EMERG "CPUMF sampling did not stop properly.\n"); + dump_stack(); + } + + return rc; +} + +static int smp_ctl_ssctl_deactivate(int cpu) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.es = 1; + cb->ssctl.cs = 0; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + if (cb->qsi.cs) + printk(KERN_EMERG "CPUMF sampling was not set inactive.\n"); + + return rc; +} + +static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval) +{ + int rc; + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + cb->ssctl.h = 1; + cb->ssctl.tear = cb->first_sdbt; + cb->ssctl.dear = *(unsigned long *) cb->first_sdbt; + cb->ssctl.interval = interval; + cb->ssctl.es = 1; + cb->ssctl.cs = 1; + + ep.buffer = &cb->ssctl; + smp_call_function_single(cpu, execute_ssctl, &ep, 1); + rc = ep.rc; + if (rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + if (ep.rc) + printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu); + + return rc; +} + +static int smp_ctl_qsi(int cpu) +{ + struct hws_execute_parms ep; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + ep.buffer = &cb->qsi; + smp_call_function_single(cpu, execute_qsi, &ep, 1); + + return ep.rc; +} + +static inline unsigned long *trailer_entry_ptr(unsigned long v) +{ + void *ret; + + ret = (void *)v; + ret += PAGE_SIZE; + ret -= sizeof(struct hws_trailer_entry); + + return (unsigned long *) ret; +} + +/* prototypes for external interrupt handler and worker */ +static void hws_ext_handler(unsigned int ext_int_code, + unsigned int param32, unsigned long param64); + +static void worker(struct work_struct *work); + +static void add_samples_to_oprofile(unsigned cpu, unsigned long *, + unsigned long *dear); + +static void init_all_cpu_buffers(void) +{ + int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + memset(cb, 0, sizeof(struct hws_cpu_buffer)); + } +} + +static int is_link_entry(unsigned long *s) +{ + return *s & 0x1ul ? 1 : 0; +} + +static unsigned long *get_next_sdbt(unsigned long *s) +{ + return (unsigned long *) (*s & ~0x1ul); +} + +static int prepare_cpu_buffers(void) +{ + int cpu; + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + atomic_set(&cb->ext_params, 0); + cb->worker_entry = 0; + cb->sample_overflow = 0; + cb->req_alert = 0; + cb->incorrect_sdbt_entry = 0; + cb->invalid_entry_address = 0; + cb->loss_of_sample_data = 0; + cb->sample_auth_change_alert = 0; + cb->finish = 0; + cb->oom = 0; + cb->stop_mode = 0; + } + + return rc; +} + +/* + * allocate_sdbt() - allocate sampler memory + * @cpu: the cpu for which sampler memory is allocated + * + * A 4K page is allocated for each requested SDBT. + * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs. + * Set ALERT_REQ mask in each SDBs trailer. + * Returns zero if successful, <0 otherwise. + */ +static int allocate_sdbt(int cpu) +{ + int j, k, rc; + unsigned long *sdbt; + unsigned long sdb; + unsigned long *tail; + unsigned long *trailer; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->first_sdbt) + return -EINVAL; + + sdbt = NULL; + tail = sdbt; + + for (j = 0; j < num_sdbt; j++) { + sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdbt) { + if (sdbt) + free_page((unsigned long)sdbt); + + goto allocate_sdbt_error; + } + if (cb->first_sdbt == 0) + cb->first_sdbt = (unsigned long)sdbt; + + /* link current page to tail of chain */ + if (tail) + *tail = (unsigned long)(void *)sdbt + 1; + + mutex_unlock(&hws_sem_oom); + + for (k = 0; k < num_sdb; k++) { + /* get and set SDB page */ + sdb = get_zeroed_page(GFP_KERNEL); + + mutex_lock(&hws_sem_oom); + /* OOM killer might have been activated */ + barrier(); + if (oom_killer_was_active || !sdb) { + if (sdb) + free_page(sdb); + + goto allocate_sdbt_error; + } + *sdbt = sdb; + trailer = trailer_entry_ptr(*sdbt); + *trailer = ALERT_REQ_MASK; + sdbt++; + mutex_unlock(&hws_sem_oom); + } + tail = sdbt; + } + mutex_lock(&hws_sem_oom); + if (oom_killer_was_active) + goto allocate_sdbt_error; + + rc = 0; + if (tail) + *tail = (unsigned long) + ((void *)cb->first_sdbt) + 1; + +allocate_sdbt_exit: + mutex_unlock(&hws_sem_oom); + return rc; + +allocate_sdbt_error: + rc = -ENOMEM; + goto allocate_sdbt_exit; +} + +/* + * deallocate_sdbt() - deallocate all sampler memory + * + * For each online CPU all SDBT trees are deallocated. + * Returns the number of freed pages. + */ +static int deallocate_sdbt(void) +{ + int cpu; + int counter; + + counter = 0; + + for_each_online_cpu(cpu) { + unsigned long start; + unsigned long sdbt; + unsigned long *curr; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->first_sdbt) + continue; + + sdbt = cb->first_sdbt; + curr = (unsigned long *) sdbt; + start = sdbt; + + /* we'll free the SDBT after all SDBs are processed... */ + while (1) { + if (!*curr || !sdbt) + break; + + /* watch for link entry reset if found */ + if (is_link_entry(curr)) { + curr = get_next_sdbt(curr); + if (sdbt) + free_page(sdbt); + + /* we are done if we reach the start */ + if ((unsigned long) curr == start) + break; + else + sdbt = (unsigned long) curr; + } else { + /* process SDB pointer */ + if (*curr) { + free_page(*curr); + curr++; + } + } + counter++; + } + cb->first_sdbt = 0; + } + return counter; +} + +static int start_sampling(int cpu) +{ + int rc; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu); + goto start_exit; + } + + rc = -EINVAL; + if (!cb->qsi.es) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu); + goto start_exit; + } + + if (!cb->qsi.cs) { + printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu); + goto start_exit; + } + + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n", + cpu, interval); + + rc = 0; + +start_exit: + return rc; +} + +static int stop_sampling(int cpu) +{ + unsigned long v; + int rc; + struct hws_cpu_buffer *cb; + + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (!rc && !cb->qsi.es) + printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu); + + rc = smp_ctl_ssctl_stop(cpu); + if (rc) { + printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n", + cpu, rc); + goto stop_exit; + } + + printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu); + +stop_exit: + v = cb->req_alert; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert," + " count=%lu.\n", cpu, v); + + v = cb->loss_of_sample_data; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data," + " count=%lu.\n", cpu, v); + + v = cb->invalid_entry_address; + if (v) + printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address," + " count=%lu.\n", cpu, v); + + v = cb->incorrect_sdbt_entry; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Incorrect SDBT address," + " count=%lu.\n", cpu, v); + + v = cb->sample_auth_change_alert; + if (v) + printk(KERN_ERR + "hwsampler: CPU %d CPUMF Sample authorization change," + " count=%lu.\n", cpu, v); + + return rc; +} + +static int check_hardware_prerequisites(void) +{ + unsigned long long facility_bits[2]; + + memcpy(facility_bits, S390_lowcore.stfle_fac_list, 32); + if (!(facility_bits[1] & (1ULL << 59))) + return -EOPNOTSUPP; + + return 0; +} +/* + * hws_oom_callback() - the OOM callback function + * + * In case the callback is invoked during memory allocation for the + * hw sampler, all obtained memory is deallocated and a flag is set + * so main sampler memory allocation can exit with a failure code. + * In case the callback is invoked during sampling the hw sampler + * is deactivated for all CPUs. + */ +static int hws_oom_callback(struct notifier_block *nfb, + unsigned long dummy, void *parm) +{ + unsigned long *freed; + int cpu; + struct hws_cpu_buffer *cb; + + freed = parm; + + mutex_lock(&hws_sem_oom); + + if (hws_state == HWS_DEALLOCATED) { + /* during memory allocation */ + if (oom_killer_was_active == 0) { + oom_killer_was_active = 1; + *freed += deallocate_sdbt(); + } + } else { + int i; + cpu = get_cpu(); + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (!cb->oom) { + for_each_online_cpu(i) { + smp_ctl_ssctl_deactivate(i); + cb->oom = 1; + } + cb->finish = 1; + + printk(KERN_INFO + "hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n", + cpu); + } + } + + mutex_unlock(&hws_sem_oom); + + return NOTIFY_OK; +} + +static struct notifier_block hws_oom_notifier = { + .notifier_call = hws_oom_callback +}; + +static int hws_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + /* We do not have sampler space available for all possible CPUs. + All CPUs should be online when hw sampling is activated. */ + return NOTIFY_BAD; +} + +static struct notifier_block hws_cpu_notifier = { + .notifier_call = hws_cpu_callback +}; + +/** + * hwsampler_deactivate() - set hardware sampling temporarily inactive + * @cpu: specifies the CPU to be set inactive. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deactivate(unsigned int cpu) +{ + /* + * Deactivate hw sampling temporarily and flush the buffer + * by pushing all the pending samples to oprofile buffer. + * + * This function can be called under one of the following conditions: + * Memory unmap, task is exiting. + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.cs) { + rc = smp_ctl_ssctl_deactivate(cpu); + if (rc) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu); + cb->finish = 1; + hws_state = HWS_STOPPING; + } else { + hws_flush_all = 1; + /* Add work to queue to read pending samples.*/ + queue_work_on(cpu, hws_wq, &cb->worker); + } + } + } + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + return rc; +} + +/** + * hwsampler_activate() - activate/resume hardware sampling which was deactivated + * @cpu: specifies the CPU to be set active. + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_activate(unsigned int cpu) +{ + /* + * Re-activate hw sampling. This should be called in pair with + * hwsampler_deactivate(). + */ + int rc; + struct hws_cpu_buffer *cb; + + rc = 0; + mutex_lock(&hws_sem); + + cb = &per_cpu(sampler_cpu_buffer, cpu); + if (hws_state == HWS_STARTED) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (!cb->qsi.cs) { + hws_flush_all = 0; + rc = smp_ctl_ssctl_enable_activate(cpu, interval); + if (rc) { + printk(KERN_ERR + "CPU %d, CPUMF activate sampling failed.\n", + cpu); + } + } + } + + mutex_unlock(&hws_sem); + + return rc; +} + +static void hws_ext_handler(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) +{ + int cpu; + struct hws_cpu_buffer *cb; + + cpu = smp_processor_id(); + cb = &per_cpu(sampler_cpu_buffer, cpu); + + atomic_xchg( + &cb->ext_params, + atomic_read(&cb->ext_params) + | S390_lowcore.ext_params); + + if (hws_wq) + queue_work(hws_wq, &cb->worker); +} + +static int check_qsi_on_setup(void) +{ + int rc; + unsigned int cpu; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (rc) + return -EOPNOTSUPP; + + if (!cb->qsi.as) { + printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n"); + return -EINVAL; + } + + if (cb->qsi.es) { + printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n"); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + return -EINVAL; + + printk(KERN_INFO + "CPU %d, CPUMF Sampling stopped now.\n", cpu); + } + } + return 0; +} + +static int check_qsi_on_start(void) +{ + unsigned int cpu; + int rc; + struct hws_cpu_buffer *cb; + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + + if (!cb->qsi.as) + return -EINVAL; + + if (cb->qsi.es) + return -EINVAL; + + if (cb->qsi.cs) + return -EINVAL; + } + return 0; +} + +static void worker_on_start(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->worker_entry = cb->first_sdbt; +} + +static int worker_check_error(unsigned int cpu, int ext_params) +{ + int rc; + unsigned long *sdbt; + struct hws_cpu_buffer *cb; + + rc = 0; + cb = &per_cpu(sampler_cpu_buffer, cpu); + sdbt = (unsigned long *) cb->worker_entry; + + if (!sdbt || !*sdbt) + return -EINVAL; + + if (ext_params & EI_IEA) + cb->req_alert++; + + if (ext_params & EI_LSDA) + cb->loss_of_sample_data++; + + if (ext_params & EI_IEA) { + cb->invalid_entry_address++; + rc = -EINVAL; + } + + if (ext_params & EI_ISE) { + cb->incorrect_sdbt_entry++; + rc = -EINVAL; + } + + if (ext_params & EI_SACA) { + cb->sample_auth_change_alert++; + rc = -EINVAL; + } + + return rc; +} + +static void worker_on_finish(unsigned int cpu) +{ + int rc, i; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + if (cb->finish) { + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (cb->qsi.es) { + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n", + cpu); + rc = smp_ctl_ssctl_stop(cpu); + if (rc) + printk(KERN_INFO + "hwsampler: CPU %d, CPUMF Deactivation failed.\n", + cpu); + + for_each_online_cpu(i) { + if (i == cpu) + continue; + if (!cb->finish) { + cb->finish = 1; + queue_work_on(i, hws_wq, + &cb->worker); + } + } + } + } +} + +static void worker_on_interrupt(unsigned int cpu) +{ + unsigned long *sdbt; + unsigned char done; + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + sdbt = (unsigned long *) cb->worker_entry; + + done = 0; + /* do not proceed if stop was entered, + * forget the buffers not yet processed */ + while (!done && !cb->stop_mode) { + unsigned long *trailer; + struct hws_trailer_entry *te; + unsigned long *dear = 0; + + trailer = trailer_entry_ptr(*sdbt); + /* leave loop if no more work to do */ + if (!(*trailer & BUFFER_FULL_MASK)) { + done = 1; + if (!hws_flush_all) + continue; + } + + te = (struct hws_trailer_entry *)trailer; + cb->sample_overflow += te->overflow; + + add_samples_to_oprofile(cpu, sdbt, dear); + + /* reset trailer */ + xchg((unsigned char *) te, 0x40); + + /* advance to next sdb slot in current sdbt */ + sdbt++; + /* in case link bit is set use address w/o link bit */ + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + cb->worker_entry = (unsigned long)sdbt; + } +} + +static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, + unsigned long *dear) +{ + struct hws_data_entry *sample_data_ptr; + unsigned long *trailer; + + trailer = trailer_entry_ptr(*sdbt); + if (dear) { + if (dear > trailer) + return; + trailer = dear; + } + + sample_data_ptr = (struct hws_data_entry *)(*sdbt); + + while ((unsigned long *)sample_data_ptr < trailer) { + struct pt_regs *regs = NULL; + struct task_struct *tsk = NULL; + + /* + * Check sampling mode, 1 indicates basic (=customer) sampling + * mode. + */ + if (sample_data_ptr->def != 1) { + /* sample slot is not yet written */ + break; + } else { + /* make sure we don't use it twice, + * the next time the sampler will set it again */ + sample_data_ptr->def = 0; + } + + /* Get pt_regs. */ + if (sample_data_ptr->P == 1) { + /* userspace sample */ + unsigned int pid = sample_data_ptr->prim_asn; + rcu_read_lock(); + tsk = pid_task(find_vpid(pid), PIDTYPE_PID); + if (tsk) + regs = task_pt_regs(tsk); + rcu_read_unlock(); + } else { + /* kernelspace sample */ + regs = task_pt_regs(current); + } + + mutex_lock(&hws_sem); + oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0, + !sample_data_ptr->P, tsk); + mutex_unlock(&hws_sem); + + sample_data_ptr++; + } +} + +static void worker(struct work_struct *work) +{ + unsigned int cpu; + int ext_params; + struct hws_cpu_buffer *cb; + + cb = container_of(work, struct hws_cpu_buffer, worker); + cpu = smp_processor_id(); + ext_params = atomic_xchg(&cb->ext_params, 0); + + if (!cb->worker_entry) + worker_on_start(cpu); + + if (worker_check_error(cpu, ext_params)) + return; + + if (!cb->finish) + worker_on_interrupt(cpu); + + if (cb->finish) + worker_on_finish(cpu); +} + +/** + * hwsampler_allocate() - allocate memory for the hardware sampler + * @sdbt: number of SDBTs per online CPU (must be > 0) + * @sdb: number of SDBs per SDBT (minimum 1, maximum 511) + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb) +{ + int cpu, rc; + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_DEALLOCATED) + goto allocate_exit; + + if (sdbt < 1) + goto allocate_exit; + + if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB) + goto allocate_exit; + + num_sdbt = sdbt; + num_sdb = sdb; + + oom_killer_was_active = 0; + register_oom_notifier(&hws_oom_notifier); + + for_each_online_cpu(cpu) { + if (allocate_sdbt(cpu)) { + unregister_oom_notifier(&hws_oom_notifier); + goto allocate_error; + } + } + unregister_oom_notifier(&hws_oom_notifier); + if (oom_killer_was_active) + goto allocate_error; + + hws_state = HWS_STOPPED; + rc = 0; + +allocate_exit: + mutex_unlock(&hws_sem); + return rc; + +allocate_error: + rc = -ENOMEM; + printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n"); + goto allocate_exit; +} + +/** + * hwsampler_deallocate() - deallocate hardware sampler memory + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_deallocate() +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto deallocate_exit; + + smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + deallocate_sdbt(); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +deallocate_exit: + mutex_unlock(&hws_sem); + + return rc; +} + +long hwsampler_query_min_interval(void) +{ + if (min_sampler_rate) + return min_sampler_rate; + else + return -EINVAL; +} + +long hwsampler_query_max_interval(void) +{ + if (max_sampler_rate) + return max_sampler_rate; + else + return -EINVAL; +} + +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) +{ + struct hws_cpu_buffer *cb; + + cb = &per_cpu(sampler_cpu_buffer, cpu); + + return cb->sample_overflow; +} + +int hwsampler_setup() +{ + int rc; + int cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state) + goto setup_exit; + + hws_state = HWS_INIT; + + init_all_cpu_buffers(); + + rc = check_hardware_prerequisites(); + if (rc) + goto setup_exit; + + rc = check_qsi_on_setup(); + if (rc) + goto setup_exit; + + rc = -EINVAL; + hws_wq = create_workqueue("hwsampler"); + if (!hws_wq) + goto setup_exit; + + register_cpu_notifier(&hws_cpu_notifier); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + INIT_WORK(&cb->worker, worker); + rc = smp_ctl_qsi(cpu); + WARN_ON(rc); + if (min_sampler_rate != cb->qsi.min_sampl_rate) { + if (min_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different min sampler rate values.\n"); + if (min_sampler_rate < cb->qsi.min_sampl_rate) + min_sampler_rate = + cb->qsi.min_sampl_rate; + } else + min_sampler_rate = cb->qsi.min_sampl_rate; + } + if (max_sampler_rate != cb->qsi.max_sampl_rate) { + if (max_sampler_rate) { + printk(KERN_WARNING + "hwsampler: different max sampler rate values.\n"); + if (max_sampler_rate > cb->qsi.max_sampl_rate) + max_sampler_rate = + cb->qsi.max_sampl_rate; + } else + max_sampler_rate = cb->qsi.max_sampl_rate; + } + } + register_external_interrupt(0x1407, hws_ext_handler); + + hws_state = HWS_DEALLOCATED; + rc = 0; + +setup_exit: + mutex_unlock(&hws_sem); + return rc; +} + +int hwsampler_shutdown() +{ + int rc; + + mutex_lock(&hws_sem); + + rc = -EINVAL; + if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) { + mutex_unlock(&hws_sem); + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + + if (hws_state == HWS_STOPPED) { + smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + deallocate_sdbt(); + } + if (hws_wq) { + destroy_workqueue(hws_wq); + hws_wq = NULL; + } + + unregister_external_interrupt(0x1407, hws_ext_handler); + hws_state = HWS_INIT; + rc = 0; + } + mutex_unlock(&hws_sem); + + unregister_cpu_notifier(&hws_cpu_notifier); + + return rc; +} + +/** + * hwsampler_start_all() - start hardware sampling on all online CPUs + * @rate: specifies the used interval when samples are taken + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_start_all(unsigned long rate) +{ + int rc, cpu; + + mutex_lock(&hws_sem); + + hws_oom = 0; + + rc = -EINVAL; + if (hws_state != HWS_STOPPED) + goto start_all_exit; + + interval = rate; + + /* fail if rate is not valid */ + if (interval < min_sampler_rate || interval > max_sampler_rate) + goto start_all_exit; + + rc = check_qsi_on_start(); + if (rc) + goto start_all_exit; + + rc = prepare_cpu_buffers(); + if (rc) + goto start_all_exit; + + for_each_online_cpu(cpu) { + rc = start_sampling(cpu); + if (rc) + break; + } + if (rc) { + for_each_online_cpu(cpu) { + stop_sampling(cpu); + } + goto start_all_exit; + } + hws_state = HWS_STARTED; + rc = 0; + +start_all_exit: + mutex_unlock(&hws_sem); + + if (rc) + return rc; + + register_oom_notifier(&hws_oom_notifier); + hws_oom = 1; + hws_flush_all = 0; + /* now let them in, 1407 CPUMF external interrupts */ + smp_ctl_set_bit(0, 5); /* set CR0 bit 58 */ + + return 0; +} + +/** + * hwsampler_stop_all() - stop hardware sampling on all online CPUs + * + * Returns 0 on success, !0 on failure. + */ +int hwsampler_stop_all() +{ + int tmp_rc, rc, cpu; + struct hws_cpu_buffer *cb; + + mutex_lock(&hws_sem); + + rc = 0; + if (hws_state == HWS_INIT) { + mutex_unlock(&hws_sem); + return rc; + } + hws_state = HWS_STOPPING; + mutex_unlock(&hws_sem); + + for_each_online_cpu(cpu) { + cb = &per_cpu(sampler_cpu_buffer, cpu); + cb->stop_mode = 1; + tmp_rc = stop_sampling(cpu); + if (tmp_rc) + rc = tmp_rc; + } + + if (hws_wq) + flush_workqueue(hws_wq); + + mutex_lock(&hws_sem); + if (hws_oom) { + unregister_oom_notifier(&hws_oom_notifier); + hws_oom = 0; + } + hws_state = HWS_STOPPED; + mutex_unlock(&hws_sem); + + return rc; +} diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h new file mode 100644 index 000000000000..8c72b59316b5 --- /dev/null +++ b/arch/s390/oprofile/hwsampler.h @@ -0,0 +1,113 @@ +/* + * CPUMF HW sampler functions and internal structures + * + * Copyright IBM Corp. 2010 + * Author(s): Heinz Graalfs + */ + +#ifndef HWSAMPLER_H_ +#define HWSAMPLER_H_ + +#include + +struct hws_qsi_info_block /* QUERY SAMPLING information block */ +{ /* Bit(s) */ + unsigned int b0_13:14; /* 0-13: zeros */ + unsigned int as:1; /* 14: sampling authorisation control*/ + unsigned int b15_21:7; /* 15-21: zeros */ + unsigned int es:1; /* 22: sampling enable control */ + unsigned int b23_29:7; /* 23-29: zeros */ + unsigned int cs:1; /* 30: sampling activation control */ + unsigned int:1; /* 31: reserved */ + unsigned int bsdes:16; /* 4-5: size of sampling entry */ + unsigned int:16; /* 6-7: reserved */ + unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ + unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ + unsigned long tear; /* 24-31: TEAR contents */ + unsigned long dear; /* 32-39: DEAR contents */ + unsigned int rsvrd0; /* 40-43: reserved */ + unsigned int cpu_speed; /* 44-47: CPU speed */ + unsigned long long rsvrd1; /* 48-55: reserved */ + unsigned long long rsvrd2; /* 56-63: reserved */ +}; + +struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */ +{ /* bytes 0 - 7 Bit(s) */ + unsigned int s:1; /* 0: maximum buffer indicator */ + unsigned int h:1; /* 1: part. level reserved for VM use*/ + unsigned long b2_53:52; /* 2-53: zeros */ + unsigned int es:1; /* 54: sampling enable control */ + unsigned int b55_61:7; /* 55-61: - zeros */ + unsigned int cs:1; /* 62: sampling activation control */ + unsigned int b63:1; /* 63: zero */ + unsigned long interval; /* 8-15: sampling interval */ + unsigned long tear; /* 16-23: TEAR contents */ + unsigned long dear; /* 24-31: DEAR contents */ + /* 32-63: */ + unsigned long rsvrd1; /* reserved */ + unsigned long rsvrd2; /* reserved */ + unsigned long rsvrd3; /* reserved */ + unsigned long rsvrd4; /* reserved */ +}; + +struct hws_cpu_buffer { + unsigned long first_sdbt; /* @ of 1st SDB-Table for this CP*/ + unsigned long worker_entry; + unsigned long sample_overflow; /* taken from SDB ... */ + struct hws_qsi_info_block qsi; + struct hws_ssctl_request_block ssctl; + struct work_struct worker; + atomic_t ext_params; + unsigned long req_alert; + unsigned long loss_of_sample_data; + unsigned long invalid_entry_address; + unsigned long incorrect_sdbt_entry; + unsigned long sample_auth_change_alert; + unsigned int finish:1; + unsigned int oom:1; + unsigned int stop_mode:1; +}; + +struct hws_data_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:4; /* 16-19 reserved */ + unsigned int U:4; /* 20-23 Number of unique instruct. */ + unsigned int z:2; /* zeros */ + unsigned int T:1; /* 26 PSW DAT mode */ + unsigned int W:1; /* 27 PSW wait state */ + unsigned int P:1; /* 28 PSW Problem state */ + unsigned int AS:2; /* 29-30 PSW address-space control */ + unsigned int I:1; /* 31 entry valid or invalid */ + unsigned int:16; + unsigned int prim_asn:16; /* primary ASN */ + unsigned long long ia; /* Instruction Address */ + unsigned long long lpp; /* Logical-Partition Program Param. */ + unsigned long long vpp; /* Virtual-Machine Program Param. */ +}; + +struct hws_trailer_entry { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned long:62; /* 2 - 63: Reserved */ + unsigned long overflow; /* 64 - sample Overflow count */ + unsigned long timestamp; /* 16 - time-stamp */ + unsigned long timestamp1; /* */ + unsigned long reserved1; /* 32 -Reserved */ + unsigned long reserved2; /* */ + unsigned long progusage1; /* 48 - reserved for programming use */ + unsigned long progusage2; /* */ +}; + +int hwsampler_setup(void); +int hwsampler_shutdown(void); +int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); +int hwsampler_deallocate(void); +long hwsampler_query_min_interval(void); +long hwsampler_query_max_interval(void); +int hwsampler_start_all(unsigned long interval); +int hwsampler_stop_all(void); +int hwsampler_deactivate(unsigned int cpu); +int hwsampler_activate(unsigned int cpu); +unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu); + +#endif /*HWSAMPLER_H_*/ diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 7a995113b918..16c76def4a9d 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -4,23 +4,182 @@ * S390 Version * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Thomas Spatzier (tspat@de.ibm.com) + * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com) + * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com) * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2011 OProfile authors */ #include #include #include +#include +#include +#include +#include "../../../drivers/oprofile/oprof.h" +#include "hwsampler.h" + +#define DEFAULT_INTERVAL 4096 + +#define DEFAULT_SDBT_BLOCKS 1 +#define DEFAULT_SDB_BLOCKS 511 + +static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL; +static unsigned long oprofile_min_interval; +static unsigned long oprofile_max_interval; + +static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS; +static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS; + +static int hwsampler_file; +static int hwsampler_running; /* start_mutex must be held to change */ + +static struct oprofile_operations timer_ops; extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); -int __init oprofile_arch_init(struct oprofile_operations* ops) +static int oprofile_hwsampler_start(void) +{ + int retval; + + hwsampler_running = hwsampler_file; + + if (!hwsampler_running) + return timer_ops.start(); + + retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); + if (retval) + return retval; + + retval = hwsampler_start_all(oprofile_hw_interval); + if (retval) + hwsampler_deallocate(); + + return retval; +} + +static void oprofile_hwsampler_stop(void) +{ + if (!hwsampler_running) { + timer_ops.stop(); + return; + } + + hwsampler_stop_all(); + hwsampler_deallocate(); + return; +} + +static ssize_t hwsampler_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset); +} + +static ssize_t hwsampler_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + if (oprofile_started) + /* + * save to do without locking as we set + * hwsampler_running in start() when start_mutex is + * held + */ + return -EBUSY; + + hwsampler_file = val; + + return count; +} + +static const struct file_operations hwsampler_fops = { + .read = hwsampler_read, + .write = hwsampler_write, +}; + +static int oprofile_create_hwsampling_files(struct super_block *sb, + struct dentry *root) +{ + struct dentry *hw_dir; + + /* reinitialize default values */ + hwsampler_file = 1; + + hw_dir = oprofilefs_mkdir(sb, root, "hwsampling"); + if (!hw_dir) + return -EINVAL; + + oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops); + oprofilefs_create_ulong(sb, hw_dir, "hw_interval", + &oprofile_hw_interval); + oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval", + &oprofile_min_interval); + oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval", + &oprofile_max_interval); + oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks", + &oprofile_sdbt_blocks); + + return 0; +} + +static int oprofile_hwsampler_init(struct oprofile_operations *ops) +{ + if (hwsampler_setup()) + return -ENODEV; + + /* + * create hwsampler files only if hwsampler_setup() succeeds. + */ + oprofile_min_interval = hwsampler_query_min_interval(); + if (oprofile_min_interval < 0) { + oprofile_min_interval = 0; + return -ENODEV; + } + oprofile_max_interval = hwsampler_query_max_interval(); + if (oprofile_max_interval < 0) { + oprofile_max_interval = 0; + return -ENODEV; + } + + if (oprofile_timer_init(ops)) + return -ENODEV; + + printk(KERN_INFO "oprofile: using hardware sampling\n"); + + memcpy(&timer_ops, ops, sizeof(timer_ops)); + + ops->start = oprofile_hwsampler_start; + ops->stop = oprofile_hwsampler_stop; + ops->create_files = oprofile_create_hwsampling_files; + + return 0; +} + +static void oprofile_hwsampler_exit(void) +{ + oprofile_timer_exit(); + hwsampler_shutdown(); +} + +int __init oprofile_arch_init(struct oprofile_operations *ops) { ops->backtrace = s390_backtrace; - return -ENODEV; + + return oprofile_hwsampler_init(ops); } void oprofile_arch_exit(void) { + oprofile_hwsampler_exit(); } diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 59f55441e075..b8ef8ddcc292 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -258,8 +258,10 @@ op_add_sample(struct oprofile_cpu_buffer *cpu_buf, */ static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, - unsigned long backtrace, int is_kernel, unsigned long event) + unsigned long backtrace, int is_kernel, unsigned long event, + struct task_struct *task) { + struct task_struct *tsk = task ? task : current; cpu_buf->sample_received++; if (pc == ESCAPE_CODE) { @@ -267,7 +269,7 @@ log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, return 0; } - if (op_add_code(cpu_buf, backtrace, is_kernel, current)) + if (op_add_code(cpu_buf, backtrace, is_kernel, tsk)) goto fail; if (op_add_sample(cpu_buf, pc, event)) @@ -292,7 +294,8 @@ static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) static inline void __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, - unsigned long event, int is_kernel) + unsigned long event, int is_kernel, + struct task_struct *task) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); unsigned long backtrace = oprofile_backtrace_depth; @@ -301,7 +304,7 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, * if log_sample() fail we can't backtrace since we lost the * source of this event */ - if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event)) + if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task)) /* failed */ return; @@ -313,10 +316,17 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, oprofile_end_trace(cpu_buf); } +void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel, + struct task_struct *task) +{ + __oprofile_add_ext_sample(pc, regs, event, is_kernel, task); +} + void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, unsigned long event, int is_kernel) { - __oprofile_add_ext_sample(pc, regs, event, is_kernel); + __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL); } void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) @@ -332,7 +342,7 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) pc = ESCAPE_CODE; /* as this causes an early return. */ } - __oprofile_add_ext_sample(pc, regs, event, is_kernel); + __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL); } /* @@ -403,7 +413,7 @@ int oprofile_write_commit(struct op_entry *entry) void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); - log_sample(cpu_buf, pc, 0, is_kernel, event); + log_sample(cpu_buf, pc, 0, is_kernel, event, NULL); } void oprofile_add_trace(unsigned long pc) diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index 010725117dbb..3ef44624f510 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -97,7 +97,7 @@ static struct notifier_block __refdata oprofile_cpu_notifier = { .notifier_call = oprofile_cpu_notify, }; -int __init oprofile_timer_init(struct oprofile_operations *ops) +int oprofile_timer_init(struct oprofile_operations *ops) { int rc; @@ -113,7 +113,7 @@ int __init oprofile_timer_init(struct oprofile_operations *ops) return 0; } -void __exit oprofile_timer_exit(void) +void oprofile_timer_exit(void) { unregister_hotcpu_notifier(&oprofile_cpu_notifier); } diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 1ca64113efe8..7f5cfd3b37dd 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -106,6 +106,13 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event); void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, unsigned long event, int is_kernel); +/** + * Add an hardware sample. + */ +void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel, + struct task_struct *task); + /* Use this instead when the PC value is not from the regs. Doesn't * backtrace. */ void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event);