From 92d6b71ab906be706f3679353b30a8d2c3831144 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Thu, 11 Mar 2010 14:08:56 -0800 Subject: [PATCH 1/6] genirq: Expose irq_desc->node in proc/irq Expose irq_desc->node as /proc/irq/*/node. This file provides device hardware locality information for apps desiring to include hardware locality in irq mapping decisions. Signed-off-by: Dimitri Sivanich Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- Documentation/filesystems/proc.txt | 4 ++++ kernel/irq/proc.c | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index a4f30faa4f1f..6507d2ae5236 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -566,6 +566,10 @@ The default_smp_affinity mask applies to all non-active IRQs, which are the IRQs which have not yet been allocated/activated, and hence which lack a /proc/irq/[0-9]* directory. +The node file on an SMP system shows the node to which the device using the IRQ +reports itself as being attached. This hardware locality information does not +include information about any possible driver locality preference. + prof_cpu_mask specifies which CPUs are to be profiled by the system wide profiler. Default value is ffffffff (all cpus). diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 6f50eccc79c0..e346e08f5c34 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -146,6 +146,26 @@ static const struct file_operations default_affinity_proc_fops = { .release = single_release, .write = default_affinity_write, }; + +static int irq_node_proc_show(struct seq_file *m, void *v) +{ + struct irq_desc *desc = irq_to_desc((long) m->private); + + seq_printf(m, "%d\n", desc->node); + return 0; +} + +static int irq_node_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, irq_node_proc_show, PDE(inode)->data); +} + +static const struct file_operations irq_node_proc_fops = { + .open = irq_node_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif static int irq_spurious_proc_show(struct seq_file *m, void *v) @@ -230,6 +250,9 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) /* create /proc/irq//smp_affinity */ proc_create_data("smp_affinity", 0600, desc->dir, &irq_affinity_proc_fops, (void *)(long)irq); + + proc_create_data("node", 0444, desc->dir, + &irq_node_proc_fops, (void *)(long)irq); #endif proc_create_data("spurious", 0444, desc->dir, From ae731f8d0785ccd3380f511bae888933b6562e45 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2010 22:56:33 +0000 Subject: [PATCH 2/6] genirq: Introduce request_any_context_irq() Now that we enjoy threaded interrupts, we're starting to see irq_chip implementations (wm831x, pca953x) that make use of threaded interrupts for the controller, and nested interrupts for the client interrupt. It all works very well, with one drawback: Drivers requesting an IRQ must now know whether the handler will run in a thread context or not, and call request_threaded_irq() or request_irq() accordingly. The problem is that the requesting driver sometimes doesn't know about the nature of the interrupt, specially when the interrupt controller is a discrete chip (typically a GPIO expander connected over I2C) that can be connected to a wide variety of otherwise perfectly supported hardware. This patch introduces the request_any_context_irq() function that mostly mimics the usual request_irq(), except that it checks whether the irq level is configured as nested or not, and calls the right backend. On success, it also returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED. [ tglx: Made return value an enum, simplified code and made the export of request_any_context_irq GPL ] Signed-off-by: Marc Zyngier Cc: LKML-Reference: <927ea285bd0c68934ddae1a47e44a9ba@localhost> Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 23 +++++++++++++++++++++++ kernel/irq/manage.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 75f3f00ac1e5..d7e7a7660c6c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -77,6 +77,18 @@ enum { IRQTF_AFFINITY, }; +/** + * These values can be returned by request_any_context_irq() and + * describe the context the interrupt will be run in. + * + * IRQC_IS_HARDIRQ - interrupt runs in hardirq context + * IRQC_IS_NESTED - interrupt runs in a nested threaded context + */ +enum { + IRQC_IS_HARDIRQ = 0, + IRQC_IS_NESTED, +}; + typedef irqreturn_t (*irq_handler_t)(int, void *); /** @@ -120,6 +132,10 @@ request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, return request_threaded_irq(irq, handler, NULL, flags, name, dev); } +extern int __must_check +request_any_context_irq(unsigned int irq, irq_handler_t handler, + unsigned long flags, const char *name, void *dev_id); + extern void exit_irq_thread(void); #else @@ -141,6 +157,13 @@ request_threaded_irq(unsigned int irq, irq_handler_t handler, return request_irq(irq, handler, flags, name, dev); } +static inline int __must_check +request_any_context_irq(unsigned int irq, irq_handler_t handler, + unsigned long flags, const char *name, void *dev_id) +{ + return request_irq(irq, handler, flags, name, dev_id); +} + static inline void exit_irq_thread(void) { } #endif diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 704e488730a5..84f32278ff1f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1120,3 +1120,40 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, return retval; } EXPORT_SYMBOL(request_threaded_irq); + +/** + * request_any_context_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @flags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. It selects either a + * hardirq or threaded handling method depending on the + * context. + * + * On failure, it returns a negative value. On success, + * it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED. + */ +int request_any_context_irq(unsigned int irq, irq_handler_t handler, + unsigned long flags, const char *name, void *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + int ret; + + if (!desc) + return -EINVAL; + + if (desc->status & IRQ_NESTED_THREAD) { + ret = request_threaded_irq(irq, NULL, handler, + flags, name, dev_id); + return !ret ? IRQC_IS_NESTED : ret; + } + + ret = request_irq(irq, handler, flags, name, dev_id); + return !ret ? IRQC_IS_HARDIRQ : ret; +} +EXPORT_SYMBOL_GPL(request_any_context_irq); From e58aa3d2d0cc01ad8d6f7f640a0670433f794922 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 26 Mar 2010 00:06:51 +0000 Subject: [PATCH 3/6] genirq: Run irq handlers with interrupts disabled Running interrupt handlers with interrupts enabled can cause stack overflows. That has been observed with multiqueue NICs delivering all their interrupts to a single core. We might band aid that somehow by checking the interrupt stacks, but the real safe fix is to run the irq handlers with interrupts disabled. Drivers for whacky hardware still can reenable them in the handler itself, if the need arises. (They do already due to lockdep) The risk of doing this is rather low: - lockdep already enforces this - CONFIG_NOHZ has shaken out the drivers which relied on jiffies updates - time keeping is not longer sensitive to the timer interrupt being delayed Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Alan Cox Cc: Andi Kleen Cc: David Miller Cc: Greg Kroah-Hartman Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds LKML-Reference: <20100326000405.758579387@linutronix.de> --- kernel/irq/handle.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 76d5a671bfe1..27e5c6911223 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -370,9 +370,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) irqreturn_t ret, retval = IRQ_NONE; unsigned int status = 0; - if (!(action->flags & IRQF_DISABLED)) - local_irq_enable_in_hardirq(); - do { trace_irq_handler_entry(irq, action); ret = action->handler(irq, action->dev_id); From 6932bf37bed45ce8ed531928b1b0f98162fe6df6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 26 Mar 2010 00:06:55 +0000 Subject: [PATCH 4/6] genirq: Remove IRQF_DISABLED from core code Remove all code which is related to IRQF_DISABLED from the core kernel code. IRQF_DISABLED still exists as a flag, but becomes a NOOP and will be removed after a grace period. That way we can easily revert to the previous behaviour by just restoring the core code. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Alan Cox Cc: Andi Kleen Cc: David Miller Cc: Greg Kroah-Hartman Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds LKML-Reference: <20100326000405.991244690@linutronix.de> --- Documentation/feature-removal-schedule.txt | 7 +++++ include/linux/interrupt.h | 3 ++- kernel/irq/manage.c | 30 ---------------------- 3 files changed, 9 insertions(+), 31 deletions(-) diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index ed511af0f79a..9c31c2e63684 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -589,3 +589,10 @@ Why: Useful in 2003, implementation is a hack. Generally invoked by accident today. Seen as doing more harm than good. Who: Len Brown + +---------------------------- + +What: IRQF_DISABLED +When: 2.6.36 +Why: The flag is a NOOP as we run interrupt handlers with interrupts disabled +Who: Thomas Gleixner diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d7e7a7660c6c..e6d2f4441fda 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -39,7 +39,8 @@ * These flags used only by the kernel as part of the * irq handling routines. * - * IRQF_DISABLED - keep irqs disabled when calling the action handler + * IRQF_DISABLED - keep irqs disabled when calling the action handler. + * DEPRECATED. This flag is a NOOP and scheduled to be removed * IRQF_SAMPLE_RANDOM - irq is used to feed the random generator * IRQF_SHARED - allow sharing the irq among several devices * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 84f32278ff1f..444d5a81a209 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -757,16 +757,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (new->flags & IRQF_ONESHOT) desc->status |= IRQ_ONESHOT; - /* - * Force MSI interrupts to run with interrupts - * disabled. The multi vector cards can cause stack - * overflows due to nested interrupts when enough of - * them are directed to a core and fire at the same - * time. - */ - if (desc->msi_desc) - new->flags |= IRQF_DISABLED; - if (!(desc->status & IRQ_NOAUTOEN)) { desc->depth = 0; desc->status &= ~IRQ_DISABLED; @@ -1027,7 +1017,6 @@ EXPORT_SYMBOL(free_irq); * Flags: * * IRQF_SHARED Interrupt is shared - * IRQF_DISABLED Disable local interrupts while processing * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy * IRQF_TRIGGER_* Specify active edge(s) or level * @@ -1040,25 +1029,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, struct irq_desc *desc; int retval; - /* - * handle_IRQ_event() always ignores IRQF_DISABLED except for - * the _first_ irqaction (sigh). That can cause oopsing, but - * the behavior is classified as "will not fix" so we need to - * start nudging drivers away from using that idiom. - */ - if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) == - (IRQF_SHARED|IRQF_DISABLED)) { - pr_warning( - "IRQ %d/%s: IRQF_DISABLED is not guaranteed on shared IRQs\n", - irq, devname); - } - -#ifdef CONFIG_LOCKDEP - /* - * Lockdep wants atomic interrupt handlers: - */ - irqflags |= IRQF_DISABLED; -#endif /* * Sanity-check: shared interrupts must pass in a real dev-ID, * otherwise we'll have trouble later trying to figure out From e7a297b0d7d6049bd4e423ac1e17da31e4c401b8 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Fri, 30 Apr 2010 14:44:50 -0700 Subject: [PATCH 5/6] genirq: Add CPU mask affinity hint This patch adds a cpumask affinity hint to the irq_desc structure, along with a registration function and a read-only proc entry for each interrupt. This affinity_hint handle for each interrupt can be used by underlying drivers that need a better mechanism to control interrupt affinity. The underlying driver can register a cpumask for the interrupt, which will allow the driver to provide the CPU mask for the interrupt to anything that requests it. The intent is to extend the userspace daemon, irqbalance, to help hint to it a preferred CPU mask to balance the interrupt into. [ tglx: Fixed compile warnings, added WARN_ON, made SMP only ] Signed-off-by: Peter P Waskiewicz Jr Cc: davem@davemloft.net Cc: arjan@linux.jf.intel.com Cc: bhutchings@solarflare.com LKML-Reference: <20100430214445.3992.41647.stgit@ppwaskie-hc2.jf.intel.com> Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 6 ++++++ include/linux/irq.h | 1 + kernel/irq/manage.c | 22 ++++++++++++++++++++++ kernel/irq/proc.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index e6d2f4441fda..5137db3317f9 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -233,6 +233,7 @@ extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask); extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); +extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); #else /* CONFIG_SMP */ static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) @@ -247,6 +248,11 @@ static inline int irq_can_set_affinity(unsigned int irq) static inline int irq_select_affinity(unsigned int irq) { return 0; } +static inline int irq_set_affinity_hint(unsigned int irq, + const struct cpumask *m) +{ + return -EINVAL; +} #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */ #ifdef CONFIG_GENERIC_HARDIRQS diff --git a/include/linux/irq.h b/include/linux/irq.h index 707ab122e2e6..c03243ad84b4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -195,6 +195,7 @@ struct irq_desc { raw_spinlock_t lock; #ifdef CONFIG_SMP cpumask_var_t affinity; + const struct cpumask *affinity_hint; unsigned int node; #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_var_t pending_mask; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 444d5a81a209..3164ba7ce151 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -138,6 +138,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) return 0; } +int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) +{ + struct irq_desc *desc = irq_to_desc(irq); + unsigned long flags; + + if (!desc) + return -EINVAL; + + raw_spin_lock_irqsave(&desc->lock, flags); + desc->affinity_hint = m; + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(irq_set_affinity_hint); + #ifndef CONFIG_AUTO_IRQ_AFFINITY /* * Generic version of the affinity autoselector. @@ -906,6 +922,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) desc->chip->disable(irq); } +#ifdef CONFIG_SMP + /* make sure affinity_hint is cleaned up */ + if (WARN_ON_ONCE(desc->affinity_hint)) + desc->affinity_hint = NULL; +#endif + raw_spin_unlock_irqrestore(&desc->lock, flags); unregister_handler_proc(irq, action); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index fe92dc5190dd..4f9427a30e14 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -32,6 +32,29 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v) return 0; } +static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) +{ + struct irq_desc *desc = irq_to_desc((long)m->private); + unsigned long flags; + cpumask_var_t mask; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + raw_spin_lock_irqsave(&desc->lock, flags); + if (desc->affinity_hint) + cpumask_copy(mask, desc->affinity_hint); + else + cpumask_setall(mask); + raw_spin_unlock_irqrestore(&desc->lock, flags); + + seq_cpumask(m, mask); + seq_putc(m, '\n'); + free_cpumask_var(mask); + + return 0; +} + #ifndef is_affinity_mask_valid #define is_affinity_mask_valid(val) 1 #endif @@ -84,6 +107,11 @@ static int irq_affinity_proc_open(struct inode *inode, struct file *file) return single_open(file, irq_affinity_proc_show, PDE(inode)->data); } +static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data); +} + static const struct file_operations irq_affinity_proc_fops = { .open = irq_affinity_proc_open, .read = seq_read, @@ -92,6 +120,13 @@ static const struct file_operations irq_affinity_proc_fops = { .write = irq_affinity_proc_write, }; +static const struct file_operations irq_affinity_hint_proc_fops = { + .open = irq_affinity_hint_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int default_affinity_show(struct seq_file *m, void *v) { seq_cpumask(m, irq_default_affinity); @@ -252,6 +287,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) proc_create_data("smp_affinity", 0600, desc->dir, &irq_affinity_proc_fops, (void *)(long)irq); + /* create /proc/irq//affinity_hint */ + proc_create_data("affinity_hint", 0400, desc->dir, + &irq_affinity_hint_proc_fops, (void *)(long)irq); + proc_create_data("node", 0444, desc->dir, &irq_node_proc_fops, (void *)(long)irq); #endif From 4308ad801193f14ff42cb746da37cf07e35f0d08 Mon Sep 17 00:00:00 2001 From: Peter P Waskiewicz Jr Date: Wed, 5 May 2010 13:56:42 -0700 Subject: [PATCH 6/6] genirq: Clear CPU mask in affinity_hint when none is provided When an interrupt is disabled and torn down, the CPU mask returned through affinity_hint right now is all CPUs. Also, for drivers that don't provide an affinity_hint mask, this can be misleading. There should be no hint at all, meaning an empty CPU mask. [ tglx: use zalloc_cpumask_var instead of clearing it under the lock ] Signed-off-by: Peter P Waskiewicz Jr Cc: davem@davemloft.net Cc: arjan@linux.jf.intel.com Cc: bhutchings@solarflare.com LKML-Reference: <20100505205638.5426.87189.stgit@ppwaskie-hc2.jf.intel.com> Signed-off-by: Thomas Gleixner --- kernel/irq/proc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 4f9427a30e14..09a2ee540bd2 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -38,14 +38,12 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) unsigned long flags; cpumask_var_t mask; - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; raw_spin_lock_irqsave(&desc->lock, flags); if (desc->affinity_hint) cpumask_copy(mask, desc->affinity_hint); - else - cpumask_setall(mask); raw_spin_unlock_irqrestore(&desc->lock, flags); seq_cpumask(m, mask);