genirq: Implement a sane sparse_irq allocator
The current sparse_irq allocator has several short comings due to failures in the design or the lack of it: - Requires iteration over the number of active irqs to find a free slot (Some architectures have grown their own workarounds for this) - Removal of entries is not possible - Racy between create_irq_nr and destroy_irq (plugged by horrible callbacks) - Migration of active irq descriptors is not possible - No bulk allocation of irq ranges - Sprinkeled irq_desc references all over the place outside of kernel/irq/ (The previous chip functions series is addressing this issue) Implement a sane allocator which fixes the above short comings (though migration of active descriptors needs a full tree wide cleanup of the direct and mostly unlocked access to irq_desc). The new allocator still uses a radix_tree, but uses a bitmap for keeping track of allocated irq numbers. That allows: - Fast lookup of a free slot - Allows the removal of descriptors - Prevents the create/destroy race - Bulk allocation of consecutive irq ranges - Basic design is ready for migration of life descriptors after further cleanups The bitmap is also used in the SPARSE_IRQ=n case for lookup and raceless (de)allocation of irq numbers. So it removes the requirement for looping through the descriptor array to find slots. Right now it uses sparse_irq_lock to protect the bitmap and the radix tree, but after cleaning up all users we should be able convert that to a mutex and to switch the radix_tree and decriptor allocations to GFP_KERNEL. [ Folded in a bugfix from Yinghai Lu ] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
1318a481fc
commit
1f5a5b87f7
2 changed files with 246 additions and 8 deletions
|
@ -398,6 +398,29 @@ static inline struct irq_2_iommu *irq_data_get_iommu(struct irq_data *d)
|
|||
}
|
||||
#endif
|
||||
|
||||
int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
|
||||
void irq_free_descs(unsigned int irq, unsigned int cnt);
|
||||
|
||||
static inline int irq_alloc_desc(int node)
|
||||
{
|
||||
return irq_alloc_descs(-1, 0, 1, node);
|
||||
}
|
||||
|
||||
static inline int irq_alloc_desc_at(unsigned int at, int node)
|
||||
{
|
||||
return irq_alloc_descs(at, at, 1, node);
|
||||
}
|
||||
|
||||
static inline int irq_alloc_desc_from(unsigned int from, int node)
|
||||
{
|
||||
return irq_alloc_descs(-1, from, 1, node);
|
||||
}
|
||||
|
||||
static inline void irq_free_desc(unsigned int irq)
|
||||
{
|
||||
irq_free_descs(irq, 1);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_GENERIC_HARDIRQS */
|
||||
|
||||
#endif /* !CONFIG_S390 */
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/bitmap.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
|
@ -33,9 +34,54 @@ static void __init init_irq_default_affinity(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
|
||||
{
|
||||
if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node))
|
||||
return -ENOMEM;
|
||||
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
|
||||
free_cpumask_var(desc->irq_data.affinity);
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void desc_smp_init(struct irq_desc *desc, int node)
|
||||
{
|
||||
desc->node = node;
|
||||
cpumask_copy(desc->irq_data.affinity, irq_default_affinity);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline int
|
||||
alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; }
|
||||
static inline void desc_smp_init(struct irq_desc *desc, int node) { }
|
||||
#endif
|
||||
|
||||
static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
|
||||
{
|
||||
desc->irq_data.irq = irq;
|
||||
desc->irq_data.chip = &no_irq_chip;
|
||||
desc->irq_data.chip_data = NULL;
|
||||
desc->irq_data.handler_data = NULL;
|
||||
desc->irq_data.msi_desc = NULL;
|
||||
desc->status = IRQ_DEFAULT_INIT_FLAGS;
|
||||
desc->handle_irq = handle_bad_irq;
|
||||
desc->depth = 1;
|
||||
desc->name = NULL;
|
||||
memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
|
||||
desc_smp_init(desc, node);
|
||||
}
|
||||
|
||||
int nr_irqs = NR_IRQS;
|
||||
EXPORT_SYMBOL_GPL(nr_irqs);
|
||||
|
||||
DEFINE_RAW_SPINLOCK(sparse_irq_lock);
|
||||
static DECLARE_BITMAP(allocated_irqs, NR_IRQS);
|
||||
|
||||
#ifdef CONFIG_SPARSE_IRQ
|
||||
|
||||
static struct irq_desc irq_desc_init = {
|
||||
|
@ -85,14 +131,9 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
|
|||
arch_init_chip_data(desc, node);
|
||||
}
|
||||
|
||||
/*
|
||||
* Protect the sparse_irqs:
|
||||
*/
|
||||
DEFINE_RAW_SPINLOCK(sparse_irq_lock);
|
||||
|
||||
static RADIX_TREE(irq_desc_tree, GFP_ATOMIC);
|
||||
|
||||
static void set_irq_desc(unsigned int irq, struct irq_desc *desc)
|
||||
static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
radix_tree_insert(&irq_desc_tree, irq, desc);
|
||||
}
|
||||
|
@ -111,6 +152,94 @@ void replace_irq_desc(unsigned int irq, struct irq_desc *desc)
|
|||
radix_tree_replace_slot(ptr, desc);
|
||||
}
|
||||
|
||||
static void delete_irq_desc(unsigned int irq)
|
||||
{
|
||||
radix_tree_delete(&irq_desc_tree, irq);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void free_masks(struct irq_desc *desc)
|
||||
{
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
free_cpumask_var(desc->pending_mask);
|
||||
#endif
|
||||
free_cpumask_var(desc->affinity);
|
||||
}
|
||||
#else
|
||||
static inline void free_masks(struct irq_desc *desc) { }
|
||||
#endif
|
||||
|
||||
static struct irq_desc *alloc_desc(int irq, int node)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
gfp_t gfp = GFP_KERNEL;
|
||||
|
||||
desc = kzalloc_node(sizeof(*desc), gfp, node);
|
||||
if (!desc)
|
||||
return NULL;
|
||||
/* allocate based on nr_cpu_ids */
|
||||
desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs),
|
||||
gfp, node);
|
||||
if (!desc->kstat_irqs)
|
||||
goto err_desc;
|
||||
|
||||
if (alloc_masks(desc, gfp, node))
|
||||
goto err_kstat;
|
||||
|
||||
raw_spin_lock_init(&desc->lock);
|
||||
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
|
||||
|
||||
desc_set_defaults(irq, desc, node);
|
||||
|
||||
return desc;
|
||||
|
||||
err_kstat:
|
||||
kfree(desc->kstat_irqs);
|
||||
err_desc:
|
||||
kfree(desc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void free_desc(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&sparse_irq_lock, flags);
|
||||
delete_irq_desc(irq);
|
||||
raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
|
||||
|
||||
free_masks(desc);
|
||||
kfree(desc->kstat_irqs);
|
||||
kfree(desc);
|
||||
}
|
||||
|
||||
static int alloc_descs(unsigned int start, unsigned int cnt, int node)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < cnt; i++) {
|
||||
desc = alloc_desc(start + i, node);
|
||||
if (!desc)
|
||||
goto err;
|
||||
raw_spin_lock_irqsave(&sparse_irq_lock, flags);
|
||||
irq_insert_desc(start + i, desc);
|
||||
raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
|
||||
}
|
||||
return start;
|
||||
|
||||
err:
|
||||
for (i--; i >= 0; i--)
|
||||
free_desc(start + i);
|
||||
|
||||
raw_spin_lock_irqsave(&sparse_irq_lock, flags);
|
||||
bitmap_clear(allocated_irqs, start, cnt);
|
||||
raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
|
||||
[0 ... NR_IRQS_LEGACY-1] = {
|
||||
.status = IRQ_DEFAULT_INIT_FLAGS,
|
||||
|
@ -155,7 +284,7 @@ int __init early_irq_init(void)
|
|||
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
|
||||
alloc_desc_masks(&desc[i], node, true);
|
||||
init_desc_masks(&desc[i]);
|
||||
set_irq_desc(i, &desc[i]);
|
||||
irq_insert_desc(i, &desc[i]);
|
||||
}
|
||||
|
||||
return arch_early_irq_init();
|
||||
|
@ -192,7 +321,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
|
|||
}
|
||||
init_one_irq_desc(irq, desc, node);
|
||||
|
||||
set_irq_desc(irq, desc);
|
||||
irq_insert_desc(irq, desc);
|
||||
|
||||
out_unlock:
|
||||
raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
|
||||
|
@ -245,8 +374,94 @@ struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
|
|||
{
|
||||
return irq_to_desc(irq);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline int desc_node(struct irq_desc *desc)
|
||||
{
|
||||
return desc->irq_data.node;
|
||||
}
|
||||
#else
|
||||
static inline int desc_node(struct irq_desc *desc) { return 0; }
|
||||
#endif
|
||||
|
||||
static void free_desc(unsigned int irq)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&desc->lock, flags);
|
||||
desc_set_defaults(irq, desc, desc_node(desc));
|
||||
raw_spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
|
||||
static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
|
||||
{
|
||||
return start;
|
||||
}
|
||||
#endif /* !CONFIG_SPARSE_IRQ */
|
||||
|
||||
/* Dynamic interrupt handling */
|
||||
|
||||
/**
|
||||
* irq_free_descs - free irq descriptors
|
||||
* @from: Start of descriptor range
|
||||
* @cnt: Number of consecutive irqs to free
|
||||
*/
|
||||
void irq_free_descs(unsigned int from, unsigned int cnt)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
if (from >= nr_irqs || (from + cnt) > nr_irqs)
|
||||
return;
|
||||
|
||||
for (i = 0; i < cnt; i++)
|
||||
free_desc(from + i);
|
||||
|
||||
raw_spin_lock_irqsave(&sparse_irq_lock, flags);
|
||||
bitmap_clear(allocated_irqs, from, cnt);
|
||||
raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* irq_alloc_descs - allocate and initialize a range of irq descriptors
|
||||
* @irq: Allocate for specific irq number if irq >= 0
|
||||
* @from: Start the search from this irq number
|
||||
* @cnt: Number of consecutive irqs to allocate.
|
||||
* @node: Preferred node on which the irq descriptor should be allocated
|
||||
*
|
||||
* Returns the first irq number or error code
|
||||
*/
|
||||
int __ref
|
||||
irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
|
||||
{
|
||||
unsigned long flags;
|
||||
int start, ret;
|
||||
|
||||
if (!cnt)
|
||||
return -EINVAL;
|
||||
|
||||
raw_spin_lock_irqsave(&sparse_irq_lock, flags);
|
||||
|
||||
start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0);
|
||||
ret = -EEXIST;
|
||||
if (irq >=0 && start != irq)
|
||||
goto err;
|
||||
|
||||
ret = -ENOMEM;
|
||||
if (start >= nr_irqs)
|
||||
goto err;
|
||||
|
||||
bitmap_set(allocated_irqs, start, cnt);
|
||||
raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
|
||||
return alloc_descs(start, cnt, node);
|
||||
|
||||
err:
|
||||
raw_spin_unlock_irqrestore(&sparse_irq_lock, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Statistics access */
|
||||
void clear_kstat_irqs(struct irq_desc *desc)
|
||||
{
|
||||
memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
|
||||
|
|
Loading…
Reference in a new issue