IB/ehca: Fix race condition/locking issues in scaling code
Fix a race condition in find_next_cpu_online() and some other locking issues in ehca scaling code. Signed-off-by: Hoang-Nam Nguyen <hnguyen@de.ibm.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
parent
78d8d5f9ef
commit
8b16cef3df
1 changed files with 33 additions and 35 deletions
|
@ -544,28 +544,30 @@ void ehca_tasklet_eq(unsigned long data)
|
|||
|
||||
static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
|
||||
{
|
||||
unsigned long flags_last_cpu;
|
||||
int cpu;
|
||||
unsigned long flags;
|
||||
|
||||
WARN_ON_ONCE(!in_interrupt());
|
||||
if (ehca_debug_level)
|
||||
ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
|
||||
|
||||
spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu);
|
||||
pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map);
|
||||
if (pool->last_cpu == NR_CPUS)
|
||||
pool->last_cpu = first_cpu(cpu_online_map);
|
||||
spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu);
|
||||
spin_lock_irqsave(&pool->last_cpu_lock, flags);
|
||||
cpu = next_cpu(pool->last_cpu, cpu_online_map);
|
||||
if (cpu == NR_CPUS)
|
||||
cpu = first_cpu(cpu_online_map);
|
||||
pool->last_cpu = cpu;
|
||||
spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
|
||||
|
||||
return pool->last_cpu;
|
||||
return cpu;
|
||||
}
|
||||
|
||||
static void __queue_comp_task(struct ehca_cq *__cq,
|
||||
struct ehca_cpu_comp_task *cct)
|
||||
{
|
||||
unsigned long flags_cct;
|
||||
unsigned long flags_cq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&cct->task_lock, flags_cct);
|
||||
spin_lock_irqsave(&__cq->task_lock, flags_cq);
|
||||
spin_lock_irqsave(&cct->task_lock, flags);
|
||||
spin_lock(&__cq->task_lock);
|
||||
|
||||
if (__cq->nr_callbacks == 0) {
|
||||
__cq->nr_callbacks++;
|
||||
|
@ -576,8 +578,8 @@ static void __queue_comp_task(struct ehca_cq *__cq,
|
|||
else
|
||||
__cq->nr_callbacks++;
|
||||
|
||||
spin_unlock_irqrestore(&__cq->task_lock, flags_cq);
|
||||
spin_unlock_irqrestore(&cct->task_lock, flags_cct);
|
||||
spin_unlock(&__cq->task_lock);
|
||||
spin_unlock_irqrestore(&cct->task_lock, flags);
|
||||
}
|
||||
|
||||
static void queue_comp_task(struct ehca_cq *__cq)
|
||||
|
@ -588,69 +590,69 @@ static void queue_comp_task(struct ehca_cq *__cq)
|
|||
|
||||
cpu = get_cpu();
|
||||
cpu_id = find_next_online_cpu(pool);
|
||||
|
||||
BUG_ON(!cpu_online(cpu_id));
|
||||
|
||||
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
|
||||
BUG_ON(!cct);
|
||||
|
||||
if (cct->cq_jobs > 0) {
|
||||
cpu_id = find_next_online_cpu(pool);
|
||||
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
|
||||
BUG_ON(!cct);
|
||||
}
|
||||
|
||||
__queue_comp_task(__cq, cct);
|
||||
|
||||
put_cpu();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void run_comp_task(struct ehca_cpu_comp_task* cct)
|
||||
{
|
||||
struct ehca_cq *cq;
|
||||
unsigned long flags_cct;
|
||||
unsigned long flags_cq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&cct->task_lock, flags_cct);
|
||||
spin_lock_irqsave(&cct->task_lock, flags);
|
||||
|
||||
while (!list_empty(&cct->cq_list)) {
|
||||
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
|
||||
spin_unlock_irqrestore(&cct->task_lock, flags_cct);
|
||||
spin_unlock_irqrestore(&cct->task_lock, flags);
|
||||
comp_event_callback(cq);
|
||||
spin_lock_irqsave(&cct->task_lock, flags_cct);
|
||||
spin_lock_irqsave(&cct->task_lock, flags);
|
||||
|
||||
spin_lock_irqsave(&cq->task_lock, flags_cq);
|
||||
spin_lock(&cq->task_lock);
|
||||
cq->nr_callbacks--;
|
||||
if (cq->nr_callbacks == 0) {
|
||||
list_del_init(cct->cq_list.next);
|
||||
cct->cq_jobs--;
|
||||
}
|
||||
spin_unlock_irqrestore(&cq->task_lock, flags_cq);
|
||||
|
||||
spin_unlock(&cq->task_lock);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&cct->task_lock, flags_cct);
|
||||
|
||||
return;
|
||||
spin_unlock_irqrestore(&cct->task_lock, flags);
|
||||
}
|
||||
|
||||
static int comp_task(void *__cct)
|
||||
{
|
||||
struct ehca_cpu_comp_task* cct = __cct;
|
||||
int cql_empty;
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
while(!kthread_should_stop()) {
|
||||
add_wait_queue(&cct->wait_queue, &wait);
|
||||
|
||||
if (list_empty(&cct->cq_list))
|
||||
spin_lock_irq(&cct->task_lock);
|
||||
cql_empty = list_empty(&cct->cq_list);
|
||||
spin_unlock_irq(&cct->task_lock);
|
||||
if (cql_empty)
|
||||
schedule();
|
||||
else
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
remove_wait_queue(&cct->wait_queue, &wait);
|
||||
|
||||
if (!list_empty(&cct->cq_list))
|
||||
spin_lock_irq(&cct->task_lock);
|
||||
cql_empty = list_empty(&cct->cq_list);
|
||||
spin_unlock_irq(&cct->task_lock);
|
||||
if (!cql_empty)
|
||||
run_comp_task(__cct);
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
@ -693,8 +695,6 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
|
|||
|
||||
if (task)
|
||||
kthread_stop(task);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void take_over_work(struct ehca_comp_pool *pool,
|
||||
|
@ -815,6 +815,4 @@ void ehca_destroy_comp_pool(void)
|
|||
free_percpu(pool->cpu_comp_tasks);
|
||||
kfree(pool);
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue